diff --git a/scripts/mw2html_audacity/mw2html.py b/scripts/mw2html_audacity/mw2html.py
index f97c0d927..15e6222be 100644
--- a/scripts/mw2html_audacity/mw2html.py
+++ b/scripts/mw2html_audacity/mw2html.py
@@ -51,6 +51,7 @@ config = None
MOVE_HREF = 'movehref'
MADE_BY_COMMENT = ''
INDEX_HTML = 'index.html'
+QHELP_HTML = 'quick_help.html'
url_filename_cache = {}
redir_cache = {}
wrote_file_set = set()
@@ -235,7 +236,6 @@ def pre_html_transform(doc, url):
modified HTML document.
"""
global config
- new_urls = []
if config.hack_skin:
if config.skin == MONOBOOK_SKIN:
@@ -292,7 +292,7 @@ def pos_html_transform(doc, url):
f.close()
# add static dump time
- footer_html = footer_text.replace('%DATE%', strftime("%Y-%m-%d %H:%M:%S"))
+ footer_html = footer_text.replace('%DATE%', strftime("%Y-%m-%d"))
# add online url
footer_html = footer_html.replace('%ONLINEURL%', url)
@@ -618,7 +618,9 @@ def url_to_filename(url):
return url_filename_cache[nurl]
#ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', params='', query='', fragment='')
- L = list(urlparse.urlparse(nurl))
+ turl = re.sub(r'm/index.php\?title=', r'man/', nurl)
+ turl = re.sub(r'.css&[\S\s]+', r'.css', turl)
+ L = list(urlparse.urlparse(turl))
#this way the url will not create a folder outside of the maindomain
droot = get_domain(config.rooturl)
@@ -639,6 +641,10 @@ def url_to_filename(url):
if lpath[-1]=='man':
L[2] = INDEX_HTML
+ if lpath[-1].lower().startswith( 'quick_help'):
+ L[2] = QHELP_HTML
+ L[3] = ''
+
L[2] = L[2].strip('/')
@@ -824,6 +830,10 @@ def should_follow(url):
print url, 'is a file outside of scope with unknown extension'
return False
+ # JKC: we do allow css from 'strange' places.
+ if '.css' in L[-1]:
+ return True
+
forbidden_parents = ['.php', '.html', '.htm']
for fp in forbidden_parents:
if fp in L[-1]:
@@ -852,7 +862,11 @@ def parse_html(doc, url):
doc = doc.replace('-->', END_COMMENT_REPLACE)
L = htmldata.urlextract(doc, url, 'text/html')
-
+
+ # in this code we change each absolute url in L
+ # into a relative one.
+ # we also kick-off zillions of subthreads to collect
+ # more pages.
for item in L:
u = item.url
follow = should_follow(u)
@@ -868,7 +882,7 @@ def parse_html(doc, url):
# item.url = ''
if config.debug:
print 'NOT INCLUDED - ', u
-
+
newdoc = htmldata.urljoin(doc, L)
newdoc = newdoc.replace(BEGIN_COMMENT_REPLACE, '')