mirror of
https://github.com/cookiengineer/audacity
synced 2025-04-30 15:49:41 +02:00
Fixes to local-manual script Part 2.
This reinstates the logo and the 303.css. It's now mostly working for most of the pages.
This commit is contained in:
parent
e036c59f15
commit
74c2af918c
2027
scripts/mw2html_audacity/303.css
Normal file
2027
scripts/mw2html_audacity/303.css
Normal file
File diff suppressed because it is too large
Load Diff
BIN
scripts/mw2html_audacity/AudacityLogo.png
Normal file
BIN
scripts/mw2html_audacity/AudacityLogo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
@ -18,7 +18,7 @@ Minor tweaks (for Audacity) By James Crook, Nov 2009.
|
|||||||
...
|
...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = '0.1.0.0'
|
__version__ = '0.1.0.1'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
@ -34,6 +34,7 @@ import hashlib
|
|||||||
import httplib
|
import httplib
|
||||||
#import pdb
|
#import pdb
|
||||||
from time import strftime
|
from time import strftime
|
||||||
|
from shutil import copyfile
|
||||||
|
|
||||||
try:
|
try:
|
||||||
set
|
set
|
||||||
@ -47,6 +48,8 @@ except:
|
|||||||
print ' http://www.connellybarnes.com/code/htmldata/'
|
print ' http://www.connellybarnes.com/code/htmldata/'
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
config = None
|
config = None
|
||||||
MOVE_HREF = 'movehref'
|
MOVE_HREF = 'movehref'
|
||||||
MADE_BY_COMMENT = '<!-- Content generated by Mediawiki and mw2html -->'
|
MADE_BY_COMMENT = '<!-- Content generated by Mediawiki and mw2html -->'
|
||||||
@ -121,6 +124,10 @@ def normalize_url(url, lower=True):
|
|||||||
|
|
||||||
if url.startswith('http://'):
|
if url.startswith('http://'):
|
||||||
url = url[len('http://'):]
|
url = url[len('http://'):]
|
||||||
|
|
||||||
|
# if url.startswith('https://'):
|
||||||
|
# url = url[len('https://'):]
|
||||||
|
|
||||||
if url.startswith('www.'):
|
if url.startswith('www.'):
|
||||||
url = url[len('www.'):]
|
url = url[len('www.'):]
|
||||||
|
|
||||||
@ -187,17 +194,22 @@ def monobook_fix_html(doc, page_url):
|
|||||||
if config.made_by:
|
if config.made_by:
|
||||||
doc = doc.replace('<html xmlns=', MADE_BY_COMMENT + '\n<html xmlns=')
|
doc = doc.replace('<html xmlns=', MADE_BY_COMMENT + '\n<html xmlns=')
|
||||||
|
|
||||||
doc = remove_tag(doc, '<div class="portlet" id="p-personal"', '</div>', '<div')
|
# Obselete substitutions.
|
||||||
doc = remove_tag(doc, '<div id="p-search" class="portlet"', '</div>', '<div')
|
# doc = remove_tag(doc, '<div class="portlet" id="p-editors">', '</div>', '<div')
|
||||||
doc = remove_tag(doc, '<div class="portlet" id="p-editors">', '</div>', '<div')
|
# doc = remove_tag(doc, '<div id=\'catlinks\' class=\'catlinks catlinks-allhidden\'>', '</div>', '<div')
|
||||||
doc = remove_tag(doc, '<div id=\'catlinks\' class=\'catlinks catlinks-allhidden\'>', '</div>', '<div')
|
|
||||||
#James also remove the page/discussion/source/history/ div.
|
#James also remove the page/discussion/source/history/ div.
|
||||||
doc = remove_tag(doc, '<li id="ca-', '</li>', '<li')
|
doc = remove_tag(doc, '<li id="ca-', '</li>', '<li')
|
||||||
|
doc = remove_tag(doc, '<div id="p-search" class="portlet"', '</div>', '<div')
|
||||||
|
doc = remove_tag(doc, '<div class="portlet" id="p-personal"', '</div>', '<div')
|
||||||
doc = remove_tag(doc, '<div class="editornote2"', '</div>', '<div')
|
doc = remove_tag(doc, '<div class="editornote2"', '</div>', '<div')
|
||||||
doc = remove_tag(doc, '<div id="p-cactions"', '</div>', '<div')
|
doc = remove_tag(doc, '<div id="p-cactions"', '</div>', '<div')
|
||||||
doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-For_Editors"', '</div>', '<div')
|
doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-For_Editors"', '</div>', '<div')
|
||||||
doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-ToDo"', '</div>', '<div')
|
doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-ToDo"', '</div>', '<div')
|
||||||
doc = remove_tag(doc, '<div class="portlet" id="p-tb"', '</div>', '<div')
|
doc = remove_tag(doc, '<div class="portlet" id="p-tb"', '</div>', '<div')
|
||||||
|
#remove javascript.
|
||||||
|
doc = remove_tag(doc, '<script', '</script>', '<script')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#andre special mode
|
#andre special mode
|
||||||
if config.special_mode:
|
if config.special_mode:
|
||||||
@ -262,7 +274,7 @@ def pre_html_transform(doc, url):
|
|||||||
|
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def pos_html_transform(doc, url):
|
def pos_html_transform(doc, url,filename):
|
||||||
global footer_text, config, sidebar_html
|
global footer_text, config, sidebar_html
|
||||||
url = normalize_url(url, False)
|
url = normalize_url(url, False)
|
||||||
|
|
||||||
@ -272,7 +284,7 @@ def pos_html_transform(doc, url):
|
|||||||
sidebar_html = f.read()
|
sidebar_html = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
doc = re.sub(r'(<!-- end of the left \(by default at least\) column -->)', sidebar_html + r'\1', doc)
|
# doc = re.sub(r'(<!-- end of the left \(by default at least\) column -->)', sidebar_html + r'\1', doc)
|
||||||
|
|
||||||
# Remove empty links
|
# Remove empty links
|
||||||
doc = clean_tag(doc, 'href=""', '</a>', '<a ');
|
doc = clean_tag(doc, 'href=""', '</a>', '<a ');
|
||||||
@ -284,9 +296,15 @@ def pos_html_transform(doc, url):
|
|||||||
# Remove external javascript
|
# Remove external javascript
|
||||||
doc = re.sub(r'<script type="text/javascript" src="http://[\s\S]+?</script>', r'', doc)
|
doc = re.sub(r'<script type="text/javascript" src="http://[\s\S]+?</script>', r'', doc)
|
||||||
|
|
||||||
# Add back relevant stylesheet.
|
|
||||||
doc = re.sub(r'</head>', '<link rel="stylesheet" href="../m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc, flags=re.DOTALL)
|
|
||||||
|
|
||||||
|
# Add back relevant stylesheet.
|
||||||
|
top_level_dir = config.outdir
|
||||||
|
if( os.path.dirname(os.path.dirname( filename )) == config.outdir ):
|
||||||
|
doc = re.sub(r'</head>', '<link rel="stylesheet" href="m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc, flags=re.DOTALL)
|
||||||
|
else:
|
||||||
|
doc = re.sub(r'</head>',
|
||||||
|
'<link rel="stylesheet" href="../m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc,
|
||||||
|
flags=re.DOTALL)
|
||||||
|
|
||||||
# Replace remaining text with footer, if available (this needs to be done after parse_html to avoid rewriting of urls
|
# Replace remaining text with footer, if available (this needs to be done after parse_html to avoid rewriting of urls
|
||||||
if config.footer is not None:
|
if config.footer is not None:
|
||||||
@ -357,8 +375,8 @@ def html_remove_translation_links(doc):
|
|||||||
The second version deals with links like /pt_PT and /zh_CN
|
The second version deals with links like /pt_PT and /zh_CN
|
||||||
We are case sensitive, so as not to treat FAQ as a language code.
|
We are case sensitive, so as not to treat FAQ as a language code.
|
||||||
"""
|
"""
|
||||||
doc = re.sub(r'<a href="[^"]+/[a-z]{2,3}[/"][\s\S]+?</a>', r'<!--Removed Translation Flag-->', doc)
|
doc = re.sub(r'<a href="[^"]+/[a-z]{2,3}[/"][\s\S]+?</a>', r'', doc)
|
||||||
doc = re.sub(r'<a href="[^"]+/[a-z]{2}_[A-Z]{2}[/"][\s\S]+?</a>', r'<!--Removed Translation Flag2-->', doc)
|
doc = re.sub(r'<a href="[^"]+/[a-z]{2}_[A-Z]{2}[/"][\s\S]+?</a>', r'', doc)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def monobook_hack_skin_html(doc):
|
def monobook_hack_skin_html(doc):
|
||||||
@ -859,12 +877,13 @@ def should_follow(url):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def parse_html(doc, url):
|
def parse_html(doc, url, filename):
|
||||||
"""
|
"""
|
||||||
Returns (modified_doc, new_urls), where new_urls are absolute URLs for
|
Returns (modified_doc, new_urls), where new_urls are absolute URLs for
|
||||||
all links we want to spider in the HTML.
|
all links we want to spider in the HTML.
|
||||||
"""
|
"""
|
||||||
global config
|
global config
|
||||||
|
global counter
|
||||||
|
|
||||||
BEGIN_COMMENT_REPLACE = '<BEGINCOMMENT-' + str(random.random()) + '>'
|
BEGIN_COMMENT_REPLACE = '<BEGINCOMMENT-' + str(random.random()) + '>'
|
||||||
END_COMMENT_REPLACE = '<ENDCOMMENT-' + str(random.random()) + '>'
|
END_COMMENT_REPLACE = '<ENDCOMMENT-' + str(random.random()) + '>'
|
||||||
@ -886,7 +905,7 @@ def parse_html(doc, url):
|
|||||||
# more pages.
|
# more pages.
|
||||||
for item in L:
|
for item in L:
|
||||||
u = item.url
|
u = item.url
|
||||||
follow = should_follow(u)
|
follow = should_follow(u) # and (counter < 10)
|
||||||
if follow:
|
if follow:
|
||||||
if config.debug:
|
if config.debug:
|
||||||
print 'ACCEPTED - ', u
|
print 'ACCEPTED - ', u
|
||||||
@ -904,11 +923,10 @@ def parse_html(doc, url):
|
|||||||
newdoc = newdoc.replace(BEGIN_COMMENT_REPLACE, '<!--')
|
newdoc = newdoc.replace(BEGIN_COMMENT_REPLACE, '<!--')
|
||||||
newdoc = newdoc.replace(END_COMMENT_REPLACE, '-->')
|
newdoc = newdoc.replace(END_COMMENT_REPLACE, '-->')
|
||||||
|
|
||||||
newdoc = pos_html_transform(newdoc, url)
|
newdoc = pos_html_transform(newdoc, url,filename)
|
||||||
|
|
||||||
return (newdoc, new_urls)
|
return (newdoc, new_urls)
|
||||||
|
|
||||||
|
|
||||||
def run(out=sys.stdout):
|
def run(out=sys.stdout):
|
||||||
"""
|
"""
|
||||||
Code interface.
|
Code interface.
|
||||||
@ -976,7 +994,7 @@ def run(out=sys.stdout):
|
|||||||
new_urls = []
|
new_urls = []
|
||||||
|
|
||||||
if filename.endswith('.html'):
|
if filename.endswith('.html'):
|
||||||
(doc, new_urls) = parse_html(doc, url)
|
(doc, new_urls) = parse_html(doc, url, filename)
|
||||||
elif filename.endswith('.css'):
|
elif filename.endswith('.css'):
|
||||||
(doc, new_urls) = parse_css(doc, url)
|
(doc, new_urls) = parse_css(doc, url)
|
||||||
|
|
||||||
@ -1013,6 +1031,26 @@ def run(out=sys.stdout):
|
|||||||
print errors, "errors not recovered"
|
print errors, "errors not recovered"
|
||||||
|
|
||||||
|
|
||||||
|
src_dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
src = os.path.join(src_dir, "AudacityLogo.png")
|
||||||
|
subfile = r"alphamanual.audacityteam.org\m\resources\assets\AudacityLogo.png"
|
||||||
|
dest = os.path.join(config.outdir, subfile)
|
||||||
|
print "copying from", src, "to", dest
|
||||||
|
directory = os.path.dirname(dest)
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
os.makedirs(directory)
|
||||||
|
copyfile(src,dest)
|
||||||
|
src = os.path.join(src_dir, "303.css")
|
||||||
|
subfile = r"alphamanual.audacityteam.org\m\skins\monobook\main.css\303.css"
|
||||||
|
dest = os.path.join(config.outdir, subfile)
|
||||||
|
print "copying from", src, "to", dest
|
||||||
|
directory = os.path.dirname(dest)
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
os.makedirs(directory)
|
||||||
|
copyfile(src,dest)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def usage():
|
def usage():
|
||||||
"""
|
"""
|
||||||
Print command line options.
|
Print command line options.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user