1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-04-30 15:49:41 +02:00

Fixes to local-manual script Part 2.

This reinstates the logo and the 303.css.  It's now mostly working for most of the pages.
This commit is contained in:
James Crook 2017-08-22 22:57:46 +01:00
parent e036c59f15
commit 74c2af918c
3 changed files with 2081 additions and 16 deletions

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -18,7 +18,7 @@ Minor tweaks (for Audacity) By James Crook, Nov 2009.
... ...
""" """
__version__ = '0.1.0.0' __version__ = '0.1.0.1'
import re import re
import sys import sys
@ -34,6 +34,7 @@ import hashlib
import httplib import httplib
#import pdb #import pdb
from time import strftime from time import strftime
from shutil import copyfile
try: try:
set set
@ -47,6 +48,8 @@ except:
print ' http://www.connellybarnes.com/code/htmldata/' print ' http://www.connellybarnes.com/code/htmldata/'
sys.exit() sys.exit()
config = None config = None
MOVE_HREF = 'movehref' MOVE_HREF = 'movehref'
MADE_BY_COMMENT = '<!-- Content generated by Mediawiki and mw2html -->' MADE_BY_COMMENT = '<!-- Content generated by Mediawiki and mw2html -->'
@ -121,6 +124,10 @@ def normalize_url(url, lower=True):
if url.startswith('http://'): if url.startswith('http://'):
url = url[len('http://'):] url = url[len('http://'):]
# if url.startswith('https://'):
# url = url[len('https://'):]
if url.startswith('www.'): if url.startswith('www.'):
url = url[len('www.'):] url = url[len('www.'):]
@ -187,17 +194,22 @@ def monobook_fix_html(doc, page_url):
if config.made_by: if config.made_by:
doc = doc.replace('<html xmlns=', MADE_BY_COMMENT + '\n<html xmlns=') doc = doc.replace('<html xmlns=', MADE_BY_COMMENT + '\n<html xmlns=')
doc = remove_tag(doc, '<div class="portlet" id="p-personal"', '</div>', '<div') # Obselete substitutions.
doc = remove_tag(doc, '<div id="p-search" class="portlet"', '</div>', '<div') # doc = remove_tag(doc, '<div class="portlet" id="p-editors">', '</div>', '<div')
doc = remove_tag(doc, '<div class="portlet" id="p-editors">', '</div>', '<div') # doc = remove_tag(doc, '<div id=\'catlinks\' class=\'catlinks catlinks-allhidden\'>', '</div>', '<div')
doc = remove_tag(doc, '<div id=\'catlinks\' class=\'catlinks catlinks-allhidden\'>', '</div>', '<div')
#James also remove the page/discussion/source/history/ div. #James also remove the page/discussion/source/history/ div.
doc = remove_tag(doc, '<li id="ca-', '</li>', '<li') doc = remove_tag(doc, '<li id="ca-', '</li>', '<li')
doc = remove_tag(doc, '<div id="p-search" class="portlet"', '</div>', '<div')
doc = remove_tag(doc, '<div class="portlet" id="p-personal"', '</div>', '<div')
doc = remove_tag(doc, '<div class="editornote2"', '</div>', '<div') doc = remove_tag(doc, '<div class="editornote2"', '</div>', '<div')
doc = remove_tag(doc, '<div id="p-cactions"', '</div>', '<div') doc = remove_tag(doc, '<div id="p-cactions"', '</div>', '<div')
doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-For_Editors"', '</div>', '<div') doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-For_Editors"', '</div>', '<div')
doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-ToDo"', '</div>', '<div') doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-ToDo"', '</div>', '<div')
doc = remove_tag(doc, '<div class="portlet" id="p-tb"', '</div>', '<div') doc = remove_tag(doc, '<div class="portlet" id="p-tb"', '</div>', '<div')
#remove javascript.
doc = remove_tag(doc, '<script', '</script>', '<script')
#andre special mode #andre special mode
if config.special_mode: if config.special_mode:
@ -262,7 +274,7 @@ def pre_html_transform(doc, url):
return doc return doc
def pos_html_transform(doc, url): def pos_html_transform(doc, url,filename):
global footer_text, config, sidebar_html global footer_text, config, sidebar_html
url = normalize_url(url, False) url = normalize_url(url, False)
@ -272,7 +284,7 @@ def pos_html_transform(doc, url):
sidebar_html = f.read() sidebar_html = f.read()
f.close() f.close()
doc = re.sub(r'(<!-- end of the left \(by default at least\) column -->)', sidebar_html + r'\1', doc) # doc = re.sub(r'(<!-- end of the left \(by default at least\) column -->)', sidebar_html + r'\1', doc)
# Remove empty links # Remove empty links
doc = clean_tag(doc, 'href=""', '</a>', '<a '); doc = clean_tag(doc, 'href=""', '</a>', '<a ');
@ -284,9 +296,15 @@ def pos_html_transform(doc, url):
# Remove external javascript # Remove external javascript
doc = re.sub(r'<script type="text/javascript" src="http://[\s\S]+?</script>', r'', doc) doc = re.sub(r'<script type="text/javascript" src="http://[\s\S]+?</script>', r'', doc)
# Add back relevant stylesheet.
doc = re.sub(r'</head>', '<link rel="stylesheet" href="../m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc, flags=re.DOTALL)
# Add back relevant stylesheet.
top_level_dir = config.outdir
if( os.path.dirname(os.path.dirname( filename )) == config.outdir ):
doc = re.sub(r'</head>', '<link rel="stylesheet" href="m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc, flags=re.DOTALL)
else:
doc = re.sub(r'</head>',
'<link rel="stylesheet" href="../m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc,
flags=re.DOTALL)
# Replace remaining text with footer, if available (this needs to be done after parse_html to avoid rewriting of urls # Replace remaining text with footer, if available (this needs to be done after parse_html to avoid rewriting of urls
if config.footer is not None: if config.footer is not None:
@ -357,8 +375,8 @@ def html_remove_translation_links(doc):
The second version deals with links like /pt_PT and /zh_CN The second version deals with links like /pt_PT and /zh_CN
We are case sensitive, so as not to treat FAQ as a language code. We are case sensitive, so as not to treat FAQ as a language code.
""" """
doc = re.sub(r'<a href="[^"]+/[a-z]{2,3}[/"][\s\S]+?</a>', r'<!--Removed Translation Flag-->', doc) doc = re.sub(r'<a href="[^"]+/[a-z]{2,3}[/"][\s\S]+?</a>', r'', doc)
doc = re.sub(r'<a href="[^"]+/[a-z]{2}_[A-Z]{2}[/"][\s\S]+?</a>', r'<!--Removed Translation Flag2-->', doc) doc = re.sub(r'<a href="[^"]+/[a-z]{2}_[A-Z]{2}[/"][\s\S]+?</a>', r'', doc)
return doc return doc
def monobook_hack_skin_html(doc): def monobook_hack_skin_html(doc):
@ -859,12 +877,13 @@ def should_follow(url):
return True return True
def parse_html(doc, url): def parse_html(doc, url, filename):
""" """
Returns (modified_doc, new_urls), where new_urls are absolute URLs for Returns (modified_doc, new_urls), where new_urls are absolute URLs for
all links we want to spider in the HTML. all links we want to spider in the HTML.
""" """
global config global config
global counter
BEGIN_COMMENT_REPLACE = '<BEGINCOMMENT-' + str(random.random()) + '>' BEGIN_COMMENT_REPLACE = '<BEGINCOMMENT-' + str(random.random()) + '>'
END_COMMENT_REPLACE = '<ENDCOMMENT-' + str(random.random()) + '>' END_COMMENT_REPLACE = '<ENDCOMMENT-' + str(random.random()) + '>'
@ -886,7 +905,7 @@ def parse_html(doc, url):
# more pages. # more pages.
for item in L: for item in L:
u = item.url u = item.url
follow = should_follow(u) follow = should_follow(u) # and (counter < 10)
if follow: if follow:
if config.debug: if config.debug:
print 'ACCEPTED - ', u print 'ACCEPTED - ', u
@ -904,11 +923,10 @@ def parse_html(doc, url):
newdoc = newdoc.replace(BEGIN_COMMENT_REPLACE, '<!--') newdoc = newdoc.replace(BEGIN_COMMENT_REPLACE, '<!--')
newdoc = newdoc.replace(END_COMMENT_REPLACE, '-->') newdoc = newdoc.replace(END_COMMENT_REPLACE, '-->')
newdoc = pos_html_transform(newdoc, url) newdoc = pos_html_transform(newdoc, url,filename)
return (newdoc, new_urls) return (newdoc, new_urls)
def run(out=sys.stdout): def run(out=sys.stdout):
""" """
Code interface. Code interface.
@ -976,7 +994,7 @@ def run(out=sys.stdout):
new_urls = [] new_urls = []
if filename.endswith('.html'): if filename.endswith('.html'):
(doc, new_urls) = parse_html(doc, url) (doc, new_urls) = parse_html(doc, url, filename)
elif filename.endswith('.css'): elif filename.endswith('.css'):
(doc, new_urls) = parse_css(doc, url) (doc, new_urls) = parse_css(doc, url)
@ -1013,6 +1031,26 @@ def run(out=sys.stdout):
print errors, "errors not recovered" print errors, "errors not recovered"
src_dir = os.path.dirname(os.path.realpath(__file__))
src = os.path.join(src_dir, "AudacityLogo.png")
subfile = r"alphamanual.audacityteam.org\m\resources\assets\AudacityLogo.png"
dest = os.path.join(config.outdir, subfile)
print "copying from", src, "to", dest
directory = os.path.dirname(dest)
if not os.path.exists(directory):
os.makedirs(directory)
copyfile(src,dest)
src = os.path.join(src_dir, "303.css")
subfile = r"alphamanual.audacityteam.org\m\skins\monobook\main.css\303.css"
dest = os.path.join(config.outdir, subfile)
print "copying from", src, "to", dest
directory = os.path.dirname(dest)
if not os.path.exists(directory):
os.makedirs(directory)
copyfile(src,dest)
def usage(): def usage():
""" """
Print command line options. Print command line options.