1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-04-30 07:39:42 +02:00

Fixes to local-manual script Part 2.

This reinstates the logo and the 303.css.  It's now mostly working for most of the pages.
This commit is contained in:
James Crook 2017-08-22 22:57:46 +01:00
parent e036c59f15
commit 74c2af918c
3 changed files with 2081 additions and 16 deletions

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -18,7 +18,7 @@ Minor tweaks (for Audacity) By James Crook, Nov 2009.
...
"""
__version__ = '0.1.0.0'
__version__ = '0.1.0.1'
import re
import sys
@ -34,6 +34,7 @@ import hashlib
import httplib
#import pdb
from time import strftime
from shutil import copyfile
try:
set
@ -47,6 +48,8 @@ except:
print ' http://www.connellybarnes.com/code/htmldata/'
sys.exit()
config = None
MOVE_HREF = 'movehref'
MADE_BY_COMMENT = '<!-- Content generated by Mediawiki and mw2html -->'
@ -121,6 +124,10 @@ def normalize_url(url, lower=True):
if url.startswith('http://'):
url = url[len('http://'):]
# if url.startswith('https://'):
# url = url[len('https://'):]
if url.startswith('www.'):
url = url[len('www.'):]
@ -187,17 +194,22 @@ def monobook_fix_html(doc, page_url):
if config.made_by:
doc = doc.replace('<html xmlns=', MADE_BY_COMMENT + '\n<html xmlns=')
doc = remove_tag(doc, '<div class="portlet" id="p-personal"', '</div>', '<div')
doc = remove_tag(doc, '<div id="p-search" class="portlet"', '</div>', '<div')
doc = remove_tag(doc, '<div class="portlet" id="p-editors">', '</div>', '<div')
doc = remove_tag(doc, '<div id=\'catlinks\' class=\'catlinks catlinks-allhidden\'>', '</div>', '<div')
# Obselete substitutions.
# doc = remove_tag(doc, '<div class="portlet" id="p-editors">', '</div>', '<div')
# doc = remove_tag(doc, '<div id=\'catlinks\' class=\'catlinks catlinks-allhidden\'>', '</div>', '<div')
#James also remove the page/discussion/source/history/ div.
doc = remove_tag(doc, '<li id="ca-', '</li>', '<li')
doc = remove_tag(doc, '<div id="p-search" class="portlet"', '</div>', '<div')
doc = remove_tag(doc, '<div class="portlet" id="p-personal"', '</div>', '<div')
doc = remove_tag(doc, '<div class="editornote2"', '</div>', '<div')
doc = remove_tag(doc, '<div id="p-cactions"', '</div>', '<div')
doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-For_Editors"', '</div>', '<div')
doc = remove_tag(doc, '<div class="generated-sidebar portlet" id="p-ToDo"', '</div>', '<div')
doc = remove_tag(doc, '<div class="portlet" id="p-tb"', '</div>', '<div')
#remove javascript.
doc = remove_tag(doc, '<script', '</script>', '<script')
#andre special mode
if config.special_mode:
@ -262,7 +274,7 @@ def pre_html_transform(doc, url):
return doc
def pos_html_transform(doc, url):
def pos_html_transform(doc, url,filename):
global footer_text, config, sidebar_html
url = normalize_url(url, False)
@ -272,7 +284,7 @@ def pos_html_transform(doc, url):
sidebar_html = f.read()
f.close()
doc = re.sub(r'(<!-- end of the left \(by default at least\) column -->)', sidebar_html + r'\1', doc)
# doc = re.sub(r'(<!-- end of the left \(by default at least\) column -->)', sidebar_html + r'\1', doc)
# Remove empty links
doc = clean_tag(doc, 'href=""', '</a>', '<a ');
@ -284,9 +296,15 @@ def pos_html_transform(doc, url):
# Remove external javascript
doc = re.sub(r'<script type="text/javascript" src="http://[\s\S]+?</script>', r'', doc)
# Add back relevant stylesheet.
doc = re.sub(r'</head>', '<link rel="stylesheet" href="../m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc, flags=re.DOTALL)
# Add back relevant stylesheet.
top_level_dir = config.outdir
if( os.path.dirname(os.path.dirname( filename )) == config.outdir ):
doc = re.sub(r'</head>', '<link rel="stylesheet" href="m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc, flags=re.DOTALL)
else:
doc = re.sub(r'</head>',
'<link rel="stylesheet" href="../m/skins/monobook/main.css/303.css" media="screen" />\n</head>', doc,
flags=re.DOTALL)
# Replace remaining text with footer, if available (this needs to be done after parse_html to avoid rewriting of urls
if config.footer is not None:
@ -357,8 +375,8 @@ def html_remove_translation_links(doc):
The second version deals with links like /pt_PT and /zh_CN
We are case sensitive, so as not to treat FAQ as a language code.
"""
doc = re.sub(r'<a href="[^"]+/[a-z]{2,3}[/"][\s\S]+?</a>', r'<!--Removed Translation Flag-->', doc)
doc = re.sub(r'<a href="[^"]+/[a-z]{2}_[A-Z]{2}[/"][\s\S]+?</a>', r'<!--Removed Translation Flag2-->', doc)
doc = re.sub(r'<a href="[^"]+/[a-z]{2,3}[/"][\s\S]+?</a>', r'', doc)
doc = re.sub(r'<a href="[^"]+/[a-z]{2}_[A-Z]{2}[/"][\s\S]+?</a>', r'', doc)
return doc
def monobook_hack_skin_html(doc):
@ -859,12 +877,13 @@ def should_follow(url):
return True
def parse_html(doc, url):
def parse_html(doc, url, filename):
"""
Returns (modified_doc, new_urls), where new_urls are absolute URLs for
all links we want to spider in the HTML.
"""
global config
global counter
BEGIN_COMMENT_REPLACE = '<BEGINCOMMENT-' + str(random.random()) + '>'
END_COMMENT_REPLACE = '<ENDCOMMENT-' + str(random.random()) + '>'
@ -886,7 +905,7 @@ def parse_html(doc, url):
# more pages.
for item in L:
u = item.url
follow = should_follow(u)
follow = should_follow(u) # and (counter < 10)
if follow:
if config.debug:
print 'ACCEPTED - ', u
@ -904,11 +923,10 @@ def parse_html(doc, url):
newdoc = newdoc.replace(BEGIN_COMMENT_REPLACE, '<!--')
newdoc = newdoc.replace(END_COMMENT_REPLACE, '-->')
newdoc = pos_html_transform(newdoc, url)
newdoc = pos_html_transform(newdoc, url,filename)
return (newdoc, new_urls)
def run(out=sys.stdout):
"""
Code interface.
@ -976,7 +994,7 @@ def run(out=sys.stdout):
new_urls = []
if filename.endswith('.html'):
(doc, new_urls) = parse_html(doc, url)
(doc, new_urls) = parse_html(doc, url, filename)
elif filename.endswith('.css'):
(doc, new_urls) = parse_css(doc, url)
@ -1013,6 +1031,26 @@ def run(out=sys.stdout):
print errors, "errors not recovered"
src_dir = os.path.dirname(os.path.realpath(__file__))
src = os.path.join(src_dir, "AudacityLogo.png")
subfile = r"alphamanual.audacityteam.org\m\resources\assets\AudacityLogo.png"
dest = os.path.join(config.outdir, subfile)
print "copying from", src, "to", dest
directory = os.path.dirname(dest)
if not os.path.exists(directory):
os.makedirs(directory)
copyfile(src,dest)
src = os.path.join(src_dir, "303.css")
subfile = r"alphamanual.audacityteam.org\m\skins\monobook\main.css\303.css"
dest = os.path.join(config.outdir, subfile)
print "copying from", src, "to", dest
directory = os.path.dirname(dest)
if not os.path.exists(directory):
os.makedirs(directory)
copyfile(src,dest)
def usage():
"""
Print command line options.