mirror of
https://github.com/cookiengineer/audacity
synced 2025-07-29 15:09:30 +02:00
Fix mw2html byte artifact issues.
The conversion from Python2 to Python3 had some issues. - Characters outside the ASCII range got converted to \xe2\x80 numbers - Single quotes got escaped. - The entire doc was enclosed in quotes. These changes address those, and also remove html comments.
This commit is contained in:
parent
de50f55f61
commit
9acabbbed3
@ -939,10 +939,17 @@ def parse_html(doc, url, filename):
|
||||
|
||||
newdoc = pos_html_transform(newdoc, url,filename)
|
||||
|
||||
# remove the comments.
|
||||
p = re.compile( '<!--.*?-->', re.DOTALL)
|
||||
newdoc = p.sub( '', newdoc )
|
||||
# Remove byte artifacts in string
|
||||
newdoc = newdoc.replace('\\n','\n')
|
||||
newdoc = newdoc.replace('\\t', '\t')
|
||||
newdoc = newdoc.replace('\\\'', '\'')
|
||||
newdoc = newdoc.replace('\\xe2\\x80\\x99','\'')
|
||||
newdoc = newdoc.replace('\\xe2\\x80\\x90', '-')
|
||||
newdoc = newdoc.strip('b')
|
||||
newdoc = newdoc.strip('\'')
|
||||
newdoc = newdoc.strip('')
|
||||
|
||||
return (newdoc, new_urls)
|
||||
|
Loading…
x
Reference in New Issue
Block a user