From b4d6f5f03a47fe90b4043dde884ec5ac33c77a41 Mon Sep 17 00:00:00 2001 From: James Crook Date: Wed, 22 Jun 2016 19:18:30 +0100 Subject: [PATCH] Bug 1370 - mw2html: Filipino pages present. Now excludes links containing /xx/, /xxx/ and /xx_XX/. Case sensitive, otherwise we exclude /FAQ/ and /SC4/ --- scripts/mw2html_audacity/mw2html.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/mw2html_audacity/mw2html.py b/scripts/mw2html_audacity/mw2html.py index 6751c3004..7b9378318 100644 --- a/scripts/mw2html_audacity/mw2html.py +++ b/scripts/mw2html_audacity/mw2html.py @@ -343,11 +343,13 @@ def html_remove_image_history(doc): def html_remove_translation_links(doc): """ Remove translation links (the international flags). - We identify them by the pattern for a 2 letter language code, /[\s\S][\s\S][/"] + We identify them by the pattern for a 2 or 3 letter language code, /[a-z]{2,3}[/"] in the URL. + The second version deals with links like /pt_PT and /zh_CN + We are case sensitive, so as not to treat FAQ as a language code. """ - doc = re.sub(r'', r'', doc) - doc = re.sub(r'', r'', doc) + doc = re.sub(r'', r'', doc) + doc = re.sub(r'', r'', doc) return doc def monobook_hack_skin_html(doc):