From cfcb876bc19c6fde7343b8d2da3bf008e02b522f Mon Sep 17 00:00:00 2001 From: Gale Andrews Date: Mon, 4 Apr 2016 18:29:06 +0100 Subject: [PATCH] Buanzo's fix to include a User-Agent HTTP request header This means that we don't get 403 error when dumping the Manual with the server's security features in place. --- scripts/mw2html_audacity/mw2html.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/mw2html_audacity/mw2html.py b/scripts/mw2html_audacity/mw2html.py index b5279896d..5f877ec9f 100644 --- a/scripts/mw2html_audacity/mw2html.py +++ b/scripts/mw2html_audacity/mw2html.py @@ -60,6 +60,7 @@ footer_text = '' counter = 0 errors = 0 conn = None +headers = {"User-Agent": "mw2html.py/Audacity"} domain = '' MONOBOOK_SKIN = 'monobook' # Constant identifier for Monobook. @@ -517,7 +518,7 @@ def split_section(url): def url_open(url): # download a file and retrieve its content and mimetype - global conn, domain, counter, redir_cache, errors + global conn, domain, counter, redir_cache, errors, headers l_redir = [] redirect = url @@ -547,7 +548,7 @@ def url_open(url): #increment httplib requests counter counter += 1 try: - conn.request("GET", rel_url) + conn.request("GET", rel_url,headers=headers) r = conn.getresponse() print 'Status', r.status, r.reason, 'accessing', rel_url if r.status == 404: @@ -898,7 +899,7 @@ def run(out=sys.stdout): """ Code interface. """ - global conn, domain, counter, redir_cache, config + global conn, domain, counter, redir_cache, config, headers if urlparse.urlparse(config.rooturl)[1].lower().endswith('wikipedia.org'): out.write('Please do not use robots with the Wikipedia site.\n')