diff --git a/scripts/mw2html_audacity/htmldata.py b/scripts/mw2html_audacity/htmldata.py
index c11927dee..f10f8060e 100644
--- a/scripts/mw2html_audacity/htmldata.py
+++ b/scripts/mw2html_audacity/htmldata.py
@@ -11,22 +11,19 @@ Features:
    This allows you to read and write HTML documents
    programmably, with much flexibility.
  - Extract and modify URLs in an HTML document.
- - Compatible with Python 2.0 - 2.5.
+ - Compatible with Python 3+
 
 See the L{examples} for a quick start.
 
+Moved to Python3 by Jack Thomson May 2020
+
 """
 
-__version__ = '1.1.1'
+__version__ = '1.1.2'
 
 __all__ = ['examples', 'tagextract', 'tagjoin', 'urlextract',
            'urljoin', 'URLMatch']
 
-# Define True and False for Python < 2.2.
-import sys
-if sys.version_info[:3] < (2, 2, 0):
-    exec "True = 1; False = 0"
-
 # -------------------------------------------------------------------
 # Globals
 # -------------------------------------------------------------------
@@ -34,8 +31,8 @@ if sys.version_info[:3] < (2, 2, 0):
 import re
 import shlex
 import string
-import urllib
-import urlparse
+import urllib.request, urllib.parse, urllib.error
+import urllib.parse
 import types
 
 # Translate text between these strings as plain text (not HTML).
@@ -164,7 +161,7 @@ def tagjoin(L):
             else:
                 rslash = ''
             tag_items = []
-            items = d.items()
+            items = list(d.items())
             items.sort()
             for (key, value) in items:
                 if value != None:
@@ -189,7 +186,7 @@ def _enumerate(L):
 
     Returns a list instead of an iterator.
     """
-    return zip(range(len(L)), L)
+    return list(zip(list(range(len(L))), L))
 
 def _ignore_tag_index(s, i):
     """
@@ -261,7 +258,7 @@ def _html_split(s):
                 found = False
                 in_quot1 = False
                 in_quot2 = False
-                for i2 in xrange(i + 1, len(s)):
+                for i2 in range(i + 1, len(s)):
                     c2 = s[i2]
                     if c2 == '"' and not in_quot1:
                         in_quot2 = not in_quot2
@@ -521,7 +518,7 @@ def _test_tag_dict():
     s = ' \r\nbg = val text \t= "hi you" name\t e="5"\t\t\t\n'
     (a, b, c) = _tag_dict(s)
     assert a == {'text': 'hi you', 'bg': 'val', 'e': '5', 'name': None}
-    for key in a.keys():
+    for key in list(a.keys()):
         assert s[b[key][0]:b[key][1]] == key
         if a[key] != None:
             assert s[c[key][0]:c[key][1]] == a[key]
@@ -609,7 +606,7 @@ def _full_tag_extract(s):
 
                 (attrs, key_pos, value_pos) = _tag_dict(dtext)
                 # Correct offsets in key_pos and value_pos.
-                for key in attrs.keys():
+                for key in list(attrs.keys()):
                     key_pos[key] = (key_pos[key][0] + Lstart[i] + dtext_offset,
                                       key_pos[key][1] + Lstart[i] + dtext_offset)
                     value_pos[key] = (value_pos[key][0] + Lstart[i] + dtext_offset,
@@ -720,7 +717,7 @@ _URL_TAGS = ['a href', 'applet archive', 'applet code',
             'script src', 'table background', 'tbody background',
             'td background', 'tfoot background', 'th background',
             'thead background', 'tr background']
-_URL_TAGS = map(lambda s: tuple(s.split()), _URL_TAGS)
+_URL_TAGS = [tuple(s.split()) for s in _URL_TAGS]
 
 
 def _finditer(pattern, string):
@@ -862,7 +859,7 @@ def urlextract(doc, siteurl=None, mimetype='text/html'):
                     pass
             else:
                 # Current item is a tag.
-                if item.attrs.has_key('style'):
+                if 'style' in item.attrs:
                     # Process a stylesheet embedded in the 'style' attribute.
                     temp = urlextract(item.attrs['style'], siteurl, 'text/css')
                     # Offset indices and add to ans.
@@ -872,7 +869,7 @@ def urlextract(doc, siteurl=None, mimetype='text/html'):
                     ans += temp
 
                 for (a, b) in _URL_TAGS:
-                    if item.name.startswith(a) and b in item.attrs.keys():
+                    if item.name.startswith(a) and b in list(item.attrs.keys()):
                         # Got one URL.
                         url = item.attrs[b]
                         # FIXME: Some HTML tag wants a URL list, look up which
@@ -893,7 +890,7 @@ def urlextract(doc, siteurl=None, mimetype='text/html'):
     start_end_map = {}
     filtered_ans = []
     for item in ans:
-        if not start_end_map.has_key((item.start, item.end)):
+        if (item.start, item.end) not in start_end_map:
             start_end_map[(item.start, item.end)] = None
             filtered_ans.append(item)
     return filtered_ans
@@ -1090,7 +1087,7 @@ def examples():
     the offending IP address.
 
     """
-    print examples.__doc__
+    print(examples.__doc__)
 
 class URLMatch:
     """
@@ -1137,7 +1134,7 @@ class URLMatch:
         self.in_css = in_css
 
         if siteurl != None:
-            self.url = urlparse.urljoin(siteurl, self.url)
+            self.url = urllib.parse.urljoin(siteurl, self.url)
 
         self.tag_attr = tag_attr
         self.tag_attrs = tag_attrs
@@ -1154,15 +1151,15 @@ def _cast_to_str(arg, str_class):
     """
     if _is_str(arg):
         return str_class(arg)
-    elif isinstance(arg, types.ListType):
+    elif isinstance(arg, list):
         ans = []
         for item in arg:
             if _is_str(item):
                 ans.append(str_class(item))
-            elif isinstance(item, types.TupleType) and len(item) == 2:
+            elif isinstance(item, tuple) and len(item) == 2:
                 (a, b) = item
                 b_prime = {}
-                for (b_key, b_value) in b.items():
+                for (b_key, b_value) in list(b.items()):
                     if b_value is None:
                         b_prime[str_class(b_key)] = None
                     else:
@@ -1321,7 +1318,7 @@ def _test_tagextract(str_class=str):
         L = _full_tag_extract(s)
         for (i, item) in _enumerate(L):
             if isinstance(item, _HTMLTag):
-                for key in item.attrs.keys():
+                for key in list(item.attrs.keys()):
                     assert s[item.key_pos[key][0]:item.key_pos[key][1]].lower()\
                            == key
                     if item.attrs[key] != None:
@@ -1460,7 +1457,7 @@ def _test_urlextract(str_class=str):
     base = f('http://www.python.org/~guido/')
     L = urlextract(s, base)
     L2 = [x.url for x in L]
-    assert L2 == [urlparse.urljoin(base, x) for x in ans]
+    assert L2 == [urllib.parse.urljoin(base, x) for x in ans]
 
     # Test urljoin().
     assert urljoin(doc1, urlextract(doc1, mimetype='text/css')) == doc1
@@ -1485,17 +1482,6 @@ def _test_urlextract(str_class=str):
     assert L2 == f(['foo', 'a.gif', 'bar.css', 'b.html'])
     assert [s[x.start:x.end] == x.url for x in L].count(False) == 0
 
-def _python_has_unicode():
-    """
-    True iff Python was compiled with unicode().
-    """
-    try:
-        unicode
-        return True
-    except:
-        return False
-
-
 # -------------------------------------------------------------------
 # Unit Test Main Routine
 # -------------------------------------------------------------------
@@ -1504,32 +1490,30 @@ def _test():
     """
     Unit test main routine.
     """
-    print 'Unit tests:'
+    print('Unit tests:')
     _test_remove_comments()
-    print '  _remove_comments:       OK'
+    print('  _remove_comments:       OK')
     _test_shlex_split()
-    print '  _shlex_split:           OK'
+    print('  _shlex_split:           OK')
     _test_tag_dict()
-    print '  _tag_dict:              OK'
+    print('  _tag_dict:              OK')
     _test_tuple_replace()
-    print '  _tuple_replace:         OK'
+    print('  _tuple_replace:         OK')
 
     _test_tagextract()
-    print '  tagextract*:            OK'
+    print('  tagextract*:            OK')
 
-    if _python_has_unicode():
-        _test_tagextract(unicode)
-        print '  tagextract (unicode)*:  OK'
+    _test_tagextract(str)
+    print('  tagextract (unicode)*:  OK')
 
     _test_urlextract()
-    print '  urlextract*:            OK'
+    print('  urlextract*:            OK')
 
-    if _python_has_unicode():
-        _test_urlextract(unicode)
-        print '  urlextract (unicode)*:  OK'
+    _test_urlextract(str)
+    print('  urlextract (unicode)*:  OK')
 
-    print
-    print '* The corresponding join method has been tested as well.'
+    print()
+    print('* The corresponding join method has been tested as well.')
 
 
 if __name__ == '__main__':
diff --git a/scripts/mw2html_audacity/mw2html.py b/scripts/mw2html_audacity/mw2html.py
index 39f327913..95c485260 100644
--- a/scripts/mw2html_audacity/mw2html.py
+++ b/scripts/mw2html_audacity/mw2html.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
 """
 mw2html - Mediawiki to static HTML
@@ -15,37 +15,34 @@ Improved filtering.
 Improved usability.
 Customized for Audacity's manual wiki.
 Minor tweaks (for Audacity) By James Crook, Nov 2009.
+Moved to Python3 by Jack Thomson, May 2020
 ...
 """
 
-__version__ = '0.1.0.2'
+__version__ = '0.1.0.3'
 
 import re
 import sys
 import getopt
 import random
-import urllib
+import urllib.request, urllib.parse, urllib.error
 import textwrap
-import urlparse
+import urllib.parse
 import os, os.path
 
+import htmldata
+
 import errno
 import hashlib
-import httplib
-#import pdb
+import http.client
 from time import strftime
 from shutil import copyfile
 
-try:
-    set
-except:
-    from sets import Set as set
-
 try:
     import htmldata
 except:
-    print 'Requires Python htmldata module:'
-    print '  http://www.connellybarnes.com/code/htmldata/'
+    print('Requires Python3 htmldata module:')
+    print(' https://github.com/audacity/audacity/blob/master/scripts/mw2html_audacity/htmldata.py')
     sys.exit()
 
 
@@ -111,7 +108,7 @@ def get_domain(u):
     url = normalize_url(u)
 
     #ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', params='', query='', fragment='')
-    L = list(urlparse.urlparse(url))
+    L = list(urllib.parse.urlparse(url))
 
     return L[1]
 
@@ -135,7 +132,7 @@ def normalize_url(url, lower=True):
 
     url = 'https://' + url
 
-    urlparse.urljoin(config.rooturl, url)
+    urllib.parse.urljoin(config.rooturl, url)
 
     return url
 
@@ -283,7 +280,7 @@ def pos_html_transform(doc, url,filename):
 
     # Add sidebar.html
     if config.sidebar != None and sidebar_html == '':
-        f = open(config.sidebar, 'rU')
+        f = open(config.sidebar, 'r')
         sidebar_html = f.read()
         f.close()
 
@@ -328,7 +325,7 @@ def pos_html_transform(doc, url,filename):
         return doc
 
     if footer_text == '':
-        f = open(config.footer, 'rU')
+        f = open(config.footer, 'r')
         footer_text = f.read()
         f.close()
 
@@ -567,14 +564,14 @@ def url_open(url):
     while redirect != '':
         l_redir += [url]
 
-        L = urlparse.urlparse(url)
+        L = urllib.parse.urlparse(url)
         if L[1] != domain:
             conn.close()
             if L[1] == '': return(['',''])
-            print "connection to", domain, "closed."
-            conn = httplib.HTTPSConnection(L[1])
+            print("connection to", domain, "closed.")
+            conn = http.client.HTTPSConnection(L[1])
             domain = L[1]
-            print "connection to", domain, "opened."
+            print("connection to", domain, "opened.")
 
         rel_url = url
         pos = url.find(domain)
@@ -593,47 +590,47 @@ def url_open(url):
             try:
                 conn.request("GET", rel_url,headers=headers)
                 r = conn.getresponse()
-                print 'Status', r.status, r.reason, 'accessing', rel_url
+                print('Status', r.status, r.reason, 'accessing', rel_url)
                 if r.status == 404:
-                    print "   it's not possible to recover this error."
+                    print("   it's not possible to recover this error.")
                     errors += 1
                     return ('', '')
                 if r.status == 500:
-                    print "   eventually this error might be recovered. let's try again."
-                    print '   reconnecting...'
-                    conn = httplib.HTTPSConnection(domain)
+                    print("   eventually this error might be recovered. let's try again.")
+                    print('   reconnecting...')
+                    conn = http.client.HTTPSConnection(domain)
                     attempts += 1
                     continue
                 if r.status == 403:
-                    print "   that shouldn't happen, but let's try again anyway."
-                    print '   reconnecting...'
-                    conn = httplib.HTTPSConnection(domain)
+                    print("   that shouldn't happen, but let's try again anyway.")
+                    print('   reconnecting...')
+                    conn = http.client.HTTPSConnection(domain)
                     attempts += 1
                     continue
                 if attempts != 0:
                     recovered = True
                 if r.status != 200:
-                    print "      Status other than 200, 404, 500, 403. It is: ", r.status
+                    print("      Status other than 200, 404, 500, 403. It is: ", r.status)
                 success = True
 
-            except httplib.HTTPException, e:
-                print 'ERROR', e.__class__.__name__, 'while retrieving', url
+            except http.client.HTTPException as e:
+                print('ERROR', e.__class__.__name__, 'while retrieving', url)
                 conn.close
                 if e.__class__.__name__ in ['BadStatusLine', 'ImproperConnectionState', 'NotConnected', 'IncompleteRead', 'ResponseNotReady']:
-                    print "eventually this error might be recovered. let's try again."
-                    print 'reconnecting...'
-                    conn = httplib.HTTPSConnection(domain)
+                    print("eventually this error might be recovered. let's try again.")
+                    print('reconnecting...')
+                    conn = http.client.HTTPSConnection(domain)
                     attempts += 1
                 else:
-                    print "it's not possible to recover this error."
+                    print("it's not possible to recover this error.")
                     errors += 1
                     return ('', '')
 
         if recovered:
-            print "error recovered"
+            print("error recovered")
 
         if not success:
-            print "it was not possible to recover this error."
+            print("it was not possible to recover this error.")
             errors += 1
             return ('', '')
 
@@ -666,7 +663,7 @@ def url_to_filename(url):
     #ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', params='', query='', fragment='')
     turl = re.sub(r'm/index.php\?title=', r'man/', nurl)
     turl = re.sub(r'.css&[\S\s]+', r'.css', turl)
-    L = list(urlparse.urlparse(turl))
+    L = list(urllib.parse.urlparse(turl))
 
     #this way the url will not create a folder outside of the maindomain
     droot = get_domain(config.rooturl)
@@ -697,10 +694,10 @@ def url_to_filename(url):
 
     #don't sanitize / for path
     L[0] = ''
-    L[2] = urllib.quote_plus(L[2],'/')
-    L[3] = urllib.quote_plus(L[3])
-    L[4] = urllib.quote_plus(L[4])
-    L[5] = urllib.quote_plus(L[5])
+    L[2] = urllib.parse.quote_plus(L[2],'/')
+    L[3] = urllib.parse.quote_plus(L[3])
+    L[4] = urllib.parse.quote_plus(L[4])
+    L[5] = urllib.parse.quote_plus(L[5])
 
     # Local filename relative to outdir
     # os.sep - O.S. directory separator
@@ -750,12 +747,11 @@ def url_to_filename(url):
     wrote_file_set.add(os.path.normcase(os.path.normpath(ans)))
     url_filename_cache[nurl] = ans
 
-    mode = ['wb', 'w'][mimetype.startswith('text')]
 
     # Make parent directory if it doesn't exist.
     try:
         os.makedirs(os.path.split(ans)[0])
-    except OSError, e:
+    except OSError as e:
         if e.errno != errno.EEXIST:
             raise
 
@@ -765,7 +761,12 @@ def url_to_filename(url):
         out.write('File already exists: ' + str(ans)) #@UndefinedVariable
         sys.exit(1)
 
-    f = open(ans, mode)
+    if mimetype.startswith('text'):
+        f = open(ans, 'w', encoding='utf8')
+        doc = str(doc)
+    else:
+        f = open(ans, 'wb')
+
     f.write(doc)
     f.close()
 
@@ -790,7 +791,7 @@ def url_to_relative(url, cururl):
         L1 = L1[1:]
         L2 = L2[1:]
 
-    rel_url = urllib.quote('../' * (len(L2) - 1) + '/'.join(L1)) + section
+    rel_url = urllib.parse.quote('../' * (len(L2) - 1) + '/'.join(L1)) + section
     if rel_url == '':
         return '#'
     else:
@@ -842,28 +843,28 @@ def should_follow(url):
     #if droot != dn and not (dn.endswith(droot) or droot.endswith(dn)):
     if droot != dn:
         if config.debug:
-            print url, 'not in the same domain'
+            print(url, 'not in the same domain')
         return False
 
     # False if multiple query fields or parameters found
     if (url.count('&') >= 1 or url.count(';') > 0) and not any(x in url for x in ('.css', 'gen=css')):
         if config.debug:
-            print url, 'with multiple query fields'
+            print(url, 'with multiple query fields')
         return False
 
     if any(x in url for x in ('Special:', 'Image:', 'Talk:', 'User:', 'Help:', 'User_talk:', 'MediaWiki_talk:', 'File:', 'action=edit', 'title=-')):
         if config.debug:
-            print url, 'is a forbidden wiki page'
+            print(url, 'is a forbidden wiki page')
         return False
 
     if config.no_images and any(url.strip().lower().endswith(suffix) for suffix in ('.jpg', '.gif', '.png', '.ico')):
         if config.debug:
-            print url, 'is a image and you are in no-images mode'
+            print(url, 'is a image and you are in no-images mode')
         return False
 
     if any(url.strip().lower().endswith(suffix) for suffix in ('.zip', '.7z')):
         if config.debug:
-            print url, 'is a compressed file'
+            print(url, 'is a compressed file')
         return False
 
 
@@ -874,7 +875,7 @@ def should_follow(url):
         L = nurl.split('/')
         if ('.' not in L[-1]):
             if config.debug:
-                print url, 'is a file outside of scope with unknown extension'
+                print(url, 'is a file outside of scope with unknown extension')
             return False
 
         # JKC: we do allow css from 'strange' places.
@@ -885,7 +886,7 @@ def should_follow(url):
         for fp in forbidden_parents:
             if fp in L[-1]:
                 if config.debug:
-                    print url, 'is a page outside of scope'
+                    print(url, 'is a page outside of scope')
                 return False
 
     return True
@@ -921,7 +922,7 @@ def parse_html(doc, url, filename):
         follow = should_follow(u) #and (counter < 10)
         if follow:
             if config.debug:
-                print 'ACCEPTED   - ', u
+                print('ACCEPTED   - ', u)
             # Store url locally.
             new_urls += [u]
             item.url = url_to_relative(u, url)
@@ -930,7 +931,7 @@ def parse_html(doc, url, filename):
             # if not any( license in u for license in ('creativecommons.org', 'wxwidgets.org', 'gnu.org', 'mediawiki.org') ):
             #  item.url = ''
             if config.debug:
-                print 'NOT INCLUDED     - ', u
+                print('NOT INCLUDED     - ', u)
 
     newdoc = htmldata.urljoin(doc, L)
     newdoc = newdoc.replace(BEGIN_COMMENT_REPLACE, '<!--')
@@ -938,13 +939,19 @@ def parse_html(doc, url, filename):
 
     newdoc = pos_html_transform(newdoc, url,filename)
 
+    # Remove byte artifacts in string
+    newdoc = newdoc.replace('\\n','\n')
+    newdoc = newdoc.replace('\\t', '\t')
+    newdoc = newdoc.strip('b')
+    newdoc = newdoc.strip('')
+
     return (newdoc, new_urls)
 
 def deploy_file( src, dest ):
     src_dir = os.path.dirname(os.path.realpath(__file__))
     src = os.path.join(src_dir, src)
     dest = os.path.join(config.outdir, dest)
-    print "copying from", src, "to", dest
+    print("copying from", src, "to", dest)
     directory = os.path.dirname(dest)
     if not os.path.exists(directory):
         os.makedirs(directory)
@@ -957,7 +964,7 @@ def run(out=sys.stdout):
     """
     global conn, domain, counter, redir_cache, config, headers
 
-    if urlparse.urlparse(config.rooturl)[1].lower().endswith('wikipedia.org'):
+    if urllib.parse.urlparse(config.rooturl)[1].lower().endswith('wikipedia.org'):
         out.write('Please do not use robots with the Wikipedia site.\n')
         out.write('Instead, install the Wikipedia database locally and use mw2html on\n')
         out.write('your local installation.  See the Mediawiki site for more information.\n')
@@ -971,8 +978,8 @@ def run(out=sys.stdout):
         sys.exit(1)
 
     domain = get_domain(config.rooturl)
-    conn = httplib.HTTPSConnection(domain)
-    print 'connection established to:', domain
+    conn = http.client.HTTPSConnection(domain)
+    print('connection established to:', domain)
     complete = set()
     pending = set([config.rooturl])
 
@@ -986,7 +993,7 @@ def run(out=sys.stdout):
 
         if nurl in complete:
             if config.debug:
-                print url, 'already processed'
+                print(url, 'already processed')
             continue
 
         complete.add(nurl)
@@ -997,7 +1004,7 @@ def run(out=sys.stdout):
         if start:
             start = False
             aux_url = ''
-            for redir in redir_cache.iterkeys():
+            for redir in redir_cache.keys():
                 aux_url = normalize_url(redir)
                 url_filename_cache[aux_url] = filename
                 if aux_url not in complete:
@@ -1009,10 +1016,16 @@ def run(out=sys.stdout):
             continue
 
         if not os.path.exists(filename):
-            print "ERROR: ", url, '\n'
+            print("ERROR: ", url, '\n')
             continue
 
-        f = open(filename, 'r')
+        # These formats are encoded as text. Everything else is read as bytes
+        text_ext = ('txt', 'html', 'rtf', 'css', 'sgml', 'xml')
+
+        if not filename.endswith(text_ext):
+            f = open(filename, 'rb')
+        else:
+            f = open(filename, 'r')
         doc = f.read()
         f.close()
         new_urls = []
@@ -1025,7 +1038,6 @@ def run(out=sys.stdout):
         # Save document changes to disk
         # The unmodified file already exists on disk.
         update = False
-        text_ext = ('txt', 'html', 'rtf', 'css', 'sgml', 'xml')
         for ext in text_ext:
             if filename.endswith(ext):
                 update = True
@@ -1049,10 +1061,10 @@ def run(out=sys.stdout):
                 pending.add(u)        
 
     conn.close()
-    print "connection to", domain, "closed."
+    print("connection to", domain, "closed.")
     out.write(str(n) + ' files saved\n')
-    print counter, "httplib requests done"
-    print errors, "errors not recovered"
+    print(counter, "httplib requests done")
+    print(errors, "errors not recovered")
 
     # use / not \ so as to work on both windows and mac.
     deploy_file( "AudacityLogo.png", r"alphamanual.audacityteam.org/m/resources/assets/AudacityLogo.png")
@@ -1118,7 +1130,7 @@ def usage():
 
     """
 
-    print textwrap.dedent(usage_str.strip('\n'))
+    print(textwrap.dedent(usage_str.strip('\n')))
     sys.exit(1)