mirror of
https://github.com/cookiengineer/audacity
synced 2025-04-29 23:29:41 +02:00
Replace msgfmt.py with one from Cython
Since it actually works. :-)
This commit is contained in:
parent
56852ae4c1
commit
0ab7645105
337
locale/msgfmt.py
Executable file → Normal file
337
locale/msgfmt.py
Executable file → Normal file
@ -1,144 +1,78 @@
|
||||
#! /usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
# Written by Martin v. Loewis <loewis@informatik.hu-berlin.de>
|
||||
#
|
||||
# Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless
|
||||
# translation service (PTS) of Zope
|
||||
#
|
||||
# Fixed some bugs and updated to support msgctxt
|
||||
# by Hanno Schlichting <hanno@hannosch.eu>
|
||||
#! /usr/bin/env python3
|
||||
# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
|
||||
|
||||
"""Generate binary message catalog from textual translation description.
|
||||
|
||||
This program converts a textual Uniforum-style message catalog (.po file) into
|
||||
a binary GNU catalog (.mo file). This is essentially the same function as the
|
||||
GNU msgfmt program, however, it is a simpler implementation.
|
||||
GNU msgfmt program, however, it is a simpler implementation. Currently it
|
||||
does not handle plural forms but it does handle message contexts.
|
||||
|
||||
This file was taken from Python-2.3.2/Tools/i18n and altered in several ways.
|
||||
Now you can simply use it from another python module:
|
||||
Usage: msgfmt.py [OPTIONS] filename.po
|
||||
|
||||
from msgfmt import Msgfmt
|
||||
mo = Msgfmt(po).get()
|
||||
Options:
|
||||
-o file
|
||||
--output-file=file
|
||||
Specify the output file to write to. If omitted, output will go to a
|
||||
file named filename.mo (based off the input file name).
|
||||
|
||||
where po is path to a po file as string, an opened po file ready for reading or
|
||||
a list of strings (readlines of a po file) and mo is the compiled mo file as
|
||||
binary string.
|
||||
-h
|
||||
--help
|
||||
Print this message and exit.
|
||||
|
||||
Exceptions:
|
||||
|
||||
* IOError if the file couldn't be read
|
||||
|
||||
* msgfmt.PoSyntaxError if the po file has syntax errors
|
||||
-V
|
||||
--version
|
||||
Display version information and exit.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
import array
|
||||
from ast import literal_eval
|
||||
import codecs
|
||||
from email.parser import HeaderParser
|
||||
import os
|
||||
import sys
|
||||
import ast
|
||||
import getopt
|
||||
import struct
|
||||
import sys
|
||||
import array
|
||||
from email.parser import HeaderParser
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
if PY3:
|
||||
def header_charset(s):
|
||||
p = HeaderParser()
|
||||
return p.parsestr(s).get_content_charset()
|
||||
__version__ = "1.2"
|
||||
|
||||
import io
|
||||
BytesIO = io.BytesIO
|
||||
FILE_TYPE = io.IOBase
|
||||
else:
|
||||
def header_charset(s):
|
||||
p = HeaderParser()
|
||||
return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset()
|
||||
|
||||
from cStringIO import StringIO as BytesIO
|
||||
FILE_TYPE = file
|
||||
MESSAGES = {}
|
||||
|
||||
|
||||
class PoSyntaxError(Exception):
|
||||
""" Syntax error in a po file """
|
||||
|
||||
def __init__(self, msg):
|
||||
self.msg = msg
|
||||
|
||||
def __str__(self):
|
||||
return 'Po file syntax error: %s' % self.msg
|
||||
def usage(code, msg=''):
|
||||
print(__doc__, file=sys.stderr)
|
||||
if msg:
|
||||
print(msg, file=sys.stderr)
|
||||
sys.exit(code)
|
||||
|
||||
|
||||
class Msgfmt:
|
||||
def add(ctxt, id, str, fuzzy):
|
||||
"Add a non-fuzzy translation to the dictionary."
|
||||
global MESSAGES
|
||||
if not fuzzy and str:
|
||||
if ctxt is None:
|
||||
MESSAGES[id] = str
|
||||
else:
|
||||
MESSAGES[b"%b\x04%b" % (ctxt, id)] = str
|
||||
|
||||
def __init__(self, po, name='unknown'):
|
||||
self.po = po
|
||||
self.name = name
|
||||
self.messages = {}
|
||||
self.openfile = False
|
||||
# Start off assuming latin-1, so everything decodes without failure,
|
||||
# until we know the exact encoding
|
||||
self.encoding = 'latin-1'
|
||||
|
||||
def readPoData(self):
|
||||
""" read po data from self.po and return an iterator """
|
||||
output = []
|
||||
if isinstance(self.po, str):
|
||||
output = open(self.po, 'rb')
|
||||
elif isinstance(self.po, FILE_TYPE):
|
||||
self.po.seek(0)
|
||||
self.openfile = True
|
||||
output = self.po
|
||||
elif isinstance(self.po, list):
|
||||
output = self.po
|
||||
if not output:
|
||||
raise ValueError("self.po is invalid! %s" % type(self.po))
|
||||
if isinstance(output, FILE_TYPE):
|
||||
# remove BOM from the start of the parsed input
|
||||
first = output.readline()
|
||||
if len(first) == 0:
|
||||
return output.readlines()
|
||||
if first.startswith(codecs.BOM_UTF8):
|
||||
first = first.lstrip(codecs.BOM_UTF8)
|
||||
return [first] + output.readlines()
|
||||
return output
|
||||
|
||||
def add(self, context, id, string, fuzzy):
|
||||
"Add a non-empty and non-fuzzy translation to the dictionary."
|
||||
if string and not fuzzy:
|
||||
# The context is put before the id and separated by a EOT char.
|
||||
if context:
|
||||
id = context + u'\x04' + id
|
||||
if not id:
|
||||
# See whether there is an encoding declaration
|
||||
charset = header_charset(string)
|
||||
if charset:
|
||||
# decode header in proper encoding
|
||||
string = string.encode(self.encoding).decode(charset)
|
||||
if not PY3:
|
||||
# undo damage done by literal_eval in Python 2.x
|
||||
string = string.encode(self.encoding).decode(charset)
|
||||
self.encoding = charset
|
||||
self.messages[id] = string
|
||||
|
||||
def generate(self):
|
||||
def generate():
|
||||
"Return the generated output."
|
||||
global MESSAGES
|
||||
# the keys are sorted in the .mo file
|
||||
keys = sorted(self.messages.keys())
|
||||
keys = sorted(MESSAGES.keys())
|
||||
offsets = []
|
||||
ids = strs = b''
|
||||
for id in keys:
|
||||
msg = self.messages[id].encode(self.encoding)
|
||||
id = id.encode(self.encoding)
|
||||
# For each string, we need size and file offset. Each string is
|
||||
# NUL terminated; the NUL does not count into the size.
|
||||
offsets.append((len(ids), len(id), len(strs),
|
||||
len(msg)))
|
||||
# For each string, we need size and file offset. Each string is NUL
|
||||
# terminated; the NUL does not count into the size.
|
||||
offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
|
||||
ids += id + b'\0'
|
||||
strs += msg + b'\0'
|
||||
output = b''
|
||||
# The header is 7 32-bit unsigned integers. We don't use hash tables,
|
||||
# so the keys start right after the index tables.
|
||||
keystart = 7 * 4 + 16 * len(keys)
|
||||
strs += MESSAGES[id] + b'\0'
|
||||
output = ''
|
||||
# The header is 7 32-bit unsigned integers. We don't use hash tables, so
|
||||
# the keys start right after the index tables.
|
||||
# translated string.
|
||||
keystart = 7*4+16*len(keys)
|
||||
# and the values start after the keys
|
||||
valuestart = keystart + len(ids)
|
||||
koffsets = []
|
||||
@ -146,160 +80,167 @@ class Msgfmt:
|
||||
# The string table first has the list of keys, then the list of values.
|
||||
# Each entry has first the size of the string, then the file offset.
|
||||
for o1, l1, o2, l2 in offsets:
|
||||
koffsets += [l1, o1 + keystart]
|
||||
voffsets += [l2, o2 + valuestart]
|
||||
koffsets += [l1, o1+keystart]
|
||||
voffsets += [l2, o2+valuestart]
|
||||
offsets = koffsets + voffsets
|
||||
# Even though we don't use a hashtable, we still set its offset to be
|
||||
# binary compatible with the gnu gettext format produced by:
|
||||
# msgfmt file.po --no-hash
|
||||
output = struct.pack("Iiiiiii",
|
||||
0x950412de, # Magic
|
||||
0, # Version
|
||||
len(keys), # # of entries
|
||||
7 * 4, # start of key index
|
||||
7 * 4 + len(keys) * 8, # start of value index
|
||||
0, keystart) # size and offset of hash table
|
||||
if PY3:
|
||||
7*4, # start of key index
|
||||
7*4+len(keys)*8, # start of value index
|
||||
0, 0) # size and offset of hash table
|
||||
output += array.array("i", offsets).tobytes()
|
||||
else:
|
||||
output += array.array("i", offsets).tostring()
|
||||
output += ids
|
||||
output += strs
|
||||
return output
|
||||
|
||||
def get(self):
|
||||
""" """
|
||||
self.read()
|
||||
# Compute output
|
||||
return self.generate()
|
||||
|
||||
def read(self, header_only=False):
|
||||
""" """
|
||||
def make(filename, outfile):
|
||||
ID = 1
|
||||
STR = 2
|
||||
CTXT = 3
|
||||
|
||||
section = None
|
||||
# Compute .mo name from .po name and arguments
|
||||
if filename.endswith('.po'):
|
||||
infile = filename
|
||||
else:
|
||||
infile = filename + '.po'
|
||||
if outfile is None:
|
||||
outfile = os.path.splitext(infile)[0] + '.mo'
|
||||
|
||||
try:
|
||||
with open(infile, 'rb') as f:
|
||||
lines = f.readlines()
|
||||
except IOError as msg:
|
||||
print(msg, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
section = msgctxt = None
|
||||
fuzzy = 0
|
||||
msgid = msgstr = msgctxt = u''
|
||||
|
||||
# Start off assuming Latin-1, so everything decodes without failure,
|
||||
# until we know the exact encoding
|
||||
encoding = 'latin-1'
|
||||
|
||||
# Parse the catalog
|
||||
lno = 0
|
||||
for l in self.readPoData():
|
||||
l = l.decode(self.encoding)
|
||||
for l in lines:
|
||||
l = l.decode(encoding)
|
||||
lno += 1
|
||||
# If we get a comment line after a msgstr or a line starting with
|
||||
# msgid or msgctxt, this is a new entry
|
||||
if section == STR and (l[0] == '#' or (l[0] == 'm' and
|
||||
(l.startswith('msgctxt') or l.startswith('msgid')))):
|
||||
self.add(msgctxt, msgid, msgstr, fuzzy)
|
||||
section = None
|
||||
# If we get a comment line after a msgstr, this is a new entry
|
||||
if l[0] == '#' and section == STR:
|
||||
add(msgctxt, msgid, msgstr, fuzzy)
|
||||
section = msgctxt = None
|
||||
fuzzy = 0
|
||||
# If we only want the header we stop after the first message
|
||||
if header_only:
|
||||
break
|
||||
# Record a fuzzy mark
|
||||
if l[:2] == '#,' and 'fuzzy' in l:
|
||||
fuzzy = 1
|
||||
# Skip comments
|
||||
if l[0] == '#':
|
||||
continue
|
||||
# Now we are in a msgctxt section
|
||||
# Now we are in a msgid or msgctxt section, output previous section
|
||||
if l.startswith('msgctxt'):
|
||||
if section == STR:
|
||||
add(msgctxt, msgid, msgstr, fuzzy)
|
||||
section = CTXT
|
||||
l = l[7:]
|
||||
msgctxt = u''
|
||||
# Now we are in a msgid section, output previous section
|
||||
elif (l.startswith('msgid') and
|
||||
not l.startswith('msgid_plural')):
|
||||
msgctxt = b''
|
||||
elif l.startswith('msgid') and not l.startswith('msgid_plural'):
|
||||
if section == STR:
|
||||
self.add(msgid, msgstr, fuzzy)
|
||||
add(msgctxt, msgid, msgstr, fuzzy)
|
||||
if not msgid:
|
||||
# See whether there is an encoding declaration
|
||||
p = HeaderParser()
|
||||
charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
|
||||
if charset:
|
||||
encoding = charset
|
||||
section = ID
|
||||
l = l[5:]
|
||||
msgid = msgstr = u''
|
||||
msgid = msgstr = b''
|
||||
is_plural = False
|
||||
# This is a message with plural forms
|
||||
elif l.startswith('msgid_plural'):
|
||||
if section != ID:
|
||||
raise PoSyntaxError(
|
||||
'msgid_plural not preceded by '
|
||||
'msgid on line %d of po file %s' %
|
||||
(lno, repr(self.name)))
|
||||
print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
l = l[12:]
|
||||
msgid += u'\0' # separator of singular and plural
|
||||
msgid += b'\0' # separator of singular and plural
|
||||
is_plural = True
|
||||
# Now we are in a msgstr section
|
||||
elif l.startswith('msgstr'):
|
||||
section = STR
|
||||
if l.startswith('msgstr['):
|
||||
if not is_plural:
|
||||
raise PoSyntaxError(
|
||||
'plural without msgid_plural '
|
||||
'on line %d of po file %s' %
|
||||
(lno, repr(self.name)))
|
||||
print('plural without msgid_plural on %s:%d' % (infile, lno),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
l = l.split(']', 1)[1]
|
||||
if msgstr:
|
||||
# Separator of the various plural forms
|
||||
msgstr += u'\0'
|
||||
msgstr += b'\0' # Separator of the various plural forms
|
||||
else:
|
||||
if is_plural:
|
||||
raise PoSyntaxError(
|
||||
'indexed msgstr required for '
|
||||
'plural on line %d of po file %s' %
|
||||
(lno, repr(self.name)))
|
||||
print('indexed msgstr required for plural on %s:%d' % (infile, lno),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
l = l[6:]
|
||||
# Skip empty lines
|
||||
l = l.strip()
|
||||
if not l:
|
||||
continue
|
||||
# TODO: Does this always follow Python escape semantics?
|
||||
try:
|
||||
l = literal_eval(l)
|
||||
except Exception as msg:
|
||||
raise PoSyntaxError(
|
||||
'%s (line %d of po file %s): \n%s' %
|
||||
(msg, lno, repr(self.name), l))
|
||||
if isinstance(l, bytes):
|
||||
l = l.decode(self.encoding)
|
||||
l = ast.literal_eval(l)
|
||||
if section == CTXT:
|
||||
msgctxt += l
|
||||
msgctxt += l.encode(encoding)
|
||||
elif section == ID:
|
||||
msgid += l
|
||||
msgid += l.encode(encoding)
|
||||
elif section == STR:
|
||||
msgstr += l
|
||||
msgstr += l.encode(encoding)
|
||||
else:
|
||||
raise PoSyntaxError(
|
||||
'error on line %d of po file %s' %
|
||||
(lno, repr(self.name)))
|
||||
|
||||
print('Syntax error on %s:%d' % (infile, lno), \
|
||||
'before:', file=sys.stderr)
|
||||
print(l, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
# Add last entry
|
||||
if section == STR:
|
||||
self.add(msgctxt, msgid, msgstr, fuzzy)
|
||||
add(msgctxt, msgid, msgstr, fuzzy)
|
||||
|
||||
if self.openfile:
|
||||
self.po.close()
|
||||
# Compute output
|
||||
output = generate()
|
||||
|
||||
try:
|
||||
with open(outfile,"wb") as f:
|
||||
f.write(output)
|
||||
except IOError as msg:
|
||||
print(msg, file=sys.stderr)
|
||||
|
||||
def getAsFile(self):
|
||||
return BytesIO(self.get())
|
||||
|
||||
def main():
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'o:')
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
|
||||
['help', 'version', 'output-file='])
|
||||
except getopt.error as msg:
|
||||
print(msg, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
usage(1, msg)
|
||||
|
||||
outfile = None
|
||||
# parse options
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
usage(0)
|
||||
elif opt in ('-V', '--version'):
|
||||
print("msgfmt.py", __version__)
|
||||
sys.exit(0)
|
||||
elif opt in ('-o', '--output-file'):
|
||||
outfile = arg
|
||||
# do it
|
||||
if not args:
|
||||
print('No input file given', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print("Try `msgfmt --help' for more information.", file=sys.stderr)
|
||||
return
|
||||
|
||||
if not opts:
|
||||
print('No output file given', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
for filename in args:
|
||||
make(filename, outfile)
|
||||
|
||||
with open(opts[0][1], "wb") as mo:
|
||||
mo.write(Msgfmt(args[0]).get())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user