mirror of
https://github.com/cookiengineer/audacity
synced 2025-08-03 17:39:25 +02:00
Don't need smartmsgmerge.py either
Since it doesn't work anymore.
This commit is contained in:
parent
50c7627f74
commit
84f5c55d7c
@ -1,265 +0,0 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
#
|
|
||||||
# smartmsgmerge.py
|
|
||||||
#
|
|
||||||
# Written by Dominic Mazzoni, 2006
|
|
||||||
# GNU General Public License 2.0
|
|
||||||
#
|
|
||||||
# This is a replacement for the GNU gettext "msgmerge" program, which
|
|
||||||
# is typically used to update a .po file (def) to the latest .pot file
|
|
||||||
# (ref). This program is not command-line compatible; it takes no
|
|
||||||
# flags but simply the def, ref, and output file names.
|
|
||||||
#
|
|
||||||
# It uses a much faster and also much stricter policy for finding new
|
|
||||||
# fuzzy matches: the edit-distance must be no more than 4%, or for very
|
|
||||||
# short strings, no more than 1 character.
|
|
||||||
#
|
|
||||||
# This makes it safe for you to enable fuzzy strings in your .mo file
|
|
||||||
# without worrying that they'll be too terrible.
|
|
||||||
#
|
|
||||||
# It also fixes translations where the beginning and ending newlines
|
|
||||||
# do not match the original string.
|
|
||||||
#
|
|
||||||
|
|
||||||
import sys, os
|
|
||||||
|
|
||||||
if len(sys.argv) != 4:
|
|
||||||
print "Usage: %s def.po ref.pot out.po" % sys.argv[0]
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
def_filename = sys.argv[1]
|
|
||||||
ref_filename = sys.argv[2]
|
|
||||||
out_filename = sys.argv[3]
|
|
||||||
|
|
||||||
# Each object will contain the comments, msgid (untranslated),
|
|
||||||
# msgstr (translated), and a fuzzy flag. For simplicity this is
|
|
||||||
# not a typechecked class, just a dummy dynamic container class.
|
|
||||||
class obj:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Compute the edit-distance between str1 and str2, taking a couple
|
|
||||||
# of shortcuts such that it returns 999 quickly if the edit-distance
|
|
||||||
# is clearly not going to be less than 10 percent.
|
|
||||||
def edit_distance(str1, str2):
|
|
||||||
l1 = len(str1)
|
|
||||||
l2 = len(str2)
|
|
||||||
|
|
||||||
# Exit if the difference in the string lenghts is 10% or more
|
|
||||||
if l1*1.0/l2 < 0.9 or l1*1.0/l2 > 1.1:
|
|
||||||
return 999
|
|
||||||
|
|
||||||
# Compute a beam width of +/- 5% - the path through the matrix cannot
|
|
||||||
# go outside the main diagonal +/- the beam.
|
|
||||||
beam = int(0.5 + 0.1 * ((l1 + l2) / 2))
|
|
||||||
|
|
||||||
# Create a 2D array
|
|
||||||
d = [None]*(l1+1)
|
|
||||||
for i in range(l1+1):
|
|
||||||
d[i] = [999]*(l2+1)
|
|
||||||
|
|
||||||
# Initialize the first row and column
|
|
||||||
for i in range(l1+1):
|
|
||||||
d[i][0] = i
|
|
||||||
for j in range(l2+1):
|
|
||||||
d[0][j] = j
|
|
||||||
|
|
||||||
# Dynamic programming
|
|
||||||
for i in range(1, l1+1):
|
|
||||||
# Quick short-circuit after 30 rows; stop if things are
|
|
||||||
# looking really bad
|
|
||||||
if i==30 and l2>=30 and d[29][29] > 20:
|
|
||||||
return 999
|
|
||||||
for j in range(max(1, i-beam), min(l2+1, i+beam+1)):
|
|
||||||
if str1[i-1] == str2[j-1]:
|
|
||||||
cost = 0
|
|
||||||
else:
|
|
||||||
cost = 1
|
|
||||||
d[i][j] = min(
|
|
||||||
d[i-1][j] + 1, # deletion
|
|
||||||
d[i][j-1] + 1, # insertion
|
|
||||||
d[i-1][j-1] + cost # substitution
|
|
||||||
)
|
|
||||||
return d[l1][l2]
|
|
||||||
|
|
||||||
# Take a string and format it on a bunch of separate lines in quotes
|
|
||||||
def quote(str):
|
|
||||||
if str=="":
|
|
||||||
return "\"\"\n"
|
|
||||||
q = ""
|
|
||||||
p = str.find("\\n")
|
|
||||||
while p >= 0:
|
|
||||||
line = str[:p]
|
|
||||||
str = str[p+2:]
|
|
||||||
q += "\"%s\\n\"\n" % line
|
|
||||||
p = str.find("\\n")
|
|
||||||
if len(str) > 0:
|
|
||||||
q += "\"%s\"\n" % str
|
|
||||||
return q
|
|
||||||
|
|
||||||
# Take a bunch of separate lines in quotes and turn them into a single string
|
|
||||||
def unquote(str):
|
|
||||||
u = ""
|
|
||||||
for line in str.split("\n"):
|
|
||||||
line = line.strip()
|
|
||||||
if len(line)>=2:
|
|
||||||
if line[0]=='"' and line[-1]=='"':
|
|
||||||
u += line[1:-1]
|
|
||||||
else:
|
|
||||||
print "Error with:"
|
|
||||||
print '**%s**' % line
|
|
||||||
sys.exit()
|
|
||||||
return u
|
|
||||||
|
|
||||||
# Parse one file in the .po / .pot format, returning a hash of all
|
|
||||||
# msgids and a list of all msgids in order.
|
|
||||||
def parse(fname):
|
|
||||||
h = {}
|
|
||||||
l = []
|
|
||||||
msgid = ""
|
|
||||||
msgstr = ""
|
|
||||||
comments = ""
|
|
||||||
fuzzy = False
|
|
||||||
first = True
|
|
||||||
line_no = 0
|
|
||||||
|
|
||||||
# Read the lines of the file and make sure it always ends in a
|
|
||||||
# blank line
|
|
||||||
lines = open(fname).readlines()
|
|
||||||
lines.append("\n")
|
|
||||||
|
|
||||||
for line in (lines + ["\n"]):
|
|
||||||
# Handle DOS line endings
|
|
||||||
if len(line)>=2 and line[-2]=='\r' and line[-1]=='\n':
|
|
||||||
line = line[:-2]+'\n'
|
|
||||||
|
|
||||||
line_no += 1
|
|
||||||
if line=="\n":
|
|
||||||
if len(msgid)==0 and not first:
|
|
||||||
# We found a blank line or comments in the middle of nowhere
|
|
||||||
comments = ""
|
|
||||||
fuzzy = False
|
|
||||||
msgstr = ""
|
|
||||||
continue
|
|
||||||
# Otherwise, a blank line in the middle of the file
|
|
||||||
# signifies the end of a translation
|
|
||||||
msgid = unquote(msgid)
|
|
||||||
msgstr = unquote(msgstr)
|
|
||||||
if msgid in h:
|
|
||||||
print "Duplicate msgid in %s:" % (fname)
|
|
||||||
print quote(msgid)
|
|
||||||
print "Found on line %d, previously defined on line %d" % \
|
|
||||||
(line_no, h[msgid].line_no)
|
|
||||||
sys.exit()
|
|
||||||
o = obj()
|
|
||||||
o.comments = comments
|
|
||||||
o.msgid = msgid
|
|
||||||
o.msgstr = msgstr
|
|
||||||
o.fuzzy = fuzzy
|
|
||||||
o.line_no = line_no
|
|
||||||
h[msgid] = o
|
|
||||||
l.append(msgid)
|
|
||||||
comments = ""
|
|
||||||
msgstr = ""
|
|
||||||
msgid = ""
|
|
||||||
fuzzy = False
|
|
||||||
first = False
|
|
||||||
elif len(line)>=8 and line[:8] == "#, fuzzy":
|
|
||||||
fuzzy = True
|
|
||||||
comments += line
|
|
||||||
elif line[0] == '#':
|
|
||||||
comments += line
|
|
||||||
elif len(line)>6 and line[:6]=="msgid ":
|
|
||||||
msgid += line[6:]
|
|
||||||
elif len(line)>7 and line[:7]=="msgstr ":
|
|
||||||
msgstr += line[7:]
|
|
||||||
else:
|
|
||||||
if len(msgstr):
|
|
||||||
msgstr += line
|
|
||||||
else:
|
|
||||||
msgid += line
|
|
||||||
return (h, l)
|
|
||||||
|
|
||||||
(def_h, def_l) = parse(def_filename)
|
|
||||||
(ref_h, ref_l) = parse(ref_filename)
|
|
||||||
|
|
||||||
# Handle the exact matches
|
|
||||||
final_h = {}
|
|
||||||
for msgid in ref_l:
|
|
||||||
if msgid in def_h:
|
|
||||||
final_h[msgid] = def_h[msgid]
|
|
||||||
|
|
||||||
# Try for fuzzy matches
|
|
||||||
for ref_msgid in [x for x in ref_l if x not in final_h]:
|
|
||||||
min_ed = 999
|
|
||||||
min_msgid = None
|
|
||||||
for def_msgid in [x for x in def_l if x not in final_h]:
|
|
||||||
if len(def_h[def_msgid].msgstr) < 3:
|
|
||||||
continue
|
|
||||||
ed = edit_distance(ref_msgid, def_msgid)
|
|
||||||
if ed < min_ed:
|
|
||||||
min_ed = ed
|
|
||||||
min_msgid = def_msgid
|
|
||||||
if min_msgid != None:
|
|
||||||
pct = min_ed * 100.0 / min(len(ref_msgid), len(min_msgid))
|
|
||||||
if min_ed == 1 or pct <= 4.0:
|
|
||||||
refstr = ref_msgid
|
|
||||||
if len(refstr)>40:
|
|
||||||
refstr = refstr[:37]+"..."
|
|
||||||
minstr = min_msgid
|
|
||||||
if len(minstr)>40:
|
|
||||||
minstr = minstr[:37]+"..."
|
|
||||||
print "Found fuzzy match:"
|
|
||||||
print " %s" % refstr
|
|
||||||
print " %s" % minstr
|
|
||||||
print " def_len=%d, ref_len=%d, edit_distance=%d" % \
|
|
||||||
(len(min_msgid), len(ref_msgid), min_ed)
|
|
||||||
o = obj()
|
|
||||||
def_h[min_msgid]
|
|
||||||
o.msgid = ref_msgid
|
|
||||||
o.comments = ref_h[ref_msgid].comments
|
|
||||||
o.msgstr = def_h[min_msgid].msgstr
|
|
||||||
o.fuzzy = True
|
|
||||||
final_h[ref_msgid] = o
|
|
||||||
|
|
||||||
# Generate output file
|
|
||||||
translated = 0
|
|
||||||
fuzzy = 0
|
|
||||||
empty = 0
|
|
||||||
out_fp = open(out_filename, "w")
|
|
||||||
for msgid in ref_l:
|
|
||||||
if msgid in final_h:
|
|
||||||
o = final_h[msgid]
|
|
||||||
if o.fuzzy:
|
|
||||||
fuzzy += 1
|
|
||||||
elif msgid != "":
|
|
||||||
translated += 1
|
|
||||||
else:
|
|
||||||
o = ref_h[msgid]
|
|
||||||
empty += 1
|
|
||||||
|
|
||||||
msgstr = o.msgstr
|
|
||||||
# Fix leading and trailing newlines
|
|
||||||
if len(msgid)>4 and len(msgstr)>4:
|
|
||||||
# Add newline if missing
|
|
||||||
if msgid[:2]=="\\n" and msgstr[:2]!="\\n":
|
|
||||||
msgstr = "\\n" + msgstr
|
|
||||||
if msgid[-2:]=="\\n" and msgstr[-2:]!="\\n":
|
|
||||||
msgstr = msgstr + "\\n"
|
|
||||||
|
|
||||||
# Remove newline if extraneous
|
|
||||||
if msgid[:2]!="\\n" and msgstr[:2]=="\\n":
|
|
||||||
msgstr = msgstr[2:]
|
|
||||||
if msgid[-2:]!="\\n" and msgstr[-2:]=="\\n":
|
|
||||||
msgstr = msgstr[:-2]
|
|
||||||
|
|
||||||
# Write the entry
|
|
||||||
out_fp.write(o.comments)
|
|
||||||
if o.fuzzy and o.comments.find("fuzzy")==-1:
|
|
||||||
out_fp.write("#, fuzzy\n")
|
|
||||||
out_fp.write("msgid " + quote(msgid))
|
|
||||||
out_fp.write("msgstr " + quote(msgstr))
|
|
||||||
out_fp.write("\n")
|
|
||||||
|
|
||||||
# Print stats
|
|
||||||
print "Translated: %d Fuzzy: %d Empty: %d" % (translated, fuzzy, empty)
|
|
||||||
print "Wrote output to %s" % out_filename
|
|
Loading…
x
Reference in New Issue
Block a user