mirror of
				https://github.com/cookiengineer/audacity
				synced 2025-10-26 07:13:49 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			266 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			266 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/python
 | |
| #
 | |
| # smartmsgmerge.py
 | |
| #
 | |
| # Written by Dominic Mazzoni, 2006
 | |
| # GNU General Public License 2.0
 | |
| #
 | |
| # This is a replacement for the GNU gettext "msgmerge" program, which
 | |
| # is typically used to update a .po file (def) to the latest .pot file
 | |
| # (ref).  This program is not command-line compatible; it takes no
 | |
| # flags but simply the def, ref, and output file names.
 | |
| #
 | |
| # It uses a much faster and also much stricter policy for finding new
 | |
| # fuzzy matches: the edit-distance must be no more than 4%, or for very
 | |
| # short strings, no more than 1 character.
 | |
| #
 | |
| # This makes it safe for you to enable fuzzy strings in your .mo file
 | |
| # without worrying that they'll be too terrible.
 | |
| #
 | |
| # It also fixes translations where the beginning and ending newlines
 | |
| # do not match the original string.
 | |
| #
 | |
| 
 | |
| import sys, os
 | |
| 
 | |
| if len(sys.argv) != 4:
 | |
|   print "Usage: %s def.po ref.pot out.po" % sys.argv[0]
 | |
|   sys.exit()
 | |
| 
 | |
| def_filename = sys.argv[1]
 | |
| ref_filename = sys.argv[2]
 | |
| out_filename = sys.argv[3]
 | |
| 
 | |
| # Each object will contain the comments, msgid (untranslated),
 | |
| # msgstr (translated), and a fuzzy flag.  For simplicity this is
 | |
| # not a typechecked class, just a dummy dynamic container class.
 | |
| class obj:
 | |
|   pass
 | |
| 
 | |
| # Compute the edit-distance between str1 and str2, taking a couple
 | |
| # of shortcuts such that it returns 999 quickly if the edit-distance
 | |
| # is clearly not going to be less than 10 percent.
 | |
| def edit_distance(str1, str2):
 | |
|   l1 = len(str1)
 | |
|   l2 = len(str2)
 | |
| 
 | |
|   # Exit if the difference in the string lenghts is 10% or more
 | |
|   if l1*1.0/l2 < 0.9 or l1*1.0/l2 > 1.1:
 | |
|     return 999
 | |
| 
 | |
|   # Compute a beam width of +/- 5% - the path through the matrix cannot
 | |
|   # go outside the main diagonal +/- the beam.
 | |
|   beam = int(0.5 + 0.1 * ((l1 + l2) / 2))
 | |
| 
 | |
|   # Create a 2D array
 | |
|   d = [None]*(l1+1)
 | |
|   for i in range(l1+1):
 | |
|     d[i] = [999]*(l2+1)
 | |
| 
 | |
|   # Initialize the first row and column
 | |
|   for i in range(l1+1):
 | |
|     d[i][0] = i
 | |
|   for j in range(l2+1):
 | |
|     d[0][j] = j
 | |
| 
 | |
|   # Dynamic programming
 | |
|   for i in range(1, l1+1):
 | |
|     # Quick short-circuit after 30 rows; stop if things are
 | |
|     # looking really bad
 | |
|     if i==30 and l2>=30 and d[29][29] > 20:
 | |
|       return 999
 | |
|     for j in range(max(1, i-beam), min(l2+1, i+beam+1)):
 | |
|       if str1[i-1] == str2[j-1]:
 | |
|         cost = 0
 | |
|       else:
 | |
|         cost = 1
 | |
|       d[i][j] = min(
 | |
|                     d[i-1][j] + 1,       # deletion
 | |
|                     d[i][j-1] + 1,       # insertion
 | |
|                     d[i-1][j-1] + cost   # substitution
 | |
|                    )
 | |
|   return d[l1][l2]
 | |
| 
 | |
| # Take a string and format it on a bunch of separate lines in quotes
 | |
| def quote(str):
 | |
|   if str=="":
 | |
|     return "\"\"\n"
 | |
|   q = ""
 | |
|   p = str.find("\\n")
 | |
|   while p >= 0:
 | |
|     line = str[:p]
 | |
|     str = str[p+2:]
 | |
|     q += "\"%s\\n\"\n" % line
 | |
|     p = str.find("\\n")
 | |
|   if len(str) > 0:
 | |
|     q += "\"%s\"\n" % str
 | |
|   return q
 | |
| 
 | |
| # Take a bunch of separate lines in quotes and turn them into a single string
 | |
| def unquote(str):
 | |
|   u = ""
 | |
|   for line in str.split("\n"):
 | |
|     line = line.strip()
 | |
|     if len(line)>=2:
 | |
|       if line[0]=='"' and line[-1]=='"':
 | |
|         u += line[1:-1]
 | |
|       else:
 | |
|         print "Error with:"
 | |
|         print '**%s**' % line
 | |
|         sys.exit()
 | |
|   return u
 | |
| 
 | |
| # Parse one file in the .po / .pot format, returning a hash of all
 | |
| # msgids and a list of all msgids in order.
 | |
| def parse(fname):
 | |
|   h = {}
 | |
|   l = []
 | |
|   msgid = ""
 | |
|   msgstr = ""
 | |
|   comments = ""
 | |
|   fuzzy = False
 | |
|   first = True
 | |
|   line_no = 0
 | |
| 
 | |
|   # Read the lines of the file and make sure it always ends in a
 | |
|   # blank line
 | |
|   lines = open(fname).readlines()
 | |
|   lines.append("\n")
 | |
| 
 | |
|   for line in (lines + ["\n"]):
 | |
|     # Handle DOS line endings
 | |
|     if len(line)>=2 and line[-2]=='\r' and line[-1]=='\n':
 | |
|       line = line[:-2]+'\n'
 | |
| 
 | |
|     line_no += 1
 | |
|     if line=="\n":
 | |
|       if len(msgid)==0 and not first:
 | |
|         # We found a blank line or comments in the middle of nowhere
 | |
|         comments = ""
 | |
|         fuzzy = False
 | |
|         msgstr = ""
 | |
|         continue
 | |
|       # Otherwise, a blank line in the middle of the file
 | |
|       # signifies the end of a translation
 | |
|       msgid = unquote(msgid)
 | |
|       msgstr = unquote(msgstr)
 | |
|       if msgid in h:
 | |
|         print "Duplicate msgid in %s:" % (fname)
 | |
|         print quote(msgid)
 | |
|         print "Found on line %d, previously defined on line %d" % \
 | |
|           (line_no, h[msgid].line_no)
 | |
|         sys.exit()
 | |
|       o = obj()
 | |
|       o.comments = comments
 | |
|       o.msgid = msgid
 | |
|       o.msgstr = msgstr
 | |
|       o.fuzzy = fuzzy
 | |
|       o.line_no = line_no
 | |
|       h[msgid] = o
 | |
|       l.append(msgid)
 | |
|       comments = ""
 | |
|       msgstr = ""
 | |
|       msgid = ""
 | |
|       fuzzy = False
 | |
|       first = False
 | |
|     elif len(line)>=8 and line[:8] == "#, fuzzy":
 | |
|       fuzzy = True
 | |
|       comments += line
 | |
|     elif line[0] == '#':
 | |
|       comments += line
 | |
|     elif len(line)>6 and line[:6]=="msgid ":
 | |
|       msgid += line[6:]
 | |
|     elif len(line)>7 and line[:7]=="msgstr ":
 | |
|       msgstr += line[7:]
 | |
|     else:
 | |
|       if len(msgstr):
 | |
|         msgstr += line
 | |
|       else:
 | |
|         msgid += line
 | |
|   return (h, l)
 | |
| 
 | |
| (def_h, def_l) = parse(def_filename)
 | |
| (ref_h, ref_l) = parse(ref_filename)
 | |
| 
 | |
| # Handle the exact matches
 | |
| final_h = {}
 | |
| for msgid in ref_l:
 | |
|   if msgid in def_h:
 | |
|     final_h[msgid] = def_h[msgid]
 | |
| 
 | |
| # Try for fuzzy matches
 | |
| for ref_msgid in [x for x in ref_l if x not in final_h]:
 | |
|   min_ed = 999
 | |
|   min_msgid = None
 | |
|   for def_msgid in [x for x in def_l if x not in final_h]:
 | |
|     if len(def_h[def_msgid].msgstr) < 3:
 | |
|       continue
 | |
|     ed = edit_distance(ref_msgid, def_msgid)
 | |
|     if ed < min_ed:
 | |
|       min_ed = ed
 | |
|       min_msgid = def_msgid
 | |
|   if min_msgid != None:
 | |
|     pct = min_ed * 100.0 / min(len(ref_msgid), len(min_msgid))
 | |
|     if min_ed == 1 or pct <= 4.0:
 | |
|       refstr = ref_msgid
 | |
|       if len(refstr)>40:
 | |
|         refstr = refstr[:37]+"..."
 | |
|       minstr = min_msgid
 | |
|       if len(minstr)>40:
 | |
|         minstr = minstr[:37]+"..."
 | |
|       print "Found fuzzy match:"
 | |
|       print "   %s" % refstr
 | |
|       print "   %s" % minstr
 | |
|       print "     def_len=%d, ref_len=%d, edit_distance=%d" % \
 | |
|           (len(min_msgid), len(ref_msgid), min_ed)
 | |
|       o = obj()
 | |
|       def_h[min_msgid]
 | |
|       o.msgid = ref_msgid
 | |
|       o.comments = ref_h[ref_msgid].comments
 | |
|       o.msgstr = def_h[min_msgid].msgstr
 | |
|       o.fuzzy = True
 | |
|       final_h[ref_msgid] = o
 | |
| 
 | |
| # Generate output file
 | |
| translated = 0
 | |
| fuzzy = 0
 | |
| empty = 0
 | |
| out_fp = open(out_filename, "w")
 | |
| for msgid in ref_l:
 | |
|   if msgid in final_h:
 | |
|     o = final_h[msgid]
 | |
|     if o.fuzzy:
 | |
|       fuzzy += 1
 | |
|     elif msgid != "":
 | |
|       translated += 1
 | |
|   else:
 | |
|     o = ref_h[msgid]
 | |
|     empty += 1
 | |
| 
 | |
|   msgstr = o.msgstr
 | |
|   # Fix leading and trailing newlines
 | |
|   if len(msgid)>4 and len(msgstr)>4:
 | |
|     # Add newline if missing
 | |
|     if msgid[:2]=="\\n" and msgstr[:2]!="\\n":
 | |
|       msgstr = "\\n" + msgstr
 | |
|     if msgid[-2:]=="\\n" and msgstr[-2:]!="\\n":
 | |
|       msgstr = msgstr + "\\n"
 | |
| 
 | |
|     # Remove newline if extraneous
 | |
|     if msgid[:2]!="\\n" and msgstr[:2]=="\\n":
 | |
|       msgstr = msgstr[2:]
 | |
|     if msgid[-2:]!="\\n" and msgstr[-2:]=="\\n":
 | |
|       msgstr = msgstr[:-2]
 | |
| 
 | |
|   # Write the entry
 | |
|   out_fp.write(o.comments)
 | |
|   if o.fuzzy and o.comments.find("fuzzy")==-1:
 | |
|     out_fp.write("#, fuzzy\n")
 | |
|   out_fp.write("msgid " + quote(msgid))
 | |
|   out_fp.write("msgstr " + quote(msgstr))
 | |
|   out_fp.write("\n")
 | |
| 
 | |
| # Print stats
 | |
| print "Translated: %d Fuzzy: %d Empty: %d" % (translated, fuzzy, empty)
 | |
| print "Wrote output to %s" % out_filename
 |