Package translate :: Package convert :: Module csv2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.csv2po

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2003-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """convert Comma-Separated Value (.csv) files to Gettext PO localization files 
 23   
 24  See: http://translate.sourceforge.net/wiki/toolkit/csv2po for examples and  
 25  usage instructions 
 26  """ 
 27   
 28  import sys 
 29  from translate.misc import sparse 
 30  from translate.storage import po 
 31  from translate.storage import csvl10n 
 32   
33 -def replacestrings(source, *pairs):
34 for orig, new in pairs: 35 source = source.replace(orig, new) 36 return source
37
38 -def quotecsvstr(source):
39 return '"' + replacestrings(source, ('\\"','"'), ('"','\\"'), ("\\\\'", "\\'"), ('\\\\n', '\\n')) + '"'
40
41 -def simplify(string):
42 return filter(type(string).isalnum, string) 43 tokens = sparse.SimpleParser().tokenize(string) 44 return " ".join(tokens)
45
46 -class csv2po:
47 """a class that takes translations from a .csv file and puts them in a .po file"""
48 - def __init__(self, templatepo=None, charset=None, duplicatestyle="keep"):
49 """construct the converter...""" 50 self.pofile = templatepo 51 self.charset = charset 52 self.duplicatestyle = duplicatestyle 53 if self.pofile is not None: 54 self.unmatched = 0 55 self.makeindex()
56
57 - def makeindex(self):
58 """makes indexes required for searching...""" 59 self.commentindex = {} 60 self.sourceindex = {} 61 self.simpleindex = {} 62 self.duplicatecomments = [] 63 for pounit in self.pofile.units: 64 joinedcomment = " ".join(pounit.getlocations()) 65 source = pounit.source 66 # the definitive way to match is by source comment (joinedcomment) 67 if joinedcomment in self.commentindex: 68 # unless more than one thing matches... 69 self.duplicatecomments.append(joinedcomment) 70 else: 71 self.commentindex[joinedcomment] = pounit 72 # do simpler matching in case things have been mangled... 73 simpleid = simplify(source) 74 # but check for duplicates 75 if simpleid in self.simpleindex and not (source in self.sourceindex): 76 # keep a list of them... 77 self.simpleindex[simpleid].append(pounit) 78 else: 79 self.simpleindex[simpleid] = [pounit] 80 # also match by standard msgid 81 self.sourceindex[source] = pounit 82 for comment in self.duplicatecomments: 83 if comment in self.commentindex: 84 del self.commentindex[comment]
85
86 - def convertunit(self, csvunit):
87 """converts csv unit to po unit""" 88 pounit = po.pounit(encoding="UTF-8") 89 if csvunit.comment: 90 pounit.addlocation(csvunit.comment) 91 pounit.source = csvunit.source 92 pounit.target = csvunit.target 93 return pounit
94
95 - def handlecsvunit(self, csvunit):
96 """handles reintegrating a csv unit into the .po file""" 97 if len(csvunit.comment.strip()) > 0 and csvunit.comment in self.commentindex: 98 pounit = self.commentindex[csvunit.comment] 99 elif csvunit.source in self.sourceindex: 100 pounit = self.sourceindex[csvunit.source] 101 elif simplify(csvunit.source) in self.simpleindex: 102 thepolist = self.simpleindex[simplify(csvunit.source)] 103 if len(thepolist) > 1: 104 csvfilename = getattr(self.csvfile, "filename", "(unknown)") 105 matches = "\n ".join(["possible match: " + pounit.source for pounit in thepolist]) 106 print >> sys.stderr, "%s - csv entry not found in pofile, multiple matches found:\n location\t%s\n original\t%s\n translation\t%s\n %s" % (csvfilename, csvunit.comment, csvunit.source, csvunit.target, matches) 107 self.unmatched += 1 108 return 109 pounit = thepolist[0] 110 else: 111 csvfilename = getattr(self.csvfile, "filename", "(unknown)") 112 print >> sys.stderr, "%s - csv entry not found in pofile:\n location\t%s\n original\t%s\n translation\t%s" % (csvfilename, csvunit.comment, csvunit.source, csvunit.target) 113 self.unmatched += 1 114 return 115 if pounit.hasplural(): 116 # we need to work out whether we matched the singular or the plural 117 singularid = pounit.source.strings[0] 118 pluralid = pounit.source.strings[1] 119 if csvunit.source == singularid: 120 pounit.msgstr[0] = csvunit.target 121 elif csvunit.source == pluralid: 122 pounit.msgstr[1] = csvunit.target 123 elif simplify(csvunit.source) == simplify(singularid): 124 pounit.msgstr[0] = csvunit.target 125 elif simplify(csvunit.source) == simplify(pluralid): 126 pounit.msgstr[1] = csvunit.target 127 else: 128 print >> sys.stderr, "couldn't work out singular or plural: %r, %r, %r" % \ 129 (csvunit.source, singularid, pluralid) 130 self.unmatched += 1 131 return 132 else: 133 pounit.target = csvunit.target
134
135 - def convertstore(self, thecsvfile):
136 """converts a csvfile to a pofile, and returns it. uses templatepo if given at construction""" 137 self.csvfile = thecsvfile 138 if self.pofile is None: 139 self.pofile = po.pofile() 140 mergemode = False 141 else: 142 mergemode = True 143 if self.pofile.units and self.pofile.units[0].isheader(): 144 targetheader = self.pofile.units[0] 145 targetheader.msgstr = [line.replace("CHARSET", "UTF-8").replace("ENCODING", "8bit") for line in targetheader.msgstr] 146 else: 147 targetheader = self.pofile.makeheader(charset="UTF-8", encoding="8bit") 148 targetheader.addnote("extracted from %s" % self.csvfile.filename, "developer") 149 mightbeheader = True 150 for csvunit in self.csvfile.units: 151 if self.charset is not None: 152 csvunit.source = csvunit.source.decode(self.charset) 153 csvunit.target = csvunit.target.decode(self.charset) 154 if mightbeheader: 155 # ignore typical header strings... 156 mightbeheader = False 157 if [item.strip().lower() for item in csvunit.comment, csvunit.source, csvunit.target] == \ 158 ["comment", "original", "translation"]: 159 continue 160 if len(csvunit.comment.strip()) == 0 and csvunit.source.find("Content-Type:") != -1: 161 continue 162 if mergemode: 163 self.handlecsvunit(csvunit) 164 else: 165 pounit = self.convertunit(csvunit) 166 self.pofile.addunit(pounit) 167 self.pofile.removeduplicates(self.duplicatestyle) 168 return self.pofile
169
170 -def convertcsv(inputfile, outputfile, templatefile, charset=None, columnorder=None, duplicatestyle="msgctxt"):
171 """reads in inputfile using csvl10n, converts using csv2po, writes to outputfile""" 172 inputstore = csvl10n.csvfile(inputfile, fieldnames=columnorder) 173 if templatefile is None: 174 convertor = csv2po(charset=charset, duplicatestyle=duplicatestyle) 175 else: 176 templatestore = po.pofile(templatefile) 177 convertor = csv2po(templatestore, charset=charset, duplicatestyle=duplicatestyle) 178 outputstore = convertor.convertstore(inputstore) 179 if outputstore.isempty(): 180 return 0 181 outputfile.write(str(outputstore)) 182 return 1
183
184 -def main(argv=None):
185 from translate.convert import convert 186 formats = {("csv", "po"): ("po", convertcsv), ("csv", "pot"): ("po", convertcsv), 187 ("csv", None): ("po", convertcsv)} 188 parser = convert.ConvertOptionParser(formats, usetemplates=True, description=__doc__) 189 parser.add_option("", "--charset", dest="charset", default=None, 190 help="set charset to decode from csv files", metavar="CHARSET") 191 parser.add_option("", "--columnorder", dest="columnorder", default=None, 192 help="specify the order and position of columns (source,source,target)") 193 parser.add_duplicates_option() 194 parser.passthrough.append("charset") 195 parser.passthrough.append("columnorder") 196 parser.run(argv)
197 198 199 if __name__ == '__main__': 200 main() 201