Package translate :: Package convert :: Module dtd2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.dtd2po

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """script to convert a mozilla .dtd UTF-8 localization format to a 
 23  gettext .po localization file using the po and dtd modules, and the  
 24  dtd2po convertor class which is in this module 
 25  You can convert back to .dtd using po2dtd.py""" 
 26   
 27  from translate.storage import po 
 28  from translate.storage import dtd 
 29  from translate.misc import quote 
 30   
31 -class dtd2po:
32 - def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
33 self.currentgroup = None 34 self.blankmsgstr = blankmsgstr 35 self.duplicatestyle = duplicatestyle
36
37 - def convertcomments(self, thedtd, thepo):
38 entity = quote.rstripeol(thedtd.entity) 39 if len(entity) > 0: 40 thepo.addlocation(thedtd.entity) 41 for commenttype, comment in thedtd.comments: 42 # handle groups 43 if (commenttype == "locgroupstart"): 44 groupcomment = comment.replace('BEGIN','GROUP') 45 self.currentgroup = groupcomment 46 elif (commenttype == "locgroupend"): 47 groupcomment = comment.replace('END','GROUP') 48 self.currentgroup = None 49 # handle automatic comment 50 if commenttype == "automaticcomment": 51 thepo.addnote(comment, origin="developer") 52 # handle normal comments 53 else: 54 thepo.addnote(quote.stripcomment(comment), origin="developer") 55 # handle group stuff 56 if self.currentgroup is not None: 57 thepo.addnote(quote.stripcomment(self.currentgroup), origin="translator") 58 if entity.endswith(".height") or entity.endswith(".width") or entity.endswith(".size"): 59 thepo.addnote("Do not translate this. Only change the numeric values if you need this dialogue box to appear bigger", origin="developer")
60
61 - def convertstrings(self, thedtd, thepo):
62 # extract the string, get rid of quoting 63 unquoted = dtd.unquotefromdtd(thedtd.definition).replace("\r", "") 64 # escape backslashes... but not if they're for a newline 65 # unquoted = unquoted.replace("\\", "\\\\").replace("\\\\n", "\\n") 66 # now split the string into lines and quote them 67 lines = unquoted.split('\n') 68 while lines and not lines[0].strip(): 69 del lines[0] 70 while lines and not lines[-1].strip(): 71 del lines[-1] 72 # quotes have been escaped already by escapeforpo, so just add the start and end quotes 73 if len(lines) > 1: 74 thepo.source = "\n".join([lines[0].rstrip() + ' '] + \ 75 [line.strip() + ' ' for line in lines[1:-1]] + \ 76 [lines[-1].lstrip()]) 77 elif lines: 78 thepo.source = lines[0] 79 else: 80 thepo.source = "" 81 thepo.target = ""
82
83 - def convertunit(self, thedtd):
84 """converts a dtd unit to a po unit, returns None if empty or not for translation""" 85 if thedtd is None: 86 return None 87 if getattr(thedtd, "entityparameter", None) == "SYSTEM": 88 return None 89 thepo = po.pounit(encoding="UTF-8") 90 # remove unwanted stuff 91 for commentnum in range(len(thedtd.comments)): 92 commenttype, locnote = thedtd.comments[commentnum] 93 # if this is a localization note 94 if commenttype == 'locnote': 95 # parse the locnote into the entity and the actual note 96 typeend = quote.findend(locnote,'LOCALIZATION NOTE') 97 # parse the id 98 idstart = locnote.find('(', typeend) 99 if idstart == -1: continue 100 idend = locnote.find(')', idstart+1) 101 entity = locnote[idstart+1:idend].strip() 102 # parse the actual note 103 actualnotestart = locnote.find(':', idend+1) 104 actualnoteend = locnote.find('-->', idend) 105 actualnote = locnote[actualnotestart+1:actualnoteend].strip() 106 # if it's for this entity, process it 107 if thedtd.entity == entity: 108 # if it says don't translate (and nothing more), 109 if actualnote.startswith("DONT_TRANSLATE"): 110 # take out the entity,definition and the DONT_TRANSLATE comment 111 thedtd.entity = "" 112 thedtd.definition = "" 113 del thedtd.comments[commentnum] 114 # finished this for loop 115 break 116 else: 117 # convert it into an automatic comment, to be processed by convertcomments 118 thedtd.comments[commentnum] = ("automaticcomment", actualnote) 119 # do a standard translation 120 self.convertcomments(thedtd, thepo) 121 self.convertstrings(thedtd, thepo) 122 if thepo.isblank() and not thepo.getlocations(): 123 return None 124 else: 125 return thepo
126 127 # labelsuffixes and accesskeysuffixes are combined to accelerator notation 128 labelsuffixes = (".label", ".title") 129 accesskeysuffixes = (".accesskey", ".accessKey", ".akey") 130
131 - def convertmixedunit(self, labeldtd, accesskeydtd):
132 labelpo = self.convertunit(labeldtd) 133 accesskeypo = self.convertunit(accesskeydtd) 134 if labelpo is None: 135 return accesskeypo 136 if accesskeypo is None: 137 return labelpo 138 thepo = po.pounit(encoding="UTF-8") 139 thepo.addlocations(labelpo.getlocations()) 140 thepo.addlocations(accesskeypo.getlocations()) 141 thepo.msgidcomment = thepo._extract_msgidcomments() + labelpo._extract_msgidcomments() 142 thepo.msgidcomment = thepo._extract_msgidcomments() + accesskeypo._extract_msgidcomments() 143 thepo.addnote(labelpo.getnotes("developer"), "developer") 144 thepo.addnote(accesskeypo.getnotes("developer"), "developer") 145 thepo.addnote(labelpo.getnotes("translator"), "translator") 146 thepo.addnote(accesskeypo.getnotes("translator"), "translator") 147 # redo the strings from original dtd... 148 label = dtd.unquotefromdtd(labeldtd.definition).decode('UTF-8') 149 accesskey = dtd.unquotefromdtd(accesskeydtd.definition).decode('UTF-8') 150 if len(accesskey) == 0: 151 return None 152 # try and put the & in front of the accesskey in the label... 153 # make sure to avoid muddling up &amp;-type strings 154 searchpos = 0 155 accesskeypos = -1 156 inentity = 0 157 accesskeyaltcasepos = -1 158 while (accesskeypos < 0) and searchpos < len(label): 159 searchchar = label[searchpos] 160 if searchchar == '&': 161 inentity = 1 162 elif searchchar == ';': 163 inentity = 0 164 else: 165 if not inentity: 166 if searchchar == accesskey.upper(): 167 # always prefer uppercase 168 accesskeypos = searchpos 169 if searchchar == accesskey.lower(): 170 # take lower case otherwise... 171 if accesskeyaltcasepos == -1: 172 # only want to remember first altcasepos 173 accesskeyaltcasepos = searchpos 174 # note: we keep on looping through in hope of exact match 175 searchpos += 1 176 # if we didn't find an exact case match, use an alternate one if available 177 if accesskeypos == -1: 178 accesskeypos = accesskeyaltcasepos 179 # now we want to handle whatever we found... 180 if accesskeypos >= 0: 181 label = label[:accesskeypos] + '&' + label[accesskeypos:] 182 label = label.encode("UTF-8", "replace") 183 else: 184 # can't currently mix accesskey if it's not in label 185 return None 186 thepo.source = label 187 thepo.target = "" 188 return thepo
189
190 - def findmixedentities(self, thedtdfile):
191 """creates self.mixedentities from the dtd file...""" 192 self.mixedentities = {} # those entities which have a .label/.title and .accesskey combined 193 for entity in thedtdfile.index.keys(): 194 for labelsuffix in self.labelsuffixes: 195 if entity.endswith(labelsuffix): 196 entitybase = entity[:entity.rfind(labelsuffix)] 197 # see if there is a matching accesskey in this line, making this a 198 # mixed entity 199 for akeytype in self.accesskeysuffixes: 200 if thedtdfile.index.has_key(entitybase + akeytype): 201 # add both versions to the list of mixed entities 202 self.mixedentities[entity] = {} 203 self.mixedentities[entitybase+akeytype] = {}
204 # check if this could be a mixed entity (labelsuffix and ".accesskey") 205
206 - def convertdtdunit(self, thedtdfile, thedtd, mixbucket="dtd"):
207 """converts a dtd unit from thedtdfile to a po unit, handling mixed entities along the way...""" 208 # keep track of whether accesskey and label were combined 209 if thedtd.entity in self.mixedentities: 210 # use special convertmixed unit which produces one pounit with 211 # both combined for the label and None for the accesskey 212 alreadymixed = self.mixedentities[thedtd.entity].get(mixbucket, None) 213 if alreadymixed: 214 # we are successfully throwing this away... 215 return None 216 elif alreadymixed is None: 217 # depending on what we come across first, work out the label and the accesskey 218 labeldtd, accesskeydtd = None, None 219 labelentity, accesskeyentity = None, None 220 for labelsuffix in self.labelsuffixes: 221 if thedtd.entity.endswith(labelsuffix): 222 entitybase = thedtd.entity[:thedtd.entity.rfind(labelsuffix)] 223 for akeytype in self.accesskeysuffixes: 224 if thedtdfile.index.has_key(entitybase + akeytype): 225 labelentity, labeldtd = thedtd.entity, thedtd 226 accesskeyentity = labelentity[:labelentity.rfind(labelsuffix)]+akeytype 227 accesskeydtd = thedtdfile.index[accesskeyentity] 228 break 229 else: 230 for akeytype in self.accesskeysuffixes: 231 if thedtd.entity.endswith(akeytype): 232 accesskeyentity, accesskeydtd = thedtd.entity, thedtd 233 for labelsuffix in self.labelsuffixes: 234 labelentity = accesskeyentity[:accesskeyentity.rfind(akeytype)]+labelsuffix 235 if thedtdfile.index.has_key(labelentity): 236 labeldtd = thedtdfile.index[labelentity] 237 break 238 else: 239 labelentity = None 240 accesskeyentity = None 241 thepo = self.convertmixedunit(labeldtd, accesskeydtd) 242 if thepo is not None: 243 if accesskeyentity is not None: 244 self.mixedentities[accesskeyentity][mixbucket] = True 245 if labelentity is not None: 246 self.mixedentities[labelentity][mixbucket] = True 247 return thepo 248 else: 249 # otherwise the mix failed. add each one separately and remember they weren't mixed 250 if accesskeyentity is not None: 251 self.mixedentities[accesskeyentity][mixbucket] = False 252 if labelentity is not None: 253 self.mixedentities[labelentity][mixbucket] = False 254 return self.convertunit(thedtd)
255
256 - def convertstore(self, thedtdfile):
257 thetargetfile = po.pofile() 258 targetheader = thetargetfile.makeheader(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") 259 targetheader.addnote("extracted from %s" % thedtdfile.filename, "developer") 260 thetargetfile.addunit(targetheader) 261 thedtdfile.makeindex() 262 self.findmixedentities(thedtdfile) 263 # go through the dtd and convert each unit 264 for thedtd in thedtdfile.units: 265 if thedtd.isnull(): 266 continue 267 thepo = self.convertdtdunit(thedtdfile, thedtd) 268 if thepo is not None: 269 thetargetfile.addunit(thepo) 270 thetargetfile.removeduplicates(self.duplicatestyle) 271 return thetargetfile
272
273 - def mergestore(self, origdtdfile, translateddtdfile):
274 thetargetfile = po.pofile() 275 targetheader = thetargetfile.makeheader(charset="UTF-8", encoding="8bit") 276 targetheader.addnote("extracted from %s, %s" % (origdtdfile.filename, translateddtdfile.filename), "developer") 277 thetargetfile.addunit(targetheader) 278 origdtdfile.makeindex() 279 self.findmixedentities(origdtdfile) 280 translateddtdfile.makeindex() 281 self.findmixedentities(translateddtdfile) 282 # go through the dtd files and convert each unit 283 for origdtd in origdtdfile.units: 284 if origdtd.isnull(): 285 continue 286 origpo = self.convertdtdunit(origdtdfile, origdtd, mixbucket="orig") 287 if origdtd.entity in self.mixedentities: 288 mixedentitydict = self.mixedentities[origdtd.entity] 289 if "orig" not in mixedentitydict: 290 # this means that the entity is mixed in the translation, but not the original - treat as unmixed 291 mixbucket = "orig" 292 del self.mixedentities[origdtd.entity] 293 elif mixedentitydict["orig"]: 294 # the original entity is already mixed successfully 295 mixbucket = "translate" 296 else: 297 # ?? 298 mixbucket = "orig" 299 else: 300 mixbucket = "translate" 301 if origpo is None: 302 # this means its a mixed entity (with accesskey) that's already been dealt with) 303 continue 304 if origdtd.entity in translateddtdfile.index: 305 translateddtd = translateddtdfile.index[origdtd.entity] 306 translatedpo = self.convertdtdunit(translateddtdfile, translateddtd, mixbucket=mixbucket) 307 else: 308 translatedpo = None 309 if origpo is not None: 310 if translatedpo is not None and not self.blankmsgstr: 311 origpo.target = translatedpo.source 312 thetargetfile.addunit(origpo) 313 thetargetfile.removeduplicates(self.duplicatestyle) 314 return thetargetfile
315
316 -def convertdtd(inputfile, outputfile, templatefile, pot=False, duplicatestyle="msgctxt"):
317 """reads in inputfile and templatefile using dtd, converts using dtd2po, writes to outputfile""" 318 inputstore = dtd.dtdfile(inputfile) 319 convertor = dtd2po(blankmsgstr=pot, duplicatestyle=duplicatestyle) 320 if templatefile is None: 321 outputstore = convertor.convertstore(inputstore) 322 else: 323 templatestore = dtd.dtdfile(templatefile) 324 outputstore = convertor.mergestore(templatestore, inputstore) 325 if outputstore.isempty(): 326 return 0 327 outputfile.write(str(outputstore)) 328 return 1
329
330 -def main(argv=None):
331 from translate.convert import convert 332 formats = {"dtd": ("po", convertdtd), ("dtd", "dtd"): ("po", convertdtd)} 333 parser = convert.ConvertOptionParser(formats, usetemplates=True, usepots=True, description=__doc__) 334 parser.add_duplicates_option() 335 parser.passthrough.append("pot") 336 parser.run(argv)
337 338 if __name__ == '__main__': 339 main() 340