Package translate :: Package convert :: Module po2dtd
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.po2dtd

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2008 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """script that converts a .po file to a UTF-8 encoded .dtd file as used by mozilla 
 23  either done using a template or just using the .po file""" 
 24   
 25  from translate.storage import dtd 
 26  from translate.storage import po 
 27  from translate.misc import quote 
 28  import warnings 
 29   
 30  # labelsuffixes and accesskeysuffixes are combined to accelerator notation 
 31  labelsuffixes = (".label", ".title") 
 32  accesskeysuffixes = (".accesskey", ".accessKey", ".akey") 
 33   
34 -def getlabel(unquotedstr):
35 """retrieve the label from a mixed label+accesskey entity""" 36 if isinstance(unquotedstr, str): 37 unquotedstr = unquotedstr.decode("UTF-8") 38 # mixed labels just need the & taken out 39 # except that &entity; needs to be avoided... 40 amppos = 0 41 while amppos >= 0: 42 amppos = unquotedstr.find("&", amppos) 43 if amppos != -1: 44 amppos += 1 45 semipos = unquotedstr.find(";", amppos) 46 if semipos != -1: 47 if unquotedstr[amppos:semipos].isalnum(): 48 continue 49 # otherwise, cut it out... only the first one need be changed 50 # (see below to see how the accesskey is done) 51 unquotedstr = unquotedstr[:amppos-1] + unquotedstr[amppos:] 52 break 53 return unquotedstr.encode("UTF-8")
54
55 -def getaccesskey(unquotedstr):
56 """retrieve the access key from a mixed label+accesskey entity""" 57 if isinstance(unquotedstr, str): 58 unquotedstr = unquotedstr.decode("UTF-8") 59 # mixed access keys need the key extracted from after the & 60 # but we must avoid proper entities i.e. > etc... 61 amppos = 0 62 while amppos >= 0: 63 amppos = unquotedstr.find("&", amppos) 64 if amppos != -1: 65 amppos += 1 66 semipos = unquotedstr.find(";", amppos) 67 if semipos != -1: 68 if unquotedstr[amppos:semipos].isalnum(): 69 # what we have found is an entity, not a shortcut key... 70 continue 71 # otherwise, we found the shortcut key 72 return unquotedstr[amppos].encode("UTF-8") 73 # if we didn't find the shortcut key, return an empty string rather than the original string 74 # this will come out as "don't have a translation for this" because the string is not changed... 75 # so the string from the original dtd will be used instead 76 return ""
77
78 -def removeinvalidamps(entity, unquotedstr):
79 """find ampersands that aren't part of an entity definition...""" 80 amppos = 0 81 invalidamps = [] 82 while amppos >= 0: 83 amppos = unquotedstr.find("&", amppos) 84 if amppos != -1: 85 amppos += 1 86 semipos = unquotedstr.find(";", amppos) 87 if semipos != -1: 88 checkentity = unquotedstr[amppos:semipos] 89 if checkentity.replace('.', '').isalnum(): 90 # what we have found is an entity, not a problem... 91 continue 92 elif checkentity[0] == '#' and checkentity[1:].isalnum(): 93 # what we have found is an entity, not a problem... 94 continue 95 # otherwise, we found a problem 96 invalidamps.append(amppos-1) 97 if len(invalidamps) > 0: 98 warnings.warn("invalid ampersands in dtd entity %s" % (entity)) 99 comp = 0 100 for amppos in invalidamps: 101 unquotedstr = unquotedstr[:amppos-comp] + unquotedstr[amppos-comp+1:] 102 comp += 1 103 return unquotedstr
104
105 -def getmixedentities(entities):
106 """returns a list of mixed .label and .accesskey entities from a list of entities""" 107 mixedentities = [] # those entities which have a .label and .accesskey combined 108 # search for mixed entities... 109 for entity in entities: 110 for labelsuffix in labelsuffixes: 111 if entity.endswith(labelsuffix): 112 entitybase = entity[:entity.rfind(labelsuffix)] 113 # see if there is a matching accesskey, making this a mixed entity 114 for akeytype in accesskeysuffixes: 115 if entitybase + akeytype in entities: 116 # add both versions to the list of mixed entities 117 mixedentities += [entity, entitybase+akeytype] 118 return mixedentities
119
120 -def applytranslation(entity, dtdunit, inputunit, mixedentities):
121 """applies the translation for entity in the po unit to the dtd unit""" 122 # this converts the po-style string to a dtd-style string 123 unquotedstr = inputunit.target 124 # check there aren't missing entities... 125 if len(unquotedstr.strip()) == 0: 126 return 127 # handle mixed entities 128 for labelsuffix in labelsuffixes: 129 if entity.endswith(labelsuffix): 130 if entity in mixedentities: 131 unquotedstr = getlabel(unquotedstr) 132 break 133 else: 134 for akeytype in accesskeysuffixes: 135 if entity.endswith(akeytype): 136 if entity in mixedentities: 137 unquotedstr = getaccesskey(unquotedstr) 138 if not unquotedstr: 139 warnings.warn("Could not find accesskey for %s" % entity) 140 else: 141 original = dtd.unquotefromdtd(dtdunit.definition) 142 if original.isupper() and unquotedstr.islower(): 143 unquotedstr = unquotedstr.upper() 144 elif original.islower() and unquotedstr.isupper(): 145 unquotedstr = unquotedstr.lower() 146 # handle invalid left-over ampersands (usually unneeded access key shortcuts) 147 unquotedstr = removeinvalidamps(entity, unquotedstr) 148 # finally set the new definition in the dtd, but not if its empty 149 if len(unquotedstr) > 0: 150 dtdunit.definition = dtd.quotefordtd(unquotedstr)
151
152 -class redtd:
153 """this is a convertor class that creates a new dtd based on a template using translations in a po"""
154 - def __init__(self, dtdfile):
155 self.dtdfile = dtdfile
156
157 - def convertstore(self, inputstore, includefuzzy=False):
158 # translate the strings 159 for inunit in inputstore.units: 160 # there may be more than one entity due to msguniq merge 161 if includefuzzy or not inunit.isfuzzy(): 162 self.handleinunit(inunit) 163 return self.dtdfile
164
165 - def handleinunit(self, inunit):
166 entities = inunit.getlocations() 167 mixedentities = getmixedentities(entities) 168 for entity in entities: 169 if self.dtdfile.index.has_key(entity): 170 # now we need to replace the definition of entity with msgstr 171 dtdunit = self.dtdfile.index[entity] # find the dtd 172 applytranslation(entity, dtdunit, inunit, mixedentities)
173
174 -class po2dtd:
175 """this is a convertor class that creates a new dtd file based on a po file without a template"""
176 - def convertcomments(self, inputunit, dtdunit):
177 entities = inputunit.getlocations() 178 if len(entities) > 1: 179 # don't yet handle multiple entities 180 dtdunit.comments.append(("conversionnote",'<!-- CONVERSION NOTE - multiple entities -->\n')) 181 dtdunit.entity = entities[0] 182 elif len(entities) == 1: 183 dtdunit.entity = entities[0] 184 else: 185 # this produces a blank entity, which doesn't write anything out 186 dtdunit.entity = "" 187 188 if inputunit.isfuzzy(): 189 dtdunit.comments.append(("potype", "fuzzy\n")) 190 for note in inputunit.getnotes("translator").split("\n"): 191 if not note: 192 continue 193 note = quote.unstripcomment(note) 194 if (note.find('LOCALIZATION NOTE') == -1) or (note.find('GROUP') == -1): 195 dtdunit.comments.append(("comment", note)) 196 # msgidcomments are special - they're actually localization notes 197 msgidcomment = inputunit._extract_msgidcomments() 198 if msgidcomment: 199 locnote = quote.unstripcomment("LOCALIZATION NOTE ("+dtdunit.entity+"): "+msgidcomment) 200 dtdunit.comments.append(("locnote", locnote))
201 202
203 - def convertstrings(self, inputunit, dtdunit):
204 if inputunit.istranslated(): 205 unquoted = inputunit.target 206 else: 207 unquoted = inputunit.source 208 unquoted = removeinvalidamps(dtdunit.entity, unquoted) 209 dtdunit.definition = dtd.quotefordtd(unquoted)
210
211 - def convertunit(self, inputunit):
212 dtdunit = dtd.dtdunit() 213 self.convertcomments(inputunit, dtdunit) 214 self.convertstrings(inputunit, dtdunit) 215 return dtdunit
216
217 - def convertstore(self, inputstore, includefuzzy=False):
218 outputstore = dtd.dtdfile() 219 self.currentgroups = [] 220 for inputunit in inputstore.units: 221 if includefuzzy or not inputunit.isfuzzy(): 222 dtdunit = self.convertunit(inputunit) 223 if dtdunit is not None: 224 outputstore.addunit(dtdunit) 225 return outputstore
226
227 -def convertdtd(inputfile, outputfile, templatefile, includefuzzy=False):
228 inputstore = po.pofile(inputfile) 229 if templatefile is None: 230 convertor = po2dtd() 231 else: 232 templatestore = dtd.dtdfile(templatefile) 233 convertor = redtd(templatestore) 234 outputstore = convertor.convertstore(inputstore, includefuzzy) 235 outputfile.write(str(outputstore)) 236 return 1
237
238 -def main(argv=None):
239 # handle command line options 240 from translate.convert import convert 241 formats = {"po": ("dtd", convertdtd), ("po", "dtd"): ("dtd", convertdtd)} 242 parser = convert.ConvertOptionParser(formats, usetemplates=True, description=__doc__) 243 parser.add_fuzzy_option() 244 parser.run(argv)
245 246 if __name__ == '__main__': 247 main() 248