1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """script to convert a mozilla .dtd UTF-8 localization format to a
23 gettext .po localization file using the po and dtd modules, and the
24 dtd2po convertor class which is in this module
25 You can convert back to .dtd using po2dtd.py"""
26
27 from translate.storage import po
28 from translate.storage import dtd
29 from translate.misc import quote
30
32 - def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
33 self.currentgroup = None
34 self.blankmsgstr = blankmsgstr
35 self.duplicatestyle = duplicatestyle
36
60
62
63 unquoted = dtd.unquotefromdtd(thedtd.definition).replace("\r", "")
64
65
66
67 lines = unquoted.split('\n')
68 while lines and not lines[0].strip():
69 del lines[0]
70 while lines and not lines[-1].strip():
71 del lines[-1]
72
73 if len(lines) > 1:
74 thepo.source = "\n".join([lines[0].rstrip() + ' '] + \
75 [line.strip() + ' ' for line in lines[1:-1]] + \
76 [lines[-1].lstrip()])
77 elif lines:
78 thepo.source = lines[0]
79 else:
80 thepo.source = ""
81 thepo.target = ""
82
84 """converts a dtd unit to a po unit, returns None if empty or not for translation"""
85 if thedtd is None:
86 return None
87 if getattr(thedtd, "entityparameter", None) == "SYSTEM":
88 return None
89 thepo = po.pounit(encoding="UTF-8")
90
91 for commentnum in range(len(thedtd.comments)):
92 commenttype, locnote = thedtd.comments[commentnum]
93
94 if commenttype == 'locnote':
95
96 typeend = quote.findend(locnote,'LOCALIZATION NOTE')
97
98 idstart = locnote.find('(', typeend)
99 if idstart == -1: continue
100 idend = locnote.find(')', idstart+1)
101 entity = locnote[idstart+1:idend].strip()
102
103 actualnotestart = locnote.find(':', idend+1)
104 actualnoteend = locnote.find('-->', idend)
105 actualnote = locnote[actualnotestart+1:actualnoteend].strip()
106
107 if thedtd.entity == entity:
108
109 if actualnote.startswith("DONT_TRANSLATE"):
110
111 thedtd.entity = ""
112 thedtd.definition = ""
113 del thedtd.comments[commentnum]
114
115 break
116 else:
117
118 thedtd.comments[commentnum] = ("automaticcomment", actualnote)
119
120 self.convertcomments(thedtd, thepo)
121 self.convertstrings(thedtd, thepo)
122 if thepo.isblank() and not thepo.getlocations():
123 return None
124 else:
125 return thepo
126
127
128 labelsuffixes = (".label", ".title")
129 accesskeysuffixes = (".accesskey", ".accessKey", ".akey")
130
132 labelpo = self.convertunit(labeldtd)
133 accesskeypo = self.convertunit(accesskeydtd)
134 if labelpo is None:
135 return accesskeypo
136 if accesskeypo is None:
137 return labelpo
138 thepo = po.pounit(encoding="UTF-8")
139 thepo.addlocations(labelpo.getlocations())
140 thepo.addlocations(accesskeypo.getlocations())
141 thepo.msgidcomment = thepo._extract_msgidcomments() + labelpo._extract_msgidcomments()
142 thepo.msgidcomment = thepo._extract_msgidcomments() + accesskeypo._extract_msgidcomments()
143 thepo.addnote(labelpo.getnotes("developer"), "developer")
144 thepo.addnote(accesskeypo.getnotes("developer"), "developer")
145 thepo.addnote(labelpo.getnotes("translator"), "translator")
146 thepo.addnote(accesskeypo.getnotes("translator"), "translator")
147
148 label = dtd.unquotefromdtd(labeldtd.definition).decode('UTF-8')
149 accesskey = dtd.unquotefromdtd(accesskeydtd.definition).decode('UTF-8')
150 if len(accesskey) == 0:
151 return None
152
153
154 searchpos = 0
155 accesskeypos = -1
156 inentity = 0
157 accesskeyaltcasepos = -1
158 while (accesskeypos < 0) and searchpos < len(label):
159 searchchar = label[searchpos]
160 if searchchar == '&':
161 inentity = 1
162 elif searchchar == ';':
163 inentity = 0
164 else:
165 if not inentity:
166 if searchchar == accesskey.upper():
167
168 accesskeypos = searchpos
169 if searchchar == accesskey.lower():
170
171 if accesskeyaltcasepos == -1:
172
173 accesskeyaltcasepos = searchpos
174
175 searchpos += 1
176
177 if accesskeypos == -1:
178 accesskeypos = accesskeyaltcasepos
179
180 if accesskeypos >= 0:
181 label = label[:accesskeypos] + '&' + label[accesskeypos:]
182 label = label.encode("UTF-8", "replace")
183 else:
184
185 return None
186 thepo.source = label
187 thepo.target = ""
188 return thepo
189
191 """creates self.mixedentities from the dtd file..."""
192 self.mixedentities = {}
193 for entity in thedtdfile.index.keys():
194 for labelsuffix in self.labelsuffixes:
195 if entity.endswith(labelsuffix):
196 entitybase = entity[:entity.rfind(labelsuffix)]
197
198
199 for akeytype in self.accesskeysuffixes:
200 if thedtdfile.index.has_key(entitybase + akeytype):
201
202 self.mixedentities[entity] = {}
203 self.mixedentities[entitybase+akeytype] = {}
204
205
207 """converts a dtd unit from thedtdfile to a po unit, handling mixed entities along the way..."""
208
209 if thedtd.entity in self.mixedentities:
210
211
212 alreadymixed = self.mixedentities[thedtd.entity].get(mixbucket, None)
213 if alreadymixed:
214
215 return None
216 elif alreadymixed is None:
217
218 labeldtd, accesskeydtd = None, None
219 labelentity, accesskeyentity = None, None
220 for labelsuffix in self.labelsuffixes:
221 if thedtd.entity.endswith(labelsuffix):
222 entitybase = thedtd.entity[:thedtd.entity.rfind(labelsuffix)]
223 for akeytype in self.accesskeysuffixes:
224 if thedtdfile.index.has_key(entitybase + akeytype):
225 labelentity, labeldtd = thedtd.entity, thedtd
226 accesskeyentity = labelentity[:labelentity.rfind(labelsuffix)]+akeytype
227 accesskeydtd = thedtdfile.index[accesskeyentity]
228 break
229 else:
230 for akeytype in self.accesskeysuffixes:
231 if thedtd.entity.endswith(akeytype):
232 accesskeyentity, accesskeydtd = thedtd.entity, thedtd
233 for labelsuffix in self.labelsuffixes:
234 labelentity = accesskeyentity[:accesskeyentity.rfind(akeytype)]+labelsuffix
235 if thedtdfile.index.has_key(labelentity):
236 labeldtd = thedtdfile.index[labelentity]
237 break
238 else:
239 labelentity = None
240 accesskeyentity = None
241 thepo = self.convertmixedunit(labeldtd, accesskeydtd)
242 if thepo is not None:
243 if accesskeyentity is not None:
244 self.mixedentities[accesskeyentity][mixbucket] = True
245 if labelentity is not None:
246 self.mixedentities[labelentity][mixbucket] = True
247 return thepo
248 else:
249
250 if accesskeyentity is not None:
251 self.mixedentities[accesskeyentity][mixbucket] = False
252 if labelentity is not None:
253 self.mixedentities[labelentity][mixbucket] = False
254 return self.convertunit(thedtd)
255
257 thetargetfile = po.pofile()
258 targetheader = thetargetfile.makeheader(charset="UTF-8", encoding="8bit", x_accelerator_marker="&")
259 targetheader.addnote("extracted from %s" % thedtdfile.filename, "developer")
260 thetargetfile.addunit(targetheader)
261 thedtdfile.makeindex()
262 self.findmixedentities(thedtdfile)
263
264 for thedtd in thedtdfile.units:
265 if thedtd.isnull():
266 continue
267 thepo = self.convertdtdunit(thedtdfile, thedtd)
268 if thepo is not None:
269 thetargetfile.addunit(thepo)
270 thetargetfile.removeduplicates(self.duplicatestyle)
271 return thetargetfile
272
273 - def mergestore(self, origdtdfile, translateddtdfile):
274 thetargetfile = po.pofile()
275 targetheader = thetargetfile.makeheader(charset="UTF-8", encoding="8bit")
276 targetheader.addnote("extracted from %s, %s" % (origdtdfile.filename, translateddtdfile.filename), "developer")
277 thetargetfile.addunit(targetheader)
278 origdtdfile.makeindex()
279 self.findmixedentities(origdtdfile)
280 translateddtdfile.makeindex()
281 self.findmixedentities(translateddtdfile)
282
283 for origdtd in origdtdfile.units:
284 if origdtd.isnull():
285 continue
286 origpo = self.convertdtdunit(origdtdfile, origdtd, mixbucket="orig")
287 if origdtd.entity in self.mixedentities:
288 mixedentitydict = self.mixedentities[origdtd.entity]
289 if "orig" not in mixedentitydict:
290
291 mixbucket = "orig"
292 del self.mixedentities[origdtd.entity]
293 elif mixedentitydict["orig"]:
294
295 mixbucket = "translate"
296 else:
297
298 mixbucket = "orig"
299 else:
300 mixbucket = "translate"
301 if origpo is None:
302
303 continue
304 if origdtd.entity in translateddtdfile.index:
305 translateddtd = translateddtdfile.index[origdtd.entity]
306 translatedpo = self.convertdtdunit(translateddtdfile, translateddtd, mixbucket=mixbucket)
307 else:
308 translatedpo = None
309 if origpo is not None:
310 if translatedpo is not None and not self.blankmsgstr:
311 origpo.target = translatedpo.source
312 thetargetfile.addunit(origpo)
313 thetargetfile.removeduplicates(self.duplicatestyle)
314 return thetargetfile
315
316 -def convertdtd(inputfile, outputfile, templatefile, pot=False, duplicatestyle="msgctxt"):
317 """reads in inputfile and templatefile using dtd, converts using dtd2po, writes to outputfile"""
318 inputstore = dtd.dtdfile(inputfile)
319 convertor = dtd2po(blankmsgstr=pot, duplicatestyle=duplicatestyle)
320 if templatefile is None:
321 outputstore = convertor.convertstore(inputstore)
322 else:
323 templatestore = dtd.dtdfile(templatefile)
324 outputstore = convertor.mergestore(templatestore, inputstore)
325 if outputstore.isempty():
326 return 0
327 outputfile.write(str(outputstore))
328 return 1
329
330 -def main(argv=None):
331 from translate.convert import convert
332 formats = {"dtd": ("po", convertdtd), ("dtd", "dtd"): ("po", convertdtd)}
333 parser = convert.ConvertOptionParser(formats, usetemplates=True, usepots=True, description=__doc__)
334 parser.add_duplicates_option()
335 parser.passthrough.append("pot")
336 parser.run(argv)
337
338 if __name__ == '__main__':
339 main()
340