1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of .dtd files (dtdunit) or entire files (dtdfile)
23 these are specific .dtd files for localisation used by mozilla"""
24
25 from translate.storage import base
26 from translate.misc import quote
27
28 import re
29 import sys
30 import warnings
31 try:
32 from lxml import etree
33 import StringIO
34 except ImportError:
35 etree = None
36
45
47 """unquotes a quoted dtd definition"""
48
49 if len(source) == 0: source = '""'
50 quotechar = source[0]
51 extracted, quotefinished = quote.extractwithoutquotes(source, quotechar, quotechar, allowreentry=False)
52 if quotechar == "'" and "'" in extracted:
53 extracted = extracted.replace("'", "'")
54
55
56 return extracted
57
58 -class dtdunit(base.TranslationUnit):
59 """this class represents an entity definition from a dtd file (and possibly associated comments)"""
61 """construct the dtdunit, prepare it for parsing"""
62 super(dtdunit, self).__init__(source)
63 self.comments = []
64 self.unparsedlines = []
65 self.incomment = 0
66 self.inentity = 0
67 self.entity = "FakeEntityOnlyForInitialisationAndTesting"
68 self.source = source
69
70
72 """Sets the definition to the quoted value of source"""
73 self.definition = quotefordtd(source)
74
76 """gets the unquoted source string"""
77 return unquotefromdtd(self.definition)
78 source = property(getsource, setsource)
79
85
87 """gets the unquoted target string"""
88 return unquotefromdtd(self.definition)
89 target = property(gettarget, settarget)
90
92 """returns whether this dtdunit doesn't actually have an entity definition"""
93
94
95 return self.entity is None
96
98 """read the first dtd element from the source code into this object, return linesprocessed"""
99 self.comments = []
100
101 self.locfilenotes = self.comments
102 self.locgroupstarts = self.comments
103 self.locgroupends = self.comments
104 self.locnotes = self.comments
105
106
107
108
109
110 self.entity = None
111 self.definition = ''
112 if not dtdsrc:
113 return 0
114 lines = dtdsrc.split("\n")
115 linesprocessed = 0
116 comment = ""
117 for line in lines:
118 line += "\n"
119 linesprocessed += 1
120
121 if not self.incomment:
122 if (line.find('<!--') != -1):
123 self.incomment = 1
124 self.continuecomment = 0
125
126 (comment, dummy) = quote.extract(line, "<!--", "-->", None, 0)
127 if comment.find('LOCALIZATION NOTE') != -1:
128 l = quote.findend(comment,'LOCALIZATION NOTE')
129 while (comment[l] == ' '): l += 1
130 if comment.find('FILE', l) == l:
131 self.commenttype = "locfile"
132 elif comment.find('BEGIN', l) == l:
133 self.commenttype = "locgroupstart"
134 elif comment.find('END', l) == l:
135 self.commenttype = "locgroupend"
136 else:
137 self.commenttype = "locnote"
138 else:
139
140 self.commenttype = "comment"
141
142 if self.incomment:
143
144 (comment, self.incomment) = quote.extract(line, "<!--", "-->", None, self.continuecomment)
145
146 self.continuecomment = self.incomment
147
148 line = line.replace(comment, "", 1)
149
150 if not self.incomment:
151 if line.isspace():
152 comment += line
153 line = ''
154 else:
155 comment += '\n'
156
157
158
159
160
161
162
163 commentpair = (self.commenttype, comment)
164 if self.commenttype == "locfile":
165 self.locfilenotes.append(commentpair)
166 elif self.commenttype == "locgroupstart":
167 self.locgroupstarts.append(commentpair)
168 elif self.commenttype == "locgroupend":
169 self.locgroupends.append(commentpair)
170 elif self.commenttype == "locnote":
171 self.locnotes.append(commentpair)
172 elif self.commenttype == "comment":
173 self.comments.append(commentpair)
174
175 if not self.inentity and not self.incomment:
176 entitypos = line.find('<!ENTITY')
177 if entitypos != -1:
178 self.inentity = 1
179 beforeentity = line[:entitypos].strip()
180 if beforeentity.startswith("#"):
181 self.hashprefix = beforeentity
182 self.entitypart = "start"
183 else:
184 self.unparsedlines.append(line)
185
186 if self.inentity:
187 if self.entitypart == "start":
188
189 e = quote.findend(line,'<!ENTITY')
190 line = line[e:]
191 self.entitypart = "name"
192 self.entitytype = "internal"
193 if self.entitypart == "name":
194 e = 0
195 while (e < len(line) and line[e].isspace()): e += 1
196 self.entity = ''
197 if (e < len(line) and line[e] == '%'):
198 self.entitytype = "external"
199 self.entityparameter = ""
200 e += 1
201 while (e < len(line) and line[e].isspace()): e += 1
202 while (e < len(line) and not line[e].isspace()):
203 self.entity += line[e]
204 e += 1
205 while (e < len(line) and line[e].isspace()): e += 1
206 if self.entity:
207 if self.entitytype == "external":
208 self.entitypart = "parameter"
209 else:
210 self.entitypart = "definition"
211
212 if e == len(line):
213 self.entityhelp = None
214 continue
215 elif self.entitypart == "definition":
216 self.entityhelp = (e, line[e])
217 self.instring = 0
218 if self.entitypart == "parameter":
219 paramstart = e
220 while (e < len(line) and line[e].isalnum()): e += 1
221 self.entityparameter += line[paramstart:e]
222 while (e < len(line) and line[e].isspace()): e += 1
223 line = line[e:]
224 e = 0
225 if not line:
226 continue
227 if line[0] in ('"', "'"):
228 self.entitypart = "definition"
229 self.entityhelp = (e, line[e])
230 self.instring = 0
231 if self.entitypart == "definition":
232 if self.entityhelp is None:
233 e = 0
234 while (e < len(line) and line[e].isspace()): e += 1
235 if e == len(line):
236 continue
237 self.entityhelp = (e, line[e])
238 self.instring = 0
239
240 e = self.entityhelp[0]
241 if (self.entityhelp[1] == "'"):
242 (defpart, self.instring) = quote.extract(line[e:], "'", "'", startinstring=self.instring, allowreentry=False)
243 elif (self.entityhelp[1] == '"'):
244 (defpart, self.instring) = quote.extract(line[e:], '"', '"', startinstring=self.instring, allowreentry=False)
245 else:
246 raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1]))
247
248 self.entityhelp = (0, self.entityhelp[1])
249 self.definition += defpart
250 if not self.instring:
251 self.inentity = 0
252 break
253
254
255 if 0:
256 for attr in dir(self):
257 r = repr(getattr(self, attr))
258 if len(r) > 60: r = r[:57]+"..."
259 self.comments.append(("comment", "self.%s = %s" % (attr, r) ))
260 return linesprocessed
261
268
270 """convert the dtd entity back to string form"""
271 lines = []
272 lines.extend([comment for commenttype, comment in self.comments])
273 lines.extend(self.unparsedlines)
274 if self.isnull():
275 result = "".join(lines)
276 return result.rstrip() + "\n"
277
278
279
280
281 if len(self.entity) > 0:
282 if getattr(self, 'entitytype', None) == 'external':
283 entityline = '<!ENTITY % '+self.entity+' '+self.entityparameter+' '+self.definition+'>'
284 else:
285 entityline = '<!ENTITY '+self.entity+' '+self.definition+'>'
286 if getattr(self, 'hashprefix', None):
287 entityline = self.hashprefix + " " + entityline
288 if isinstance(entityline, unicode):
289 entityline = entityline.encode('UTF-8')
290 lines.append(entityline+'\n')
291 return "".join(lines)
292
293 -class dtdfile(base.TranslationStore):
294 """this class represents a .dtd file, made up of dtdunits"""
295 UnitClass = dtdunit
305
306 - def parse(self, dtdsrc):
307 """read the source code of a dtd file in and include them as dtdunits in self.units (any existing units are lost)"""
308 self.units = []
309 start = 0
310 end = 0
311 lines = dtdsrc.split("\n")
312 while end < len(lines):
313 if (start == end): end += 1
314 foundentity = 0
315 while end < len(lines):
316 if end >= len(lines):
317 break
318 if lines[end].find('<!ENTITY') > -1:
319 foundentity = 1
320 if foundentity and re.match("[\"']\s*>", lines[end]):
321 end += 1
322 break
323 end += 1
324
325
326 linesprocessed = 1
327 while linesprocessed >= 1:
328 newdtd = dtdunit()
329 try:
330 linesprocessed = newdtd.parse("\n".join(lines[start:end]))
331 if linesprocessed >= 1 and (not newdtd.isnull() or newdtd.unparsedlines):
332 self.units.append(newdtd)
333 except Exception, e:
334 warnings.warn("%s\nError occured between lines %d and %d:\n%s" % (e, start+1, end, "\n".join(lines[start:end])))
335 start += linesprocessed
336
338 """convert to a string. double check that unicode is handled somehow here"""
339 source = self.getoutput()
340 if etree is not None:
341 try:
342 dtd = etree.DTD(StringIO.StringIO(re.sub("#expand", "", source)))
343 except etree.DTDParseError:
344 warnings.warn("DTD file '%s' does not validate" % self.filename)
345 return None
346 if isinstance(source, unicode):
347 return source.encode(getattr(self, "encoding", "UTF-8"))
348 return source
349
351 """convert the units back to source"""
352 sources = [str(dtd) for dtd in self.units]
353 return "".join(sources)
354
356 """makes self.index dictionary keyed on entities"""
357 self.index = {}
358 for dtd in self.units:
359 if not dtd.isnull():
360 self.index[dtd.entity] = dtd
361
363 for dtd in self.units:
364 lines = dtd.definition.split("\n")
365 if len(lines) > 1:
366 definition = lines[0]
367 for line in lines[1:]:
368 if definition[-1:].isspace() or line[:1].isspace():
369 definition += line
370 else:
371 definition += " " + line
372 dtd.definition = definition
373
374 if __name__ == "__main__":
375 import sys
376 d = dtdfile(sys.stdin)
377 d.rewrap()
378 sys.stdout.write(str(d))
379