1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 Classes that hold units of .oo files (oounit) or entire files (oofile).
24
25 These are specific .oo files for localisation exported by OpenOffice.org - SDF
26 format (previously knows as GSI files). For an overview of the format, see
27 http://l10n.openoffice.org/L10N_Framework/Intermediate_file_format.html
28
29 The behaviour in terms of escaping is explained in detail in the programming
30 comments.
31 """
32
33
34 import os
35 import re
36 import sys
37 from translate.misc import quote
38 from translate.misc import wStringIO
39 import warnings
40
41
42
43 normalfilenamechars = "/#.0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
44 normalizetable = ""
45 for i in map(chr, range(256)):
46 if i in normalfilenamechars:
47 normalizetable += i
48 else:
49 normalizetable += "_"
50
53 self.normalchars = {}
54 for char in normalchars:
55 self.normalchars[ord(char)] = char
57 return self.normalchars.get(key, u"_")
58
59 unormalizetable = unormalizechar(normalfilenamechars.decode("ascii"))
60
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 -def escape_text(text):
89 """Escapes SDF text to be suitable for unit consumption."""
90 return text.replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")
91
92 -def unescape_text(text):
93 """Unescapes SDF text to be suitable for unit consumption."""
94 return text.replace("\\\\", "\a").replace("\\n", "\n").replace("\\t", "\t").\
95 replace("\\r", "\r").replace("\a", "\\\\")
96
97 helptagre = re.compile('''<[/]??[a-z_\-]+?(?:| +[a-z]+?=".*?") *[/]??>''')
98
100 """Escapes the help text as it would be in an SDF file.
101
102 <, >, " are only escaped in <[[:lower:]]> tags. Some HTML tags make it in in
103 lowercase so those are dealt with. Some OpenOffice.org help tags are not
104 escaped.
105 """
106 text = text.replace("\\", "\\\\")
107 for tag in helptagre.findall(text):
108 escapethistag = True
109 if tag in ["<br>", "<h1>", "</h1>", "<img ...>", "<->", "<empty>", "<ref>", "<references>"]:
110 escapethistag = False
111 for skip in ["<font", "<node", "<help_section"]:
112 if tag.startswith(skip):
113 escapethistag = False
114 if escapethistag:
115 escaped_tag = ("\\<" + tag[1:-1] + "\\>").replace('"', '\\"')
116 text = text.replace(tag, escaped_tag)
117 return text
118
120 """Unescapes normal text to be suitable for writing to the SDF file."""
121 return text.replace(r"\<", "<").replace(r"\>", ">").replace(r'\"', '"').replace(r"\\", "\\")
122
124 """Encode a Unicode string the the specified encoding"""
125 if isinstance(text, unicode):
126 return text.encode('UTF-8')
127 return text
128
129
131 """this represents one line, one translation in an .oo file"""
133 """construct an ooline from its parts"""
134 if parts is None:
135 self.project, self.sourcefile, self.dummy, self.resourcetype, \
136 self.groupid, self.localid, self.helpid, self.platform, \
137 self.width, self.languageid, self.text, self.helptext, \
138 self.quickhelptext, self.title, self.timestamp = [""] * 15
139 else:
140 self.setparts(parts)
141
143 """create a line from its tab-delimited parts"""
144 if len(parts) != 15:
145 warnings.warn("oo line contains %d parts, it should contain 15: %r" % \
146 (len(parts), parts))
147 newparts = list(parts)
148 if len(newparts) < 15:
149 newparts = newparts + [""] * (15-len(newparts))
150 else:
151 newparts = newparts[:15]
152 parts = tuple(newparts)
153 self.project, self.sourcefile, self.dummy, self.resourcetype, \
154 self.groupid, self.localid, self.helpid, self.platform, \
155 self.width, self.languageid, self._text, self.helptext, \
156 self.quickhelptext, self.title, self.timestamp = parts
157
159 """return a list of parts in this line"""
160 return (self.project, self.sourcefile, self.dummy, self.resourcetype,
161 self.groupid, self.localid, self.helpid, self.platform,
162 self.width, self.languageid, self._text, self.helptext,
163 self.quickhelptext, self.title, self.timestamp)
164
166 """Obtains the text column and handle escaping."""
167 if self.sourcefile.endswith(".xhp"):
168 return unescape_help_text(self._text)
169 else:
170 return unescape_text(self._text)
171
172 - def settext(self, text):
173 """Sets the text column and handle escaping."""
174 if self.sourcefile.endswith(".xhp"):
175 self._text = escape_help_text(text)
176 else:
177 self._text = escape_text(text)
178 text = property(gettext, settext)
179
183
185 """return a line in tab-delimited form"""
186 parts = self.getparts()
187 return "\t".join(parts)
188
190 """get the key that identifies the resource"""
191 return (self.project, self.sourcefile, self.resourcetype, self.groupid,
192 self.localid, self.platform)
193
195 """this represents a number of translations of a resource"""
197 """construct the oounit"""
198 self.languages = {}
199 self.lines = []
200
202 """add a line to the oounit"""
203 self.languages[line.languageid] = line
204 self.lines.append(line)
205
209
211 """return the lines in tab-delimited form"""
212 return "\r\n".join([str(line) for line in self.lines])
213
215 """this represents an entire .oo file"""
216 UnitClass = oounit
218 """constructs the oofile"""
219 self.oolines = []
220 self.units = []
221 self.ookeys = {}
222 self.filename = ""
223 self.languages = []
224 if input is not None:
225 self.parse(input)
226
228 """adds a parsed line to the file"""
229 key = thisline.getkey()
230 element = self.ookeys.get(key, None)
231 if element is None:
232 element = self.UnitClass()
233 self.units.append(element)
234 self.ookeys[key] = element
235 element.addline(thisline)
236 self.oolines.append(thisline)
237 if thisline.languageid not in self.languages:
238 self.languages.append(thisline.languageid)
239
241 """parses lines and adds them to the file"""
242 if not self.filename:
243 self.filename = getattr(input, 'name', '')
244 if hasattr(input, "read"):
245 src = input.read()
246 input.close()
247 else:
248 src = input
249 for line in src.split("\n"):
250 line = quote.rstripeol(line)
251 if not line:
252 continue
253 parts = line.split("\t")
254 thisline = ooline(parts)
255 self.addline(thisline)
256
260
262 """converts all the lines back to tab-delimited form"""
263 lines = []
264 for oe in self.units:
265 if len(oe.lines) > 2:
266 warnings.warn("contains %d lines (should be 2 at most): languages %r" % (len(oe.lines), oe.languages))
267 oekeys = [line.getkey() for line in oe.lines]
268 warnings.warn("contains %d lines (should be 2 at most): keys %r" % (len(oe.lines), oekeys))
269 oeline = str(oe) + "\r\n"
270 lines.append(oeline)
271 return "".join(lines)
272
274 """this takes a huge GSI file and represents it as multiple smaller files..."""
275 - def __init__(self, filename, mode=None, multifilestyle="single"):
276 """initialises oomultifile from a seekable inputfile or writable outputfile"""
277 self.filename = filename
278 if mode is None:
279 if os.path.exists(filename):
280 mode = 'r'
281 else:
282 mode = 'w'
283 self.mode = mode
284 self.multifilestyle = multifilestyle
285 self.multifilename = os.path.splitext(filename)[0]
286 self.multifile = open(filename, mode)
287 self.subfilelines = {}
288 if mode == "r":
289 self.createsubfileindex()
290
292 """reads in all the lines and works out the subfiles"""
293 linenum = 0
294 for line in self.multifile:
295 subfile = self.getsubfilename(line)
296 if not subfile in self.subfilelines:
297 self.subfilelines[subfile] = []
298 self.subfilelines[subfile].append(linenum)
299 linenum += 1
300
302 """looks up the subfile name for the line"""
303 if line.count("\t") < 2:
304 raise ValueError("invalid tab-delimited line: %r" % line)
305 lineparts = line.split("\t", 2)
306 module, filename = lineparts[0], lineparts[1]
307 if self.multifilestyle == "onefile":
308 ooname = self.multifilename
309 elif self.multifilestyle == "toplevel":
310 ooname = module
311 else:
312 filename = filename.replace("\\", "/")
313 fileparts = [module] + filename.split("/")
314 ooname = os.path.join(*fileparts[:-1])
315 return ooname + os.extsep + "oo"
316
318 """returns a list of subfiles in the file"""
319 return self.subfilelines.keys()
320
322 """iterates through the subfile names"""
323 for subfile in self.listsubfiles():
324 yield subfile
325
327 """checks if this pathname is a valid subfile"""
328 return pathname in self.subfilelines
329
331 """returns the list of lines matching the subfile"""
332 lines = []
333 requiredlines = dict.fromkeys(self.subfilelines[subfile])
334 linenum = 0
335 self.multifile.seek(0)
336 for line in self.multifile:
337 if linenum in requiredlines:
338 lines.append(line)
339 linenum += 1
340 return "".join(lines)
341
348
350 """returns a pseudo-file object for the given subfile"""
351 def onclose(contents):
352 self.multifile.write(contents)
353 self.multifile.flush()
354 outputfile = wStringIO.CatchStringOutput(onclose)
355 outputfile.filename = subfile
356 return outputfile
357
359 """returns an oofile built up from the given subfile's lines"""
360 subfilesrc = self.getsubfilesrc(subfile)
361 oosubfile = oofile()
362 oosubfile.filename = subfile
363 oosubfile.parse(subfilesrc)
364 return oosubfile
365
366 if __name__ == '__main__':
367 of = oofile()
368 of.parse(sys.stdin.read())
369 sys.stdout.write(str(of))
370