Package translate :: Package storage :: Module oo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.oo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2008 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """ 
 23  Classes that hold units of .oo files (oounit) or entire files (oofile). 
 24   
 25  These are specific .oo files for localisation exported by OpenOffice.org - SDF  
 26  format (previously knows as GSI files). For an overview of the format, see 
 27  http://l10n.openoffice.org/L10N_Framework/Intermediate_file_format.html 
 28   
 29  The behaviour in terms of escaping is explained in detail in the programming 
 30  comments. 
 31  """ 
 32  # FIXME: add simple test which reads in a file and writes it out again 
 33   
 34  import os 
 35  import re 
 36  import sys 
 37  from translate.misc import quote 
 38  from translate.misc import wStringIO 
 39  import warnings 
 40   
 41  # File normalisation 
 42   
 43  normalfilenamechars = "/#.0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 
 44  normalizetable = "" 
 45  for i in map(chr, range(256)): 
 46      if i in normalfilenamechars: 
 47          normalizetable += i 
 48      else: 
 49          normalizetable += "_" 
 50   
51 -class unormalizechar(dict):
52 - def __init__(self, normalchars):
53 self.normalchars = {} 54 for char in normalchars: 55 self.normalchars[ord(char)] = char
56 - def __getitem__(self, key):
57 return self.normalchars.get(key, u"_")
58 59 unormalizetable = unormalizechar(normalfilenamechars.decode("ascii")) 60
61 -def normalizefilename(filename):
62 """converts any non-alphanumeric (standard roman) characters to _""" 63 if isinstance(filename, str): 64 return filename.translate(normalizetable) 65 else: 66 return filename.translate(unormalizetable)
67 68 # These are functions that deal with escaping and unescaping of the text fields 69 # of the SDF file. These should only be applied to the text column. 70 # The fields quickhelptext and title are assumed to carry no escaping. 71 # 72 # The escaping of all strings except those coming from .xhp (helpcontent2) 73 # sourcefiles work as follows: 74 # (newline) -> \n 75 # (carriage return) -> \r 76 # (tab) -> \t 77 # Backslash characters (\) and single quotes (') are not consistently escaped, 78 # and are therefore left as they are. 79 # 80 # For strings coming from .xhp (helpcontent2) sourcefiles the following 81 # characters are escaped inside XML tags only: 82 # < -> \< when used with lowercase tagnames (with some exceptions) 83 # > -> \> when used with lowercase tagnames (with some exceptions) 84 # " -> \" around XML properties 85 # The following is consistently escaped in .xhp strings (not only in XML tags): 86 # \ -> \\ 87
88 -def escape_text(text):
89 """Escapes SDF text to be suitable for unit consumption.""" 90 return text.replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")
91
92 -def unescape_text(text):
93 """Unescapes SDF text to be suitable for unit consumption.""" 94 return text.replace("\\\\", "\a").replace("\\n", "\n").replace("\\t", "\t").\ 95 replace("\\r", "\r").replace("\a", "\\\\")
96 97 helptagre = re.compile('''<[/]??[a-z_\-]+?(?:| +[a-z]+?=".*?") *[/]??>''') 98
99 -def escape_help_text(text):
100 """Escapes the help text as it would be in an SDF file. 101 102 <, >, " are only escaped in <[[:lower:]]> tags. Some HTML tags make it in in 103 lowercase so those are dealt with. Some OpenOffice.org help tags are not 104 escaped. 105 """ 106 text = text.replace("\\", "\\\\") 107 for tag in helptagre.findall(text): 108 escapethistag = True 109 if tag in ["<br>", "<h1>", "</h1>", "<img ...>", "<->", "<empty>", "<ref>", "<references>"]: 110 escapethistag = False 111 for skip in ["<font", "<node", "<help_section"]: 112 if tag.startswith(skip): 113 escapethistag = False 114 if escapethistag: 115 escaped_tag = ("\\<" + tag[1:-1] + "\\>").replace('"', '\\"') 116 text = text.replace(tag, escaped_tag) 117 return text
118
119 -def unescape_help_text(text):
120 """Unescapes normal text to be suitable for writing to the SDF file.""" 121 return text.replace(r"\<", "<").replace(r"\>", ">").replace(r'\"', '"').replace(r"\\", "\\")
122
123 -def encode_if_needed_utf8(text):
124 """Encode a Unicode string the the specified encoding""" 125 if isinstance(text, unicode): 126 return text.encode('UTF-8') 127 return text
128 129
130 -class ooline(object):
131 """this represents one line, one translation in an .oo file"""
132 - def __init__(self, parts=None):
133 """construct an ooline from its parts""" 134 if parts is None: 135 self.project, self.sourcefile, self.dummy, self.resourcetype, \ 136 self.groupid, self.localid, self.helpid, self.platform, \ 137 self.width, self.languageid, self.text, self.helptext, \ 138 self.quickhelptext, self.title, self.timestamp = [""] * 15 139 else: 140 self.setparts(parts)
141
142 - def setparts(self, parts):
143 """create a line from its tab-delimited parts""" 144 if len(parts) != 15: 145 warnings.warn("oo line contains %d parts, it should contain 15: %r" % \ 146 (len(parts), parts)) 147 newparts = list(parts) 148 if len(newparts) < 15: 149 newparts = newparts + [""] * (15-len(newparts)) 150 else: 151 newparts = newparts[:15] 152 parts = tuple(newparts) 153 self.project, self.sourcefile, self.dummy, self.resourcetype, \ 154 self.groupid, self.localid, self.helpid, self.platform, \ 155 self.width, self.languageid, self._text, self.helptext, \ 156 self.quickhelptext, self.title, self.timestamp = parts
157
158 - def getparts(self):
159 """return a list of parts in this line""" 160 return (self.project, self.sourcefile, self.dummy, self.resourcetype, 161 self.groupid, self.localid, self.helpid, self.platform, 162 self.width, self.languageid, self._text, self.helptext, 163 self.quickhelptext, self.title, self.timestamp)
164
165 - def gettext(self):
166 """Obtains the text column and handle escaping.""" 167 if self.sourcefile.endswith(".xhp"): 168 return unescape_help_text(self._text) 169 else: 170 return unescape_text(self._text)
171
172 - def settext(self, text):
173 """Sets the text column and handle escaping.""" 174 if self.sourcefile.endswith(".xhp"): 175 self._text = escape_help_text(text) 176 else: 177 self._text = escape_text(text)
178 text = property(gettext, settext) 179
180 - def __str__(self):
181 """convert to a string. double check that unicode is handled""" 182 return encode_if_needed_utf8(self.getoutput())
183
184 - def getoutput(self):
185 """return a line in tab-delimited form""" 186 parts = self.getparts() 187 return "\t".join(parts)
188
189 - def getkey(self):
190 """get the key that identifies the resource""" 191 return (self.project, self.sourcefile, self.resourcetype, self.groupid, 192 self.localid, self.platform)
193
194 -class oounit:
195 """this represents a number of translations of a resource"""
196 - def __init__(self):
197 """construct the oounit""" 198 self.languages = {} 199 self.lines = []
200
201 - def addline(self, line):
202 """add a line to the oounit""" 203 self.languages[line.languageid] = line 204 self.lines.append(line)
205
206 - def __str__(self):
207 """convert to a string. double check that unicode is handled""" 208 return encode_if_needed_utf8(self.getoutput())
209
210 - def getoutput(self):
211 """return the lines in tab-delimited form""" 212 return "\r\n".join([str(line) for line in self.lines])
213
214 -class oofile:
215 """this represents an entire .oo file""" 216 UnitClass = oounit
217 - def __init__(self, input=None):
218 """constructs the oofile""" 219 self.oolines = [] 220 self.units = [] 221 self.ookeys = {} 222 self.filename = "" 223 self.languages = [] 224 if input is not None: 225 self.parse(input)
226
227 - def addline(self, thisline):
228 """adds a parsed line to the file""" 229 key = thisline.getkey() 230 element = self.ookeys.get(key, None) 231 if element is None: 232 element = self.UnitClass() 233 self.units.append(element) 234 self.ookeys[key] = element 235 element.addline(thisline) 236 self.oolines.append(thisline) 237 if thisline.languageid not in self.languages: 238 self.languages.append(thisline.languageid)
239
240 - def parse(self, input):
241 """parses lines and adds them to the file""" 242 if not self.filename: 243 self.filename = getattr(input, 'name', '') 244 if hasattr(input, "read"): 245 src = input.read() 246 input.close() 247 else: 248 src = input 249 for line in src.split("\n"): 250 line = quote.rstripeol(line) 251 if not line: 252 continue 253 parts = line.split("\t") 254 thisline = ooline(parts) 255 self.addline(thisline)
256
257 - def __str__(self):
258 """convert to a string. double check that unicode is handled""" 259 return encode_if_needed_utf8(self.getoutput())
260
261 - def getoutput(self):
262 """converts all the lines back to tab-delimited form""" 263 lines = [] 264 for oe in self.units: 265 if len(oe.lines) > 2: 266 warnings.warn("contains %d lines (should be 2 at most): languages %r" % (len(oe.lines), oe.languages)) 267 oekeys = [line.getkey() for line in oe.lines] 268 warnings.warn("contains %d lines (should be 2 at most): keys %r" % (len(oe.lines), oekeys)) 269 oeline = str(oe) + "\r\n" 270 lines.append(oeline) 271 return "".join(lines)
272
273 -class oomultifile:
274 """this takes a huge GSI file and represents it as multiple smaller files..."""
275 - def __init__(self, filename, mode=None, multifilestyle="single"):
276 """initialises oomultifile from a seekable inputfile or writable outputfile""" 277 self.filename = filename 278 if mode is None: 279 if os.path.exists(filename): 280 mode = 'r' 281 else: 282 mode = 'w' 283 self.mode = mode 284 self.multifilestyle = multifilestyle 285 self.multifilename = os.path.splitext(filename)[0] 286 self.multifile = open(filename, mode) 287 self.subfilelines = {} 288 if mode == "r": 289 self.createsubfileindex()
290
291 - def createsubfileindex(self):
292 """reads in all the lines and works out the subfiles""" 293 linenum = 0 294 for line in self.multifile: 295 subfile = self.getsubfilename(line) 296 if not subfile in self.subfilelines: 297 self.subfilelines[subfile] = [] 298 self.subfilelines[subfile].append(linenum) 299 linenum += 1
300
301 - def getsubfilename(self, line):
302 """looks up the subfile name for the line""" 303 if line.count("\t") < 2: 304 raise ValueError("invalid tab-delimited line: %r" % line) 305 lineparts = line.split("\t", 2) 306 module, filename = lineparts[0], lineparts[1] 307 if self.multifilestyle == "onefile": 308 ooname = self.multifilename 309 elif self.multifilestyle == "toplevel": 310 ooname = module 311 else: 312 filename = filename.replace("\\", "/") 313 fileparts = [module] + filename.split("/") 314 ooname = os.path.join(*fileparts[:-1]) 315 return ooname + os.extsep + "oo"
316
317 - def listsubfiles(self):
318 """returns a list of subfiles in the file""" 319 return self.subfilelines.keys()
320
321 - def __iter__(self):
322 """iterates through the subfile names""" 323 for subfile in self.listsubfiles(): 324 yield subfile
325
326 - def __contains__(self, pathname):
327 """checks if this pathname is a valid subfile""" 328 return pathname in self.subfilelines
329
330 - def getsubfilesrc(self, subfile):
331 """returns the list of lines matching the subfile""" 332 lines = [] 333 requiredlines = dict.fromkeys(self.subfilelines[subfile]) 334 linenum = 0 335 self.multifile.seek(0) 336 for line in self.multifile: 337 if linenum in requiredlines: 338 lines.append(line) 339 linenum += 1 340 return "".join(lines)
341
342 - def openinputfile(self, subfile):
343 """returns a pseudo-file object for the given subfile""" 344 subfilesrc = self.getsubfilesrc(subfile) 345 inputfile = wStringIO.StringIO(subfilesrc) 346 inputfile.filename = subfile 347 return inputfile
348
349 - def openoutputfile(self, subfile):
350 """returns a pseudo-file object for the given subfile""" 351 def onclose(contents): 352 self.multifile.write(contents) 353 self.multifile.flush()
354 outputfile = wStringIO.CatchStringOutput(onclose) 355 outputfile.filename = subfile 356 return outputfile
357
358 - def getoofile(self, subfile):
359 """returns an oofile built up from the given subfile's lines""" 360 subfilesrc = self.getsubfilesrc(subfile) 361 oosubfile = oofile() 362 oosubfile.filename = subfile 363 oosubfile.parse(subfilesrc) 364 return oosubfile
365 366 if __name__ == '__main__': 367 of = oofile() 368 of.parse(sys.stdin.read()) 369 sys.stdout.write(str(of)) 370