Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """classes that hold units of .po files (pounit) or entire files (pofile) 
 23  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 24   
 25  from __future__ import generators 
 26  from translate.misc.multistring import multistring 
 27  from translate.misc import quote 
 28  from translate.misc import textwrap 
 29  from translate.lang import data 
 30  from translate.storage import pocommon, base 
 31  import re 
 32   
 33  lsep = "\n#: " 
 34  """Seperator for #: entries""" 
 35   
 36  # general functions for quoting / unquoting po strings 
 37   
 38  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 39  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 40   
41 -def escapeforpo(line):
42 """Escapes a line for po format. assumes no \n occurs in the line. 43 44 @param line: unescaped text 45 """ 46 special_locations = [] 47 for special_key in po_escape_map: 48 special_locations.extend(quote.find_all(line, special_key)) 49 special_locations = dict.fromkeys(special_locations).keys() 50 special_locations.sort() 51 escaped_line = "" 52 last_location = 0 53 for location in special_locations: 54 escaped_line += line[last_location:location] 55 escaped_line += po_escape_map[line[location:location+1]] 56 last_location = location+1 57 escaped_line += line[last_location:] 58 return escaped_line
59
60 -def unescapehandler(escape):
61 62 return po_unescape_map.get(escape, escape)
63
64 -def wrapline(line):
65 """Wrap text for po files.""" 66 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 67 68 # Lines should not start with a space... 69 if len(wrappedlines) > 1: 70 for index, line in enumerate(wrappedlines[1:]): 71 if line.startswith(' '): 72 # Remove the space at the beginning of the line: 73 wrappedlines[index+1] = line[1:] 74 75 # Append a space to the previous line: 76 wrappedlines[index] += ' ' 77 return wrappedlines
78
79 -def quoteforpo(text):
80 """quotes the given text for a PO file, returning quoted and escaped lines""" 81 polines = [] 82 if text is None: 83 return polines 84 lines = text.split("\n") 85 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 86 if len(lines) != 2 or lines[1]: 87 polines.extend(['""']) 88 for line in lines[:-1]: 89 lns = wrapline(line) 90 if len(lns) > 0: 91 for ln in lns[:-1]: 92 polines.extend(['"' + escapeforpo(ln) + '"']) 93 if lns[-1]: 94 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 95 else: 96 polines.extend(['"\\n"']) 97 if lines[-1]: 98 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 99 return polines
100
101 -def extractpoline(line):
102 """Remove quote and unescape line from po file. 103 104 @param line: a quoted line from a po file (msgid or msgstr) 105 """ 106 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 107 return extracted
108
109 -def unquotefrompo(postr):
110 return u"".join([extractpoline(line) for line in postr])
111
112 -def encodingToUse(encoding):
113 """Tests whether the given encoding is known in the python runtime, or returns utf-8. 114 This function is used to ensure that a valid encoding is always used.""" 115 if encoding == "CHARSET" or encoding == None: return 'utf-8' 116 return encoding
117 # if encoding is None: return False 118 # return True 119 # try: 120 # tuple = codecs.lookup(encoding) 121 # except LookupError: 122 # return False 123 # return True 124 125 """ 126 From the GNU gettext manual: 127 WHITE-SPACE 128 # TRANSLATOR-COMMENTS 129 #. AUTOMATIC-COMMENTS 130 #| PREVIOUS MSGID (Gettext 0.16 - check if this is the correct position - not yet implemented) 131 #: REFERENCE... 132 #, FLAG... 133 msgctxt CONTEXT (Gettext 0.15) 134 msgid UNTRANSLATED-STRING 135 msgstr TRANSLATED-STRING 136 """ 137
138 -def extractstr(string):
139 left = string.find('"') 140 right = string.rfind('"') 141 if right > -1: 142 return string[left:right+1] 143 else: 144 return string[left:] + '"'
145
146 -class pounit(pocommon.pounit):
147 # othercomments = [] # # this is another comment 148 # automaticcomments = [] # #. comment extracted from the source code 149 # sourcecomments = [] # #: sourcefile.xxx:35 150 # typecomments = [] # #, fuzzy 151 # msgidcomments = [] # _: within msgid 152 # msgctxt 153 # msgid = [] 154 # msgstr = [] 155
156 - def __init__(self, source=None, encoding="UTF-8"):
157 self._encoding = encodingToUse(encoding) 158 self.obsolete = False 159 self._initallcomments(blankall=True) 160 self.msgctxt = [] 161 self.msgid = [] 162 self.msgid_pluralcomments = [] 163 self.msgid_plural = [] 164 self.msgstr = [] 165 self.obsoletemsgctxt = [] 166 self.obsoletemsgid = [] 167 self.obsoletemsgid_pluralcomments = [] 168 self.obsoletemsgid_plural = [] 169 self.obsoletemsgstr = [] 170 if source: 171 self.setsource(source) 172 super(pounit, self).__init__(source)
173
174 - def _initallcomments(self, blankall=False):
175 """Initialises allcomments""" 176 if blankall: 177 self.othercomments = [] 178 self.automaticcomments = [] 179 self.sourcecomments = [] 180 self.typecomments = [] 181 self.msgidcomments = [] 182 self.obsoletemsgidcomments = [] 183 self.allcomments = [self.othercomments, 184 self.automaticcomments, 185 self.sourcecomments, 186 self.typecomments, 187 self.msgidcomments, 188 self.obsoletemsgidcomments]
189
190 - def getsource(self):
191 """Returns the unescaped msgid""" 192 multi = multistring(unquotefrompo(self.msgid), self._encoding) 193 if self.hasplural(): 194 pluralform = unquotefrompo(self.msgid_plural) 195 if isinstance(pluralform, str): 196 pluralform = pluralform.decode(self._encoding) 197 multi.strings.append(pluralform) 198 return multi
199
200 - def setsource(self, source):
201 """Sets the msgid to the given (unescaped) value. 202 203 @param source: an unescaped source string. 204 """ 205 if isinstance(source, str): 206 source = source.decode(self._encoding) 207 if isinstance(source, multistring): 208 source = source.strings 209 if isinstance(source, list): 210 self.msgid = quoteforpo(source[0]) 211 if len(source) > 1: 212 self.msgid_plural = quoteforpo(source[1]) 213 else: 214 self.msgid = quoteforpo(source)
215 source = property(getsource, setsource) 216
217 - def gettarget(self):
218 """Returns the unescaped msgstr""" 219 if isinstance(self.msgstr, dict): 220 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 221 else: 222 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 223 return multi
224
225 - def settarget(self, target):
226 """Sets the msgstr to the given (unescaped) value""" 227 if isinstance(target, str): 228 target = target.decode(self._encoding) 229 if target == self.target: 230 return 231 if self.hasplural(): 232 if isinstance(target, multistring): 233 target = target.strings 234 elif isinstance(target, basestring): 235 target = [target] 236 elif isinstance(target,(dict, list)): 237 if len(target) == 1: 238 target = target[0] 239 else: 240 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 241 templates = self.msgstr 242 if isinstance(templates, list): 243 templates = {0: templates} 244 if isinstance(target, list): 245 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 246 elif isinstance(target, dict): 247 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 248 else: 249 self.msgstr = quoteforpo(target)
250 target = property(gettarget, settarget) 251
252 - def getnotes(self, origin=None):
253 """Return comments based on origin value (programmer, developer, source code and translator)""" 254 if origin == None: 255 comments = u"".join([comment[2:] for comment in self.othercomments]) 256 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 257 elif origin == "translator": 258 comments = u"".join ([comment[2:] for comment in self.othercomments]) 259 elif origin in ["programmer", "developer", "source code"]: 260 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 261 else: 262 raise ValueError("Comment type not valid") 263 # Let's drop the last newline 264 return comments[:-1]
265
266 - def addnote(self, text, origin=None, position="append"):
267 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 268 # We don't want to put in an empty '#' without a real comment: 269 if not text: 270 return 271 text = data.forceunicode(text) 272 commentlist = self.othercomments 273 linestart = "# " 274 if origin in ["programmer", "developer", "source code"]: 275 autocomments = True 276 commentlist = self.automaticcomments 277 linestart = "#. " 278 text = text.split("\n") 279 if position == "append": 280 commentlist += [linestart + line + "\n" for line in text] 281 else: 282 newcomments = [linestart + line + "\n" for line in text] 283 newcomments += [line for line in commentlist] 284 if autocomments: 285 self.automaticcomments = newcomments 286 else: 287 self.othercomments = newcomments
288
289 - def removenotes(self):
290 """Remove all the translator's notes (other comments)""" 291 self.othercomments = []
292
293 - def copy(self):
294 newpo = self.__class__() 295 newpo.othercomments = self.othercomments[:] 296 newpo.automaticcomments = self.automaticcomments[:] 297 newpo.sourcecomments = self.sourcecomments[:] 298 newpo.typecomments = self.typecomments[:] 299 newpo.obsolete = self.obsolete 300 newpo.msgidcomments = self.msgidcomments[:] 301 newpo._initallcomments() 302 newpo.msgctxt = self.msgctxt[:] 303 newpo.msgid = self.msgid[:] 304 newpo.msgid_pluralcomments = self.msgid_pluralcomments[:] 305 newpo.msgid_plural = self.msgid_plural[:] 306 if isinstance(self.msgstr, dict): 307 newpo.msgstr = self.msgstr.copy() 308 else: 309 newpo.msgstr = self.msgstr[:] 310 311 newpo.obsoletemsgctxt = self.obsoletemsgctxt[:] 312 newpo.obsoletemsgid = self.obsoletemsgid[:] 313 newpo.obsoletemsgid_pluralcomments = self.obsoletemsgid_pluralcomments[:] 314 newpo.obsoletemsgid_plural = self.obsoletemsgid_plural[:] 315 if isinstance(self.obsoletemsgstr, dict): 316 newpo.obsoletemsgstr = self.obsoletemsgstr.copy() 317 else: 318 newpo.obsoletemsgstr = self.obsoletemsgstr[:] 319 return newpo
320
321 - def msgidlen(self):
322 if self.hasplural(): 323 return len(unquotefrompo(self.msgid).strip()) + len(unquotefrompo(self.msgid_plural).strip()) 324 else: 325 return len(unquotefrompo(self.msgid).strip())
326
327 - def msgstrlen(self):
328 if isinstance(self.msgstr, dict): 329 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()]) 330 return len(combinedstr.strip()) 331 else: 332 return len(unquotefrompo(self.msgstr).strip())
333
334 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
335 """Merges the otherpo (with the same msgid) into this one. 336 337 Overwrite non-blank self.msgstr only if overwrite is True 338 merge comments only if comments is True 339 340 """ 341 342 def mergelists(list1, list2, split=False): 343 #decode where necessary 344 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 345 for position, item in enumerate(list1): 346 if isinstance(item, str): 347 list1[position] = item.decode("utf-8") 348 for position, item in enumerate(list2): 349 if isinstance(item, str): 350 list2[position] = item.decode("utf-8") 351 352 #Determine the newline style of list1 353 lineend = "" 354 if list1 and list1[0]: 355 for candidate in ["\n", "\r", "\n\r"]: 356 if list1[0].endswith(candidate): 357 lineend = candidate 358 if not lineend: 359 lineend = "" 360 else: 361 lineend = "\n" 362 363 #Split if directed to do so: 364 if split: 365 splitlist1 = [] 366 splitlist2 = [] 367 prefix = "#" 368 for item in list1: 369 splitlist1.extend(item.split()[1:]) 370 prefix = item.split()[0] 371 for item in list2: 372 splitlist2.extend(item.split()[1:]) 373 prefix = item.split()[0] 374 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 375 else: 376 #Normal merge, but conform to list1 newline style 377 if list1 != list2: 378 for item in list2: 379 if lineend: 380 item = item.rstrip() + lineend 381 # avoid duplicate comment lines (this might cause some problems) 382 if item not in list1 or len(item) < 5: 383 list1.append(item)
384 if not isinstance(otherpo, pounit): 385 super(pounit, self).merge(otherpo, overwrite, comments) 386 return 387 if comments: 388 mergelists(self.othercomments, otherpo.othercomments) 389 mergelists(self.typecomments, otherpo.typecomments) 390 if not authoritative: 391 # We don't bring across otherpo.automaticcomments as we consider ourself 392 # to be the the authority. Same applies to otherpo.msgidcomments 393 mergelists(self.automaticcomments, otherpo.automaticcomments) 394 mergelists(self.msgidcomments, otherpo.msgidcomments) 395 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 396 if not self.istranslated() or overwrite: 397 # Remove kde-style comments from the translation (if any). 398 if self._extract_msgidcomments(otherpo.target): 399 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 400 self.target = otherpo.target 401 if self.source != otherpo.source: 402 self.markfuzzy() 403 else: 404 self.markfuzzy(otherpo.isfuzzy()) 405 elif not otherpo.istranslated(): 406 if self.source != otherpo.source: 407 self.markfuzzy() 408 else: 409 if self.target != otherpo.target: 410 self.markfuzzy()
411
412 - def isheader(self):
413 #return (self.msgidlen() == 0) and (self.msgstrlen() > 0) and (len(self.msgidcomments) == 0) 414 #rewritten here for performance: 415 return ((self.msgid == [] or self.msgid == ['""']) and 416 not (self.msgstr == [] or self.msgstr == ['""']) 417 and self.msgidcomments == [] 418 and (self.msgctxt == [] or self.msgctxt == ['""']) 419 and (self.sourcecomments == [] or self.sourcecomments == [""]))
420
421 - def isblank(self):
422 if self.isheader() or len(self.msgidcomments): 423 return False 424 if (self.msgidlen() == 0) and (self.msgstrlen() == 0): 425 return True 426 return False
427 # TODO: remove: 428 # Before, the equivalent of the following was the final return statement: 429 # return len(self.source.strip()) == 0 430
431 - def hastypecomment(self, typecomment):
432 """check whether the given type comment is present""" 433 # check for word boundaries properly by using a regular expression... 434 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
435
436 - def hasmarkedcomment(self, commentmarker):
437 """check whether the given comment marker is present as # (commentmarker) ...""" 438 commentmarker = "(%s)" % commentmarker 439 for comment in self.othercomments: 440 if comment.replace("#", "", 1).strip().startswith(commentmarker): 441 return True 442 return False
443
444 - def settypecomment(self, typecomment, present=True):
445 """alters whether a given typecomment is present""" 446 if self.hastypecomment(typecomment) != present: 447 if present: 448 self.typecomments.append("#, %s\n" % typecomment) 449 else: 450 # this should handle word boundaries properly ... 451 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 452 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
453
454 - def istranslated(self):
455 return super(pounit, self).istranslated() and not self.isobsolete()
456
457 - def istranslatable(self):
458 return not (self.isheader() or self.isblank())
459
460 - def isfuzzy(self):
461 return self.hastypecomment("fuzzy")
462
463 - def markfuzzy(self, present=True):
464 self.settypecomment("fuzzy", present)
465
466 - def isreview(self):
467 return self.hastypecomment("review") or self.hasmarkedcomment("review") or self.hasmarkedcomment("pofilter")
468
469 - def isobsolete(self):
470 return self.obsolete
471
472 - def makeobsolete(self):
473 """Makes this unit obsolete""" 474 self.obsolete = True 475 if self.msgctxt: 476 self.obsoletemsgctxt = self.msgctxt 477 if self.msgid: 478 self.obsoletemsgid = self.msgid 479 self.msgid = [] 480 if self.msgidcomments: 481 self.obsoletemsgidcomments = self.msgidcomments 482 self.msgidcomments = [] 483 if self.msgid_plural: 484 self.obsoletemsgid_plural = self.msgid_plural 485 self.msgid_plural = [] 486 if self.msgstr: 487 self.obsoletemsgstr = self.msgstr 488 self.msgstr = [] 489 self.sourcecomments = [] 490 self.automaticcomments = []
491
492 - def resurrect(self):
493 """Makes an obsolete unit normal""" 494 self.obsolete = False 495 if self.obsoletemsgctxt: 496 self.msgid = self.obsoletemsgctxt 497 self.obsoletemsgctxt = [] 498 if self.obsoletemsgid: 499 self.msgid = self.obsoletemsgid 500 self.obsoletemsgid = [] 501 if self.obsoletemsgidcomments: 502 self.msgidcomments = self.obsoletemsgidcomments 503 self.obsoletemsgidcomments = [] 504 if self.obsoletemsgid_plural: 505 self.msgid_plural = self.obsoletemsgid_plural 506 self.obsoletemsgid_plural = [] 507 if self.obsoletemsgstr: 508 self.msgstr = self.obsoletemsgstr 509 self.obsoletemgstr = []
510
511 - def hasplural(self):
512 """returns whether this pounit contains plural strings...""" 513 return len(self.msgid_plural) > 0
514
515 - def parselines(self, lines):
516 inmsgctxt = 0 517 inmsgid = 0 518 inmsgid_comment = 0 519 inmsgid_plural = 0 520 inmsgstr = 0 521 msgstr_pluralid = None 522 linesprocessed = 0 523 for line in lines: 524 line = line + "\n" 525 linesprocessed += 1 526 if len(line) == 0: 527 continue 528 elif line[0] == '#': 529 if inmsgstr and not line[1] == '~': 530 # if we're already in the message string, this is from the next element 531 break 532 if line[1] == '.': 533 self.automaticcomments.append(line) 534 elif line[1] == ':': 535 self.sourcecomments.append(line) 536 elif line[1] == ',': 537 self.typecomments.append(line) 538 elif line[1] == '~': 539 line = line[3:] 540 self.obsolete = True 541 else: 542 self.othercomments.append(line) 543 if line.startswith('msgid_plural'): 544 inmsgctxt = 0 545 inmsgid = 0 546 inmsgid_plural = 1 547 inmsgstr = 0 548 inmsgid_comment = 0 549 elif line.startswith('msgctxt'): 550 inmsgctxt = 1 551 inmsgid = 0 552 inmsgid_plural = 0 553 inmsgstr = 0 554 inmsgid_comment = 0 555 elif line.startswith('msgid'): 556 # if we just finished a msgstr or msgid_plural, there is probably an 557 # empty line missing between the units, so let's stop the parsing now. 558 if inmsgstr or inmsgid_plural: 559 break 560 inmsgctxt = 0 561 inmsgid = 1 562 inmsgid_plural = 0 563 inmsgstr = 0 564 inmsgid_comment = 0 565 elif line.startswith('msgstr'): 566 inmsgctxt = 0 567 inmsgid = 0 568 inmsgid_plural = 0 569 inmsgstr = 1 570 if line.startswith('msgstr['): 571 msgstr_pluralid = int(line[len('msgstr['):line.find(']')].strip()) 572 else: 573 msgstr_pluralid = None 574 extracted = extractstr(line) 575 if not extracted is None: 576 if inmsgctxt: 577 self.msgctxt.append(extracted) 578 elif inmsgid: 579 # TODO: improve kde comment detection 580 if extracted.find("_:") != -1: 581 inmsgid_comment = 1 582 if inmsgid_comment: 583 self.msgidcomments.append(extracted) 584 else: 585 self.msgid.append(extracted) 586 if inmsgid_comment and extracted.find("\\n") != -1: 587 inmsgid_comment = 0 588 elif inmsgid_plural: 589 if extracted.find("_:") != -1: 590 inmsgid_comment = 1 591 if inmsgid_comment: 592 self.msgid_pluralcomments.append(extracted) 593 else: 594 self.msgid_plural.append(extracted) 595 if inmsgid_comment and extracted.find("\\n") != -1: 596 inmsgid_comment = 0 597 elif inmsgstr: 598 if msgstr_pluralid is None: 599 self.msgstr.append(extracted) 600 else: 601 if type(self.msgstr) == list: 602 self.msgstr = {0: self.msgstr} 603 if msgstr_pluralid not in self.msgstr: 604 self.msgstr[msgstr_pluralid] = [] 605 self.msgstr[msgstr_pluralid].append(extracted) 606 if self.obsolete: 607 self.makeobsolete() 608 # If this unit is the header, we have to get the encoding to ensure that no 609 # methods are called that need the encoding before we obtained it. 610 if self.isheader(): 611 charset = re.search("charset=([^\\s]+)", unquotefrompo(self.msgstr)) 612 if charset: 613 self._encoding = encodingToUse(charset.group(1)) 614 return linesprocessed
615
616 - def parse(self, src):
617 if isinstance(src, str): 618 # This has not been decoded yet, so we need to make a plan 619 src = src.decode(self._encoding) 620 return self.parselines(src.split("\n"))
621
622 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
623 if isinstance(partlines, dict): 624 partkeys = partlines.keys() 625 partkeys.sort() 626 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 627 partstr = partname + " " 628 partstartline = 0 629 if len(partlines) > 0 and len(partcomments) == 0: 630 partstr += partlines[0] 631 partstartline = 1 632 elif len(partcomments) > 0: 633 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 634 # if there is a blank leader line, it must come before the comment 635 partstr += partlines[0] + '\n' 636 # but if the whole string is blank, leave it in 637 if len(partlines) > 1: 638 partstartline += 1 639 else: 640 # All partcomments should start on a newline 641 partstr += '""\n' 642 # combine comments into one if more than one 643 if len(partcomments) > 1: 644 combinedcomment = [] 645 for comment in partcomments: 646 comment = unquotefrompo([comment]) 647 if comment.startswith("_:"): 648 comment = comment[len("_:"):] 649 if comment.endswith("\\n"): 650 comment = comment[:-len("\\n")] 651 #Before we used to strip. Necessary in some cases? 652 combinedcomment.append(comment) 653 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 654 # comments first, no blank leader line needed 655 partstr += "\n".join(partcomments) 656 partstr = quote.rstripeol(partstr) 657 else: 658 partstr += '""' 659 partstr += '\n' 660 # add the rest 661 for partline in partlines[partstartline:]: 662 partstr += partline + '\n' 663 return partstr
664
665 - def _encodeifneccessary(self, output):
666 """encodes unicode strings and returns other strings unchanged""" 667 if isinstance(output, unicode): 668 encoding = encodingToUse(getattr(self, "encoding", "UTF-8")) 669 return output.encode(encoding) 670 return output
671
672 - def __str__(self):
673 """convert to a string. double check that unicode is handled somehow here""" 674 output = self._getoutput() 675 return self._encodeifneccessary(output)
676
677 - def _getoutput(self):
678 """return this po element as a string""" 679 lines = [] 680 lines.extend(self.othercomments) 681 if self.isobsolete(): 682 lines.extend(self.typecomments) 683 obsoletelines = [] 684 if self.obsoletemsgctxt: 685 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 686 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 687 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 688 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 689 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 690 for index, obsoleteline in enumerate(obsoletelines): 691 # We need to account for a multiline msgid or msgstr here 692 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 693 lines.extend(obsoletelines) 694 lines = [self._encodeifneccessary(line) for line in lines] 695 return "".join(lines) 696 # if there's no msgid don't do msgid and string, unless we're the header 697 # this will also discard any comments other than plain othercomments... 698 if (len(self.msgid) == 0) or ((len(self.msgid) == 1) and (self.msgid[0] == '""')): 699 if not (self.isheader() or self.msgidcomments or self.sourcecomments): 700 return "".join(lines) 701 lines.extend(self.automaticcomments) 702 lines.extend(self.sourcecomments) 703 lines.extend(self.typecomments) 704 if self.msgctxt: 705 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) 706 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments)) 707 if self.msgid_plural or self.msgid_pluralcomments: 708 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 709 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) 710 lines = [self._encodeifneccessary(line) for line in lines] 711 postr = "".join(lines) 712 return postr
713
714 - def getlocations(self):
715 """Get a list of locations from sourcecomments in the PO unit 716 717 rtype: List 718 return: A list of the locations with '#: ' stripped 719 720 """ 721 locations = [] 722 for sourcecomment in self.sourcecomments: 723 locations += quote.rstripeol(sourcecomment)[3:].split() 724 return locations
725
726 - def addlocation(self, location):
727 """Add a location to sourcecomments in the PO unit 728 729 @param location: Text location e.g. 'file.c:23' does not include #: 730 @type location: String 731 732 """ 733 self.sourcecomments.append("#: %s\n" % location)
734
735 - def _extract_msgidcomments(self, text=None):
736 """Extract KDE style msgid comments from the unit. 737 738 @rtype: String 739 @return: Returns the extracted msgidcomments found in this unit's msgid. 740 741 """ 742 743 if not text: 744 text = unquotefrompo(self.msgidcomments) 745 return text.split('\n')[0].replace('_: ', '', 1)
746
747 - def getcontext(self):
748 """Get the message context.""" 749 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
750
751 - def getid(self):
752 """Returns a unique identifier for this unit.""" 753 context = self.getcontext() 754 # Gettext does not consider the plural to determine duplicates, only 755 # the msgid. For generation of .mo files, we might want to use this 756 # code to generate the entry for the hash table, but for now, it is 757 # commented out for conformance to gettext. 758 # id = '\0'.join(self.source.strings) 759 id = self.source 760 if self.msgidcomments: 761 id = "_: %s\n%s" % (context, id) 762 elif context: 763 id = "%s\04%s" % (context, id) 764 return id
765
766 -class pofile(pocommon.pofile):
767 """this represents a .po file containing various units""" 768 UnitClass = pounit
769 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit):
770 """construct a pofile, optionally reading in from inputfile. 771 encoding can be specified but otherwise will be read from the PO header""" 772 self.UnitClass = unitclass 773 pocommon.pofile.__init__(self, unitclass=unitclass) 774 self.units = [] 775 self.filename = '' 776 self._encoding = encodingToUse(encoding) 777 if inputfile is not None: 778 self.parse(inputfile)
779
780 - def changeencoding(self, newencoding):
781 """changes the encoding on the file""" 782 self._encoding = encodingToUse(newencoding) 783 if not self.units: 784 return 785 header = self.header() 786 if not header or header.isblank(): 787 return 788 charsetline = None 789 headerstr = unquotefrompo(header.msgstr) 790 for line in headerstr.split("\n"): 791 if not ":" in line: continue 792 key, value = line.strip().split(":", 1) 793 if key.strip() != "Content-Type": continue 794 charsetline = line 795 if charsetline is None: 796 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 797 else: 798 charset = re.search("charset=([^ ]*)", charsetline) 799 if charset is None: 800 newcharsetline = charsetline 801 if not newcharsetline.strip().endswith(";"): 802 newcharsetline += ";" 803 newcharsetline += " charset=%s" % self._encoding 804 else: 805 charset = charset.group(1) 806 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 807 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 808 header.msgstr = quoteforpo(headerstr)
809
810 - def parse(self, input):
811 """parses the given file or file source string""" 812 try: 813 if hasattr(input, 'name'): 814 self.filename = input.name 815 elif not getattr(self, 'filename', ''): 816 self.filename = '' 817 if hasattr(input, "read"): 818 posrc = input.read() 819 input.close() 820 input = posrc 821 # TODO: change this to a proper parser that doesn't do line-by-line madness 822 lines = input.split("\n") 823 start = 0 824 end = 0 825 # make only the first one the header 826 linesprocessed = 0 827 is_decoded = False 828 while end <= len(lines): 829 if (end == len(lines)) or (not lines[end].strip()): # end of lines or blank line 830 newpe = self.UnitClass(encoding=self._encoding) 831 unit_lines = lines[start:end] 832 # We need to work carefully if we haven't decoded properly yet. 833 # So let's solve this temporarily until we actually get the 834 # encoding from the header. 835 if not is_decoded: 836 unit_lines = [line.decode('ascii', 'ignore') for line in unit_lines] 837 linesprocessed = newpe.parselines(unit_lines) 838 start += linesprocessed 839 # TODO: find a better way of working out if we actually read anything 840 if linesprocessed >= 1 and newpe._getoutput(): 841 self.units.append(newpe) 842 if not is_decoded: 843 if newpe.isheader(): # If there is a header... 844 if "Content-Type" in self.parseheader(): # and a Content-Type... 845 if self._encoding.lower() != 'charset': # with a valid charset... 846 self._encoding = newpe._encoding # then change the encoding 847 # otherwise we'll decode using UTF-8 848 lines = self.decode(lines) 849 self.units = [] 850 start = 0 851 end = 0 852 is_decoded = True 853 end = end+1 854 except Exception, e: 855 raise base.ParseError()
856
857 - def removeduplicates(self, duplicatestyle="merge"):
858 """make sure each msgid is unique ; merge comments etc from duplicates into original""" 859 msgiddict = {} 860 uniqueunits = [] 861 # we sometimes need to keep track of what has been marked 862 # TODO: this is using a list as the pos aren't hashable, but this is slow... 863 markedpos = [] 864 def addcomment(thepo): 865 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 866 markedpos.append(thepo)
867 for thepo in self.units: 868 if duplicatestyle.startswith("msgid_comment"): 869 msgid = unquotefrompo(thepo.msgidcomments) + unquotefrompo(thepo.msgid) 870 else: 871 msgid = unquotefrompo(thepo.msgid) 872 if thepo.isheader(): 873 # header msgids shouldn't be merged... 874 uniqueunits.append(thepo) 875 elif duplicatestyle == "msgid_comment_all": 876 addcomment(thepo) 877 uniqueunits.append(thepo) 878 elif msgid in msgiddict: 879 if duplicatestyle == "merge": 880 if msgid: 881 msgiddict[msgid].merge(thepo) 882 else: 883 addcomment(thepo) 884 uniqueunits.append(thepo) 885 elif duplicatestyle == "keep": 886 uniqueunits.append(thepo) 887 elif duplicatestyle == "msgid_comment": 888 origpo = msgiddict[msgid] 889 if origpo not in markedpos: 890 addcomment(origpo) 891 addcomment(thepo) 892 uniqueunits.append(thepo) 893 elif duplicatestyle == "msgctxt": 894 origpo = msgiddict[msgid] 895 if origpo not in markedpos: 896 origpo.msgctxt.append('"%s"' % " ".join(origpo.getlocations())) 897 markedpos.append(thepo) 898 thepo.msgctxt.append('"%s"' % " ".join(thepo.getlocations())) 899 uniqueunits.append(thepo) 900 else: 901 if not msgid and duplicatestyle != "keep": 902 addcomment(thepo) 903 msgiddict[msgid] = thepo 904 uniqueunits.append(thepo) 905 self.units = uniqueunits
906
907 - def __str__(self):
908 """convert to a string. double check that unicode is handled somehow here""" 909 output = self._getoutput() 910 if isinstance(output, unicode): 911 return output.encode(getattr(self, "encoding", "UTF-8")) 912 return output
913
914 - def _getoutput(self):
915 """convert the units back to lines""" 916 lines = [] 917 for unit in self.units: 918 unitsrc = str(unit) + "\n" 919 lines.append(unitsrc) 920 lines = "".join(self.encode(lines)).rstrip() 921 #After the last pounit we will have \n\n and we only want to end in \n: 922 if lines: lines += "\n" 923 return lines
924
925 - def encode(self, lines):
926 """encode any unicode strings in lines in self._encoding""" 927 newlines = [] 928 encoding = self._encoding 929 if encoding is None or encoding.lower() == "charset": 930 encoding = 'UTF-8' 931 for line in lines: 932 if isinstance(line, unicode): 933 line = line.encode(encoding) 934 newlines.append(line) 935 return newlines
936
937 - def decode(self, lines):
938 """decode any non-unicode strings in lines with self._encoding""" 939 newlines = [] 940 for line in lines: 941 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 942 try: 943 line = line.decode(self._encoding) 944 except UnicodeError, e: 945 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 946 newlines.append(line) 947 return newlines
948
949 - def unit_iter(self):
950 for unit in self.units: 951 if not (unit.isheader() or unit.isobsolete()): 952 yield unit
953 954 if __name__ == '__main__': 955 import sys 956 pf = pofile(sys.stdin) 957 sys.stdout.write(str(pf)) 958