1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of .po files (pounit) or entire files (pofile)
23 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
24
25 from __future__ import generators
26 from translate.misc.multistring import multistring
27 from translate.misc import quote
28 from translate.misc import textwrap
29 from translate.lang import data
30 from translate.storage import pocommon, base
31 import re
32
33 lsep = "\n#: "
34 """Seperator for #: entries"""
35
36
37
38 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
39 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
40
42 """Escapes a line for po format. assumes no \n occurs in the line.
43
44 @param line: unescaped text
45 """
46 special_locations = []
47 for special_key in po_escape_map:
48 special_locations.extend(quote.find_all(line, special_key))
49 special_locations = dict.fromkeys(special_locations).keys()
50 special_locations.sort()
51 escaped_line = ""
52 last_location = 0
53 for location in special_locations:
54 escaped_line += line[last_location:location]
55 escaped_line += po_escape_map[line[location:location+1]]
56 last_location = location+1
57 escaped_line += line[last_location:]
58 return escaped_line
59
63
65 """Wrap text for po files."""
66 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
67
68
69 if len(wrappedlines) > 1:
70 for index, line in enumerate(wrappedlines[1:]):
71 if line.startswith(' '):
72
73 wrappedlines[index+1] = line[1:]
74
75
76 wrappedlines[index] += ' '
77 return wrappedlines
78
80 """quotes the given text for a PO file, returning quoted and escaped lines"""
81 polines = []
82 if text is None:
83 return polines
84 lines = text.split("\n")
85 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
86 if len(lines) != 2 or lines[1]:
87 polines.extend(['""'])
88 for line in lines[:-1]:
89 lns = wrapline(line)
90 if len(lns) > 0:
91 for ln in lns[:-1]:
92 polines.extend(['"' + escapeforpo(ln) + '"'])
93 if lns[-1]:
94 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
95 else:
96 polines.extend(['"\\n"'])
97 if lines[-1]:
98 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
99 return polines
100
102 """Remove quote and unescape line from po file.
103
104 @param line: a quoted line from a po file (msgid or msgstr)
105 """
106 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
107 return extracted
108
111
113 """Tests whether the given encoding is known in the python runtime, or returns utf-8.
114 This function is used to ensure that a valid encoding is always used."""
115 if encoding == "CHARSET" or encoding == None: return 'utf-8'
116 return encoding
117
118
119
120
121
122
123
124
125 """
126 From the GNU gettext manual:
127 WHITE-SPACE
128 # TRANSLATOR-COMMENTS
129 #. AUTOMATIC-COMMENTS
130 #| PREVIOUS MSGID (Gettext 0.16 - check if this is the correct position - not yet implemented)
131 #: REFERENCE...
132 #, FLAG...
133 msgctxt CONTEXT (Gettext 0.15)
134 msgid UNTRANSLATED-STRING
135 msgstr TRANSLATED-STRING
136 """
137
139 left = string.find('"')
140 right = string.rfind('"')
141 if right > -1:
142 return string[left:right+1]
143 else:
144 return string[left:] + '"'
145
146 -class pounit(pocommon.pounit):
147
148
149
150
151
152
153
154
155
156 - def __init__(self, source=None, encoding="UTF-8"):
157 self._encoding = encodingToUse(encoding)
158 self.obsolete = False
159 self._initallcomments(blankall=True)
160 self.msgctxt = []
161 self.msgid = []
162 self.msgid_pluralcomments = []
163 self.msgid_plural = []
164 self.msgstr = []
165 self.obsoletemsgctxt = []
166 self.obsoletemsgid = []
167 self.obsoletemsgid_pluralcomments = []
168 self.obsoletemsgid_plural = []
169 self.obsoletemsgstr = []
170 if source:
171 self.setsource(source)
172 super(pounit, self).__init__(source)
173
189
191 """Returns the unescaped msgid"""
192 multi = multistring(unquotefrompo(self.msgid), self._encoding)
193 if self.hasplural():
194 pluralform = unquotefrompo(self.msgid_plural)
195 if isinstance(pluralform, str):
196 pluralform = pluralform.decode(self._encoding)
197 multi.strings.append(pluralform)
198 return multi
199
215 source = property(getsource, setsource)
216
224
226 """Sets the msgstr to the given (unescaped) value"""
227 if isinstance(target, str):
228 target = target.decode(self._encoding)
229 if target == self.target:
230 return
231 if self.hasplural():
232 if isinstance(target, multistring):
233 target = target.strings
234 elif isinstance(target, basestring):
235 target = [target]
236 elif isinstance(target,(dict, list)):
237 if len(target) == 1:
238 target = target[0]
239 else:
240 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
241 templates = self.msgstr
242 if isinstance(templates, list):
243 templates = {0: templates}
244 if isinstance(target, list):
245 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
246 elif isinstance(target, dict):
247 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
248 else:
249 self.msgstr = quoteforpo(target)
250 target = property(gettarget, settarget)
251
253 """Return comments based on origin value (programmer, developer, source code and translator)"""
254 if origin == None:
255 comments = u"".join([comment[2:] for comment in self.othercomments])
256 comments += u"".join([comment[3:] for comment in self.automaticcomments])
257 elif origin == "translator":
258 comments = u"".join ([comment[2:] for comment in self.othercomments])
259 elif origin in ["programmer", "developer", "source code"]:
260 comments = u"".join([comment[3:] for comment in self.automaticcomments])
261 else:
262 raise ValueError("Comment type not valid")
263
264 return comments[:-1]
265
266 - def addnote(self, text, origin=None, position="append"):
267 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
268
269 if not text:
270 return
271 text = data.forceunicode(text)
272 commentlist = self.othercomments
273 linestart = "# "
274 if origin in ["programmer", "developer", "source code"]:
275 autocomments = True
276 commentlist = self.automaticcomments
277 linestart = "#. "
278 text = text.split("\n")
279 if position == "append":
280 commentlist += [linestart + line + "\n" for line in text]
281 else:
282 newcomments = [linestart + line + "\n" for line in text]
283 newcomments += [line for line in commentlist]
284 if autocomments:
285 self.automaticcomments = newcomments
286 else:
287 self.othercomments = newcomments
288
290 """Remove all the translator's notes (other comments)"""
291 self.othercomments = []
292
294 newpo = self.__class__()
295 newpo.othercomments = self.othercomments[:]
296 newpo.automaticcomments = self.automaticcomments[:]
297 newpo.sourcecomments = self.sourcecomments[:]
298 newpo.typecomments = self.typecomments[:]
299 newpo.obsolete = self.obsolete
300 newpo.msgidcomments = self.msgidcomments[:]
301 newpo._initallcomments()
302 newpo.msgctxt = self.msgctxt[:]
303 newpo.msgid = self.msgid[:]
304 newpo.msgid_pluralcomments = self.msgid_pluralcomments[:]
305 newpo.msgid_plural = self.msgid_plural[:]
306 if isinstance(self.msgstr, dict):
307 newpo.msgstr = self.msgstr.copy()
308 else:
309 newpo.msgstr = self.msgstr[:]
310
311 newpo.obsoletemsgctxt = self.obsoletemsgctxt[:]
312 newpo.obsoletemsgid = self.obsoletemsgid[:]
313 newpo.obsoletemsgid_pluralcomments = self.obsoletemsgid_pluralcomments[:]
314 newpo.obsoletemsgid_plural = self.obsoletemsgid_plural[:]
315 if isinstance(self.obsoletemsgstr, dict):
316 newpo.obsoletemsgstr = self.obsoletemsgstr.copy()
317 else:
318 newpo.obsoletemsgstr = self.obsoletemsgstr[:]
319 return newpo
320
326
328 if isinstance(self.msgstr, dict):
329 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()])
330 return len(combinedstr.strip())
331 else:
332 return len(unquotefrompo(self.msgstr).strip())
333
334 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
335 """Merges the otherpo (with the same msgid) into this one.
336
337 Overwrite non-blank self.msgstr only if overwrite is True
338 merge comments only if comments is True
339
340 """
341
342 def mergelists(list1, list2, split=False):
343
344 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
345 for position, item in enumerate(list1):
346 if isinstance(item, str):
347 list1[position] = item.decode("utf-8")
348 for position, item in enumerate(list2):
349 if isinstance(item, str):
350 list2[position] = item.decode("utf-8")
351
352
353 lineend = ""
354 if list1 and list1[0]:
355 for candidate in ["\n", "\r", "\n\r"]:
356 if list1[0].endswith(candidate):
357 lineend = candidate
358 if not lineend:
359 lineend = ""
360 else:
361 lineend = "\n"
362
363
364 if split:
365 splitlist1 = []
366 splitlist2 = []
367 prefix = "#"
368 for item in list1:
369 splitlist1.extend(item.split()[1:])
370 prefix = item.split()[0]
371 for item in list2:
372 splitlist2.extend(item.split()[1:])
373 prefix = item.split()[0]
374 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
375 else:
376
377 if list1 != list2:
378 for item in list2:
379 if lineend:
380 item = item.rstrip() + lineend
381
382 if item not in list1 or len(item) < 5:
383 list1.append(item)
384 if not isinstance(otherpo, pounit):
385 super(pounit, self).merge(otherpo, overwrite, comments)
386 return
387 if comments:
388 mergelists(self.othercomments, otherpo.othercomments)
389 mergelists(self.typecomments, otherpo.typecomments)
390 if not authoritative:
391
392
393 mergelists(self.automaticcomments, otherpo.automaticcomments)
394 mergelists(self.msgidcomments, otherpo.msgidcomments)
395 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
396 if not self.istranslated() or overwrite:
397
398 if self._extract_msgidcomments(otherpo.target):
399 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
400 self.target = otherpo.target
401 if self.source != otherpo.source:
402 self.markfuzzy()
403 else:
404 self.markfuzzy(otherpo.isfuzzy())
405 elif not otherpo.istranslated():
406 if self.source != otherpo.source:
407 self.markfuzzy()
408 else:
409 if self.target != otherpo.target:
410 self.markfuzzy()
411
413
414
415 return ((self.msgid == [] or self.msgid == ['""']) and
416 not (self.msgstr == [] or self.msgstr == ['""'])
417 and self.msgidcomments == []
418 and (self.msgctxt == [] or self.msgctxt == ['""'])
419 and (self.sourcecomments == [] or self.sourcecomments == [""]))
420
422 if self.isheader() or len(self.msgidcomments):
423 return False
424 if (self.msgidlen() == 0) and (self.msgstrlen() == 0):
425 return True
426 return False
427
428
429
430
435
443
453
456
459
462
465
468
471
473 """Makes this unit obsolete"""
474 self.obsolete = True
475 if self.msgctxt:
476 self.obsoletemsgctxt = self.msgctxt
477 if self.msgid:
478 self.obsoletemsgid = self.msgid
479 self.msgid = []
480 if self.msgidcomments:
481 self.obsoletemsgidcomments = self.msgidcomments
482 self.msgidcomments = []
483 if self.msgid_plural:
484 self.obsoletemsgid_plural = self.msgid_plural
485 self.msgid_plural = []
486 if self.msgstr:
487 self.obsoletemsgstr = self.msgstr
488 self.msgstr = []
489 self.sourcecomments = []
490 self.automaticcomments = []
491
493 """Makes an obsolete unit normal"""
494 self.obsolete = False
495 if self.obsoletemsgctxt:
496 self.msgid = self.obsoletemsgctxt
497 self.obsoletemsgctxt = []
498 if self.obsoletemsgid:
499 self.msgid = self.obsoletemsgid
500 self.obsoletemsgid = []
501 if self.obsoletemsgidcomments:
502 self.msgidcomments = self.obsoletemsgidcomments
503 self.obsoletemsgidcomments = []
504 if self.obsoletemsgid_plural:
505 self.msgid_plural = self.obsoletemsgid_plural
506 self.obsoletemsgid_plural = []
507 if self.obsoletemsgstr:
508 self.msgstr = self.obsoletemsgstr
509 self.obsoletemgstr = []
510
512 """returns whether this pounit contains plural strings..."""
513 return len(self.msgid_plural) > 0
514
516 inmsgctxt = 0
517 inmsgid = 0
518 inmsgid_comment = 0
519 inmsgid_plural = 0
520 inmsgstr = 0
521 msgstr_pluralid = None
522 linesprocessed = 0
523 for line in lines:
524 line = line + "\n"
525 linesprocessed += 1
526 if len(line) == 0:
527 continue
528 elif line[0] == '#':
529 if inmsgstr and not line[1] == '~':
530
531 break
532 if line[1] == '.':
533 self.automaticcomments.append(line)
534 elif line[1] == ':':
535 self.sourcecomments.append(line)
536 elif line[1] == ',':
537 self.typecomments.append(line)
538 elif line[1] == '~':
539 line = line[3:]
540 self.obsolete = True
541 else:
542 self.othercomments.append(line)
543 if line.startswith('msgid_plural'):
544 inmsgctxt = 0
545 inmsgid = 0
546 inmsgid_plural = 1
547 inmsgstr = 0
548 inmsgid_comment = 0
549 elif line.startswith('msgctxt'):
550 inmsgctxt = 1
551 inmsgid = 0
552 inmsgid_plural = 0
553 inmsgstr = 0
554 inmsgid_comment = 0
555 elif line.startswith('msgid'):
556
557
558 if inmsgstr or inmsgid_plural:
559 break
560 inmsgctxt = 0
561 inmsgid = 1
562 inmsgid_plural = 0
563 inmsgstr = 0
564 inmsgid_comment = 0
565 elif line.startswith('msgstr'):
566 inmsgctxt = 0
567 inmsgid = 0
568 inmsgid_plural = 0
569 inmsgstr = 1
570 if line.startswith('msgstr['):
571 msgstr_pluralid = int(line[len('msgstr['):line.find(']')].strip())
572 else:
573 msgstr_pluralid = None
574 extracted = extractstr(line)
575 if not extracted is None:
576 if inmsgctxt:
577 self.msgctxt.append(extracted)
578 elif inmsgid:
579
580 if extracted.find("_:") != -1:
581 inmsgid_comment = 1
582 if inmsgid_comment:
583 self.msgidcomments.append(extracted)
584 else:
585 self.msgid.append(extracted)
586 if inmsgid_comment and extracted.find("\\n") != -1:
587 inmsgid_comment = 0
588 elif inmsgid_plural:
589 if extracted.find("_:") != -1:
590 inmsgid_comment = 1
591 if inmsgid_comment:
592 self.msgid_pluralcomments.append(extracted)
593 else:
594 self.msgid_plural.append(extracted)
595 if inmsgid_comment and extracted.find("\\n") != -1:
596 inmsgid_comment = 0
597 elif inmsgstr:
598 if msgstr_pluralid is None:
599 self.msgstr.append(extracted)
600 else:
601 if type(self.msgstr) == list:
602 self.msgstr = {0: self.msgstr}
603 if msgstr_pluralid not in self.msgstr:
604 self.msgstr[msgstr_pluralid] = []
605 self.msgstr[msgstr_pluralid].append(extracted)
606 if self.obsolete:
607 self.makeobsolete()
608
609
610 if self.isheader():
611 charset = re.search("charset=([^\\s]+)", unquotefrompo(self.msgstr))
612 if charset:
613 self._encoding = encodingToUse(charset.group(1))
614 return linesprocessed
615
617 if isinstance(src, str):
618
619 src = src.decode(self._encoding)
620 return self.parselines(src.split("\n"))
621
623 if isinstance(partlines, dict):
624 partkeys = partlines.keys()
625 partkeys.sort()
626 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
627 partstr = partname + " "
628 partstartline = 0
629 if len(partlines) > 0 and len(partcomments) == 0:
630 partstr += partlines[0]
631 partstartline = 1
632 elif len(partcomments) > 0:
633 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
634
635 partstr += partlines[0] + '\n'
636
637 if len(partlines) > 1:
638 partstartline += 1
639 else:
640
641 partstr += '""\n'
642
643 if len(partcomments) > 1:
644 combinedcomment = []
645 for comment in partcomments:
646 comment = unquotefrompo([comment])
647 if comment.startswith("_:"):
648 comment = comment[len("_:"):]
649 if comment.endswith("\\n"):
650 comment = comment[:-len("\\n")]
651
652 combinedcomment.append(comment)
653 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
654
655 partstr += "\n".join(partcomments)
656 partstr = quote.rstripeol(partstr)
657 else:
658 partstr += '""'
659 partstr += '\n'
660
661 for partline in partlines[partstartline:]:
662 partstr += partline + '\n'
663 return partstr
664
666 """encodes unicode strings and returns other strings unchanged"""
667 if isinstance(output, unicode):
668 encoding = encodingToUse(getattr(self, "encoding", "UTF-8"))
669 return output.encode(encoding)
670 return output
671
673 """convert to a string. double check that unicode is handled somehow here"""
674 output = self._getoutput()
675 return self._encodeifneccessary(output)
676
678 """return this po element as a string"""
679 lines = []
680 lines.extend(self.othercomments)
681 if self.isobsolete():
682 lines.extend(self.typecomments)
683 obsoletelines = []
684 if self.obsoletemsgctxt:
685 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
686 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
687 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
688 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
689 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
690 for index, obsoleteline in enumerate(obsoletelines):
691
692 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
693 lines.extend(obsoletelines)
694 lines = [self._encodeifneccessary(line) for line in lines]
695 return "".join(lines)
696
697
698 if (len(self.msgid) == 0) or ((len(self.msgid) == 1) and (self.msgid[0] == '""')):
699 if not (self.isheader() or self.msgidcomments or self.sourcecomments):
700 return "".join(lines)
701 lines.extend(self.automaticcomments)
702 lines.extend(self.sourcecomments)
703 lines.extend(self.typecomments)
704 if self.msgctxt:
705 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
706 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments))
707 if self.msgid_plural or self.msgid_pluralcomments:
708 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
709 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
710 lines = [self._encodeifneccessary(line) for line in lines]
711 postr = "".join(lines)
712 return postr
713
715 """Get a list of locations from sourcecomments in the PO unit
716
717 rtype: List
718 return: A list of the locations with '#: ' stripped
719
720 """
721 locations = []
722 for sourcecomment in self.sourcecomments:
723 locations += quote.rstripeol(sourcecomment)[3:].split()
724 return locations
725
727 """Add a location to sourcecomments in the PO unit
728
729 @param location: Text location e.g. 'file.c:23' does not include #:
730 @type location: String
731
732 """
733 self.sourcecomments.append("#: %s\n" % location)
734
746
747 - def getcontext(self):
748 """Get the message context."""
749 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
750
752 """Returns a unique identifier for this unit."""
753 context = self.getcontext()
754
755
756
757
758
759 id = self.source
760 if self.msgidcomments:
761 id = "_: %s\n%s" % (context, id)
762 elif context:
763 id = "%s\04%s" % (context, id)
764 return id
765
766 -class pofile(pocommon.pofile):
767 """this represents a .po file containing various units"""
768 UnitClass = pounit
770 """construct a pofile, optionally reading in from inputfile.
771 encoding can be specified but otherwise will be read from the PO header"""
772 self.UnitClass = unitclass
773 pocommon.pofile.__init__(self, unitclass=unitclass)
774 self.units = []
775 self.filename = ''
776 self._encoding = encodingToUse(encoding)
777 if inputfile is not None:
778 self.parse(inputfile)
779
781 """changes the encoding on the file"""
782 self._encoding = encodingToUse(newencoding)
783 if not self.units:
784 return
785 header = self.header()
786 if not header or header.isblank():
787 return
788 charsetline = None
789 headerstr = unquotefrompo(header.msgstr)
790 for line in headerstr.split("\n"):
791 if not ":" in line: continue
792 key, value = line.strip().split(":", 1)
793 if key.strip() != "Content-Type": continue
794 charsetline = line
795 if charsetline is None:
796 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
797 else:
798 charset = re.search("charset=([^ ]*)", charsetline)
799 if charset is None:
800 newcharsetline = charsetline
801 if not newcharsetline.strip().endswith(";"):
802 newcharsetline += ";"
803 newcharsetline += " charset=%s" % self._encoding
804 else:
805 charset = charset.group(1)
806 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
807 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
808 header.msgstr = quoteforpo(headerstr)
809
811 """parses the given file or file source string"""
812 try:
813 if hasattr(input, 'name'):
814 self.filename = input.name
815 elif not getattr(self, 'filename', ''):
816 self.filename = ''
817 if hasattr(input, "read"):
818 posrc = input.read()
819 input.close()
820 input = posrc
821
822 lines = input.split("\n")
823 start = 0
824 end = 0
825
826 linesprocessed = 0
827 is_decoded = False
828 while end <= len(lines):
829 if (end == len(lines)) or (not lines[end].strip()):
830 newpe = self.UnitClass(encoding=self._encoding)
831 unit_lines = lines[start:end]
832
833
834
835 if not is_decoded:
836 unit_lines = [line.decode('ascii', 'ignore') for line in unit_lines]
837 linesprocessed = newpe.parselines(unit_lines)
838 start += linesprocessed
839
840 if linesprocessed >= 1 and newpe._getoutput():
841 self.units.append(newpe)
842 if not is_decoded:
843 if newpe.isheader():
844 if "Content-Type" in self.parseheader():
845 if self._encoding.lower() != 'charset':
846 self._encoding = newpe._encoding
847
848 lines = self.decode(lines)
849 self.units = []
850 start = 0
851 end = 0
852 is_decoded = True
853 end = end+1
854 except Exception, e:
855 raise base.ParseError()
856
858 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
859 msgiddict = {}
860 uniqueunits = []
861
862
863 markedpos = []
864 def addcomment(thepo):
865 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
866 markedpos.append(thepo)
867 for thepo in self.units:
868 if duplicatestyle.startswith("msgid_comment"):
869 msgid = unquotefrompo(thepo.msgidcomments) + unquotefrompo(thepo.msgid)
870 else:
871 msgid = unquotefrompo(thepo.msgid)
872 if thepo.isheader():
873
874 uniqueunits.append(thepo)
875 elif duplicatestyle == "msgid_comment_all":
876 addcomment(thepo)
877 uniqueunits.append(thepo)
878 elif msgid in msgiddict:
879 if duplicatestyle == "merge":
880 if msgid:
881 msgiddict[msgid].merge(thepo)
882 else:
883 addcomment(thepo)
884 uniqueunits.append(thepo)
885 elif duplicatestyle == "keep":
886 uniqueunits.append(thepo)
887 elif duplicatestyle == "msgid_comment":
888 origpo = msgiddict[msgid]
889 if origpo not in markedpos:
890 addcomment(origpo)
891 addcomment(thepo)
892 uniqueunits.append(thepo)
893 elif duplicatestyle == "msgctxt":
894 origpo = msgiddict[msgid]
895 if origpo not in markedpos:
896 origpo.msgctxt.append('"%s"' % " ".join(origpo.getlocations()))
897 markedpos.append(thepo)
898 thepo.msgctxt.append('"%s"' % " ".join(thepo.getlocations()))
899 uniqueunits.append(thepo)
900 else:
901 if not msgid and duplicatestyle != "keep":
902 addcomment(thepo)
903 msgiddict[msgid] = thepo
904 uniqueunits.append(thepo)
905 self.units = uniqueunits
906
908 """convert to a string. double check that unicode is handled somehow here"""
909 output = self._getoutput()
910 if isinstance(output, unicode):
911 return output.encode(getattr(self, "encoding", "UTF-8"))
912 return output
913
915 """convert the units back to lines"""
916 lines = []
917 for unit in self.units:
918 unitsrc = str(unit) + "\n"
919 lines.append(unitsrc)
920 lines = "".join(self.encode(lines)).rstrip()
921
922 if lines: lines += "\n"
923 return lines
924
926 """encode any unicode strings in lines in self._encoding"""
927 newlines = []
928 encoding = self._encoding
929 if encoding is None or encoding.lower() == "charset":
930 encoding = 'UTF-8'
931 for line in lines:
932 if isinstance(line, unicode):
933 line = line.encode(encoding)
934 newlines.append(line)
935 return newlines
936
938 """decode any non-unicode strings in lines with self._encoding"""
939 newlines = []
940 for line in lines:
941 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
942 try:
943 line = line.decode(self._encoding)
944 except UnicodeError, e:
945 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
946 newlines.append(line)
947 return newlines
948
953
954 if __name__ == '__main__':
955 import sys
956 pf = pofile(sys.stdin)
957 sys.stdout.write(str(pf))
958