1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Manage the Wordfast Translation Memory format
23 """
24
25 import csv
26 import time
27 from translate.storage import base
28
29 WF_TIMEFORMAT = "%Y%m%d~%H%M%S"
30 """Time format used by Wordfast"""
31
32 WF_FIELDNAMES_HEADER = ["date", "userlist", "tucount", "src-lang", "version", "target-lang", "license", "attr1list", "attr2list", "attr3list", "attr4list", "attr5list"]
33 """Field names for the Wordfast header"""
34
35 WF_FIELDNAMES = ["date", "user", "reuse", "src-lang", "source", "target-lang", "target", "attr1", "attr2", "attr3", "attr4"]
36 """Field names for a Wordfast TU"""
37
38 WF_FIELDNAMES_HEADER_DEFAULTS = {
39 "date": "%19000101~121212",
40 "userlist": "%User ID,TT,TT Translate-Toolkit",
41 "tucount": "%TU=00000001",
42 "src-lang": "%EN-US",
43 "version": "%Wordfast TM v.5.51w9/00",
44 "target-lang": "",
45 "license": "%---00000001",
46 "attr1list": "",
47 "attr2list": "",
48 "attr3list": "",
49 "attr4list": "" }
50 """Default or minimum header entries for a Wordfast file"""
51
52
53
54
55
56 WF_ESCAPE_MAP = (
57 ("&'26;", u"\u0026"),
58 ("&'82;", u"\u201A"),
59 ("&'85;", u"\u2026"),
60 ("&'91;", u"\u2018"),
61 ("&'92;", u"\u2019"),
62 ("&'93;", u"\u201C"),
63 ("&'94;", u"\u201D"),
64 ("&'96;", u"\u2013"),
65 ("&'97;", u"\u2014"),
66 ("&'99;", u"\u2122"),
67
68 ("&'A0;", u"\u00A0"),
69 ("&'A9;", u"\u00A9"),
70 ("&'AE;", u"\u00AE"),
71 ("&'BC;", u"\u00BC"),
72 ("&'BD;", u"\u00BD"),
73 ("&'BE;", u"\u00BE"),
74
75 ("&'A8;", u"\u00AE"),
76 ("&'AA;", u"\u2122"),
77 ("&'C7;", u"\u00AB"),
78 ("&'C8;", u"\u00BB"),
79 ("&'C9;", u"\u2026"),
80 ("&'CA;", u"\u00A0"),
81 ("&'D0;", u"\u2013"),
82 ("&'D1;", u"\u2014"),
83 ("&'D2;", u"\u201C"),
84 ("&'D3;", u"\u201D"),
85 ("&'D4;", u"\u2018"),
86 ("&'D5;", u"\u2019"),
87 ("&'E2;", u"\u201A"),
88 ("&'E3;", u"\u201E"),
89
90
91 )
92 """Mapping of Wordfast &'XX; escapes to correct Unicode characters"""
93
94 TAB_UTF16 = "\x00\x09"
95
97 """Char -> Wordfast &'XX; escapes
98
99 @note: Full roundtripping is not possible because of the escaping of \n and \t"""
100
101 if string:
102 for code, char in WF_ESCAPE_MAP:
103 string = string.replace(char.encode('utf-8'), code)
104 string = string.replace("\n", "\\n").replace("\t", "\\t")
105 return string
106
108 """Wordfast &'XX; escapes -> Char"""
109 if string:
110 for code, char in WF_ESCAPE_MAP:
111 string = string.replace(code, char.encode('utf-8'))
112 string = string.replace("\\n", "\n").replace("\\t", "\t")
113 return string
114
124 csv.register_dialect("wordfast", WordfastDialect)
125
127 """Manages time stamps in the Wordfast format of YYYYMMDD~hhmmss"""
129 self._time = None
130 if not newtime:
131 self.time = None
132 elif isinstance(newtime, basestring):
133 self.timestring = newtime
134 elif isinstance(newtime, time.struct_time):
135 self.time = newtime
136
138 """Get the time in the Wordfast time format"""
139 if not self._time:
140 return None
141 else:
142 return time.strftime(WF_TIMEFORMAT, self._time)
143
145 """Set the time_sturct object using a Wordfast time formated string
146
147 @param timestring: A Wordfast time string (YYYMMDD~hhmmss)
148 @type timestring: String
149 """
150 self._time = time.strptime(timestring, WF_TIMEFORMAT)
151 timestring = property(get_timestring, set_timestring)
152
154 """Get the time_struct object"""
155 return self._time
156
158 """Set the time_struct object
159
160 @param newtime: a new time object
161 @type newtime: time.time_struct
162 """
163 if newtime and isinstance(newtime, time.struct_time):
164 self._time = newtime
165 else:
166 self._time = None
167 time = property(get_time, set_time)
168
174
176 """A wordfast translation memory header"""
183
189
191 """Get the header dictionary"""
192 return self._header_dict
193
195 self._header_dict = newheader
196 header = property(getheader, setheader)
197
199 self._header_dict['target-lang'] = '%%%s' % newlang
200 targetlang = property(None, settargetlang)
201
203 self._header_dict['tucount'] = '%%TU=%08d' % count
204 tucount = property(None, settucount)
205
207 """A Wordfast translation memory unit"""
213
217
219 """Get the dictionary of values for a Wordfast line"""
220 return self._dict
221
223 """Set the dictionary of values for a Wordfast line
224
225 @param newdict: a new dictionary with Wordfast line elements
226 @type newdict: Dict
227 """
228
229 self._dict = newdict
230 dict = property(getdict, setdict)
231
233 if self._dict[key] is None:
234 return None
235 elif self._dict[key]:
236 return _wf_to_char(self._dict[key]).decode('utf-8')
237 else:
238 return ""
239
241 if newvalue is None:
242 self._dict[key] = None
243 if isinstance(newvalue, unicode):
244 newvalue = newvalue.encode('utf-8')
245 newvalue = _char_to_wf(newvalue)
246 if not key in self._dict or newvalue != self._dict[key]:
247 self._dict[key] = newvalue
248 self._update_timestamp()
249
252
255 source = property(getsource, setsource)
256
259
262 target = property(gettarget, settarget)
263
265 self._dict['target-lang'] = newlang
266 targetlang = property(None, settargetlang)
267
269 return str(self._dict)
270
272 if not self._dict.get('source', None):
273 return False
274 return bool(self._dict.get('target', None))
275
276
278 """A Wordfast translation memory file"""
279 Name = "Wordfast TM file"
280 Mimetypes = ["application/x-wordfast"]
281 Extensions = ["txt"]
283 """construct a Wordfast TM, optionally reading in from inputfile."""
284 self.UnitClass = unitclass
285 base.TranslationStore.__init__(self, unitclass=unitclass)
286 self.filename = ''
287 self.header = WordfastHeader()
288 self._encoding = 'utf-16'
289 if inputfile is not None:
290 self.parse(inputfile)
291
293 """parsese the given file or file source string"""
294 if hasattr(input, 'name'):
295 self.filename = input.name
296 elif not getattr(self, 'filename', ''):
297 self.filename = ''
298 if hasattr(input, "read"):
299 tmsrc = input.read()
300 input.close()
301 input = tmsrc
302 if TAB_UTF16 in input.split("\n")[0]:
303 self._encoding = 'utf-16'
304 else:
305 self._encoding = 'iso-8859-1'
306 try:
307 input = input.decode(self._encoding).encode('utf-8')
308 except:
309 raise ValueError("Wordfast files are either UTF-16 (UCS2) or ISO-8859-1 encoded")
310 for header in csv.DictReader(input.split("\n")[:1], fieldnames=WF_FIELDNAMES_HEADER, dialect="wordfast"):
311 self.header = WordfastHeader(header)
312 lines = csv.DictReader(input.split("\n")[1:], fieldnames=WF_FIELDNAMES, dialect="wordfast")
313 for line in lines:
314 newunit = WordfastUnit()
315 newunit.dict = line
316 self.addunit(newunit)
317
319 output = csv.StringIO()
320 header_output = csv.StringIO()
321 writer = csv.DictWriter(output, fieldnames=WF_FIELDNAMES, dialect="wordfast")
322 unit_count = 0
323 for unit in self.units:
324 if unit.istranslated():
325 unit_count += 1
326 writer.writerow(unit.dict)
327 if unit_count == 0:
328 return ""
329 output.reset()
330 self.header.tucount = unit_count
331 outheader = csv.DictWriter(header_output, fieldnames=WF_FIELDNAMES_HEADER, dialect="wordfast")
332 outheader.writerow(self.header.header)
333 header_output.reset()
334 decoded = "".join(header_output.readlines() + output.readlines()).decode('utf-8')
335 try:
336 return decoded.encode(self._encoding)
337 except UnicodeEncodeError:
338 return decoded.encode('utf-16')
339