1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Module for parsing Gettext .mo files for translation.
32
33 The coding of .mo files was produced from documentation in Gettext 0.16 and
34 from observation and testing of existing .mo files in the wild.
35
36 The class does not implement any of the hashing componets of Gettext. This
37 will probably make the output file slower in some instances.
38 """
39
40 from translate.storage import base
41 from translate.storage import po
42 from translate.misc.multistring import multistring
43 import struct
44 import array
45 import re
46
47 MO_MAGIC_NUMBER = 0x950412deL
48
50 """Helper to unpack Gettext MO files into a Python string"""
51 f = open(filename)
52 s = f.read()
53 print "\\x%02x"*len(s) % tuple(map(ord, s))
54 f.close()
55
57 c0 = (result >> 0) & 0xff
58 c1 = (result >> 8) & 0xff
59 c2 = (result >> 16) & 0xff
60 c3 = (result >> 24) & 0xff
61
62 return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
63
65 HASHWORDBITS = 32
66 hval = 0
67 g = None
68 s = str_param
69 for s in str_param:
70 hval = hval << 4
71 hval += ord(s)
72 g = hval & 0xf << (HASHWORDBITS - 4)
73 if (g != 0):
74 hval = hval ^ g >> (HASHWORDBITS - 8)
75 hval = hval ^ g
76 return hval
77
79
80 def is_prime(num):
81
82 if (num < 2) or (num == 4):
83 return False
84 if (num == 2) or (num == 3):
85 return True
86
87 for divider in range(2, num/2):
88 if num % divider == 0:
89 return False
90 return True
91
92 candidate = start
93 while not is_prime(candidate):
94 candidate += 1
95 return candidate
96
97
98 -class mounit(base.TranslationUnit):
99 """A class representing a .mo translation message."""
104
105 - def getcontext(self):
106 """Get the message context"""
107
108 if self.msgctxt is None:
109 return None
110 return "".join(self.msgctxt)
111
113 """Is this a header entry?"""
114 return self.source == ""
115
117 """Is this message translateable?"""
118 return bool(self.source)
119
120 -class mofile(base.TranslationStore):
121 """A class representing a .mo file."""
122 UnitClass = mounit
123 Name = "Gettext MO file"
124 Mimetypes = ["application/x-gettext-catalog", "application/x-mo"]
125 Extensions = ["mo", "gmo"]
132
134 """Output a string representation of the MO data file"""
135
136 def add_to_hash_table(string, i):
137 V = hashpjw(string)
138 S = hash_size <= 2 and 3 or hash_size
139 hash_cursor = V % S;
140 orig_hash_cursor = hash_cursor;
141 increment = 1 + (V % (S - 2));
142 while True:
143 index = hash_table[hash_cursor]
144 if (index == 0):
145 hash_table[hash_cursor] = i + 1
146 break
147 hash_cursor += increment
148 hash_cursor = hash_cursor % S
149 assert (hash_cursor != orig_hash_cursor)
150
151
152
153
154 hash_size = get_next_prime_number(int((len(self.units) * 4) / 3))
155 if hash_size <= 2:
156 hash_size = 3
157 MESSAGES = {}
158 for unit in self.units:
159 if isinstance(unit.source, multistring):
160 source = "".join(unit.msgidcomments) + "\0".join(unit.source.strings)
161 else:
162 source = "".join(unit.msgidcomments) + unit.source
163 if unit.msgctxt:
164 source = "".join(unit.msgctxt) + "\x04" + source
165 if isinstance(unit.target, multistring):
166 target = "\0".join(unit.target.strings)
167 else:
168 target = unit.target
169 if unit.target:
170 MESSAGES[source.encode("utf-8")] = target
171 hash_table = array.array("L", [0] * hash_size)
172 keys = MESSAGES.keys()
173
174 keys.sort()
175 offsets = []
176 ids = strs = ''
177 for i, id in enumerate(keys):
178
179
180
181 add_to_hash_table(id, i)
182 string = MESSAGES[id]
183 if isinstance(string, unicode):
184 string = string.encode('utf-8')
185 offsets.append((len(ids), len(id), len(strs), len(string)))
186 ids = ids + id + '\0'
187 strs = strs + string + '\0'
188 output = ''
189
190 keystart = 7*4+16*len(keys)+hash_size*4
191
192 valuestart = keystart + len(ids)
193 koffsets = []
194 voffsets = []
195
196
197 for o1, l1, o2, l2 in offsets:
198 koffsets = koffsets + [l1, o1+keystart]
199 voffsets = voffsets + [l2, o2+valuestart]
200 offsets = koffsets + voffsets
201 output = struct.pack("Iiiiiii",
202 MO_MAGIC_NUMBER,
203 0,
204 len(keys),
205 7*4,
206 7*4+len(keys)*8,
207 hash_size, 7*4+2*(len(keys)*8))
208
209 if (len(keys) > 0):
210 output = output + array.array("i", offsets).tostring()
211 output = output + hash_table.tostring()
212 output = output + ids
213 output = output + strs
214 return output
215
217 """parses the given file or file source string"""
218 if hasattr(input, 'name'):
219 self.filename = input.name
220 elif not getattr(self, 'filename', ''):
221 self.filename = ''
222 if hasattr(input, "read"):
223 mosrc = input.read()
224 input.close()
225 input = mosrc
226 little, = struct.unpack("<L", input[:4])
227 big, = struct.unpack(">L", input[:4])
228 if little == MO_MAGIC_NUMBER:
229 endian = "<"
230 elif big == MO_MAGIC_NUMBER:
231 endian = ">"
232 else:
233 raise ValueError("This is not an MO file")
234 magic, version, lenkeys, startkey, startvalue, sizehash, offsethash = struct.unpack("%sLiiiiii" % endian, input[:(7*4)])
235 if version > 1:
236 raise ValueError("Unable to process MO files with versions > 1. This is a %d version MO file" % version)
237 encoding = 'UTF-8'
238 for i in range(lenkeys):
239 nextkey = startkey+(i*2*4)
240 nextvalue = startvalue+(i*2*4)
241 klength, koffset = struct.unpack("%sii" % endian, input[nextkey:nextkey+(2*4)])
242 vlength, voffset = struct.unpack("%sii" % endian, input[nextvalue:nextvalue+(2*4)])
243 source = input[koffset:koffset+klength]
244 context = None
245 if "\x04" in source:
246 context, source = source.split("\x04")
247
248 source = multistring(source.split("\0"), encoding=encoding)
249 if source == "":
250 charset = re.search("charset=([^\\s]+)", input[voffset:voffset+vlength])
251 if charset:
252 encoding = po.encodingToUse(charset.group(1))
253 target = multistring(input[voffset:voffset+vlength].split("\0"), encoding=encoding)
254 newunit = mounit(source)
255 newunit.settarget(target)
256 if context is not None:
257 newunit.msgctxt.append(context)
258 self.addunit(newunit)
259