1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for parsing Qt .qm files
24
25 @note: based on documentation from Gettext's .qm implementation (see write-qt.c) and on observation
26 of the output of lrelease.
27 @note: Certain deprecated section tags are not implemented. These will break and print out
28 the missing tag. They are easy to implement and should follow the structure in 03
29 (Translation). We could find no examples that use these so we'd rather leave it
30 unimplemented until we actually have test data.
31 @note: Many .qm files are unable to be parsed as they do not have the source text. We assume
32 that since they use a hash table to lookup the data there is actually no need for the
33 source text. It seems however that in Qt4's lrelease all data is included in the resultant .qm
34 file.
35 @todo: We can only parse, not create, a .qm file. The main issue is that we need to
36 implement the hashing algorithm (which seems to be identical to the Gettext hash algorithm). Unlike
37 Gettext it seems that the hash is required, but that has not been validated.
38 @todo: The code can parse files correctly. But it could be cleaned up to be more readable, especially
39 the part that breaks the file into sections.
40 """
41
42 from translate.storage import base
43 from translate.misc.multistring import multistring
44 import codecs
45 import struct
46 import sys
47
48 QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL)
49
51 """Helper to unpack Qt .qm files into a Python string"""
52 f = open(qmfile)
53 s = f.read()
54 print "\\x%02x"*len(s) % tuple(map(ord, s))
55 f.close()
56
57 -class qmunit(base.TranslationUnit):
58 """A class representing a .qm translation message."""
61
62 -class qmfile(base.TranslationStore):
63 """A class representing a .qm file."""
64 UnitClass = qmunit
65 Mimetypes = ["application/x-qm"]
66 Extensions = ["qm"]
74
76 """Output a string representation of the .qm data file"""
77 return ""
78
80 """parses the given file or file source string"""
81 if hasattr(input, 'name'):
82 self.filename = input.name
83 elif not getattr(self, 'filename', ''):
84 self.filename = ''
85 if hasattr(input, "read"):
86 qmsrc = input.read()
87 input.close()
88 input = qmsrc
89 if len(input) < 16:
90 raise ValueError("This is not a .qm file: file empty or to small")
91 magic = struct.unpack(">4L", input[:16])
92 if magic != QM_MAGIC_NUMBER:
93 raise ValueError("This is not a .qm file: invalid magic number")
94 startsection = 16
95 sectionheader = 5
96 while startsection < len(input):
97 section_type, length = struct.unpack(">bL", input[startsection:startsection+sectionheader])
98 if section_type == 0x42:
99
100 hashash = True
101 hash_start = startsection+sectionheader
102 hash_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
103 elif section_type == 0x69:
104
105 hasmessages = True
106 messages_start = startsection+sectionheader
107 messages_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
108 elif section_type == 0x2f:
109
110 hascontexts = True
111 contexts_start = startsection+sectionheader
112 contexts_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
113 startsection = startsection+sectionheader+length
114 pos = messages_start
115 source = target = None
116 while pos < messages_start + len(messages_data):
117 subsection, = struct.unpack(">b", input[pos:pos+1])
118 if subsection == 0x01:
119
120 pos = pos+1
121 if not source is None and not target is None:
122 newunit = self.addsourceunit(source)
123 newunit.target = target
124 source = target = None
125 else:
126 raise ValueError("Old .qm format with no source defined")
127 continue
128
129 pos = pos+1
130 length, = struct.unpack(">l", input[pos:pos+4])
131 if subsection == 0x03:
132 if length != -1:
133 raw, = struct.unpack(">%ds" % length, input[pos+4:pos+4+length])
134 string, templen = codecs.utf_16_be_decode(raw)
135 if target:
136 target.strings.append(string)
137 else:
138 target = multistring(string)
139 pos = pos+4+length
140 else:
141 target = ""
142 pos = pos+4
143
144 elif subsection == 0x06:
145 source = input[pos+4:pos+4+length].decode('iso-8859-1')
146
147 pos = pos+4+length
148 elif subsection == 0x07:
149 context = input[pos+4:pos+4+length].decode('iso-8859-1')
150
151 pos = pos+4+length
152 elif subsection == 0x08:
153 comment = input[pos+4:pos+4+length]
154
155 pos = pos+4+length
156 elif subsection == 0x05:
157 hash = input[pos:pos+4]
158
159 pos = pos+4
160 else:
161 if subsection == 0x02:
162 subsection_name = "SourceText16"
163 elif subsection == 0x04:
164 subsection_name = "Context16"
165 else:
166 subsection_name = "Unkown"
167 print >> sys.stderr, "Unimplemented: %s %s" % (subsection, subsection_name)
168 return
169