Package translate :: Package storage :: Module mo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.mo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007 Zuza Software Foundation 
  5  # 
  6  # the function "__str__" was derived from Python v2.4 
  7  #       (Tools/i18n/msgfmt.py - function "generate"): 
  8  #   Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> 
  9  #   Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. 
 10  #   All rights reserved. 
 11  #   original license: Python Software Foundation (version 2) 
 12  #  
 13  # 
 14  # This file is part of translate. 
 15  # 
 16  # translate is free software; you can redistribute it and/or modify 
 17  # it under the terms of the GNU General Public License as published by 
 18  # the Free Software Foundation; either version 2 of the License, or 
 19  # (at your option) any later version. 
 20  #  
 21  # translate is distributed in the hope that it will be useful, 
 22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 24  # GNU General Public License for more details. 
 25  # 
 26  # You should have received a copy of the GNU General Public License 
 27  # along with translate; if not, write to the Free Software 
 28  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 29  # 
 30   
 31  """Module for parsing Gettext .mo files for translation. 
 32   
 33  The coding of .mo files was produced from documentation in Gettext 0.16 and  
 34  from observation and testing of existing .mo files in the wild. 
 35   
 36  The class does not implement any of the hashing componets of Gettext.  This  
 37  will probably make the output file slower in some instances. 
 38  """ 
 39   
 40  from translate.storage import base 
 41  from translate.storage import po 
 42  from translate.misc.multistring import multistring 
 43  import struct 
 44  import array 
 45  import re 
 46   
 47  MO_MAGIC_NUMBER = 0x950412deL 
 48   
49 -def mounpack(filename='messages.mo'):
50 """Helper to unpack Gettext MO files into a Python string""" 51 f = open(filename) 52 s = f.read() 53 print "\\x%02x"*len(s) % tuple(map(ord, s)) 54 f.close()
55
56 -def my_swap4(result):
57 c0 = (result >> 0) & 0xff 58 c1 = (result >> 8) & 0xff 59 c2 = (result >> 16) & 0xff 60 c3 = (result >> 24) & 0xff 61 62 return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
63
64 -def hashpjw(str_param):
65 HASHWORDBITS = 32 66 hval = 0 67 g = None 68 s = str_param 69 for s in str_param: 70 hval = hval << 4 71 hval += ord(s) 72 g = hval & 0xf << (HASHWORDBITS - 4) 73 if (g != 0): 74 hval = hval ^ g >> (HASHWORDBITS - 8) 75 hval = hval ^ g 76 return hval
77
78 -def get_next_prime_number(start):
79 # find the smallest prime number that is greater or equal "start" 80 def is_prime(num): 81 # special small numbers 82 if (num < 2) or (num == 4): 83 return False 84 if (num == 2) or (num == 3): 85 return True 86 # check for numbers > 4 87 for divider in range(2, num/2): 88 if num % divider == 0: 89 return False 90 return True
91 92 candidate = start 93 while not is_prime(candidate): 94 candidate += 1 95 return candidate 96 97
98 -class mounit(base.TranslationUnit):
99 """A class representing a .mo translation message."""
100 - def __init__(self, source=None):
101 self.msgctxt = [] 102 self.msgidcomments = [] 103 super(mounit, self).__init__(source)
104
105 - def getcontext(self):
106 """Get the message context""" 107 # Still need to handle KDE comments 108 if self.msgctxt is None: 109 return None 110 return "".join(self.msgctxt)
111
112 - def isheader(self):
113 """Is this a header entry?""" 114 return self.source == ""
115
116 - def istranslatable(self):
117 """Is this message translateable?""" 118 return bool(self.source)
119
120 -class mofile(base.TranslationStore):
121 """A class representing a .mo file.""" 122 UnitClass = mounit 123 Name = "Gettext MO file" 124 Mimetypes = ["application/x-gettext-catalog", "application/x-mo"] 125 Extensions = ["mo", "gmo"]
126 - def __init__(self, inputfile=None, unitclass=mounit):
127 self.UnitClass = unitclass 128 base.TranslationStore.__init__(self, unitclass=unitclass) 129 self.filename = '' 130 if inputfile is not None: 131 self.parsestring(inputfile)
132
133 - def __str__(self):
134 """Output a string representation of the MO data file""" 135 # check the header of this file for the copyright note of this function 136 def add_to_hash_table(string, i): 137 V = hashpjw(string) 138 S = hash_size <= 2 and 3 or hash_size # Taken from gettext-0.17:gettext-tools/src/write-mo.c:408-409 139 hash_cursor = V % S; 140 orig_hash_cursor = hash_cursor; 141 increment = 1 + (V % (S - 2)); 142 while True: 143 index = hash_table[hash_cursor] 144 if (index == 0): 145 hash_table[hash_cursor] = i + 1 146 break 147 hash_cursor += increment 148 hash_cursor = hash_cursor % S 149 assert (hash_cursor != orig_hash_cursor)
150 151 # hash_size should be the smallest prime number that is greater 152 # or equal (4 / 3 * N) - where N is the number of keys/units. 153 # see gettext-0.17:gettext-tools/src/write-mo.c:406 154 hash_size = get_next_prime_number(int((len(self.units) * 4) / 3)) 155 if hash_size <= 2: 156 hash_size = 3 157 MESSAGES = {} 158 for unit in self.units: 159 if isinstance(unit.source, multistring): 160 source = "".join(unit.msgidcomments) + "\0".join(unit.source.strings) 161 else: 162 source = "".join(unit.msgidcomments) + unit.source 163 if unit.msgctxt: 164 source = "".join(unit.msgctxt) + "\x04" + source 165 if isinstance(unit.target, multistring): 166 target = "\0".join(unit.target.strings) 167 else: 168 target = unit.target 169 if unit.target: 170 MESSAGES[source.encode("utf-8")] = target 171 hash_table = array.array("L", [0] * hash_size) 172 keys = MESSAGES.keys() 173 # the keys are sorted in the .mo file 174 keys.sort() 175 offsets = [] 176 ids = strs = '' 177 for i, id in enumerate(keys): 178 # For each string, we need size and file offset. Each string is NUL 179 # terminated; the NUL does not count into the size. 180 # TODO: We don't do any encoding detection from the PO Header 181 add_to_hash_table(id, i) 182 string = MESSAGES[id] # id is already encoded for use as a dictionary key 183 if isinstance(string, unicode): 184 string = string.encode('utf-8') 185 offsets.append((len(ids), len(id), len(strs), len(string))) 186 ids = ids + id + '\0' 187 strs = strs + string + '\0' 188 output = '' 189 # The header is 7 32-bit unsigned integers 190 keystart = 7*4+16*len(keys)+hash_size*4 191 # and the values start after the keys 192 valuestart = keystart + len(ids) 193 koffsets = [] 194 voffsets = [] 195 # The string table first has the list of keys, then the list of values. 196 # Each entry has first the size of the string, then the file offset. 197 for o1, l1, o2, l2 in offsets: 198 koffsets = koffsets + [l1, o1+keystart] 199 voffsets = voffsets + [l2, o2+valuestart] 200 offsets = koffsets + voffsets 201 output = struct.pack("Iiiiiii", 202 MO_MAGIC_NUMBER, # Magic 203 0, # Version 204 len(keys), # # of entries 205 7*4, # start of key index 206 7*4+len(keys)*8, # start of value index 207 hash_size, 7*4+2*(len(keys)*8)) # size and offset of hash table 208 # additional data is not necessary for empty mo files 209 if (len(keys) > 0): 210 output = output + array.array("i", offsets).tostring() 211 output = output + hash_table.tostring() 212 output = output + ids 213 output = output + strs 214 return output
215
216 - def parse(self, input):
217 """parses the given file or file source string""" 218 if hasattr(input, 'name'): 219 self.filename = input.name 220 elif not getattr(self, 'filename', ''): 221 self.filename = '' 222 if hasattr(input, "read"): 223 mosrc = input.read() 224 input.close() 225 input = mosrc 226 little, = struct.unpack("<L", input[:4]) 227 big, = struct.unpack(">L", input[:4]) 228 if little == MO_MAGIC_NUMBER: 229 endian = "<" 230 elif big == MO_MAGIC_NUMBER: 231 endian = ">" 232 else: 233 raise ValueError("This is not an MO file") 234 magic, version, lenkeys, startkey, startvalue, sizehash, offsethash = struct.unpack("%sLiiiiii" % endian, input[:(7*4)]) 235 if version > 1: 236 raise ValueError("Unable to process MO files with versions > 1. This is a %d version MO file" % version) 237 encoding = 'UTF-8' 238 for i in range(lenkeys): 239 nextkey = startkey+(i*2*4) 240 nextvalue = startvalue+(i*2*4) 241 klength, koffset = struct.unpack("%sii" % endian, input[nextkey:nextkey+(2*4)]) 242 vlength, voffset = struct.unpack("%sii" % endian, input[nextvalue:nextvalue+(2*4)]) 243 source = input[koffset:koffset+klength] 244 context = None 245 if "\x04" in source: 246 context, source = source.split("\x04") 247 # Still need to handle KDE comments 248 source = multistring(source.split("\0"), encoding=encoding) 249 if source == "": 250 charset = re.search("charset=([^\\s]+)", input[voffset:voffset+vlength]) 251 if charset: 252 encoding = po.encodingToUse(charset.group(1)) 253 target = multistring(input[voffset:voffset+vlength].split("\0"), encoding=encoding) 254 newunit = mounit(source) 255 newunit.settarget(target) 256 if context is not None: 257 newunit.msgctxt.append(context) 258 self.addunit(newunit)
259