Package translate :: Package storage :: Module csvl10n
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.csvl10n

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """classes that hold units of comma-separated values (.csv) files (csvunit) 
 23  or entire files (csvfile) for use with localisation 
 24  """ 
 25   
 26  import csv 
 27   
 28  from translate.misc import sparse 
 29  from translate.storage import base 
 30   
31 -class SimpleDictReader:
32 - def __init__(self, fileobj, fieldnames):
33 self.fieldnames = fieldnames 34 self.contents = fileobj.read() 35 self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r") 36 self.parser.stringescaping = 0 37 self.parser.quotechars = '"' 38 self.tokens = self.parser.tokenize(self.contents) 39 self.tokenpos = 0
40
41 - def __iter__(self):
42 return self
43
44 - def getvalue(self, value):
45 """returns a value, evaluating strings as neccessary""" 46 if (value.startswith("'") and value.endswith("'")) or (value.startswith('"') and value.endswith('"')): 47 return sparse.stringeval(value) 48 else: 49 return value
50
51 - def next(self):
52 lentokens = len(self.tokens) 53 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n": 54 self.tokenpos += 1 55 if self.tokenpos >= lentokens: 56 raise StopIteration() 57 thistokens = [] 58 while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n": 59 thistokens.append(self.tokens[self.tokenpos]) 60 self.tokenpos += 1 61 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n": 62 self.tokenpos += 1 63 fields = [] 64 # patch together fields since we can have quotes inside a field 65 currentfield = '' 66 fieldparts = 0 67 for token in thistokens: 68 if token == ',': 69 # a field is only quoted if the whole thing is quoted 70 if fieldparts == 1: 71 currentfield = self.getvalue(currentfield) 72 fields.append(currentfield) 73 currentfield = '' 74 fieldparts = 0 75 else: 76 currentfield += token 77 fieldparts += 1 78 # things after the last comma... 79 if fieldparts: 80 if fieldparts == 1: 81 currentfield = self.getvalue(currentfield) 82 fields.append(currentfield) 83 values = {} 84 for fieldnum in range(len(self.fieldnames)): 85 if fieldnum >= len(fields): 86 values[self.fieldnames[fieldnum]] = "" 87 else: 88 values[self.fieldnames[fieldnum]] = fields[fieldnum] 89 return values
90
91 -class csvunit(base.TranslationUnit):
92 spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
93 - def __init__(self, source=None):
94 super(csvunit, self).__init__(source) 95 self.comment = "" 96 self.source = source 97 self.target = ""
98
99 - def add_spreadsheet_escapes(self, source, target):
100 """add common spreadsheet escapes to two strings""" 101 for unescaped, escaped in self.spreadsheetescapes: 102 if source.startswith(unescaped): 103 source = source.replace(unescaped, escaped, 1) 104 if target.startswith(unescaped): 105 target = target.replace(unescaped, escaped, 1) 106 return source, target
107
108 - def remove_spreadsheet_escapes(self, source, target):
109 """remove common spreadsheet escapes from two strings""" 110 for unescaped, escaped in self.spreadsheetescapes: 111 if source.startswith(escaped): 112 source = source.replace(escaped, unescaped, 1) 113 if target.startswith(escaped): 114 target = target.replace(escaped, unescaped, 1) 115 return source, target
116
117 - def fromdict(self, cedict):
118 self.comment = cedict.get('comment', '').decode('utf-8') 119 self.source = cedict.get('source', '').decode('utf-8') 120 self.target = cedict.get('target', '').decode('utf-8') 121 if self.comment is None: self.comment = '' 122 if self.source is None: self.source = '' 123 if self.target is None: self.target = '' 124 self.source, self.target = self.remove_spreadsheet_escapes(self.source, self.target)
125
126 - def todict(self, encoding='utf-8'):
127 comment, source, target = self.comment, self.source, self.target 128 source, target = self.add_spreadsheet_escapes(source, target) 129 if isinstance(comment, unicode): 130 comment = comment.encode(encoding) 131 if isinstance(source, unicode): 132 source = source.encode(encoding) 133 if isinstance(target, unicode): 134 target = target.encode(encoding) 135 return {'comment':comment, 'source': source, 'target': target}
136
137 -class csvfile(base.TranslationStore):
138 """This class represents a .csv file with various lines. 139 The default format contains three columns: comments, source, target""" 140 UnitClass = csvunit 141 Mimetypes = ['text/comma-separated-values', 'text/csv'] 142 Extensions = ["csv"]
143 - def __init__(self, inputfile=None, fieldnames=None):
144 base.TranslationStore.__init__(self, unitclass = self.UnitClass) 145 self.units = [] 146 if fieldnames is None: 147 self.fieldnames = ['comment', 'source', 'target'] 148 else: 149 if isinstance(fieldnames, basestring): 150 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")] 151 self.fieldnames = fieldnames 152 self.filename = getattr(inputfile, 'name', '') 153 if inputfile is not None: 154 csvsrc = inputfile.read() 155 inputfile.close() 156 self.parse(csvsrc)
157
158 - def parse(self, csvsrc):
159 csvfile = csv.StringIO(csvsrc) 160 reader = SimpleDictReader(csvfile, self.fieldnames) 161 for row in reader: 162 newce = self.UnitClass() 163 newce.fromdict(row) 164 self.addunit(newce)
165
166 - def __str__(self):
167 """convert to a string. double check that unicode is handled somehow here""" 168 source = self.getoutput() 169 if isinstance(source, unicode): 170 return source.encode(getattr(self, "encoding", "UTF-8")) 171 return source
172
173 - def getoutput(self):
174 csvfile = csv.StringIO() 175 writer = csv.DictWriter(csvfile, self.fieldnames) 176 for ce in self.units: 177 cedict = ce.todict() 178 writer.writerow(cedict) 179 csvfile.reset() 180 return "".join(csvfile.readlines())
181 182 183 if __name__ == '__main__': 184 import sys 185 cf = csvfile() 186 cf.parse(sys.stdin.read()) 187 sys.stdout.write(str(cf)) 188