1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of comma-separated values (.csv) files (csvunit)
23 or entire files (csvfile) for use with localisation
24 """
25
26 import csv
27
28 from translate.misc import sparse
29 from translate.storage import base
30
32 - def __init__(self, fileobj, fieldnames):
33 self.fieldnames = fieldnames
34 self.contents = fileobj.read()
35 self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r")
36 self.parser.stringescaping = 0
37 self.parser.quotechars = '"'
38 self.tokens = self.parser.tokenize(self.contents)
39 self.tokenpos = 0
40
43
50
52 lentokens = len(self.tokens)
53 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
54 self.tokenpos += 1
55 if self.tokenpos >= lentokens:
56 raise StopIteration()
57 thistokens = []
58 while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n":
59 thistokens.append(self.tokens[self.tokenpos])
60 self.tokenpos += 1
61 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
62 self.tokenpos += 1
63 fields = []
64
65 currentfield = ''
66 fieldparts = 0
67 for token in thistokens:
68 if token == ',':
69
70 if fieldparts == 1:
71 currentfield = self.getvalue(currentfield)
72 fields.append(currentfield)
73 currentfield = ''
74 fieldparts = 0
75 else:
76 currentfield += token
77 fieldparts += 1
78
79 if fieldparts:
80 if fieldparts == 1:
81 currentfield = self.getvalue(currentfield)
82 fields.append(currentfield)
83 values = {}
84 for fieldnum in range(len(self.fieldnames)):
85 if fieldnum >= len(fields):
86 values[self.fieldnames[fieldnum]] = ""
87 else:
88 values[self.fieldnames[fieldnum]] = fields[fieldnum]
89 return values
90
91 -class csvunit(base.TranslationUnit):
92 spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
98
107
116
125
126 - def todict(self, encoding='utf-8'):
127 comment, source, target = self.comment, self.source, self.target
128 source, target = self.add_spreadsheet_escapes(source, target)
129 if isinstance(comment, unicode):
130 comment = comment.encode(encoding)
131 if isinstance(source, unicode):
132 source = source.encode(encoding)
133 if isinstance(target, unicode):
134 target = target.encode(encoding)
135 return {'comment':comment, 'source': source, 'target': target}
136
137 -class csvfile(base.TranslationStore):
138 """This class represents a .csv file with various lines.
139 The default format contains three columns: comments, source, target"""
140 UnitClass = csvunit
141 Mimetypes = ['text/comma-separated-values', 'text/csv']
142 Extensions = ["csv"]
143 - def __init__(self, inputfile=None, fieldnames=None):
144 base.TranslationStore.__init__(self, unitclass = self.UnitClass)
145 self.units = []
146 if fieldnames is None:
147 self.fieldnames = ['comment', 'source', 'target']
148 else:
149 if isinstance(fieldnames, basestring):
150 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")]
151 self.fieldnames = fieldnames
152 self.filename = getattr(inputfile, 'name', '')
153 if inputfile is not None:
154 csvsrc = inputfile.read()
155 inputfile.close()
156 self.parse(csvsrc)
157
158 - def parse(self, csvsrc):
165
172
181
182
183 if __name__ == '__main__':
184 import sys
185 cf = csvfile()
186 cf.parse(sys.stdin.read())
187 sys.stdout.write(str(cf))
188