1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
24
25 import re
26
27 from translate.storage import base
28 from translate.lang import data
29 try:
30 from lxml import etree
31 except ImportError, e:
32 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
33
35 """joins together the text from all the text nodes in the nodelist and their children"""
36
37
38 if node is not None:
39
40
41 return unicode(node.xpath("string()"))
42 else:
43 return data.forceunicode(node.text) or u""
44
45
47 """generate match objects for all @re_obj matches in @text."""
48 start = 0
49 max = len(text)
50 while start < max:
51 m = re_obj.search(text, start)
52 if not m: break
53 yield m
54 start = m.end()
55
56 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
57 re_placeholders = [re.compile(ph) for ph in placeholders]
67
68 XML_NS = 'http://www.w3.org/XML/1998/namespace'
69
71 """Sets the xml:lang attribute on node"""
72 return node.get("{%s}lang" % XML_NS)
73
75 """Sets the xml:lang attribute on node"""
76 node.set("{%s}lang" % XML_NS, lang)
77
79 """Sets the xml:space attribute on node"""
80 node.set("{%s}space" % XML_NS, value)
81
83 """Returns name in Clark notation within the given namespace.
84
85 For example namespaced("source") in an XLIFF document might return
86 {urn:oasis:names:tc:xliff:document:1.1}source
87 This is needed throughout lxml.
88 """
89 if namespace:
90 return "{%s}%s" % (namespace, name)
91 else:
92 return name
93
95 """A single unit in the file.
96 Provisional work is done to make several languages possible."""
97
98
99 rootNode = ""
100
101 languageNode = ""
102
103 textNode = ""
104
105 namespace = None
106
107 - def __init__(self, source, empty=False):
108 """Constructs a unit containing the given source string"""
109 if empty:
110 return
111 self.xmlelement = etree.Element(self.rootNode)
112
113
114 super(LISAunit, self).__init__(source)
115
117 """Compares two units"""
118 languageNodes = self.getlanguageNodes()
119 otherlanguageNodes = other.getlanguageNodes()
120 if len(languageNodes) != len(otherlanguageNodes):
121 return False
122 for i in range(len(languageNodes)):
123 mytext = self.getNodeText(languageNodes[i])
124 othertext = other.getNodeText(otherlanguageNodes[i])
125 if mytext != othertext:
126
127 return False
128 return True
129
131 """Returns name in Clark notation.
132
133 For example namespaced("source") in an XLIFF document might return
134 {urn:oasis:names:tc:xliff:document:1.1}source
135 This is needed throughout lxml.
136 """
137 return namespaced(self.namespace, name)
138
140 text = data.forceunicode(text)
141 languageNodes = self.getlanguageNodes()
142 sourcelanguageNode = self.createlanguageNode(sourcelang, text, "source")
143 if len(languageNodes) > 0:
144 self.xmlelement[0] = sourcelanguageNode
145 else:
146 self.xmlelement.append(sourcelanguageNode)
147
150 source = property(getsource, setsource)
151
152 - def settarget(self, text, lang='xx', append=False):
153
154 """Sets the "target" string (second language), or alternatively appends to the list"""
155 text = data.forceunicode(text)
156
157 if self.gettarget() == text:
158 return
159 languageNodes = self.getlanguageNodes()
160 assert len(languageNodes) > 0
161 if not text is None:
162 languageNode = self.createlanguageNode(lang, text, "target")
163 if append or len(languageNodes) == 1:
164 self.xmlelement.append(languageNode)
165 else:
166 self.xmlelement.insert(1, languageNode)
167 if not append and len(languageNodes) > 1:
168 self.xmlelement.remove(languageNodes[1])
169
171 """retrieves the "target" text (second entry), or the entry in the
172 specified language, if it exists"""
173 if lang:
174 node = self.getlanguageNode(lang=lang)
175 else:
176 node = self.getlanguageNode(lang=None, index=1)
177 return self.getNodeText(node)
178 target = property(gettarget, settarget)
179
181 """Returns a xml Element setup with given parameters to represent a
182 single language entry. Has to be overridden."""
183 return None
184
186 """Create the text node in parent containing all the ph tags"""
187 matches = _getPhMatches(text)
188 if not matches:
189 parent.text = text
190 return
191
192
193 start = matches[0].start()
194 pretext = text[:start]
195 if pretext:
196 parent.text = pretext
197 lasttag = parent
198 for i, m in enumerate(matches):
199
200 pretext = text[start:m.start()]
201
202 if pretext:
203 lasttag.tail = pretext
204
205 phnode = etree.SubElement(parent, "ph")
206 phnode.set("id", str(i+1))
207 phnode.text = m.group()
208 lasttag = phnode
209 start = m.end()
210
211 if text[start:]:
212 lasttag.tail = text[start:]
213
215 """Returns a list of all nodes that contain per language information."""
216 return self.xmlelement.findall(self.namespaced(self.languageNode))
217
219 """Retrieves a languageNode either by language or by index"""
220 if lang is None and index is None:
221 raise KeyError("No criterea for languageNode given")
222 languageNodes = self.getlanguageNodes()
223 if lang:
224 for set in languageNodes:
225 if getXMLlang(set) == lang:
226 return set
227 else:
228 if index >= len(languageNodes):
229 return None
230 else:
231 return languageNodes[index]
232 return None
233
234 - def getNodeText(self, languageNode):
235 """Retrieves the term from the given languageNode"""
236 if languageNode is None:
237 return None
238 if self.textNode:
239 terms = languageNode.findall('.//%s' % self.namespaced(self.textNode))
240 if len(terms) == 0:
241 return None
242 return getText(terms[0])
243 else:
244 return getText(languageNode)
245
247 return etree.tostring(self.xmlelement, pretty_print=True, encoding='utf-8')
248
250 term = cls(None, empty=True)
251 term.xmlelement = element
252 return term
253 createfromxmlElement = classmethod(createfromxmlElement)
254
256 """A class representing a file store for one of the LISA file formats."""
257 UnitClass = LISAunit
258
259 rootNode = ""
260
261 bodyNode = ""
262
263 XMLskeleton = ""
264
265 namespace = None
266
267 - def __init__(self, inputfile=None, sourcelanguage='en', targetlanguage=None, unitclass=None):
279
281 """Method to be overridden to initialise headers, etc."""
282 pass
283
285 """Returns name in Clark notation.
286
287 For example namespaced("source") in an XLIFF document might return
288 {urn:oasis:names:tc:xliff:document:1.1}source
289 This is needed throughout lxml.
290 """
291 return namespaced(self.namespace, name)
292
293 - def initbody(self):
294 """Initialises self.body so it never needs to be retrieved from the XML again."""
295 self.namespace = self.document.getroot().nsmap.get(None, None)
296 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
297
299
300 """Adds and returns a new unit with the given string as first entry."""
301 newunit = self.UnitClass(source)
302 self.addunit(newunit)
303 return newunit
304
305 - def addunit(self, unit, new=True):
310
312 """Converts to a string containing the file's XML"""
313 return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8')
314
316 """Populates this object from the given xml string"""
317 if not hasattr(self, 'filename'):
318 self.filename = getattr(xml, 'name', '')
319 if hasattr(xml, "read"):
320 xml.seek(0)
321 posrc = xml.read()
322 xml = posrc
323 self.document = etree.fromstring(xml).getroottree()
324 self.encoding = self.document.docinfo.encoding
325 self.initbody()
326 assert self.document.getroot().tag == self.namespaced(self.rootNode)
327 termEntries = self.body.findall('.//%s' % self.namespaced(self.UnitClass.rootNode))
328 if termEntries is None:
329 return
330 for entry in termEntries:
331 term = self.UnitClass.createfromxmlElement(entry)
332 self.addunit(term, new=False)
333