1
2
3 from translate.convert import html2po
4 from translate.convert import po2html
5 from translate.convert import test_convert
6 from translate.misc import wStringIO
7
10 """Helper to convert html to po without a file."""
11 inputfile = wStringIO.StringIO(markup)
12 convertor = html2po.html2po()
13 outputpo = convertor.convertfile(inputfile, "test", False, False)
14 return outputpo
15
16 - def po2html(self, posource, htmltemplate):
23
25 """helper to check that we got the expected number of messages"""
26 actual = len(pofile.units)
27 if actual > 0:
28 if pofile.units[0].isheader():
29 actual = actual - 1
30 print pofile
31 assert actual == expected
32
34 """helper to validate a PO message"""
35 if not pofile.units[0].isheader():
36 unitnumber = unitnumber - 1
37 print 'unit source: ' + str(pofile.units[unitnumber].source) + '|'
38 print 'expected: ' + expected.encode('utf-8') + '|'
39 assert unicode(pofile.units[unitnumber].source) == unicode(expected)
40
46
51
53 """Given a snippet of php, put it into an HTML shell and see
54 if the results are as expected"""
55 self.check_single('<html><head></head><body><p><a href="'+php+'/site.html">Body text</a></p></body></html>', "Body text")
56 self.check_single('<html><head></head><body><p>More things in <a href="'+php+'/site.html">Body text</a></p></body></html>', 'More things in <a href="'+php+'/site.html">Body text</a>')
57 self.check_null('<html><head></head><body><p>'+php+'</p></body></html>')
58
60 """test to ensure that we no longer use the lang attribure"""
61 markup = '''<html lang="en"><head><title>My title</title></head><body></body></html>'''
62 pofile = self.html2po(markup)
63 self.countunits(pofile, 1)
64
65 self.compareunit(pofile, 1, "My title")
66
68 """test that we can extract the <title> tag"""
69 self.check_single("<html><head><title>My title</title></head><body></body></html>", "My title")
70
72 """Test a linebreak in the <title> tag"""
73 htmltext = '''<html>
74 <head>
75 <title>My
76 title</title>
77 </head>
78 <body>
79 </body>
80 </html>
81 '''
82 self.check_single(htmltext, "My title")
83
87
89 """test that we can extract the <p> tag"""
90 self.check_single("<html><head></head><body><p>A paragraph.</p></body></html>", "A paragraph.")
91 markup = "<p>First line.<br>Second line.</p>"
92 pofile = self.html2po(markup)
93 self.compareunit(pofile, 1, "First line.<br>Second line.")
94
96 """Test newlines within the <p> tag."""
97 htmltext = '''<html>
98 <head>
99 </head>
100 <body>
101 <p>
102 A paragraph is a section in a piece of writing, usually highlighting a
103 particular point or topic. It always begins on a new line and usually
104 with indentation, and it consists of at least one sentence.
105 </p>
106 </body>
107 </html>
108 '''
109 self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.")
110 markup = "<p>First\nline.<br>Second\nline.</p>"
111 pofile = self.html2po(markup)
112 self.compareunit(pofile, 1, "First line.<br>Second line.")
113
115 """test that we can extract the <div> tag"""
116 self.check_single("<html><head></head><body><div>A paragraph.</div></body></html>", "A paragraph.")
117 markup = "<div>First line.<br>Second line.</div>"
118 pofile = self.html2po(markup)
119 self.compareunit(pofile, 1, "First line.<br>Second line.")
120
122 """Test linebreaks within a <div> tag."""
123 htmltext = '''<html>
124 <head>
125 </head>
126 <body>
127 <div>
128 A paragraph is a section in a piece of writing, usually highlighting a
129 particular point or topic. It always begins on a new line and usually
130 with indentation, and it consists of at least one sentence.
131 </div>
132 </body>
133 </html>
134 '''
135 self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.")
136 markup = "<div>First\nline.<br>Second\nline.</div>"
137 pofile = self.html2po(markup)
138 self.compareunit(pofile, 1, "First line.<br>Second line.")
139
141 """test that we can extract the <a> tag"""
142 self.check_single('<html><head></head><body><p>A paragraph with <a href="http://translate.org.za/">hyperlink</a>.</p></body></html>', 'A paragraph with <a href="http://translate.org.za/">hyperlink</a>.')
143
145 """Test that we can extract the <a> tag with newlines in it."""
146 htmltext = '''<html>
147 <head>
148 </head>
149 <body>
150 <p>A
151 paragraph
152 with <a
153 href="http://translate.org.za/">hyperlink</a>
154 and
155 newlines.</p></body></html>
156 '''
157 self.check_single(htmltext, 'A paragraph with <a href="http://translate.org.za/">hyperlink</a> and newlines.')
158
160 """Test that we can extract the alt attribute from the <img> tag."""
161 self.check_single('''<html><head></head><body><img src="picture.png" alt="A picture"></body></html>''', "A picture")
162
164 """Test that we can extract the alt attribute from the <img> tag."""
165 htmlsource = '''<html><head></head><body><img src="images/topbar.jpg" width="750" height="80"></body></html>'''
166 self.check_null(htmlsource)
167
169 """Test that we can extract the summary attribute."""
170 self.check_single('''<html><head></head><body><table summary="Table summary"></table></body></html>''', "Table summary")
171
181
183 markup = '''<table summary="This is the summary"><caption>A caption</caption><thead><tr><th abbr="Head 1">Heading One</th><th>Heading Two</th></thead><tfoot><tr><td>Foot One</td><td>Foot Two</td></tr></tfoot><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>'''
184 pofile = self.html2po(markup)
185 self.countunits(pofile, 9)
186 self.compareunit(pofile, 1, "This is the summary")
187 self.compareunit(pofile, 2, "A caption")
188 self.compareunit(pofile, 3, "Head 1")
189 self.compareunit(pofile, 4, "Heading One")
190 self.compareunit(pofile, 5, "Heading Two")
191 self.compareunit(pofile, 6, "Foot One")
192 self.compareunit(pofile, 7, "Foot Two")
193 self.compareunit(pofile, 8, "One")
194 self.compareunit(pofile, 9, "Two")
195
197 """Test that we ignore tables that are empty.
198
199 A table is deemed empty if it has no translatable content.
200 """
201
202 self.check_null('''<html><head></head><body><table><tr><td><img src="bob.png"></td></tr></table></body></html>''')
203 self.check_null('''<html><head></head><body><table><tr><td> </td></tr></table></body></html>''')
204 self.check_null('''<html><head></head><body><table><tr><td><strong></strong></td></tr></table></body></html>''')
205
207 """Test to see if the address element is extracted"""
208 self.check_single("<body><address>My address</address></body>", "My address")
209
211 """Test to see if the h* elements are extracted"""
212 markup = "<html><head></head><body><h1>Heading One</h1><h2>Heading Two</h2><h3>Heading Three</h3><h4>Heading Four</h4><h5>Heading Five</h5><h6>Heading Six</h6></body></html>"
213 pofile = self.html2po(markup)
214 self.countunits(pofile, 6)
215 self.compareunit(pofile, 1, "Heading One")
216 self.compareunit(pofile, 2, "Heading Two")
217 self.compareunit(pofile, 3, "Heading Three")
218 self.compareunit(pofile, 4, "Heading Four")
219 self.compareunit(pofile, 5, "Heading Five")
220 self.compareunit(pofile, 6, "Heading Six")
221
223 """Test to see if h* elements with newlines can be extracted"""
224 markup = "<html><head></head><body><h1>Heading\nOne</h1><h2>Heading\nTwo</h2><h3>Heading\nThree</h3><h4>Heading\nFour</h4><h5>Heading\nFive</h5><h6>Heading\nSix</h6></body></html>"
225 pofile = self.html2po(markup)
226 self.countunits(pofile, 6)
227 self.compareunit(pofile, 1, "Heading One")
228 self.compareunit(pofile, 2, "Heading Two")
229 self.compareunit(pofile, 3, "Heading Three")
230 self.compareunit(pofile, 4, "Heading Four")
231 self.compareunit(pofile, 5, "Heading Five")
232 self.compareunit(pofile, 6, "Heading Six")
233
235 """Test to see if the definition list title (dt) element is extracted"""
236 self.check_single("<html><head></head><body><dl><dt>Definition List Item Title</dt></dl></body></html>", "Definition List Item Title")
237
239 """Test to see if the definition list description (dd) element is extracted"""
240 self.check_single("<html><head></head><body><dl><dd>Definition List Item Description</dd></dl></body></html>", "Definition List Item Description")
241
243 """test to check that we don't double extract a span item"""
244 self.check_single("<html><head></head><body><p>You are a <span>Spanish</span> sentence.</p></body></html>", "You are a <span>Spanish</span> sentence.")
245
255
257 """check that we use the default style of msgid_comments to disambiguate duplicate messages"""
258 markup = "<html><head></head><body><p>Duplicate</p><p>Duplicate</p></body></html>"
259 pofile = self.html2po(markup)
260 self.countunits(pofile, 2)
261
262 self.compareunit(pofile, 1, "Duplicate")
263 self.compareunit(pofile, 2, "Duplicate")
264
266 """check that we reflow multiline content to make it more readable for translators"""
267 self.check_single('''<td valign="middle" width="96%"><font class="headingwhite">South
268 Africa</font></td>''', '''<font class="headingwhite">South Africa</font>''')
269
277
279 """Remove carriage returns from files in dos format."""
280 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\r
281 <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->\r
282 <head>\r
283 <!-- InstanceBeginEditable name="doctitle" -->\r
284 <link href="fmfi.css" rel="stylesheet" type="text/css">\r
285 </head>\r
286 \r
287 <body>\r
288 <p>The rapid expansion of telecommunications infrastructure in recent\r
289 years has helped to bridge the digital divide to a limited extent.</p> \r
290 </body>\r
291 <!-- InstanceEnd --></html>\r
292 '''
293
294 self.check_single(htmlsource, 'The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.')
295
297 """Convert HTML input in iso-8859-1 correctly to unicode."""
298 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
299 <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->
300 <head>
301 <!-- InstanceBeginEditable name="doctitle" -->
302 <title>FMFI - South Africa - CSIR Openphone - Overview</title>
303 <!-- InstanceEndEditable -->
304 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
305 <meta name="keywords" content="fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community">
306
307 <!-- InstanceBeginEditable name="head" -->
308 <!-- InstanceEndEditable -->
309 <link href="../../../fmfi.css" rel="stylesheet" type="text/css">
310 </head>
311
312 <body>
313 <p>We aim to please \x96 will you aim too, please?</p>
314 <p>South Africa\x92s language diversity can be challenging.</p>
315 </body>
316 </html>
317 '''
318 pofile = self.html2po(htmlsource)
319
320 self.countunits(pofile, 4)
321 self.compareunit(pofile, 3, u'We aim to please \x96 will you aim too, please?')
322 self.compareunit(pofile, 4, u'South Africa\x92s language diversity can be challenging.')
323
325 """Ensure that unnecessary html is stripped from the resulting unit."""
326
327 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
328 <html>
329 <head>
330 <title>FMFI - Contact</title>
331 </head>
332 <body>
333 <table width="100%" border="0" cellpadding="0" cellspacing="0">
334 <tr align="left" valign="top">
335 <td width="150" height="556">
336 <table width="157" height="100%" border="0" cellspacing="0" id="leftmenubg-color">
337 <tr>
338 <td align="left" valign="top" height="555">
339 <table width="100%" border="0" cellspacing="0" cellpadding="2">
340 <tr align="left" valign="top" bgcolor="#660000">
341 <td width="4%"><strong></strong></td>
342 <td width="96%"><strong><font class="headingwhite">Projects</font></strong></td>
343 </tr>
344 <tr align="left" valign="top">
345 <td valign="middle" width="4%"><img src="images/arrow.gif" width="8" height="8"></td>
346 <td width="96%"><a href="index.html">Home Page</a></td>
347 </tr>
348 </table>
349 </td>
350 </tr>
351 </table></td>
352 </table>
353 </body>
354 </html>
355 '''
356 pofile = self.html2po(htmlsource)
357 self.countunits(pofile, 3)
358 self.compareunit(pofile, 2, u'Projects')
359 self.compareunit(pofile, 3, u'Home Page')
360
361
362 pofile.units[1].target = 'Projekte'
363 pofile.units[2].target = 'Tuisblad'
364 htmlresult = self.po2html(str(pofile), htmlsource).replace('\n', ' ').replace('= "', '="').replace('> <', '><')
365 snippet = '<td width="96%"><strong><font class="headingwhite">Projekte</font></strong></td>'
366 assert snippet in htmlresult
367 snippet = '<td width="96%"><a href="index.html">Tuisblad</a></td>'
368 assert snippet in htmlresult
369
371 """Test that PHP snippets don't interfere"""
372
373
374 self.check_phpsnippet('''<?=$phpvariable?>''')
375
376
377 self.check_phpsnippet('''<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>''')
378
379
380 self.check_phpsnippet(''' <? asdfghjkl qwertyuiop 1234567890!@#$%^&*()-=_+[]\{}|;':",./<>? ?> ''')
381
383 """Test multiple PHP snippets in a string to make sure they get restored properly"""
384 php1 = '''<?=$phpvariable?>'''
385 php2 = '''<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>'''
386 php3 = '''<? asdfghjklqwertyuiop1234567890!@#$%^&*()-=_+[]\{}|;':",./<>? ?>'''
387
388
389 innertext = '<a href="'+php1+'/site.html">Body text</a> and some '+php2+' more text '+php2+php3
390 htmlsource = '<html><head></head><body><p>'+innertext+'</p></body></html>'
391 self.check_single(htmlsource, innertext)
392
394
395
396 php1 = '''<? abc
397 def
398 ghi ?>'''
399
400
401 innertext = '<a href="'+php1+'/site.html">Body text</a> and some '+php1+' more text '+php1+php1
402 innertrans = '<a href="'+php1+'/site.html">Texte de corps</a> et encore de '+php1+' plus de texte '+php1+php1
403
404 htmlsource = '<html><head></head><body><p>'+innertext+'</p></body></html>'
405 transsource = '<html><head></head><body><p>'+innertrans+'</p></body></html>'
406
407 pofile = self.html2po(htmlsource)
408 pofile.units[0].target = innertrans
409 htmlresult = self.po2html(pofile, htmlsource)
410 assert htmlresult == transsource
411
413 """Tests running actual html2po commands on files"""
414 convertmodule = html2po
415 defaultoptions = {"progress": "none"}
416
423