Package translate :: Package convert :: Module html2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.html2po

 1  #!/usr/bin/env python 
 2  # -*- coding: utf-8 -*- 
 3  # 
 4  # Copyright 2004-2006 Zuza Software Foundation 
 5  #  
 6  # This file is part of translate. 
 7  # 
 8  # translate is free software; you can redistribute it and/or modify 
 9  # it under the terms of the GNU General Public License as published by 
10  # the Free Software Foundation; either version 2 of the License, or 
11  # (at your option) any later version. 
12  #  
13  # translate is distributed in the hope that it will be useful, 
14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
16  # GNU General Public License for more details. 
17  # 
18  # You should have received a copy of the GNU General Public License 
19  # along with translate; if not, write to the Free Software 
20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
21  # 
22   
23  """convert HTML files to Gettext PO localization files 
24   
25  See: http://translate.sourceforge.net/wiki/toolkit/html2po for examples and  
26  usage instructions 
27  """ 
28   
29  from translate.storage import po 
30  from translate.storage import html 
31   
32 -class html2po:
33 - def convertfile(self, inputfile, filename, includeheader, includeuntagged=False, duplicatestyle="msgid_comment"):
34 """converts a html file to .po format""" 35 thetargetfile = po.pofile() 36 htmlparser = html.htmlfile(includeuntaggeddata=includeuntagged, inputfile=inputfile) 37 if includeheader: 38 targetheader = thetargetfile.makeheader(charset="UTF-8", encoding="8bit") 39 thetargetfile.addunit(targetheader) 40 for htmlunit in htmlparser.units: 41 thepo = thetargetfile.addsourceunit(htmlunit.source) 42 thepo.addlocations(htmlunit.getlocations()) 43 thetargetfile.removeduplicates(duplicatestyle) 44 return thetargetfile
45
46 -def converthtml(inputfile, outputfile, templates, includeuntagged=False, pot=False, duplicatestyle="msgctxt"):
47 """reads in stdin using fromfileclass, converts using convertorclass, writes to stdout""" 48 convertor = html2po() 49 outputfilepos = outputfile.tell() 50 includeheader = outputfilepos == 0 51 outputstore = convertor.convertfile(inputfile, getattr(inputfile, "name", "unknown"), includeheader, includeuntagged, duplicatestyle=duplicatestyle) 52 outputfile.write(str(outputstore)) 53 return 1
54
55 -def main(argv=None):
56 from translate.convert import convert 57 from translate.misc import stdiotell 58 import sys 59 sys.stdout = stdiotell.StdIOWrapper(sys.stdout) 60 formats = {"html":("po", converthtml), "htm":("po", converthtml), "xhtml":("po", converthtml), None:("po", converthtml)} 61 parser = convert.ConvertOptionParser(formats, usepots=True, description=__doc__) 62 parser.add_option("-u", "--untagged", dest="includeuntagged", default=False, action="store_true", 63 help="include untagged sections") 64 parser.passthrough.append("includeuntagged") 65 parser.add_duplicates_option() 66 parser.passthrough.append("pot") 67 parser.run(argv)
68 69 70 if __name__ == '__main__': 71 main() 72