Package translate :: Package tools :: Module pocount
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pocount

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2003-2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """Create string and word counts for supported localization files including: 
 23  XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc 
 24   
 25  See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and 
 26  usage instructions 
 27  """ 
 28   
 29  from translate.storage import factory 
 30  from translate.storage import statsdb 
 31  import sys 
 32  import os 
 33   
34 -def calcstats_old(filename):
35 """This is the previous implementation of calcstats() and is left for 36 comparison and debuging purposes.""" 37 # ignore totally blank or header units 38 try: 39 store = factory.getobject(filename) 40 except ValueError, e: 41 print str(e) 42 return {} 43 units = filter(lambda unit: not unit.isheader(), store.units) 44 translated = translatedmessages(units) 45 fuzzy = fuzzymessages(units) 46 review = filter(lambda unit: unit.isreview(), units) 47 untranslated = untranslatedmessages(units) 48 wordcounts = dict(map(lambda unit: (unit, statsdb.wordsinunit(unit)), units)) 49 sourcewords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][0], elementlist)) 50 targetwords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][1], elementlist)) 51 stats = {} 52 53 #units 54 stats["translated"] = len(translated) 55 stats["fuzzy"] = len(fuzzy) 56 stats["untranslated"] = len(untranslated) 57 stats["review"] = len(review) 58 stats["total"] = stats["translated"] + stats["fuzzy"] + stats["untranslated"] 59 60 #words 61 stats["translatedsourcewords"] = sourcewords(translated) 62 stats["translatedtargetwords"] = targetwords(translated) 63 stats["fuzzysourcewords"] = sourcewords(fuzzy) 64 stats["untranslatedsourcewords"] = sourcewords(untranslated) 65 stats["reviewsourcewords"] = sourcewords(review) 66 stats["totalsourcewords"] = stats["translatedsourcewords"] + \ 67 stats["fuzzysourcewords"] + \ 68 stats["untranslatedsourcewords"] 69 return stats
70
71 -def calcstats(filename):
72 statscache = statsdb.StatsCache() 73 return statscache.filetotals(filename)
74
75 -def summarize(title, stats, CSVstyle=False):
76 def percent(denominator, devisor): 77 if devisor == 0: 78 return 0 79 else: 80 return denominator*100/devisor
81 82 if CSVstyle: 83 print "%s, " % title, 84 print "%d, %d, %d," % (stats["translated"], stats["translatedsourcewords"], stats["translatedtargetwords"]), 85 print "%d, %d," % (stats["fuzzy"], stats["fuzzysourcewords"]), 86 print "%d, %d," % (stats["untranslated"], stats["untranslatedsourcewords"]), 87 print "%d, %d" % (stats["total"], stats["totalsourcewords"]), 88 if stats["review"] > 0: 89 print ", %d, %d" % (stats["review"], stats["reviewsourdcewords"]), 90 print 91 else: 92 print title 93 print "type strings words (source) words (translation)" 94 print "translated: %5d (%3d%%) %10d (%3d%%) %15d" % \ 95 (stats["translated"], \ 96 percent(stats["translated"], stats["total"]), \ 97 stats["translatedsourcewords"], \ 98 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \ 99 stats["translatedtargetwords"]) 100 print "fuzzy: %5d (%3d%%) %10d (%3d%%) n/a" % \ 101 (stats["fuzzy"], \ 102 percent(stats["fuzzy"], stats["total"]), \ 103 stats["fuzzysourcewords"], \ 104 percent(stats["fuzzysourcewords"], stats["totalsourcewords"])) 105 print "untranslated: %5d (%3d%%) %10d (%3d%%) n/a" % \ 106 (stats["untranslated"], \ 107 percent(stats["untranslated"], stats["total"]), \ 108 stats["untranslatedsourcewords"], \ 109 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"])) 110 print "Total: %5d %17d %22d" % \ 111 (stats["total"], \ 112 stats["totalsourcewords"], \ 113 stats["translatedtargetwords"]) 114 if stats["review"] > 0: 115 print "review: %5d %17d n/a" % \ 116 (stats["review"], stats["reviewsourcewords"]) 117 print 118
119 -def fuzzymessages(units):
120 return filter(lambda unit: unit.isfuzzy() and unit.target, units)
121
122 -def translatedmessages(units):
123 return filter(lambda unit: unit.istranslated(), units)
124
125 -def untranslatedmessages(units):
126 return filter(lambda unit: not (unit.istranslated() or unit.isfuzzy()) and unit.source, units)
127
128 -class summarizer:
129 - def __init__(self, filenames, CSVstyle):
130 self.totals = {} 131 self.filecount = 0 132 self.CSVstyle = CSVstyle 133 if self.CSVstyle: 134 print "Filename, Translated Messages, Translated Source Words, Translated \ 135 Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \ 136 Untranslated Source Words, Total Message, Total Source Words, \ 137 Review Messages, Review Source Words" 138 for filename in filenames: 139 if not os.path.exists(filename): 140 print >> sys.stderr, "cannot process %s: does not exist" % filename 141 continue 142 elif os.path.isdir(filename): 143 self.handledir(filename) 144 else: 145 self.handlefile(filename) 146 if self.filecount > 1 and not self.CSVstyle: 147 summarize("TOTAL:", self.totals) 148 print "File count: %5d" % (self.filecount) 149 print
150
151 - def updatetotals(self, stats):
152 """Update self.totals with the statistics in stats.""" 153 for key in stats.keys(): 154 if not self.totals.has_key(key): 155 self.totals[key] = 0 156 self.totals[key] += stats[key]
157
158 - def handlefile(self, filename):
159 stats = calcstats(filename) 160 if stats: 161 self.updatetotals(stats) 162 summarize(filename, stats, self.CSVstyle) 163 self.filecount += 1
164
165 - def handlefiles(self, dirname, filenames):
166 for filename in filenames: 167 pathname = os.path.join(dirname, filename) 168 if os.path.isdir(pathname): 169 self.handledir(pathname) 170 else: 171 self.handlefile(pathname)
172
173 - def handledir(self, dirname):
174 path, name = os.path.split(dirname) 175 if name in ["CVS", ".svn", "_darcs"]: 176 return 177 entries = os.listdir(dirname) 178 self.handlefiles(dirname, entries)
179
180 -def main():
181 # TODO: make this handle command line options using optparse... 182 CSVstyle = False 183 if "--csv" in sys.argv: 184 sys.argv.remove("--csv") 185 CSVstyle = True 186 try: 187 import psyco 188 psyco.full() 189 except Exception: 190 pass 191 summarizer(sys.argv[1:], CSVstyle)
192 193 if __name__ == '__main__': 194 main() 195