Package translate :: Package tools :: Module pogrep
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pogrep

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2008 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """Grep XLIFF, Gettext PO and TMX localization files 
 23   
 24  Matches are output to snippet files of the same type which can then be reviewed  
 25  and later merged using pomerge 
 26   
 27  See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and 
 28  usage instructions 
 29  """ 
 30   
 31  from translate.storage import factory 
 32  from translate.misc import optrecurse 
 33  from translate.misc.multistring import multistring 
 34  from translate.lang import data 
 35  import re 
 36  import locale 
 37   
38 -class GrepFilter:
39 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False, invertmatch=False, accelchar=None, encoding='utf-8', includeheader=False):
40 """builds a checkfilter using the given checker""" 41 if isinstance(searchstring, unicode): 42 self.searchstring = searchstring 43 else: 44 self.searchstring = searchstring.decode(encoding) 45 self.searchstring = data.normalize(self.searchstring) 46 if searchparts: 47 # For now we still support the old terminology, except for the old 'source' 48 # which has a new meaning now. 49 self.search_source = ('source' in searchparts) or ('msgid' in searchparts) 50 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts) 51 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts) 52 self.search_locations = 'locations' in searchparts 53 else: 54 self.search_source = True 55 self.search_target = True 56 self.search_notes = False 57 self.search_locations = False 58 self.ignorecase = ignorecase 59 if self.ignorecase: 60 self.searchstring = self.searchstring.lower() 61 self.useregexp = useregexp 62 if self.useregexp: 63 self.searchpattern = re.compile(self.searchstring) 64 self.invertmatch = invertmatch 65 self.accelchar = accelchar 66 self.includeheader = includeheader
67
68 - def matches(self, teststr):
69 teststr = data.normalize(teststr) 70 if self.ignorecase: 71 teststr = teststr.lower() 72 if self.accelchar: 73 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr) 74 teststr = re.sub(self.accelchar, "", teststr) 75 if self.useregexp: 76 found = self.searchpattern.search(teststr) 77 else: 78 found = teststr.find(self.searchstring) != -1 79 if self.invertmatch: 80 found = not found 81 return found
82
83 - def filterunit(self, unit):
84 """runs filters on an element""" 85 if unit.isheader(): return [] 86 87 if self.search_source: 88 if isinstance(unit.source, multistring): 89 strings = unit.source.strings 90 else: 91 strings = [unit.source] 92 for string in strings: 93 if self.matches(string): 94 return True 95 96 if self.search_target: 97 if isinstance(unit.target, multistring): 98 strings = unit.target.strings 99 else: 100 strings = [unit.target] 101 for string in strings: 102 if self.matches(string): 103 return True 104 105 if self.search_notes: 106 return self.matches(unit.getnotes()) 107 if self.search_locations: 108 return self.matches(u" ".join(unit.getlocations())) 109 return False
110
111 - def filterfile(self, thefile):
112 """runs filters on a translation file object""" 113 thenewfile = type(thefile)() 114 thenewfile.setsourcelanguage(thefile.sourcelanguage) 115 thenewfile.settargetlanguage(thefile.targetlanguage) 116 for unit in thefile.units: 117 if self.filterunit(unit): 118 thenewfile.addunit(unit) 119 if self.includeheader and thenewfile.units > 0: 120 if thefile.units[0].isheader(): 121 thenewfile.units.insert(0, thefile.units[0]) 122 else: 123 thenewfile.units.insert(0, thenewfile.makeheader()) 124 return thenewfile
125
126 -class GrepOptionParser(optrecurse.RecursiveOptionParser):
127 """a specialized Option Parser for the grep tool..."""
128 - def parse_args(self, args=None, values=None):
129 """parses the command line options, handling implicit input/output args""" 130 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values) 131 # some intelligence as to what reasonable people might give on the command line 132 if args: 133 options.searchstring = args[0] 134 args = args[1:] 135 else: 136 self.error("At least one argument must be given for the search string") 137 if args and not options.input: 138 if not options.output: 139 options.input = args[:-1] 140 args = args[-1:] 141 else: 142 options.input = args 143 args = [] 144 if args and not options.output: 145 options.output = args[-1] 146 args = args[:-1] 147 if args: 148 self.error("You have used an invalid combination of --input, --output and freestanding args") 149 if isinstance(options.input, list) and len(options.input) == 1: 150 options.input = options.input[0] 151 return (options, args)
152
153 - def set_usage(self, usage=None):
154 """sets the usage string - if usage not given, uses getusagestring for each option""" 155 if usage is None: 156 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list]) 157 else: 158 super(GrepOptionParser, self).set_usage(usage)
159
160 - def run(self):
161 """parses the arguments, and runs recursiveprocess with the resulting options""" 162 (options, args) = self.parse_args() 163 options.inputformats = self.inputformats 164 options.outputoptions = self.outputoptions 165 options.checkfilter = GrepFilter(options.searchstring, options.searchparts, options.ignorecase, options.useregexp, options.invertmatch, options.accelchar, locale.getpreferredencoding(), options.includeheader) 166 self.usepsyco(options) 167 self.recursiveprocess(options)
168
169 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
170 """reads in inputfile, filters using checkfilter, writes to outputfile""" 171 fromfile = factory.getobject(inputfile) 172 tofile = checkfilter.filterfile(fromfile) 173 if tofile.isempty(): 174 return False 175 outputfile.write(str(tofile)) 176 return True
177
178 -def cmdlineparser():
179 formats = {"po":("po", rungrep), "pot":("pot", rungrep), 180 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep), 181 "tmx":("tmx", rungrep), 182 None:("po", rungrep)} 183 parser = GrepOptionParser(formats) 184 parser.add_option("", "--search", dest="searchparts", 185 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ], 186 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)") 187 parser.add_option("-I", "--ignore-case", dest="ignorecase", 188 action="store_true", default=False, help="ignore case distinctions") 189 parser.add_option("-e", "--regexp", dest="useregexp", 190 action="store_true", default=False, help="use regular expression matching") 191 parser.add_option("-v", "--invert-match", dest="invertmatch", 192 action="store_true", default=False, help="select non-matching lines") 193 parser.add_option("", "--accelerator", dest="accelchar", 194 action="store", type="choice", choices=["&", "_", "~"], 195 metavar="ACCELERATOR", help="ignores the given accelerator when matching") 196 parser.add_option("", "--header", dest="includeheader", 197 action="store_true", default=False, 198 help="include a PO header in the output") 199 parser.set_usage() 200 parser.passthrough.append('checkfilter') 201 parser.description = __doc__ 202 return parser
203
204 -def main():
205 parser = cmdlineparser() 206 parser.run()
207 208 if __name__ == '__main__': 209 main() 210