Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  #  
   4  # Copyright 2004-2008 Zuza Software Foundation 
   5  #  
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  #  
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation  
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check  
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the  
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in  
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  import re 
  47   
  48  # These are some regular expressions that are compiled for use in some tests 
  49   
  50  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as 
  51  # this should capture printf types defined in other platforms. 
  52  # extended to support Python named format specifiers 
  53  printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))') 
  54   
  55  # The name of the XML tag 
  56  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  57   
  58  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  59  #TODO: remove escaped strings once usage is audited 
  60  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  61   
  62  # The whole tag 
  63  tag_re = re.compile("<[^>]+>") 
  64   
65 -def tagname(string):
66 """Returns the name of the XML/HTML tag in string""" 67 return tagname_re.match(string).groups(1)[0]
68
69 -def intuplelist(pair, list):
70 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 71 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut 72 by only considering "c" if "b" has already matched.""" 73 a, b, c = pair 74 if (b, c) == (None, None): 75 #This is a tagname 76 return pair 77 for pattern in list: 78 x, y, z = pattern 79 if (x, y) in [(a, b), (None, b)]: 80 if z in [None, c]: 81 return pattern 82 return pair
83
84 -def tagproperties(strings, ignore):
85 """Returns all the properties in the XML/HTML tag string as 86 (tagname, propertyname, propertyvalue), but ignore those combinations 87 specified in ignore.""" 88 properties = [] 89 for string in strings: 90 tag = tagname(string) 91 properties += [(tag, None, None)] 92 #Now we isolate the attribute pairs. 93 pairs = property_re.findall(string) 94 for property, value, a, b in pairs: 95 #Strip the quotes: 96 value = value[1:-1] 97 98 canignore = False 99 if (tag, property, value) in ignore or \ 100 intuplelist((tag,property,value), ignore) != (tag,property,value): 101 canignore = True 102 break 103 if not canignore: 104 properties += [(tag, property, value)] 105 return properties
106 107
108 -class FilterFailure(Exception):
109 """This exception signals that a Filter didn't pass, and gives an explanation 110 or a comment"""
111 - def __init__(self, messages):
112 if not isinstance(messages, list): 113 messages = [messages] 114 assert isinstance(messages[0], unicode) # Assumption: all of same type 115 Exception.__init__(self, u", ".join(messages))
116
117 -class SeriousFilterFailure(FilterFailure):
118 """This exception signals that a Filter didn't pass, and the bad translation 119 might break an application (so the string will be marked fuzzy)""" 120 pass
121 122 #(tag, attribute, value) specifies a certain attribute which can be changed/ 123 #ignored if it exists inside tag. In the case where there is a third element 124 #in the tuple, it indicates a property value that can be ignored if present 125 #(like defaults, for example) 126 #If a certain item is None, it indicates that it is relevant for all values of 127 #the property/tag that is specified as None. A non-None value of "value" 128 #indicates that the value of the attribute must be taken into account. 129 common_ignoretags = [(None, "xml-lang", None)] 130 common_canchangetags = [("img", "alt", None)] 131
132 -class CheckerConfig(object):
133 """object representing the configuration of a checker"""
134 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 135 notranslatewords=None, musttranslatewords=None, validchars=None, 136 punctuation=None, endpunctuation=None, ignoretags=None, 137 canchangetags=None, criticaltests=None, credit_sources=None):
138 # Init lists 139 self.accelmarkers = self._init_list(accelmarkers) 140 self.varmatches = self._init_list(varmatches) 141 self.criticaltests = self._init_list(criticaltests) 142 self.credit_sources = self._init_list(credit_sources) 143 # Lang data 144 self.targetlanguage = targetlanguage 145 self.updatetargetlanguage(targetlanguage) 146 self.sourcelang = factory.getlanguage('en') 147 # Inits with default values 148 self.punctuation = self._init_default(data.forceunicode(punctuation), self.lang.punctuation) 149 self.endpunctuation = self._init_default(data.forceunicode(endpunctuation), self.lang.sentenceend) 150 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 151 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 152 # Other data 153 # TODO: allow user configuration of untranslatable words 154 self.notranslatewords = dict.fromkeys([data.forceunicode(key) for key in self._init_list(notranslatewords)]) 155 self.musttranslatewords = dict.fromkeys([data.forceunicode(key) for key in self._init_list(musttranslatewords)]) 156 validchars = data.forceunicode(validchars) 157 self.validcharsmap = {} 158 self.updatevalidchars(validchars)
159
160 - def _init_list(self, list):
161 """initialise configuration paramaters that are lists 162 163 @type list: List 164 @param list: None (we'll initialise a blank list) or a list paramater 165 @rtype: List 166 """ 167 if list is None: 168 list = [] 169 return list
170
171 - def _init_default(self, param, default):
172 """initialise parameters that can have default options 173 174 @param param: the user supplied paramater value 175 @param default: default values when param is not specified 176 @return: the paramater as specified by the user of the default settings 177 """ 178 if param is None: 179 return default 180 return param
181
182 - def update(self, otherconfig):
183 """combines the info in otherconfig into this config object""" 184 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 185 self.updatetargetlanguage(self.targetlanguage) 186 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 187 self.varmatches.extend(otherconfig.varmatches) 188 self.notranslatewords.update(otherconfig.notranslatewords) 189 self.musttranslatewords.update(otherconfig.musttranslatewords) 190 self.validcharsmap.update(otherconfig.validcharsmap) 191 self.punctuation += otherconfig.punctuation 192 self.endpunctuation += otherconfig.endpunctuation 193 #TODO: consider also updating in the following cases: 194 self.ignoretags = otherconfig.ignoretags 195 self.canchangetags = otherconfig.canchangetags 196 self.criticaltests.extend(otherconfig.criticaltests) 197 self.credit_sources = otherconfig.credit_sources
198
199 - def updatevalidchars(self, validchars):
200 """updates the map that eliminates valid characters""" 201 if validchars is None: 202 return True 203 validcharsmap = dict([(ord(validchar), None) for validchar in data.forceunicode(validchars)]) 204 self.validcharsmap.update(validcharsmap)
205
206 - def updatetargetlanguage(self, langcode):
207 """Updates the target language in the config to the given target language""" 208 self.lang = factory.getlanguage(langcode)
209
210 -def cache_results(f):
211 def cached_f(self, param1): 212 key = (f.__name__, param1) 213 res_cache = self.results_cache 214 if key in res_cache: 215 return res_cache[key] 216 else: 217 value = f(self, param1) 218 res_cache[key] = value 219 return value
220 return cached_f 221
222 -class UnitChecker(object):
223 """Parent Checker class which does the checking based on functions available 224 in derived classes.""" 225 preconditions = {} 226
227 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
228 self.errorhandler = errorhandler 229 if checkerconfig is None: 230 self.setconfig(CheckerConfig()) 231 else: 232 self.setconfig(checkerconfig) 233 # exclude functions defined in UnitChecker from being treated as tests... 234 self.helperfunctions = {} 235 for functionname in dir(UnitChecker): 236 function = getattr(self, functionname) 237 if callable(function): 238 self.helperfunctions[functionname] = function 239 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 240 241 self.results_cache = {}
242
243 - def getfilters(self, excludefilters=None, limitfilters=None):
244 """returns dictionary of available filters, including/excluding those in 245 the given lists""" 246 filters = {} 247 if limitfilters is None: 248 # use everything available unless instructed 249 limitfilters = dir(self) 250 if excludefilters is None: 251 excludefilters = {} 252 for functionname in limitfilters: 253 if functionname in excludefilters: continue 254 if functionname in self.helperfunctions: continue 255 if functionname == "errorhandler": continue 256 filterfunction = getattr(self, functionname, None) 257 if not callable(filterfunction): continue 258 filters[functionname] = filterfunction 259 return filters
260
261 - def setconfig(self, config):
262 """sets the accelerator list""" 263 self.config = config 264 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 265 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 266 for startmatch, endmatch in self.config.varmatches] 267 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone) 268 for startmatch, endmatch in self.config.varmatches]
269
270 - def setsuggestionstore(self, store):
271 """Sets the filename that a checker should use for evaluating suggestions.""" 272 self.suggestion_store = store
273
274 - def filtervariables(self, str1):
275 """filter out variables from str1""" 276 return helpers.multifilter(str1, self.varfilters)
277 filtervariables = cache_results(filtervariables) 278
279 - def removevariables(self, str1):
280 """remove variables from str1""" 281 return helpers.multifilter(str1, self.removevarfilter)
282 removevariables = cache_results(removevariables) 283
284 - def filteraccelerators(self, str1):
285 """filter out accelerators from str1""" 286 return helpers.multifilter(str1, self.accfilters, None)
287 filteraccelerators = cache_results(filteraccelerators) 288
289 - def filteraccelerators_by_list(self, str1, acceptlist=None):
290 """filter out accelerators from str1""" 291 return helpers.multifilter(str1, self.accfilters, acceptlist)
292
293 - def filterwordswithpunctuation(self, str1):
294 """replaces words with punctuation with their unpunctuated equivalents""" 295 return prefilters.filterwordswithpunctuation(str1)
296 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 297
298 - def filterxml(self, str1):
299 """filter out XML from the string so only text remains""" 300 return tag_re.sub("", str1)
301 filterxml = cache_results(filterxml) 302
303 - def run_test(self, test, unit):
304 """Runs the given test on the given unit. 305 306 Note that this can raise a FilterFailure as part of normal operation""" 307 return test(unit)
308
309 - def run_filters(self, unit):
310 """run all the tests in this suite, return failures as testname, message_or_exception""" 311 self.results_cache = {} 312 failures = {} 313 ignores = self.config.lang.ignoretests[:] 314 functionnames = self.defaultfilters.keys() 315 priorityfunctionnames = self.preconditions.keys() 316 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 317 for functionname in priorityfunctionnames + otherfunctionnames: 318 if functionname in ignores: 319 continue 320 filterfunction = getattr(self, functionname, None) 321 # this filterfunction may only be defined on another checker if using TeeChecker 322 if filterfunction is None: 323 continue 324 filtermessage = filterfunction.__doc__ 325 try: 326 filterresult = self.run_test(filterfunction, unit) 327 except FilterFailure, e: 328 filterresult = False 329 filtermessage = e.args[0] 330 except Exception, e: 331 if self.errorhandler is None: 332 raise ValueError("error in filter %s: %r, %r, %s" % \ 333 (functionname, unit.source, unit.target, e)) 334 else: 335 filterresult = self.errorhandler(functionname, unit.source, unit.target, e) 336 if not filterresult: 337 # we test some preconditions that aren't actually a cause for failure 338 if functionname in self.defaultfilters: 339 failures[functionname] = filtermessage 340 if functionname in self.preconditions: 341 for ignoredfunctionname in self.preconditions[functionname]: 342 ignores.append(ignoredfunctionname) 343 self.results_cache = {} 344 return failures
345
346 -class TranslationChecker(UnitChecker):
347 """A checker that passes source and target strings to the checks, not the 348 whole unit. 349 350 This provides some speedup and simplifies testing."""
351 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
352 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
353
354 - def run_test(self, test, unit):
355 """Runs the given test on the given unit. 356 357 Note that this can raise a FilterFailure as part of normal operation.""" 358 if self.hasplural: 359 filtermessages = [] 360 filterresult = True 361 for pluralform in unit.target.strings: 362 try: 363 if not test(self.str1, pluralform): 364 filterresult = False 365 except FilterFailure, e: 366 filterresult = False 367 filtermessages.append( str(e).decode('utf-8') ) 368 if not filterresult and filtermessages: 369 raise FilterFailure(filtermessages) 370 else: 371 return filterresult 372 else: 373 return test(self.str1, self.str2)
374
375 - def run_filters(self, unit):
376 """Do some optimisation by caching some data of the unit for the benefit 377 of run_test().""" 378 self.str1 = data.forceunicode(unit.source) 379 self.str2 = data.forceunicode(unit.target) 380 self.hasplural = unit.hasplural() 381 return super(TranslationChecker, self).run_filters(unit)
382
383 -class TeeChecker:
384 """A Checker that controls multiple checkers."""
385 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, 386 checkerclasses=None, errorhandler=None, languagecode=None):
387 """construct a TeeChecker from the given checkers""" 388 self.limitfilters = limitfilters 389 if checkerclasses is None: 390 checkerclasses = [StandardChecker] 391 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 392 if languagecode: 393 for checker in self.checkers: 394 checker.config.updatetargetlanguage(languagecode) 395 # Let's hook up the language specific checker 396 lang_checker = self.checkers[0].config.lang.checker 397 if lang_checker: 398 self.checkers.append(lang_checker) 399 400 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 401 self.config = checkerconfig or self.checkers[0].config
402
403 - def getfilters(self, excludefilters=None, limitfilters=None):
404 """returns dictionary of available filters, including/excluding those in 405 the given lists""" 406 if excludefilters is None: 407 excludefilters = {} 408 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 409 self.combinedfilters = {} 410 for filters in filterslist: 411 self.combinedfilters.update(filters) 412 # TODO: move this somewhere more sensible (a checkfilters method?) 413 if limitfilters is not None: 414 for filtername in limitfilters: 415 if not filtername in self.combinedfilters: 416 import sys 417 print >> sys.stderr, "warning: could not find filter %s" % filtername 418 return self.combinedfilters
419
420 - def run_filters(self, unit):
421 """run all the tests in the checker's suites""" 422 failures = {} 423 for checker in self.checkers: 424 failures.update(checker.run_filters(unit)) 425 return failures
426
427 - def setsuggestionstore(self, store):
428 """Sets the filename that a checker should use for evaluating suggestions.""" 429 for checker in self.checkers: 430 checker.setsuggestionstore(store)
431 432
433 -class StandardChecker(TranslationChecker):
434 """The basic test suite for source -> target translations."""
435 - def untranslated(self, str1, str2):
436 """checks whether a string has been translated at all""" 437 str2 = prefilters.removekdecomments(str2) 438 return not (len(str1.strip()) > 0 and len(str2) == 0)
439
440 - def unchanged(self, str1, str2):
441 """checks whether a translation is basically identical to the original string""" 442 str1 = self.removevariables(self.filteraccelerators(str1)).strip() 443 str2 = self.removevariables(self.filteraccelerators(str2)).strip() 444 if len(str1) < 2: 445 return True 446 if str1.isupper() and str1 == str2: 447 return True 448 if self.config.notranslatewords: 449 words1 = str1.split() 450 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 451 #currently equivalent to: 452 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 453 #why do we only test for one notranslate word? 454 return True 455 if str1.isalpha() and str1.lower() == str2.lower(): 456 raise FilterFailure(u"please translate") 457 return True
458
459 - def blank(self, str1, str2):
460 """checks whether a translation only contains spaces""" 461 len1 = len(str1.strip()) 462 len2 = len(str2.strip()) 463 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
464
465 - def short(self, str1, str2):
466 """checks whether a translation is much shorter than the original string""" 467 len1 = len(str1.strip()) 468 len2 = len(str2.strip()) 469 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
470
471 - def long(self, str1, str2):
472 """checks whether a translation is much longer than the original string""" 473 len1 = len(str1.strip()) 474 len2 = len(str2.strip()) 475 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
476
477 - def escapes(self, str1, str2):
478 """checks whether escaping is consistent between the two strings""" 479 if not helpers.countsmatch(str1, str2, ("\\", "\\\\")): 480 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if "\\" in word]) 481 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if "\\" in word]) 482 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 483 else: 484 return True
485
486 - def newlines(self, str1, str2):
487 """checks whether newlines are consistent between the two strings""" 488 if not helpers.countsmatch(str1, str2, ("\n", "\r")): 489 raise FilterFailure(u"line endings in original don't match line endings in translation") 490 else: 491 return True
492
493 - def tabs(self, str1, str2):
494 """checks whether tabs are consistent between the two strings""" 495 if not helpers.countmatch(str1, str2, "\t"): 496 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation") 497 else: 498 return True
499 500
501 - def singlequoting(self, str1, str2):
502 """checks whether singlequoting is consistent between the two strings""" 503 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 504 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 505 return helpers.countsmatch(str1, str2, ("'", "''", "\\'"))
506
507 - def doublequoting(self, str1, str2):
508 """checks whether doublequoting is consistent between the two strings""" 509 str1 = self.filteraccelerators(self.filtervariables(str1)) 510 str1 = self.filterxml(str1) 511 str1 = self.config.lang.punctranslate(str1) 512 str2 = self.filteraccelerators(self.filtervariables(str2)) 513 str2 = self.filterxml(str2) 514 return helpers.countsmatch(str1, str2, ('"', '""', '\\"', u"«", u"»"))
515
516 - def doublespacing(self, str1, str2):
517 """checks for bad double-spaces by comparing to original""" 518 str1 = self.filteraccelerators(str1) 519 str2 = self.filteraccelerators(str2) 520 return helpers.countmatch(str1, str2, " ")
521
522 - def puncspacing(self, str1, str2):
523 """checks for bad spacing after punctuation""" 524 if str1.find(u" ") == -1: 525 return True 526 str1 = self.filteraccelerators(self.filtervariables(str1)) 527 str1 = self.config.lang.punctranslate(str1) 528 str2 = self.filteraccelerators(self.filtervariables(str2)) 529 for puncchar in self.config.punctuation: 530 plaincount1 = str1.count(puncchar) 531 plaincount2 = str2.count(puncchar) 532 if not plaincount1 or plaincount1 != plaincount2: 533 continue 534 spacecount1 = str1.count(puncchar+" ") 535 spacecount2 = str2.count(puncchar+" ") 536 if spacecount1 != spacecount2: 537 # handle extra spaces that are because of transposed punctuation 538 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1: 539 continue 540 return False 541 return True
542
543 - def printf(self, str1, str2):
544 """checks whether printf format strings match""" 545 count1 = count2 = plural = None 546 # self.hasplural only set by run_filters, not always available 547 if 'hasplural' in self.__dict__: 548 plural = self.hasplural 549 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 550 count2 = var_num2 + 1 551 str2key = match2.group('key') 552 if match2.group('ord'): 553 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 554 count1 = var_num1 + 1 555 if int(match2.group('ord')) == var_num1 + 1: 556 if match2.group('fullvar') != match1.group('fullvar'): 557 return 0 558 elif str2key: 559 str1key = None 560 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 561 count1 = var_num1 + 1 562 if match1.group('key') and str2key == match1.group('key'): 563 str1key = match1.group('key') 564 # '%.0s' "placeholder" in plural will match anything 565 if plural and match2.group('fullvar') == '.0s': 566 continue 567 if match1.group('fullvar') != match2.group('fullvar'): 568 return 0 569 if str1key == None: 570 return 0 571 else: 572 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 573 count1 = var_num1 + 1 574 # '%.0s' "placeholder" in plural will match anything 575 if plural and match2.group('fullvar') == '.0s': 576 continue 577 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 578 return 0 579 580 if count2 is None: 581 if list(printf_pat.finditer(str1)): 582 return 0 583 584 if (count1 or count2) and (count1 != count2): 585 return 0 586 return 1
587
588 - def accelerators(self, str1, str2):
589 """checks whether accelerators are consistent between the two strings""" 590 str1 = self.filtervariables(str1) 591 str2 = self.filtervariables(str2) 592 messages = [] 593 for accelmarker in self.config.accelmarkers: 594 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 595 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 596 count1, countbad1 = counter1(str1) 597 count2, countbad2 = counter2(str2) 598 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 599 accel2, bad2 = getaccel(str2) 600 if count1 == count2: 601 continue 602 if count1 == 1 and count2 == 0: 603 if countbad2 == 1: 604 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 605 else: 606 messages.append(u"accelerator %s is missing from translation" % accelmarker) 607 elif count1 == 0: 608 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker) 609 elif count1 == 1 and count2 > count1: 610 messages.append(u"accelerator %s is repeated in translation" % accelmarker) 611 else: 612 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 613 if messages: 614 if "accelerators" in self.config.criticaltests: 615 raise SeriousFilterFailure(messages) 616 else: 617 raise FilterFailure(messages) 618 return True
619 620 # def acceleratedvariables(self, str1, str2): 621 # """checks that no variables are accelerated""" 622 # messages = [] 623 # for accelerator in self.config.accelmarkers: 624 # for variablestart, variableend in self.config.varmatches: 625 # error = accelerator + variablestart 626 # if str1.find(error) >= 0: 627 # messages.append(u"original has an accelerated variable") 628 # if str2.find(error) >= 0: 629 # messages.append(u"translation has an accelerated variable") 630 # if messages: 631 # raise FilterFailure(messages) 632 # return True 633
634 - def variables(self, str1, str2):
635 """checks whether variables of various forms are consistent between the two strings""" 636 messages = [] 637 mismatch1, mismatch2 = [], [] 638 varnames1, varnames2 = [], [] 639 for startmarker, endmarker in self.config.varmatches: 640 varchecker = decoration.getvariables(startmarker, endmarker) 641 if startmarker and endmarker: 642 if isinstance(endmarker, int): 643 redecorate = lambda var: startmarker + var 644 else: 645 redecorate = lambda var: startmarker + var + endmarker 646 elif startmarker: 647 redecorate = lambda var: startmarker + var 648 else: 649 redecorate = lambda var: var 650 vars1 = varchecker(str1) 651 vars2 = varchecker(str2) 652 if vars1 != vars2: 653 # we use counts to compare so we can handle multiple variables 654 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 655 # filter variable names we've already seen, so they aren't matched by more than one filter... 656 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 657 varnames1.extend(vars1) 658 varnames2.extend(vars2) 659 vars1 = map(redecorate, vars1) 660 vars2 = map(redecorate, vars2) 661 mismatch1.extend(vars1) 662 mismatch2.extend(vars2) 663 if mismatch1: 664 messages.append(u"do not translate: %s" % ", ".join(mismatch1)) 665 elif mismatch2: 666 messages.append(u"translation contains variables not in original: %s" % ", ".join(mismatch2)) 667 if messages and mismatch1: 668 raise SeriousFilterFailure(messages) 669 elif messages: 670 raise FilterFailure(messages) 671 return True
672
673 - def functions(self, str1, str2):
674 """checks that function names are not translated""" 675 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
676
677 - def emails(self, str1, str2):
678 """checks that emails are not translated""" 679 return helpers.funcmatch(str1, str2, decoration.getemails)
680
681 - def urls(self, str1, str2):
682 """checks that URLs are not translated""" 683 return helpers.funcmatch(str1, str2, decoration.geturls)
684
685 - def numbers(self, str1, str2):
686 """checks whether numbers of various forms are consistent between the two strings""" 687 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
688
689 - def startwhitespace(self, str1, str2):
690 """checks whether whitespace at the beginning of the strings matches""" 691 str1 = self.filteraccelerators(self.filtervariables(str1)) 692 str2 = self.filteraccelerators(self.filtervariables(str2)) 693 return helpers.funcmatch(str1, str2, decoration.spacestart)
694
695 - def endwhitespace(self, str1, str2):
696 """checks whether whitespace at the end of the strings matches""" 697 str1 = self.filteraccelerators(self.filtervariables(str1)) 698 str2 = self.filteraccelerators(self.filtervariables(str2)) 699 return helpers.funcmatch(str1, str2, decoration.spaceend)
700
701 - def startpunc(self, str1, str2):
702 """checks whether punctuation at the beginning of the strings match""" 703 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 704 str1 = self.config.lang.punctranslate(str1) 705 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 706 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
707
708 - def endpunc(self, str1, str2):
709 """checks whether punctuation at the end of the strings match""" 710 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 711 str1 = self.config.lang.punctranslate(str1) 712 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 713 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation)
714
715 - def purepunc(self, str1, str2):
716 """checks that strings that are purely punctuation are not changed""" 717 # this test is a subset of startandend 718 if (decoration.ispurepunctuation(str1)): 719 return str1 == str2 720 else: 721 return not decoration.ispurepunctuation(str2)
722
723 - def brackets(self, str1, str2):
724 """checks that the number of brackets in both strings match""" 725 str1 = self.filtervariables(str1) 726 str2 = self.filtervariables(str2) 727 messages = [] 728 missing = [] 729 extra = [] 730 for bracket in ("[", "]", "{", "}", "(", ")"): 731 count1 = str1.count(bracket) 732 count2 = str2.count(bracket) 733 if count2 < count1: 734 missing.append("'%s'" % bracket) 735 elif count2 > count1: 736 extra.append("'%s'" % bracket) 737 if missing: 738 messages.append(u"translation is missing %s" % ", ".join(missing)) 739 if extra: 740 messages.append(u"translation has extra %s" % ", ".join(extra)) 741 if messages: 742 raise FilterFailure(messages) 743 return True
744
745 - def sentencecount(self, str1, str2):
746 """checks that the number of sentences in both strings match""" 747 sentences1 = len(self.config.sourcelang.sentences(str1)) 748 sentences2 = len(self.config.lang.sentences(str2)) 749 if not sentences1 == sentences2: 750 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 751 return True
752
753 - def options(self, str1, str2):
754 """checks that options are not translated""" 755 str1 = self.filtervariables(str1) 756 for word1 in str1.split(): 757 if word1 != "--" and word1.startswith("--") and word1[-1].isalnum(): 758 parts = word1.split("=") 759 if not parts[0] in str2: 760 raise FilterFailure("The option %s does not occur or is translated in the translation." % parts[0]) 761 if len(parts) > 1 and parts[1] in str2: 762 raise FilterFailure("The parameter %(param)s in option %(option)s is not translated." % {"param": parts[0], "option": parts[1]}) 763 return True
764
765 - def startcaps(self, str1, str2):
766 """checks that the message starts with the correct capitalisation""" 767 str1 = self.filteraccelerators(str1) 768 str2 = self.filteraccelerators(str2) 769 if len(str1) > 1 and len(str2) > 1: 770 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 771 if len(str1) == 0 and len(str2) == 0: 772 return True 773 if len(str1) == 0 or len(str2) == 0: 774 return False 775 return True
776
777 - def simplecaps(self, str1, str2):
778 """checks the capitalisation of two strings isn't wildly different""" 779 str1 = self.removevariables(str1) 780 str2 = self.removevariables(str2) 781 # TODO: review this. The 'I' is specific to English, so it probably serves 782 # no purpose to get sourcelang.sentenceend 783 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, " i ", str1) 784 capitals1 = helpers.filtercount(str1, unicode.isupper) 785 capitals2 = helpers.filtercount(str2, unicode.isupper) 786 alpha1 = helpers.filtercount(str1, unicode.isalpha) 787 alpha2 = helpers.filtercount(str2, unicode.isalpha) 788 # Capture the all caps case 789 if capitals1 == alpha1: 790 return capitals2 == alpha2 791 # some heuristic tests to try and see that the style of capitals is vaguely the same 792 if capitals1 == 0 or capitals1 == 1: 793 return capitals2 == capitals1 794 elif capitals1 < len(str1) / 10: 795 return capitals2 <= len(str2) / 8 796 elif len(str1) < 10: 797 return abs(capitals1 - capitals2) < 3 798 elif capitals1 > len(str1) * 6 / 10: 799 return capitals2 > len(str2) * 6 / 10 800 else: 801 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
802
803 - def acronyms(self, str1, str2):
804 """checks that acronyms that appear are unchanged""" 805 acronyms = [] 806 allowed = [] 807 for startmatch, endmatch in self.config.varmatches: 808 allowed += decoration.getvariables(startmatch, endmatch)(str1) 809 allowed += self.config.musttranslatewords.keys() 810 str1 = self.filteraccelerators(self.filtervariables(str1)) 811 iter = self.config.lang.word_iter(str1) 812 str2 = self.filteraccelerators(self.filtervariables(str2)) 813 #TODO: strip XML? - should provide better error messsages 814 # see mail/chrome/messanger/smime.properties.po 815 #TODO: consider limiting the word length for recognising acronyms to 816 #something like 5/6 characters 817 for word in iter: 818 if word.isupper() and len(word) > 1 and word not in allowed: 819 if str2.find(word) == -1: 820 acronyms.append(word) 821 if acronyms: 822 raise FilterFailure("acronyms should not be translated: " + ", ".join(acronyms)) 823 return True
824
825 - def doublewords(self, str1, str2):
826 """checks for repeated words in the translation""" 827 lastword = "" 828 without_newlines = "\n".join(str2.split("\n")) 829 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(".", "").lower().split() 830 for word in words: 831 if word == lastword: 832 raise FilterFailure("The word '%s' is repeated" % word) 833 lastword = word 834 return True
835
836 - def notranslatewords(self, str1, str2):
837 """checks that words configured as untranslatable appear in the translation too""" 838 if not self.config.notranslatewords: 839 return True 840 str1 = self.filtervariables(str1) 841 str2 = self.filtervariables(str2) 842 #The above is full of strange quotes and things in utf-8 encoding. 843 #single apostrophe perhaps problematic in words like "doesn't" 844 for seperator in self.config.punctuation: 845 str1 = str1.replace(seperator, u" ") 846 str2 = str2.replace(seperator, u" ") 847 words1 = self.filteraccelerators(str1).split() 848 words2 = self.filteraccelerators(str2).split() 849 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 850 if stopwords: 851 raise FilterFailure("do not translate: %s" % (", ".join(stopwords))) 852 return True
853
854 - def musttranslatewords(self, str1, str2):
855 """checks that words configured as definitely translatable don't appear in 856 the translation""" 857 if not self.config.musttranslatewords: 858 return True 859 str1 = self.removevariables(str1) 860 str2 = self.removevariables(str2) 861 #The above is full of strange quotes and things in utf-8 encoding. 862 #single apostrophe perhaps problematic in words like "doesn't" 863 for seperator in self.config.punctuation: 864 str1 = str1.replace(seperator, " ") 865 str2 = str2.replace(seperator, " ") 866 words1 = self.filteraccelerators(str1).split() 867 words2 = self.filteraccelerators(str2).split() 868 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 869 if stopwords: 870 raise FilterFailure("please translate: %s" % (", ".join(stopwords))) 871 return True
872
873 - def validchars(self, str1, str2):
874 """checks that only characters specified as valid appear in the translation""" 875 if not self.config.validcharsmap: 876 return True 877 invalid1 = str1.translate(self.config.validcharsmap) 878 invalid2 = str2.translate(self.config.validcharsmap) 879 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 880 if invalidchars: 881 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars))) 882 return True
883
884 - def filepaths(self, str1, str2):
885 """checks that file paths have not been translated""" 886 for word1 in self.filteraccelerators(str1).split(): 887 if word1.startswith("/"): 888 if not helpers.countsmatch(str1, str2, (word1,)): 889 return False 890 return True
891
892 - def xmltags(self, str1, str2):
893 """checks that XML/HTML tags have not been translated""" 894 tags1 = tag_re.findall(str1) 895 if len(tags1) > 0: 896 if (len(tags1[0]) == len(str1)) and not "=" in tags1[0]: 897 return True 898 tags2 = tag_re.findall(str2) 899 properties1 = tagproperties(tags1, self.config.ignoretags) 900 properties2 = tagproperties(tags2, self.config.ignoretags) 901 filtered1 = [] 902 filtered2 = [] 903 for property1 in properties1: 904 filtered1 += [intuplelist(property1, self.config.canchangetags)] 905 for property2 in properties2: 906 filtered2 += [intuplelist(property2, self.config.canchangetags)] 907 908 #TODO: consider the consequences of different ordering of attributes/tags 909 if filtered1 != filtered2: 910 return False 911 else: 912 # No tags in str1, let's just check that none were added in str2. This 913 # might be useful for fuzzy strings wrongly unfuzzied, for example. 914 tags2 = tag_re.findall(str2) 915 if len(tags2) > 0: 916 return False 917 return True
918
919 - def kdecomments(self, str1, str2):
920 """checks to ensure that no KDE style comments appear in the translation""" 921 return str2.find("\n_:") == -1 and not str2.startswith("_:")
922
923 - def compendiumconflicts(self, str1, str2):
924 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 925 return str2.find("#-#-#-#-#") == -1
926
927 - def simpleplurals(self, str1, str2):
928 """checks for English style plural(s) for you to review""" 929 def numberofpatterns(string, patterns): 930 number = 0 931 for pattern in patterns: 932 number += len(re.findall(pattern, string)) 933 return number
934 935 sourcepatterns = ["\(s\)"] 936 targetpatterns = ["\(s\)"] 937 sourcecount = numberofpatterns(str1, sourcepatterns) 938 targetcount = numberofpatterns(str2, targetpatterns) 939 if self.config.lang.nplurals == 1: 940 return not targetcount 941 return sourcecount == targetcount
942
943 - def spellcheck(self, str1, str2):
944 """checks words that don't pass a spell check""" 945 if not self.config.targetlanguage: 946 return True 947 if not spelling.available: 948 return True 949 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel) 950 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel) 951 ignore1 = [] 952 messages = [] 953 for word, index, suggestions in spelling.check(str1, lang="en"): 954 ignore1.append(word) 955 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 956 if word in self.config.notranslatewords: 957 continue 958 if word in ignore1: 959 continue 960 # hack to ignore hyphenisation rules 961 if word in suggestions: 962 continue 963 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions))) 964 if messages: 965 raise FilterFailure(messages) 966 return True
967
968 - def credits(self, str1, str2):
969 """checks for messages containing translation credits instead of normal translations.""" 970 return not str1 in self.config.credit_sources
971 972 # If the precondition filter is run and fails then the other tests listed are ignored 973 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 974 "accelerators", "brackets", "endpunc", 975 "acronyms", "xmltags", "startpunc", 976 "endwhitespace", "startwhitespace", 977 "escapes", "doublequoting", "singlequoting", 978 "filepaths", "purepunc", "doublespacing", 979 "sentencecount", "numbers", "isfuzzy", 980 "isreview", "notranslatewords", "musttranslatewords", 981 "emails", "simpleplurals", "urls", "printf", 982 "tabs", "newlines", "functions", "options", 983 "blank", "nplurals"), 984 "blank": ("simplecaps", "variables", "startcaps", 985 "accelerators", "brackets", "endpunc", 986 "acronyms", "xmltags", "startpunc", 987 "endwhitespace", "startwhitespace", 988 "escapes", "doublequoting", "singlequoting", 989 "filepaths", "purepunc", "doublespacing", 990 "sentencecount", "numbers", "isfuzzy", 991 "isreview", "notranslatewords", "musttranslatewords", 992 "emails", "simpleplurals", "urls", "printf", 993 "tabs", "newlines", "functions", "options"), 994 "credits": ("simplecaps", "variables", "startcaps", 995 "accelerators", "brackets", "endpunc", 996 "acronyms", "xmltags", "startpunc", 997 "escapes", "doublequoting", "singlequoting", 998 "filepaths", "doublespacing", 999 "sentencecount", "numbers", 1000 "emails", "simpleplurals", "urls", "printf", 1001 "tabs", "newlines", "functions", "options"), 1002 "purepunc": ("startcaps", "options"), 1003 "startcaps": ("simplecaps",), 1004 "endwhitespace": ("endpunc",), 1005 "startwhitespace":("startpunc",), 1006 "unchanged": ("doublewords",), 1007 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1008 "numbers", "startpunc", "long", "variables", 1009 "startcaps", "sentencecount", "simplecaps", 1010 "doublespacing", "endpunc", "xmltags", 1011 "startwhitespace", "endwhitespace", 1012 "singlequoting", "doublequoting", 1013 "filepaths", "purepunc", "doublewords", "printf") } 1014 1015 # code to actually run the tests (use unittest?) 1016 1017 openofficeconfig = CheckerConfig( 1018 accelmarkers = ["~"], 1019 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1020 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)], 1021 canchangetags = [("link", "name", None)] 1022 ) 1023
1024 -class OpenOfficeChecker(StandardChecker):
1025 - def __init__(self, **kwargs):
1026 checkerconfig = kwargs.get("checkerconfig", None) 1027 if checkerconfig is None: 1028 checkerconfig = CheckerConfig() 1029 kwargs["checkerconfig"] = checkerconfig 1030 checkerconfig.update(openofficeconfig) 1031 StandardChecker.__init__(self, **kwargs)
1032 1033 mozillaconfig = CheckerConfig( 1034 accelmarkers = ["&"], 1035 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")], 1036 criticaltests = ["accelerators"] 1037 ) 1038
1039 -class MozillaChecker(StandardChecker):
1040 - def __init__(self, **kwargs):
1041 checkerconfig = kwargs.get("checkerconfig", None) 1042 if checkerconfig is None: 1043 checkerconfig = CheckerConfig() 1044 kwargs["checkerconfig"] = checkerconfig 1045 checkerconfig.update(mozillaconfig) 1046 StandardChecker.__init__(self, **kwargs)
1047 1048 gnomeconfig = CheckerConfig( 1049 accelmarkers = ["_"], 1050 varmatches = [("%", 1), ("$(", ")")], 1051 credit_sources = [u"translator-credits"] 1052 ) 1053
1054 -class GnomeChecker(StandardChecker):
1055 - def __init__(self, **kwargs):
1056 checkerconfig = kwargs.get("checkerconfig", None) 1057 if checkerconfig is None: 1058 checkerconfig = CheckerConfig() 1059 kwargs["checkerconfig"] = checkerconfig 1060 checkerconfig.update(gnomeconfig) 1061 StandardChecker.__init__(self, **kwargs)
1062 1063 kdeconfig = CheckerConfig( 1064 accelmarkers = ["&"], 1065 varmatches = [("%", 1)], 1066 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"] 1067 ) 1068
1069 -class KdeChecker(StandardChecker):
1070 - def __init__(self, **kwargs):
1071 # TODO allow setup of KDE plural and translator comments so that they do 1072 # not create false postives 1073 checkerconfig = kwargs.get("checkerconfig", None) 1074 if checkerconfig is None: 1075 checkerconfig = CheckerConfig() 1076 kwargs["checkerconfig"] = checkerconfig 1077 checkerconfig.update(kdeconfig) 1078 StandardChecker.__init__(self, **kwargs)
1079 1080 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1081 -class CCLicenseChecker(StandardChecker):
1082 - def __init__(self, **kwargs):
1083 checkerconfig = kwargs.get("checkerconfig", None) 1084 if checkerconfig is None: 1085 checkerconfig = CheckerConfig() 1086 kwargs["checkerconfig"] = checkerconfig 1087 checkerconfig.update(cclicenseconfig) 1088 StandardChecker.__init__(self, **kwargs)
1089 1090 projectcheckers = { 1091 "openoffice": OpenOfficeChecker, 1092 "mozilla": MozillaChecker, 1093 "kde": KdeChecker, 1094 "wx": KdeChecker, 1095 "gnome": GnomeChecker, 1096 "creativecommons": CCLicenseChecker 1097 } 1098 1099
1100 -class StandardUnitChecker(UnitChecker):
1101 """The standard checks for common checks on translation units."""
1102 - def isfuzzy(self, unit):
1103 """Check if the unit has been marked fuzzy.""" 1104 return not unit.isfuzzy()
1105
1106 - def isreview(self, unit):
1107 """Check if the unit has been marked review.""" 1108 return not unit.isreview()
1109
1110 - def nplurals(self, unit):
1111 """Checks for the correct number of noun forms for plural translations.""" 1112 if unit.hasplural(): 1113 # if we don't have a valid nplurals value, don't run the test 1114 nplurals = self.config.lang.nplurals 1115 if nplurals > 0: 1116 return len(unit.target.strings) == nplurals 1117 return True
1118
1119 - def hassuggestion(self, unit):
1120 """Checks if there is at least one suggested translation for this unit.""" 1121 self.suggestion_store = getattr(self, 'suggestion_store', None) 1122 suggestions = [] 1123 if self.suggestion_store: 1124 source = unit.source 1125 suggestions = [unit for unit in self.suggestion_store.units if unit.source == source] 1126 elif xliff and isinstance(unit, xliff.xliffunit): 1127 # TODO: we probably want to filter them somehow 1128 suggestions = unit.getalttrans() 1129 return not bool(suggestions)
1130 1131
1132 -def runtests(str1, str2, ignorelist=()):
1133 """verifies that the tests pass for a pair of strings""" 1134 from translate.storage import base 1135 str1 = data.forceunicode(str1) 1136 str2 = data.forceunicode(str2) 1137 unit = base.TranslationUnit(str1) 1138 unit.target = str2 1139 checker = StandardChecker(excludefilters=ignorelist) 1140 failures = checker.run_filters(unit) 1141 for test in failures: 1142 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2) 1143 return failures
1144
1145 -def batchruntests(pairs):
1146 """runs test on a batch of string pairs""" 1147 passed, numpairs = 0, len(pairs) 1148 for str1, str2 in pairs: 1149 if runtests(str1, str2): 1150 passed += 1 1151 print 1152 print "total: %d/%d pairs passed" % (passed, numpairs)
1153 1154 if __name__ == '__main__': 1155 testset = [(r"simple", r"somple"), 1156 (r"\this equals \that", r"does \this equal \that?"), 1157 (r"this \'equals\' that", r"this 'equals' that"), 1158 (r" start and end! they must match.", r"start and end! they must match."), 1159 (r"check for matching %variables marked like %this", r"%this %variable is marked"), 1160 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 1161 (r"check for mismatching %variables% too", r"how many %variable% are marked"), 1162 (r"%% %%", r"%%"), 1163 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1164 (r"simple lowercase", r"it is all lowercase"), 1165 (r"simple lowercase", r"It Is All Lowercase"), 1166 (r"Simple First Letter Capitals", r"First Letters"), 1167 (r"SIMPLE CAPITALS", r"First Letters"), 1168 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1169 (r"forgot to translate", r" ") 1170 ] 1171 batchruntests(testset) 1172