Package translate :: Package filters :: Module decoration
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.decoration

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2004-2008 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """functions to get decorative/informative text out of strings...""" 
 23   
 24  import re 
 25  import unicodedata 
 26  from translate.lang import data 
 27   
28 -def spacestart(str1):
29 """returns all the whitespace from the start of the string""" 30 newstring = "" 31 for c in str1: 32 if not c.isspace(): return newstring 33 else: newstring += c 34 return newstring
35
36 -def spaceend(str1):
37 """returns all the whitespace from the end of the string""" 38 newstring = "" 39 for n in range(len(str1)): 40 c = str1[-1-n] 41 if not c.isspace(): return newstring 42 else: newstring = c + newstring 43 return newstring
44
45 -def puncstart(str1, punctuation):
46 """returns all the punctuation from the start of the string""" 47 newstring = "" 48 for c in str1: 49 if c not in punctuation and not c.isspace(): return newstring 50 else: newstring += c 51 return newstring
52
53 -def puncend(str1, punctuation):
54 """returns all the punctuation from the end of the string""" 55 newstring = "" 56 for n in range(len(str1)): 57 c = str1[-1-n] 58 if c not in punctuation and not c.isspace(): return newstring 59 else: newstring = c + newstring 60 return newstring
61
62 -def ispurepunctuation(str1):
63 """checks whether the string is entirely punctuation""" 64 for c in str1: 65 if c.isalnum(): 66 return False 67 return len(str1)
68
69 -def isvalidaccelerator(accelerator, acceptlist=None):
70 """returns whether the given accelerator character is valid 71 72 @type accelerator: character 73 @param accelerator: A character to be checked for accelerator validity 74 @type acceptlist: String 75 @param acceptlist: A list of characters that are permissible as accelerators 76 @rtype: Boolean 77 @return: True if the supplied character is an acceptable accelerator 78 """ 79 assert isinstance(accelerator, unicode) 80 assert isinstance(acceptlist, unicode) or acceptlist is None 81 if len(accelerator) == 0: 82 return False 83 if acceptlist is not None: 84 acceptlist = data.normalize(acceptlist) 85 if accelerator in acceptlist: 86 return True 87 return False 88 else: 89 # Old code path - ensures that we don't get a large number of regressions 90 accelerator = accelerator.replace("_","") 91 if not accelerator.isalnum(): 92 return False 93 94 # We don't want to have accelerators on characters with diacritics, so let's 95 # see if the character can decompose. 96 decomposition = unicodedata.decomposition(accelerator) 97 # Next we strip out any extra information like <this> 98 decomposition = re.sub("<[^>]+>", "", decomposition).strip() 99 return decomposition.count(" ") == 0
100
101 -def findaccelerators(str1, accelmarker, acceptlist=None):
102 """returns all the accelerators and locations in str1 marked with a given marker""" 103 accelerators = [] 104 badaccelerators = [] 105 currentpos = 0 106 while currentpos >= 0: 107 currentpos = str1.find(accelmarker, currentpos) 108 if currentpos >= 0: 109 accelstart = currentpos 110 currentpos += len(accelmarker) 111 # we assume accelerators are single characters 112 accelend = currentpos + 1 113 if accelend > len(str1): break 114 accelerator = str1[currentpos:accelend] 115 currentpos = accelend 116 if isvalidaccelerator(accelerator, acceptlist): 117 accelerators.append((accelstart, accelerator)) 118 else: 119 badaccelerators.append((accelstart, accelerator)) 120 return accelerators, badaccelerators
121
122 -def findmarkedvariables(str1, startmarker, endmarker, ignorelist=[]):
123 """returns all the variables and locations in str1 marked with a given marker""" 124 variables = [] 125 currentpos = 0 126 while currentpos >= 0: 127 variable = None 128 currentpos = str1.find(startmarker, currentpos) 129 if currentpos >= 0: 130 startmatch = currentpos 131 currentpos += len(startmarker) 132 if endmarker is None: 133 # handle case without an end marker - use any non-alphanumeric character as the end marker, var must be len > 1 134 endmatch = currentpos 135 for n in range(currentpos, len(str1)): 136 if not (str1[n].isalnum() or str1[n] == '_'): 137 endmatch = n 138 break 139 if currentpos == endmatch: endmatch = len(str1) 140 if currentpos < endmatch: 141 variable = str1[currentpos:endmatch] 142 currentpos = endmatch 143 elif type(endmarker) == int: 144 # setting endmarker to an int means it is a fixed-length variable string (usually endmarker==1) 145 endmatch = currentpos + endmarker 146 if endmatch > len(str1): break 147 variable = str1[currentpos:endmatch] 148 currentpos = endmatch 149 else: 150 endmatch = str1.find(endmarker, currentpos) 151 if endmatch == -1: break 152 # search backwards in case there's an intervening startmarker (if not it's OK)... 153 start2 = str1.rfind(startmarker, currentpos, endmatch) 154 if start2 != -1: 155 startmatch2 = start2 156 start2 += len(startmarker) 157 if start2 != currentpos: 158 currentpos = start2 159 startmatch = startmatch2 160 variable = str1[currentpos:endmatch] 161 currentpos = endmatch + len(endmarker) 162 if variable is not None and variable not in ignorelist: 163 if not variable or variable.replace("_","").replace(".","").isalnum(): 164 variables.append((startmatch, variable)) 165 return variables
166
167 -def getaccelerators(accelmarker, acceptlist=None):
168 """returns a function that gets a list of accelerators marked using accelmarker""" 169 def getmarkedaccelerators(str1): 170 """returns all the accelerators in str1 marked with a given marker""" 171 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 172 accelerators = [accelerator for accelstart, accelerator in acclocs] 173 badaccelerators = [accelerator for accelstart, accelerator in badlocs] 174 return accelerators, badaccelerators
175 return getmarkedaccelerators 176
177 -def getvariables(startmarker, endmarker):
178 """returns a function that gets a list of variables marked using startmarker and endmarker""" 179 def getmarkedvariables(str1): 180 """returns all the variables in str1 marked with a given marker""" 181 varlocs = findmarkedvariables(str1, startmarker, endmarker) 182 variables = [variable for accelstart, variable in varlocs] 183 return variables
184 return getmarkedvariables 185
186 -def getnumbers(str1):
187 """returns any numbers that are in the string""" 188 # TODO: handle locale-based periods e.g. 2,5 for Afrikaans 189 assert isinstance(str1, unicode) 190 numbers = [] 191 innumber = False 192 degreesign = u'\xb0' 193 lastnumber = "" 194 carryperiod = "" 195 for chr1 in str1: 196 if chr1.isdigit(): 197 innumber = True 198 elif innumber: 199 if not (chr1 == '.' or chr1 == degreesign): 200 innumber = False 201 if lastnumber: 202 numbers.append(lastnumber) 203 lastnumber = "" 204 if innumber: 205 if chr1 == degreesign: 206 lastnumber += chr1 207 elif chr1 == '.': 208 carryperiod += chr1 209 else: 210 lastnumber += carryperiod + chr1 211 carryperiod = "" 212 else: 213 carryperiod = "" 214 if innumber: 215 if lastnumber: 216 numbers.append(lastnumber) 217 return numbers
218
219 -def getfunctions(str1, punctuation):
220 """returns the functions() that are in a string, while ignoring the trailing 221 punctuation in the given parameter""" 222 punctuation = punctuation.replace("(", "").replace(")", "") 223 return [word.rstrip(punctuation) for word in str1.split() if word.rstrip(punctuation).endswith("()")]
224
225 -def getemails(str1):
226 """returns the email addresses that are in a string""" 227 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
228
229 -def geturls(str1):
230 """returns the URIs in a string""" 231 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\ 232 'ftp:[\w/\.:;+\-~\%#?=&,]+' 233 return re.findall(URLPAT, str1)
234
235 -def countaccelerators(accelmarker, acceptlist=None):
236 """returns a function that counts the number of accelerators marked with the given marker""" 237 def countmarkedaccelerators(str1): 238 """returns all the variables in str1 marked with a given marker""" 239 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 240 return len(acclocs), len(badlocs)
241 return countmarkedaccelerators 242