1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """functions to get decorative/informative text out of strings..."""
23
24 import re
25 import unicodedata
26 from translate.lang import data
27
29 """returns all the whitespace from the start of the string"""
30 newstring = ""
31 for c in str1:
32 if not c.isspace(): return newstring
33 else: newstring += c
34 return newstring
35
37 """returns all the whitespace from the end of the string"""
38 newstring = ""
39 for n in range(len(str1)):
40 c = str1[-1-n]
41 if not c.isspace(): return newstring
42 else: newstring = c + newstring
43 return newstring
44
46 """returns all the punctuation from the start of the string"""
47 newstring = ""
48 for c in str1:
49 if c not in punctuation and not c.isspace(): return newstring
50 else: newstring += c
51 return newstring
52
54 """returns all the punctuation from the end of the string"""
55 newstring = ""
56 for n in range(len(str1)):
57 c = str1[-1-n]
58 if c not in punctuation and not c.isspace(): return newstring
59 else: newstring = c + newstring
60 return newstring
61
63 """checks whether the string is entirely punctuation"""
64 for c in str1:
65 if c.isalnum():
66 return False
67 return len(str1)
68
70 """returns whether the given accelerator character is valid
71
72 @type accelerator: character
73 @param accelerator: A character to be checked for accelerator validity
74 @type acceptlist: String
75 @param acceptlist: A list of characters that are permissible as accelerators
76 @rtype: Boolean
77 @return: True if the supplied character is an acceptable accelerator
78 """
79 assert isinstance(accelerator, unicode)
80 assert isinstance(acceptlist, unicode) or acceptlist is None
81 if len(accelerator) == 0:
82 return False
83 if acceptlist is not None:
84 acceptlist = data.normalize(acceptlist)
85 if accelerator in acceptlist:
86 return True
87 return False
88 else:
89
90 accelerator = accelerator.replace("_","")
91 if not accelerator.isalnum():
92 return False
93
94
95
96 decomposition = unicodedata.decomposition(accelerator)
97
98 decomposition = re.sub("<[^>]+>", "", decomposition).strip()
99 return decomposition.count(" ") == 0
100
102 """returns all the accelerators and locations in str1 marked with a given marker"""
103 accelerators = []
104 badaccelerators = []
105 currentpos = 0
106 while currentpos >= 0:
107 currentpos = str1.find(accelmarker, currentpos)
108 if currentpos >= 0:
109 accelstart = currentpos
110 currentpos += len(accelmarker)
111
112 accelend = currentpos + 1
113 if accelend > len(str1): break
114 accelerator = str1[currentpos:accelend]
115 currentpos = accelend
116 if isvalidaccelerator(accelerator, acceptlist):
117 accelerators.append((accelstart, accelerator))
118 else:
119 badaccelerators.append((accelstart, accelerator))
120 return accelerators, badaccelerators
121
123 """returns all the variables and locations in str1 marked with a given marker"""
124 variables = []
125 currentpos = 0
126 while currentpos >= 0:
127 variable = None
128 currentpos = str1.find(startmarker, currentpos)
129 if currentpos >= 0:
130 startmatch = currentpos
131 currentpos += len(startmarker)
132 if endmarker is None:
133
134 endmatch = currentpos
135 for n in range(currentpos, len(str1)):
136 if not (str1[n].isalnum() or str1[n] == '_'):
137 endmatch = n
138 break
139 if currentpos == endmatch: endmatch = len(str1)
140 if currentpos < endmatch:
141 variable = str1[currentpos:endmatch]
142 currentpos = endmatch
143 elif type(endmarker) == int:
144
145 endmatch = currentpos + endmarker
146 if endmatch > len(str1): break
147 variable = str1[currentpos:endmatch]
148 currentpos = endmatch
149 else:
150 endmatch = str1.find(endmarker, currentpos)
151 if endmatch == -1: break
152
153 start2 = str1.rfind(startmarker, currentpos, endmatch)
154 if start2 != -1:
155 startmatch2 = start2
156 start2 += len(startmarker)
157 if start2 != currentpos:
158 currentpos = start2
159 startmatch = startmatch2
160 variable = str1[currentpos:endmatch]
161 currentpos = endmatch + len(endmarker)
162 if variable is not None and variable not in ignorelist:
163 if not variable or variable.replace("_","").replace(".","").isalnum():
164 variables.append((startmatch, variable))
165 return variables
166
168 """returns a function that gets a list of accelerators marked using accelmarker"""
169 def getmarkedaccelerators(str1):
170 """returns all the accelerators in str1 marked with a given marker"""
171 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
172 accelerators = [accelerator for accelstart, accelerator in acclocs]
173 badaccelerators = [accelerator for accelstart, accelerator in badlocs]
174 return accelerators, badaccelerators
175 return getmarkedaccelerators
176
178 """returns a function that gets a list of variables marked using startmarker and endmarker"""
179 def getmarkedvariables(str1):
180 """returns all the variables in str1 marked with a given marker"""
181 varlocs = findmarkedvariables(str1, startmarker, endmarker)
182 variables = [variable for accelstart, variable in varlocs]
183 return variables
184 return getmarkedvariables
185
187 """returns any numbers that are in the string"""
188
189 assert isinstance(str1, unicode)
190 numbers = []
191 innumber = False
192 degreesign = u'\xb0'
193 lastnumber = ""
194 carryperiod = ""
195 for chr1 in str1:
196 if chr1.isdigit():
197 innumber = True
198 elif innumber:
199 if not (chr1 == '.' or chr1 == degreesign):
200 innumber = False
201 if lastnumber:
202 numbers.append(lastnumber)
203 lastnumber = ""
204 if innumber:
205 if chr1 == degreesign:
206 lastnumber += chr1
207 elif chr1 == '.':
208 carryperiod += chr1
209 else:
210 lastnumber += carryperiod + chr1
211 carryperiod = ""
212 else:
213 carryperiod = ""
214 if innumber:
215 if lastnumber:
216 numbers.append(lastnumber)
217 return numbers
218
224
226 """returns the email addresses that are in a string"""
227 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
228
230 """returns the URIs in a string"""
231 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\
232 'ftp:[\w/\.:;+\-~\%#?=&,]+'
233 return re.findall(URLPAT, str1)
234
236 """returns a function that counts the number of accelerators marked with the given marker"""
237 def countmarkedaccelerators(str1):
238 """returns all the variables in str1 marked with a given marker"""
239 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
240 return len(acclocs), len(badlocs)
241 return countmarkedaccelerators
242