1
2 """Text wrapping and filling.
3 """
4
5
6
7
8
9 __revision__ = "$Id: textwrap.py 4103 2006-10-20 07:35:02Z dwaynebailey $"
10
11 import string, re
12
13
14
15
16 try:
17 True, False
18 except NameError:
19 (True, False) = (1, 0)
20
21 __all__ = ['TextWrapper', 'wrap', 'fill']
22
23
24
25
26
27
28
29
30
31 _whitespace = '\t\n\x0b\x0c\r '
32
34 """
35 Object for wrapping/filling text. The public interface consists of
36 the wrap() and fill() methods; the other methods are just there for
37 subclasses to override in order to tweak the default behaviour.
38 If you want to completely replace the main wrapping algorithm,
39 you'll probably have to override _wrap_chunks().
40
41 Several instance attributes control various aspects of wrapping:
42 width (default: 70)
43 the maximum width of wrapped lines (unless break_long_words
44 is false)
45 initial_indent (default: "")
46 string that will be prepended to the first line of wrapped
47 output. Counts towards the line's width.
48 subsequent_indent (default: "")
49 string that will be prepended to all lines save the first
50 of wrapped output; also counts towards each line's width.
51 expand_tabs (default: true)
52 Expand tabs in input text to spaces before further processing.
53 Each tab will become 1 .. 8 spaces, depending on its position in
54 its line. If false, each tab is treated as a single character.
55 drop_whitespace (default: true)
56 Drop leading and trailing whitespace from lines.
57 replace_whitespace (default: true)
58 Replace all whitespace characters in the input text by spaces
59 after tab expansion. Note that if expand_tabs is false and
60 replace_whitespace is true, every tab will be converted to a
61 single space!
62 fix_sentence_endings (default: false)
63 Ensure that sentence-ending punctuation is always followed
64 by two spaces. Off by default because the algorithm is
65 (unavoidably) imperfect.
66 break_long_words (default: true)
67 Break words longer than 'width'. If false, those words will not
68 be broken, and some lines might be longer than 'width'.
69 """
70
71 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
72
73 unicode_whitespace_trans = {}
74 uspace = ord(u' ')
75 for x in map(ord, _whitespace):
76 unicode_whitespace_trans[x] = uspace
77
78
79
80
81
82
83
84 wordsep_re = re.compile(
85 r'(\s+|'
86 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'
87 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')
88
89
90
91 sentence_end_re = re.compile(r'[%s]'
92 r'[\.\!\?]'
93 r'[\"\']?'
94 % string.lowercase)
95
96
97 - def __init__(self,
98 width=70,
99 initial_indent="",
100 subsequent_indent="",
101 expand_tabs=True,
102 drop_whitespace=True,
103 replace_whitespace=True,
104 fix_sentence_endings=False,
105 break_long_words=True):
106 self.width = width
107 self.initial_indent = initial_indent
108 self.subsequent_indent = subsequent_indent
109 self.expand_tabs = expand_tabs
110 self.drop_whitespace = drop_whitespace
111 self.replace_whitespace = replace_whitespace
112 self.fix_sentence_endings = fix_sentence_endings
113 self.break_long_words = break_long_words
114
115
116
117
118
119 - def _munge_whitespace(self, text):
120 """_munge_whitespace(text : string) -> string
121
122 Munge whitespace in text: expand tabs and convert all other
123 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
124 becomes " foo bar baz".
125 """
126 if self.expand_tabs:
127 text = text.expandtabs()
128 if self.replace_whitespace:
129 if isinstance(text, str):
130 text = text.translate(self.whitespace_trans)
131 elif isinstance(text, unicode):
132 text = text.translate(self.unicode_whitespace_trans)
133 return text
134
135
136 - def _split(self, text):
137 """_split(text : string) -> [string]
138
139 Split the text to wrap into indivisible chunks. Chunks are
140 not quite the same as words; see wrap_chunks() for full
141 details. As an example, the text
142 Look, goof-ball -- use the -b option!
143 breaks into the following chunks:
144 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
145 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
146 """
147 chunks = self.wordsep_re.split(text)
148 chunks = filter(None, chunks)
149 return chunks
150
151 - def _fix_sentence_endings(self, chunks):
152 """_fix_sentence_endings(chunks : [string])
153
154 Correct for sentence endings buried in 'chunks'. Eg. when the
155 original text contains "... foo.\nBar ...", munge_whitespace()
156 and split() will convert that to [..., "foo.", " ", "Bar", ...]
157 which has one too few spaces; this method simply changes the one
158 space to two.
159 """
160 i = 0
161 pat = self.sentence_end_re
162 while i < len(chunks)-1:
163 if chunks[i+1] == " " and pat.search(chunks[i]):
164 chunks[i+1] = " "
165 i += 2
166 else:
167 i += 1
168
169 - def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
170 """_handle_long_word(chunks : [string],
171 cur_line : [string],
172 cur_len : int, width : int)
173
174 Handle a chunk of text (most likely a word, not whitespace) that
175 is too long to fit in any line.
176 """
177 space_left = max(width - cur_len, 1)
178
179
180
181 if self.break_long_words:
182 cur_line.append(reversed_chunks[-1][:space_left])
183 reversed_chunks[-1] = reversed_chunks[-1][space_left:]
184
185
186
187
188 elif not cur_line:
189 cur_line.append(reversed_chunks.pop())
190
191
192
193
194
195
196
197 - def _wrap_chunks(self, chunks):
198 """_wrap_chunks(chunks : [string]) -> [string]
199
200 Wrap a sequence of text chunks and return a list of lines of
201 length 'self.width' or less. (If 'break_long_words' is false,
202 some lines may be longer than this.) Chunks correspond roughly
203 to words and the whitespace between them: each chunk is
204 indivisible (modulo 'break_long_words'), but a line break can
205 come between any two chunks. Chunks should not have internal
206 whitespace; ie. a chunk is either all whitespace or a "word".
207 Whitespace chunks will be removed from the beginning and end of
208 lines, but apart from that whitespace is preserved.
209 """
210 lines = []
211 if self.width <= 0:
212 raise ValueError("invalid width %r (must be > 0)" % self.width)
213
214
215
216 chunks.reverse()
217
218 while chunks:
219
220
221
222 cur_line = []
223 cur_len = 0
224
225
226 if lines:
227 indent = self.subsequent_indent
228 else:
229 indent = self.initial_indent
230
231
232 width = self.width - len(indent)
233
234
235
236 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
237 del chunks[-1]
238
239 while chunks:
240 l = len(chunks[-1])
241
242
243 if cur_len + l <= width:
244 cur_line.append(chunks.pop())
245 cur_len += l
246
247
248 else:
249 break
250
251
252
253 if chunks and len(chunks[-1]) > width:
254 self._handle_long_word(chunks, cur_line, cur_len, width)
255
256
257 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
258 del cur_line[-1]
259
260
261
262 if cur_line:
263 lines.append(indent + ''.join(cur_line))
264
265 return lines
266
267
268
269
270 - def wrap(self, text):
271 """wrap(text : string) -> [string]
272
273 Reformat the single paragraph in 'text' so it fits in lines of
274 no more than 'self.width' columns, and return a list of wrapped
275 lines. Tabs in 'text' are expanded with string.expandtabs(),
276 and all other whitespace characters (including newline) are
277 converted to space.
278 """
279 text = self._munge_whitespace(text)
280 chunks = self._split(text)
281 if self.fix_sentence_endings:
282 self._fix_sentence_endings(chunks)
283 return self._wrap_chunks(chunks)
284
285 - def fill(self, text):
286 """fill(text : string) -> string
287
288 Reformat the single paragraph in 'text' to fit in lines of no
289 more than 'self.width' columns, and return a new string
290 containing the entire wrapped paragraph.
291 """
292 return "\n".join(self.wrap(text))
293
294
295
296
297 -def wrap(text, width=70, **kwargs):
298 """Wrap a single paragraph of text, returning a list of wrapped lines.
299
300 Reformat the single paragraph in 'text' so it fits in lines of no
301 more than 'width' columns, and return a list of wrapped lines. By
302 default, tabs in 'text' are expanded with string.expandtabs(), and
303 all other whitespace characters (including newline) are converted to
304 space. See TextWrapper class for available keyword args to customize
305 wrapping behaviour.
306 """
307 w = TextWrapper(width=width, **kwargs)
308 return w.wrap(text)
309
310 -def fill(text, width=70, **kwargs):
311 """Fill a single paragraph of text, returning a new string.
312
313 Reformat the single paragraph in 'text' to fit in lines of no more
314 than 'width' columns, and return a new string containing the entire
315 wrapped paragraph. As with wrap(), tabs are expanded and other
316 whitespace characters converted to space. See TextWrapper class for
317 available keyword args to customize wrapping behaviour.
318 """
319 w = TextWrapper(width=width, **kwargs)
320 return w.fill(text)
321
322
323
324
325 _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
326 _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
327
329 """Remove any common leading whitespace from every line in `text`.
330
331 This can be used to make triple-quoted strings line up with the left
332 edge of the display, while still presenting them in the source code
333 in indented form.
334
335 Note that tabs and spaces are both treated as whitespace, but they
336 are not equal: the lines " hello" and "\thello" are
337 considered to have no common leading whitespace. (This behaviour is
338 new in Python 2.5; older versions of this module incorrectly
339 expanded tabs before searching for common leading whitespace.)
340 """
341
342
343 margin = None
344 text = _whitespace_only_re.sub('', text)
345 indents = _leading_whitespace_re.findall(text)
346 for indent in indents:
347 if margin is None:
348 margin = indent
349
350
351
352 elif indent.startswith(margin):
353 pass
354
355
356
357 elif margin.startswith(indent):
358 margin = indent
359
360
361
362 else:
363 margin = ""
364 break
365
366
367 if 0 and margin:
368 for line in text.split("\n"):
369 assert not line or line.startswith(margin), \
370 "line = %r, margin = %r" % (line, margin)
371
372 if margin:
373 text = re.sub(r'(?m)^' + margin, '', text)
374 return text
375
376 if __name__ == "__main__":
377
378
379 print dedent("Hello there.\n This is indented.")
380