Package cssutils :: Package tests :: Module test_tokenize2
[hide private]
[frames] | no frames]

Source Code for Module cssutils.tests.test_tokenize2

  1  # -*- coding: utf-8 -*- 
  2  """Testcases for new cssutils.tokenize.Tokenizer 
  3   
  4  TODO:: 
  5   
  6      - escape ends with explicit space but \r\n as single space 
  7      - ur'"\""': [('STRING', ur'"\""', 1, 1)], 
  8      - font-face with escaped "-" 
  9   
 10  + old tests as new ones are **not complete**! 
 11  """ 
 12  __version__ = '$Id: test_tokenize2.py 1116 2008-03-05 13:52:23Z cthedot $' 
 13   
 14  import xml.dom 
 15  import basetest 
 16  from cssutils.tokenize2 import * 
 17   
18 -class TokenizerTestCase(basetest.BaseTestCase):
19 20 testsall = { 21 # IDENT 22 u'äöü߀': [('IDENT', u'äöü߀', 1, 1)], 23 u' a ': [('S', u' ', 1, 1), 24 ('IDENT', u'a', 1, 2), 25 ('S', u' ', 1, 3)], 26 u'_a': [('IDENT', u'_a', 1, 1)], 27 u'-a': [('IDENT', u'-a', 1, 1)], 28 u'aA-_\200\377': [('IDENT', u'aA-_\200\377', 1, 1)], 29 u'a1': [('IDENT', u'a1', 1, 1)], 30 # escapes must end with S or max 6 digits: 31 u'\\44 b': [('IDENT', u'Db', 1, 1)], 32 u'\\44 b': [('IDENT', u'D', 1, 1), 33 ('S', u' ', 1, 5), 34 ('IDENT', u'b', 1, 6)], 35 u'\\44\nb': [('IDENT', u'Db', 1, 1)], 36 u'\\44\rb': [('IDENT', u'Db', 1, 1)], 37 u'\\44\fb': [('IDENT', u'Db', 1, 1)], 38 u'\\44\n*': [('IDENT', u'D', 1, 1), 39 ('CHAR', u'*', 2, 1)], 40 u'\\44 a': [('IDENT', u'D', 1, 1), 41 ('S', u' ', 1, 5), 42 ('IDENT', u'a', 1, 6)], 43 # TODO: 44 # Note that this means that a "real" space after the escape sequence 45 # must itself either be escaped or doubled: 46 u'\\44\ x': [('IDENT', u'D\\ x', 1, 1)], 47 u'\\44 ': [('IDENT', u'D', 1, 1), 48 ('S', u' ', 1, 5)], 49 50 ur'\44': [('IDENT', u'D', 1, 1)], 51 ur'\\': [('IDENT', ur'\\', 1, 1)], 52 ur'\{': [('IDENT', ur'\{', 1, 1)], 53 ur'\"': [('IDENT', ur'\"', 1, 1)], 54 ur'\(': [('IDENT', ur'\(', 1, 1)], 55 ur'\1 \22 \333 \4444 \55555 \666666 \777777 7 \7777777': [ 56 ('IDENT', u'\x01"\u0333\u4444\\55555 \\666666 \\777777 7', 1, 1), 57 ('S', ' ', 1, 43), 58 ('IDENT', '\\7777777', 1, 44)], 59 60 61 u'\\1 b': [('IDENT', u'\x01b', 1, 1)], 62 u'\\44 b': [('IDENT', u'Db', 1, 1)], 63 u'\\123 b': [('IDENT', u'\u0123b', 1, 1)], 64 u'\\1234 b': [('IDENT', u'\u1234b', 1, 1)], 65 u'\\12345 b': [('IDENT', u'\\12345 b', 1, 1)], 66 u'\\123456 b': [('IDENT', u'\\123456 b', 1, 1)], 67 u'\\1234567 b': [('IDENT', u'\\1234567', 1, 1), 68 ('S', u' ', 1, 9), 69 ('IDENT', u'b', 1, 10)], 70 u'\\{\\}\\(\\)\\[\\]\\#\\@\\.\\,': 71 [('IDENT', u'\\{\\}\\(\\)\\[\\]\\#\\@\\.\\,', 1, 1)], 72 73 # STRING 74 u' "" ': [('S', u' ', 1, 1), 75 ('STRING', u'""', 1, 2), 76 ('S', u' ', 1, 4)], 77 u' "\'" ': [('S', u' ', 1, 1), 78 ('STRING', u'"\'"', 1, 2), 79 ('S', u' ', 1, 5)], 80 u" '' ": [('S', u' ', 1, 1), 81 ('STRING', u"''", 1, 2), 82 ('S', u' ', 1, 4)], 83 u" '' ": [('S', u' ', 1, 1), 84 ('STRING', u"''", 1, 2), 85 ('S', u' ', 1, 4)], 86 u"'\\\n'": [('STRING', u"'\\\n'", 1, 1)], 87 u"'\\\n\\\n\\\n'": [('STRING', u"'\\\n\\\n\\\n'", 1, 1)], 88 u"'\\\f'": [('STRING', u"'\\\f'", 1, 1)], 89 u"'\\\r'": [('STRING', u"'\\\r'", 1, 1)], 90 u"'\\\r\n'": [('STRING', u"'\\\r\n'", 1, 1)], 91 u"'1\\\n2'": [('STRING', u"'1\\\n2'", 1, 1)], 92 93 # HASH 94 u' #a ': [('S', u' ', 1, 1), 95 ('HASH', u'#a', 1, 2), 96 ('S', u' ', 1, 4)], 97 98 u'#ccc': [('HASH', u'#ccc', 1, 1)], 99 u'#111': [('HASH', u'#111', 1, 1)], 100 u'#a1a1a1': [('HASH', u'#a1a1a1', 1, 1)], 101 u'#1a1a1a': [('HASH', u'#1a1a1a', 1, 1)], 102 103 # NUMBER, for plus see CSS3 104 u' 0 ': [('S', u' ', 1, 1), 105 ('NUMBER', u'0', 1, 2), 106 ('S', u' ', 1, 3)], 107 u' 0.1 ': [('S', u' ', 1, 1), 108 ('NUMBER', u'0.1', 1, 2), 109 ('S', u' ', 1, 5)], 110 u' .0 ': [('S', u' ', 1, 1), 111 ('NUMBER', u'.0', 1, 2), 112 ('S', u' ', 1, 4)], 113 114 u' -0 ': [('S', u' ', 1, 1), 115 ('CHAR', u'-', 1, 2), 116 ('NUMBER', u'0', 1, 3), 117 ('S', u' ', 1, 4)], 118 119 # PERCENTAGE 120 u' 0% ': [('S', u' ', 1, 1), 121 ('PERCENTAGE', u'0%', 1, 2), 122 ('S', u' ', 1, 4)], 123 u' .5% ': [('S', u' ', 1, 1), 124 ('PERCENTAGE', u'.5%', 1, 2), 125 ('S', u' ', 1, 5)], 126 127 # URI 128 u' url() ': [('S', u' ', 1, 1), 129 ('URI', u'url()', 1, 2), 130 ('S', u' ', 1, 7)], 131 u' url(a) ': [('S', u' ', 1, 1), 132 ('URI', u'url(a)', 1, 2), 133 ('S', u' ', 1, 8)], 134 u' url("a") ': [('S', u' ', 1, 1), 135 ('URI', u'url("a")', 1, 2), 136 ('S', u' ', 1, 10)], 137 u' url( a ) ': [('S', u' ', 1, 1), 138 ('URI', u'url( a )', 1, 2), 139 ('S', u' ', 1, 10)], 140 141 # UNICODE-RANGE 142 143 # CDO 144 u' <!-- ': [('S', u' ', 1, 1), 145 ('CDO', u'<!--', 1, 2), 146 ('S', u' ', 1, 6)], 147 u'"<!--""-->"': [('STRING', u'"<!--"', 1, 1), 148 ('STRING', u'"-->"', 1, 7)], 149 150 # CDC 151 u' --> ': [('S', u' ', 1, 1), 152 ('CDC', u'-->', 1, 2), 153 ('S', u' ', 1, 5)], 154 155 # S 156 u' ': [('S', u' ', 1, 1)], 157 u' ': [('S', u' ', 1, 1)], 158 u'\r': [('S', u'\r', 1, 1)], 159 u'\n': [('S', u'\n', 1, 1)], 160 u'\r\n': [('S', u'\r\n', 1, 1)], 161 u'\f': [('S', u'\f', 1, 1)], 162 u'\r': [('S', u'\r', 1, 1)], 163 u'\t': [('S', u'\t', 1, 1)], 164 u'\r\n\r\n\f\t ': [('S', u'\r\n\r\n\f\t ', 1, 1)], 165 166 # COMMENT, for incomplete see later 167 u'/*x*/ ': [('COMMENT', u'/*x*/', 1, 1), 168 ('S', u' ', 1, 6)], 169 170 # FUNCTION 171 u' x( ': [('S', u' ', 1, 1), 172 ('FUNCTION', u'x(', 1, 2), 173 ('S', u' ', 1, 4)], 174 175 # INCLUDES 176 u' ~= ': [('S', u' ', 1, 1), 177 ('INCLUDES', u'~=', 1, 2), 178 ('S', u' ', 1, 4)], 179 u'~==': [('INCLUDES', u'~=', 1, 1), ('CHAR', u'=', 1, 3)], 180 181 # DASHMATCH 182 u' |= ': [('S', u' ', 1, 1), 183 ('DASHMATCH', u'|=', 1, 2), 184 ('S', u' ', 1, 4)], 185 u'|==': [('DASHMATCH', u'|=', 1, 1), ('CHAR', u'=', 1, 3)], 186 187 # CHAR 188 u' @ ': [('S', u' ', 1, 1), 189 ('CHAR', u'@', 1, 2), 190 ('S', u' ', 1, 3)], 191 192 # --- overwritten for CSS 2.1 --- 193 # LBRACE 194 u' { ': [('S', u' ', 1, 1), 195 ('CHAR', u'{', 1, 2), 196 ('S', u' ', 1, 3)], 197 # PLUS 198 u' + ': [('S', u' ', 1, 1), 199 ('CHAR', u'+', 1, 2), 200 ('S', u' ', 1, 3)], 201 # GREATER 202 u' > ': [('S', u' ', 1, 1), 203 ('CHAR', u'>', 1, 2), 204 ('S', u' ', 1, 3)], 205 # COMMA 206 u' , ': [('S', u' ', 1, 1), 207 ('CHAR', u',', 1, 2), 208 ('S', u' ', 1, 3)], 209 210 # class 211 u' . ': [('S', u' ', 1, 1), 212 ('CHAR', u'.', 1, 2), 213 ('S', u' ', 1, 3)], 214 215 } 216 217 tests3 = { 218 # specials 219 u'c\\olor': [('IDENT', u'c\\olor', 1, 1)], 220 u'-1': [('CHAR', u'-', 1, 1), ('NUMBER', u'1', 1, 2)], 221 u'-1px': [('CHAR', u'-', 1, 1), ('DIMENSION', u'1px', 1, 2)], 222 223 # ATKEYWORD 224 u' @x ': [('S', u' ', 1, 1), 225 ('ATKEYWORD', u'@x', 1, 2), 226 ('S', u' ', 1, 4)], 227 u'@X': [('ATKEYWORD', u'@X', 1, 1)], 228 u'@\\x': [('ATKEYWORD', u'@\\x', 1, 1)], 229 # - 230 u'@1x': [('CHAR', u'@', 1, 1), 231 ('DIMENSION', u'1x', 1, 2)], 232 233 # DIMENSION 234 u' 0px ': [('S', u' ', 1, 1), 235 ('DIMENSION', u'0px', 1, 2), 236 ('S', u' ', 1, 5)], 237 u' 1s ': [('S', u' ', 1, 1), 238 ('DIMENSION', u'1s', 1, 2), 239 ('S', u' ', 1, 4)], 240 u'0.2EM': [('DIMENSION', u'0.2EM', 1, 1)], 241 u'1p\\x': [('DIMENSION', u'1p\\x', 1, 1)], 242 u'1PX': [('DIMENSION', u'1PX', 1, 1)], 243 244 # NUMBER 245 u' - 0 ': [('S', u' ', 1, 1), 246 ('CHAR', u'-', 1, 2), 247 ('S', u' ', 1, 3), 248 ('NUMBER', u'0', 1, 4), 249 ('S', u' ', 1, 5)], 250 u' + 0 ': [('S', u' ', 1, 1), 251 ('CHAR', u'+', 1, 2), 252 ('S', u' ', 1, 3), 253 ('NUMBER', u'0', 1, 4), 254 ('S', u' ', 1, 5)], 255 256 # PREFIXMATCH 257 u' ^= ': [('S', u' ', 1, 1), 258 ('PREFIXMATCH', u'^=', 1, 2), 259 ('S', u' ', 1, 4)], 260 u'^==': [('PREFIXMATCH', u'^=', 1, 1), ('CHAR', u'=', 1, 3)], 261 262 # SUFFIXMATCH 263 u' $= ': [('S', u' ', 1, 1), 264 ('SUFFIXMATCH', u'$=', 1, 2), 265 ('S', u' ', 1, 4)], 266 u'$==': [('SUFFIXMATCH', u'$=', 1, 1), ('CHAR', u'=', 1, 3)], 267 268 # SUBSTRINGMATCH 269 u' *= ': [('S', u' ', 1, 1), 270 ('SUBSTRINGMATCH', u'*=', 1, 2), 271 ('S', u' ', 1, 4)], 272 u'*==': [('SUBSTRINGMATCH', u'*=', 1, 1), ('CHAR', u'=', 1, 3)], 273 274 # BOM 275 u' \xFEFF ': [('S', u' ', 1, 1), 276 ('BOM', u'\xFEFF', 1, 2), # len=3 277 ('S', u' ', 1, 5)], 278 279 } 280 281 tests2 = { 282 # escapes work not for a-f! 283 # IMPORT_SYM 284 u' @import ': [('S', u' ', 1, 1), 285 ('IMPORT_SYM', u'@import', 1, 2), 286 ('S', u' ', 1, 9)], 287 u'@IMPORT': [('IMPORT_SYM', u'@IMPORT', 1, 1)], 288 ur'@\i\m\p\o\r\t': [('IMPORT_SYM', ur'@\i\m\p\o\r\t', 1, 1)], 289 ur'@\I\M\P\O\R\T': [('IMPORT_SYM', ur'@\I\M\P\O\R\T', 1, 1)], 290 ur'@\49 \04d\0050\0004f\000052\54': [('IMPORT_SYM', 291 ur'@\49 \04d\0050\0004f\000052\54', 292 1, 1)], 293 ur'@\69 \06d\0070\0006f\000072\74': [('IMPORT_SYM', 294 ur'@\69 \06d\0070\0006f\000072\74', 295 1, 1)], 296 297 # PAGE_SYM 298 u' @page ': [('S', u' ', 1, 1), 299 ('PAGE_SYM', u'@page', 1, 2), 300 ('S', u' ', 1, 7)], 301 u'@PAGE': [('PAGE_SYM', u'@PAGE', 1, 1)], 302 ur'@\pa\ge': [('PAGE_SYM', ur'@\pa\ge', 1, 1)], 303 ur'@\PA\GE': [('PAGE_SYM', ur'@\PA\GE', 1, 1)], 304 ur'@\50\41\47\45': [('PAGE_SYM', ur'@\50\41\47\45', 1, 1)], 305 ur'@\70\61\67\65': [('PAGE_SYM', ur'@\70\61\67\65', 1, 1)], 306 307 # MEDIA_SYM 308 u' @media ': [('S', u' ', 1, 1), 309 ('MEDIA_SYM', u'@media', 1, 2), 310 ('S', u' ', 1, 8)], 311 u'@MEDIA': [('MEDIA_SYM', u'@MEDIA', 1, 1)], 312 ur'@\med\ia': [('MEDIA_SYM', ur'@\med\ia', 1, 1)], 313 ur'@\MED\IA': [('MEDIA_SYM', ur'@\MED\IA', 1, 1)], 314 u'@\\4d\n\\45\r\\44\t\\49\r\n\\41\f': [('MEDIA_SYM', 315 u'@\\4d\n\\45\r\\44\t\\49\r\n\\41\f', 316 1, 1)], 317 u'@\\6d\n\\65\r\\64\t\\69\r\n\\61\f': [('MEDIA_SYM', 318 u'@\\6d\n\\65\r\\64\t\\69\r\n\\61\f', 319 1, 1)], 320 321 # FONT_FACE_SYM 322 u' @font-face ': [('S', u' ', 1, 1), 323 ('FONT_FACE_SYM', u'@font-face', 1, 2), 324 ('S', u' ', 1, 12)], 325 u'@FONT-FACE': [('FONT_FACE_SYM', u'@FONT-FACE', 1, 1)], 326 ur'@f\o\n\t\-face': [('FONT_FACE_SYM', ur'@f\o\n\t\-face', 1, 1)], 327 ur'@F\O\N\T\-FACE': [('FONT_FACE_SYM', ur'@F\O\N\T\-FACE', 1, 1)], 328 # TODO: "-" as hex! 329 ur'@\46\4f\4e\54\-\46\41\43\45': [('FONT_FACE_SYM', 330 ur'@\46\4f\4e\54\-\46\41\43\45', 1, 1)], 331 ur'@\66\6f\6e\74\-\66\61\63\65': [('FONT_FACE_SYM', 332 ur'@\66\6f\6e\74\-\66\61\63\65', 1, 1)], 333 334 # CHARSET_SYM only if "@charset "! 335 u' @charset ': [('S', u' ', 1, 1), 336 ('CHARSET_SYM', u'@charset ', 1, 2), 337 ('S', u' ', 1, 11)], 338 u'@charset': [('ATKEYWORD', u'@charset', 1, 1)], # no ending S 339 u'@CHARSET ': [('ATKEYWORD', u'@CHARSET', 1, 1),# uppercase 340 ('S', u' ', 1, 9)], 341 u'@cha\\rset ': [('ATKEYWORD', u'@cha\\rset', 1, 1), # not literal 342 ('S', u' ', 1, 10)], 343 344 # NAMESPACE_SYM 345 u' @namespace ': [('S', u' ', 1, 1), 346 ('NAMESPACE_SYM', u'@namespace', 1, 2), 347 ('S', u' ', 1, 12)], 348 ur'@NAMESPACE': [('NAMESPACE_SYM', ur'@NAMESPACE', 1, 1)], 349 ur'@\na\me\s\pace': [('NAMESPACE_SYM', ur'@\na\me\s\pace', 1, 1)], 350 ur'@\NA\ME\S\PACE': [('NAMESPACE_SYM', ur'@\NA\ME\S\PACE', 1, 1)], 351 ur'@\4e\41\4d\45\53\50\41\43\45': [('NAMESPACE_SYM', 352 ur'@\4e\41\4d\45\53\50\41\43\45', 1, 1)], 353 ur'@\6e\61\6d\65\73\70\61\63\65': [('NAMESPACE_SYM', 354 ur'@\6e\61\6d\65\73\70\61\63\65', 1, 1)], 355 356 # ATKEYWORD 357 u' @unknown ': [('S', u' ', 1, 1), 358 ('ATKEYWORD', u'@unknown', 1, 2), 359 ('S', u' ', 1, 10)], 360 361 # STRING 362 # strings with linebreak in it 363 u' "\\na"\na': [('S', u' ', 1, 1), 364 ('STRING', u'"\\na"', 1, 2), 365 ('S', u'\n', 1, 7), 366 ('IDENT', u'a', 2, 1)], 367 u" '\\na'\na": [('S', u' ', 1, 1), 368 ('STRING', u"'\\na'", 1, 2), 369 ('S', u'\n', 1, 7), 370 ('IDENT', u'a', 2, 1)], 371 u' "\\r\\n\\t\\n\\ra"a': [('S', u' ', 1, 1), 372 ('STRING', u'"\\r\\n\\t\\n\\ra"', 1, 2), 373 ('IDENT', u'a', 1, 15)], 374 375 # IMPORTANT_SYM is not IDENT!!! 376 u' !important ': [('S', u' ', 1, 1), 377 ('CHAR', u'!', 1, 2), 378 ('IDENT', u'important', 1, 3), 379 ('S', u' ', 1, 12)], 380 u'! /*1*/ important ': [ 381 ('CHAR', u'!', 1, 1), 382 ('S', u' ', 1, 2), 383 ('COMMENT', u'/*1*/', 1, 3), 384 ('S', u' ', 1, 8), 385 ('IDENT', u'important', 1, 9), 386 ('S', u' ', 1, 18)], 387 u'! important': [('CHAR', u'!', 1, 1), 388 ('S', u' ', 1, 2), 389 ('IDENT', u'important', 1, 3)], 390 u'!\n\timportant': [('CHAR', u'!', 1, 1), 391 ('S', u'\n\t', 1, 2), 392 ('IDENT', u'important', 2, 2)], 393 u'!IMPORTANT': [('CHAR', u'!', 1, 1), 394 ('IDENT', u'IMPORTANT', 1, 2)], 395 ur'!\i\m\p\o\r\ta\n\t': [('CHAR', u'!', 1, 1), 396 ('IDENT', 397 ur'\i\m\p\o\r\ta\n\t', 1, 2)], 398 ur'!\I\M\P\O\R\Ta\N\T': [('CHAR', u'!', 1, 1), 399 ('IDENT', 400 ur'\I\M\P\O\R\Ta\N\T', 1, 2)], 401 ur'!\49\4d\50\4f\52\54\41\4e\54': [('CHAR', u'!', 1, 1), 402 ('IDENT', 403 ur'IMPORTANT', 404 1, 2)], 405 ur'!\69\6d\70\6f\72\74\61\6e\74': [('CHAR', u'!', 1, 1), 406 ('IDENT', 407 ur'important', 408 1, 2)], 409 } 410 411 # overwriting tests in testsall 412 tests2only = { 413 # LBRACE 414 u' { ': [('S', u' ', 1, 1), 415 ('LBRACE', u'{', 1, 2), 416 ('S', u' ', 1, 3)], 417 # PLUS 418 u' + ': [('S', u' ', 1, 1), 419 ('PLUS', u'+', 1, 2), 420 ('S', u' ', 1, 3)], 421 # GREATER 422 u' > ': [('S', u' ', 1, 1), 423 ('GREATER', u'>', 1, 2), 424 ('S', u' ', 1, 3)], 425 # COMMA 426 u' , ': [('S', u' ', 1, 1), 427 ('COMMA', u',', 1, 2), 428 ('S', u' ', 1, 3)], 429 # class 430 u' . ': [('S', u' ', 1, 1), 431 ('CLASS', u'.', 1, 2), 432 ('S', u' ', 1, 3)], 433 } 434 435 testsfullsheet = { 436 # TODO: escape ends with explicit space but \r\n as single space 437 #u'\\1\r\nb': [('IDENT', u'\\1\r', 1, 1), ('IDENT', u'b', 1, 4)], 438 439 # STRING 440 ur'"\" "': [('STRING', ur'"\" "', 1, 1)], 441 u"""'\\''""": [('STRING', u"""'\\''""", 1, 1)], 442 u'''"\\""''': [('STRING', u'''"\\""''', 1, 1)], 443 u' "\na': [('S', u' ', 1, 1), 444 ('INVALID', u'"', 1, 2), 445 ('S', u'\n', 1, 3), 446 ('IDENT', u'a', 2, 1)], 447 448 # strings with linebreak in it 449 u' "\\na\na': [('S', u' ', 1, 1), 450 ('INVALID', u'"\\na', 1, 2), 451 ('S', u'\n', 1, 6), 452 ('IDENT', u'a', 2, 1)], 453 u' "\\r\\n\\t\\n\\ra\na': [('S', u' ', 1, 1), 454 ('INVALID', u'"\\r\\n\\t\\n\\ra', 1, 2), 455 ('S', u'\n', 1, 14), 456 ('IDENT', u'a', 2, 1)], 457 # URI 458 u'ur\\l(a)': [('URI', u'ur\\l(a)', 1, 1)], 459 u'url(a)': [('URI', u'url(a)', 1, 1)], 460 u'\\55r\\4c(a)': [('URI', u'UrL(a)', 1, 1)], 461 u'\\75r\\6c(a)': [('URI', u'url(a)', 1, 1)], 462 } 463 464 # tests if fullsheet=False is set on tokenizer 465 testsfullsheetfalse = { 466 # COMMENT incomplete 467 u'/*': [('CHAR', u'/', 1, 1), 468 ('CHAR', u'*', 1, 2)], 469 470 # INVALID incomplete 471 u' " ': [('S', u' ', 1, 1), 472 ('INVALID', u'" ', 1, 2)], 473 u" 'abc\"with quote\" in it": [('S', u' ', 1, 1), 474 ('INVALID', u"'abc\"with quote\" in it", 1, 2)], 475 476 # URI incomplete 477 u'url(a': [('FUNCTION', u'url(', 1, 1), 478 ('IDENT', u'a', 1, 5)], 479 u'url("a': [('FUNCTION', u'url(', 1, 1), 480 ('INVALID', u'"a', 1, 5)], 481 u"url('a": [('FUNCTION', u'url(', 1, 1), 482 ('INVALID', u"'a", 1, 5)], 483 u"UR\\l('a": [('FUNCTION', u'UR\\l(', 1, 1), 484 ('INVALID', u"'a", 1, 6)], 485 } 486 487 # tests if fullsheet=True is set on tokenizer 488 testsfullsheettrue = { 489 # COMMENT incomplete 490 u'/*': [('COMMENT', u'/**/', 1, 1)], 491 492 # INVALID incomplete => STRING 493 u' " ': [('S', u' ', 1, 1), 494 ('STRING', u'" "', 1, 2)], 495 u" 'abc\"with quote\" in it": [('S', u' ', 1, 1), 496 ('STRING', u"'abc\"with quote\" in it'", 1, 2)], 497 498 # URI incomplete FUNC => URI 499 u'url(a': [('URI', u'url(a)', 1, 1)], 500 u'url( a': [('URI', u'url( a)', 1, 1)], 501 u'url("a': [('URI', u'url("a")', 1, 1)], 502 u'url( "a ': [('URI', u'url( "a ")', 1, 1)], 503 u"url('a": [('URI', u"url('a')", 1, 1)], 504 u'url("a"': [('URI', u'url("a")', 1, 1)], 505 u"url('a'": [('URI', u"url('a')", 1, 1)], 506 507 } 508
509 - def setUp(self):
510 #log = cssutils.errorhandler.ErrorHandler() 511 self.tokenizer = Tokenizer()
512
513 - def test_linenumbers(self):
514 "Tokenizer line + col" 515 pass
516
517 - def test_tokenize(self):
518 "cssutils Tokenizer().tokenize()" 519 import cssutils.cssproductions 520 tokenizer = Tokenizer(cssutils.cssproductions.MACROS, 521 cssutils.cssproductions.PRODUCTIONS) 522 tests = {} 523 tests.update(self.testsall) 524 tests.update(self.tests2) 525 tests.update(self.tests3) 526 tests.update(self.testsfullsheet) 527 tests.update(self.testsfullsheetfalse) 528 for css in tests: 529 # check token format 530 tokens = tokenizer.tokenize(css) 531 for i, actual in enumerate(tokens): 532 expected = tests[css][i] 533 self.assertEqual(expected, actual) 534 535 # check if all same number of tokens 536 tokens = [t for t in tokenizer.tokenize(css)] 537 self.assertEqual(len(tokens), len(tests[css]))
538
539 - def test_tokenizefullsheet(self):
540 "cssutils Tokenizer().tokenize(fullsheet=True)" 541 import cssutils.cssproductions 542 tokenizer = Tokenizer(cssutils.cssproductions.MACROS, 543 cssutils.cssproductions.PRODUCTIONS) 544 tests = {} 545 tests.update(self.testsall) 546 tests.update(self.tests2) 547 tests.update(self.tests3) 548 tests.update(self.testsfullsheet) 549 tests.update(self.testsfullsheettrue) 550 for css in tests: 551 # check token format 552 tokens = tokenizer.tokenize(css, fullsheet=True) 553 for i, actual in enumerate(tokens): 554 try: 555 expected = tests[css][i] 556 except IndexError: 557 # EOF is added 558 self.assertEqual(actual[0], 'EOF') 559 else: 560 self.assertEqual(expected, actual) 561 562 # check if all same number of tokens 563 tokens = [t for t in tokenizer.tokenize(css, fullsheet=True)] 564 # EOF is added so -1 565 self.assertEqual(len(tokens) - 1, len(tests[css]))
566 567 568 # not really needed
569 - def test_tokenizeCSS3(self):
570 "CSS3 Tokenizer().tokenize()" 571 import cssutils.css3productions 572 tokenizer = Tokenizer(cssutils.css3productions.MACROS, 573 cssutils.css3productions.PRODUCTIONS) 574 tests = {} 575 tests.update(self.testsall) 576 tests.update(self.tests3) 577 for css in tests: 578 tokens = tokenizer.tokenize(css) 579 for i, actual in enumerate(tokens): 580 expected = tests[css][i] 581 self.assertEqual(expected, actual)
582 583 # not really needed
584 - def test_tokenizeCSS2_1(self):
585 "CSS2 Tokenizer().tokenize()" 586 import cssutils.css2productions 587 tokenizer = Tokenizer(cssutils.css2productions.MACROS, 588 cssutils.css2productions.PRODUCTIONS) 589 tests = {} 590 tests.update(self.testsall) 591 #tests.update(self.tests2) 592 tests.update(self.tests2only) 593 for css in tests: 594 tokens = tokenizer.tokenize(css) 595 for i, actual in enumerate(tokens): 596 expected = tests[css][i] 597 self.assertEqual(expected, actual)
598 599 # -------------- 600
601 - def __old(self):
602 603 testsOLD = { 604 u'x x1 -x .-x #_x -': [(1, 1, tt.IDENT, u'x'), 605 (1, 2, 'S', u' '), 606 (1, 3, tt.IDENT, u'x1'), 607 (1, 5, 'S', u' '), 608 (1, 6, tt.IDENT, u'-x'), 609 (1, 8, 'S', u' '), 610 (1, 9, tt.CLASS, u'.'), 611 (1, 10, tt.IDENT, u'-x'), 612 (1, 12, 'S', u' '), 613 (1, 13, tt.HASH, u'#_x'), 614 (1, 16, 'S', u' '), 615 (1, 17, 'DELIM', u'-')], 616 617 # num 618 u'1 1.1 -1 -1.1 .1 -.1 1.': [(1, 1, tt.NUMBER, u'1'), 619 (1, 2, 'S', u' '), (1, 3, tt.NUMBER, u'1.1'), 620 (1, 6, 'S', u' '), (1, 7, tt.NUMBER, u'-1'), 621 (1, 9, 'S', u' '), (1, 10, tt.NUMBER, u'-1.1'), 622 (1, 14, 'S', u' '), (1, 15, tt.NUMBER, u'0.1'), 623 (1, 17, 'S', u' '), (1, 18, tt.NUMBER, u'-0.1'), 624 (1, 21, 'S', u' '), 625 (1, 22, tt.NUMBER, u'1'), (1, 23, tt.CLASS, u'.') 626 ], 627 # CSS3 pseudo 628 u'::': [(1, 1, tt.PSEUDO_ELEMENT, u'::')], 629 630 # SPECIALS 631 u'*+>~{},': [(1, 1, tt.UNIVERSAL, u'*'), 632 (1, 2, tt.PLUS, u'+'), 633 (1, 3, tt.GREATER, u'>'), 634 (1, 4, tt.TILDE, u'~'), 635 (1, 5, tt.LBRACE, u'{'), 636 (1, 6, tt.RBRACE, u'}'), 637 (1, 7, tt.COMMA, u',')], 638 639 # DELIM 640 u'!%:&$|': [(1, 1, 'DELIM', u'!'), 641 (1, 2, 'DELIM', u'%'), 642 (1, 3, 'DELIM', u':'), 643 (1, 4, 'DELIM', u'&'), 644 (1, 5, 'DELIM', u'$'), 645 (1, 6, 'DELIM', u'|')], 646 647 648 # DIMENSION 649 u'5em': [(1, 1, tt.DIMENSION, u'5em')], 650 u' 5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'5em')], 651 u'5em ': [(1, 1, tt.DIMENSION, u'5em'), (1, 4, 'S', u' ')], 652 653 u'-5em': [(1, 1, tt.DIMENSION, u'-5em')], 654 u' -5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'-5em')], 655 u'-5em ': [(1, 1, tt.DIMENSION, u'-5em'), (1, 5, 'S', u' ')], 656 657 u'.5em': [(1, 1, tt.DIMENSION, u'0.5em')], 658 u' .5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'0.5em')], 659 u'.5em ': [(1, 1, tt.DIMENSION, u'0.5em'), (1, 5, 'S', u' ')], 660 661 u'-.5em': [(1, 1, tt.DIMENSION, u'-0.5em')], 662 u' -.5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'-0.5em')], 663 u'-.5em ': [(1, 1, tt.DIMENSION, u'-0.5em'), (1, 6, 'S', u' ')], 664 665 u'5em5_-': [(1, 1, tt.DIMENSION, u'5em5_-')], 666 667 u'a a5 a5a 5 5a 5a5': [(1, 1, tt.IDENT, u'a'), 668 (1, 2, 'S', u' '), 669 (1, 3, tt.IDENT, u'a5'), 670 (1, 5, 'S', u' '), 671 (1, 6, tt.IDENT, u'a5a'), 672 (1, 9, 'S', u' '), 673 (1, 10, tt.NUMBER, u'5'), 674 (1, 11, 'S', u' '), 675 (1, 12, tt.DIMENSION, u'5a'), 676 (1, 14, 'S', u' '), 677 (1, 15, tt.DIMENSION, u'5a5')], 678 679 # URI 680 u'url()': [(1, 1, tt.URI, u'url()')], 681 u'url();': [(1, 1, tt.URI, u'url()'), (1, 6, tt.SEMICOLON, ';')], 682 u'url("x")': [(1, 1, tt.URI, u'url("x")')], 683 u'url( "x")': [(1, 1, tt.URI, u'url("x")')], 684 u'url("x" )': [(1, 1, tt.URI, u'url("x")')], 685 u'url( "x" )': [(1, 1, tt.URI, u'url("x")')], 686 u' url("x")': [ 687 (1, 1, 'S', u' '), 688 (1, 2, tt.URI, u'url("x")')], 689 u'url("x") ': [ 690 (1, 1, tt.URI, u'url("x")'), 691 (1, 9, 'S', u' '), 692 ], 693 u'url(ab)': [(1, 1, tt.URI, u'url(ab)')], 694 u'url($#/ab)': [(1, 1, tt.URI, u'url($#/ab)')], 695 u'url(\1233/a/b)': [(1, 1, tt.URI, u'url(\1233/a/b)')], 696 # not URI 697 u'url("1""2")': [ 698 (1, 1, tt.FUNCTION, u'url('), 699 (1, 5, tt.STRING, u'"1"'), 700 (1, 8, tt.STRING, u'"2"'), 701 (1, 11, tt.RPARANTHESIS, u')'), 702 ], 703 u'url(a"2")': [ 704 (1, 1, tt.FUNCTION, u'url('), 705 (1, 5, tt.IDENT, u'a'), 706 (1, 6, tt.STRING, u'"2"'), 707 (1, 9, tt.RPARANTHESIS, u')'), 708 ], 709 u'url(a b)': [ 710 (1, 1, tt.FUNCTION, u'url('), 711 (1, 5, tt.IDENT, u'a'), 712 (1, 6, 'S', u' '), 713 (1, 7, tt.IDENT, u'b'), 714 (1, 8, tt.RPARANTHESIS, u')'), 715 ], 716 717 # FUNCTION 718 u' counter("x")': [ 719 (1,1, 'S', u' '), 720 (1, 2, tt.FUNCTION, u'counter('), 721 (1, 10, tt.STRING, u'"x"'), 722 (1, 13, tt.RPARANTHESIS, u')')], 723 # HASH 724 u'# #a #_a #-a #1': [ 725 (1, 1, 'DELIM', u'#'), 726 (1, 2, 'S', u' '), 727 (1, 3, tt.HASH, u'#a'), 728 (1, 5, 'S', u' '), 729 (1, 6, tt.HASH, u'#_a'), 730 (1, 9, 'S', u' '), 731 (1, 10, tt.HASH, u'#-a'), 732 (1, 13, 'S', u' '), 733 (1, 14, tt.HASH, u'#1') 734 ], 735 u'#1a1 ': [ 736 (1, 1, tt.HASH, u'#1a1'), 737 (1, 5, 'S', u' '), 738 ], 739 u'#1a1\n': [ 740 (1, 1, tt.HASH, u'#1a1'), 741 (1, 5, 'S', u'\n'), 742 ], 743 u'#1a1{': [ 744 (1, 1, tt.HASH, u'#1a1'), 745 (1, 5, tt.LBRACE, u'{'), 746 ], 747 u'#1a1 {': [ 748 (1, 1, tt.HASH, u'#1a1'), 749 (1, 5, 'S', u' '), 750 (1, 6, tt.LBRACE, u'{'), 751 ], 752 u'#1a1\n{': [ 753 (1, 1, tt.HASH, u'#1a1'), 754 (1, 5, 'S', u'\n'), 755 (2, 1, tt.LBRACE, u'{'), 756 ], 757 u'#1a1\n {': [ 758 (1, 1, tt.HASH, u'#1a1'), 759 (1, 5, 'S', u'\n '), 760 (2, 2, tt.LBRACE, u'{'), 761 ], 762 u'#1a1 \n{': [ 763 (1, 1, tt.HASH, u'#1a1'), 764 (1, 5, 'S', u' \n'), 765 (2, 1, tt.LBRACE, u'{'), 766 ], 767 # STRINGS with NL 768 u'"x\n': [(1,1, tt.INVALID, u'"x\n')], 769 u'"x\r': [(1,1, tt.INVALID, u'"x\r')], 770 u'"x\f': [(1,1, tt.INVALID, u'"x\f')], 771 u'"x\n ': [ 772 (1,1, tt.INVALID, u'"x\n'), 773 (2,1, 'S', u' ') 774 ] 775 776 }
777 778 # tests = { 779 # u'/*a': xml.dom.SyntaxErr, 780 # u'"a': xml.dom.SyntaxErr, 781 # u"'a": xml.dom.SyntaxErr, 782 # u"\\0 a": xml.dom.SyntaxErr, 783 # u"\\00": xml.dom.SyntaxErr, 784 # u"\\000": xml.dom.SyntaxErr, 785 # u"\\0000": xml.dom.SyntaxErr, 786 # u"\\00000": xml.dom.SyntaxErr, 787 # u"\\000000": xml.dom.SyntaxErr, 788 # u"\\0000001": xml.dom.SyntaxErr 789 # } 790 # self.tokenizer.log.raiseExceptions = True #!! 791 # for css, exception in tests.items(): 792 # self.assertRaises(exception, self.tokenizer.tokenize, css) 793 794 795 if __name__ == '__main__': 796 import unittest 797 unittest.main() 798