Class | CodeRay::Scanners::Ruby |
In: |
lib/coderay/scanners/ruby.rb
|
Parent: | Scanner |
This scanner is really complex, since Ruby is a complex language!
It tries to highlight 100% of all common code, and 90% of strange codes.
It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.
For now, I think it‘s better than the scanners in VIM or Syntax, or any highlighter I was able to find, except Caleb‘s RubyLexer.
I hope it‘s also better than the rdoc/irb lexer.
# File lib/coderay/scanners/ruby.rb, line 26 26: def scan_tokens tokens, options 27: last_token_dot = false 28: value_expected = true 29: heredocs = nil 30: last_state = nil 31: state = :initial 32: depth = nil 33: inline_block_stack = [] 34: 35: patterns = Patterns # avoid constant lookup 36: 37: until eos? 38: match = nil 39: kind = nil 40: 41: if state.instance_of? patterns::StringState 42: # {{{ 43: match = scan_until(state.pattern) || scan_until(/\z/) 44: tokens << [match, :content] unless match.empty? 45: break if eos? 46: 47: if state.heredoc and self[1] # end of heredoc 48: match = getch.to_s 49: match << scan_until(/$/) unless eos? 50: tokens << [match, :delimiter] 51: tokens << [:close, state.type] 52: state = state.next_state 53: next 54: end 55: 56: case match = getch 57: 58: when state.delim 59: if state.paren 60: state.paren_depth -= 1 61: if state.paren_depth > 0 62: tokens << [match, :nesting_delimiter] 63: next 64: end 65: end 66: tokens << [match, :delimiter] 67: if state.type == :regexp and not eos? 68: modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) 69: tokens << [modifiers, :modifier] unless modifiers.empty? 70: end 71: tokens << [:close, state.type] 72: value_expected = false 73: state = state.next_state 74: 75: when '\\' 76: if state.interpreted 77: if esc = scan(/ #{patterns::ESCAPE} /ox) 78: tokens << [match + esc, :char] 79: else 80: tokens << [match, :error] 81: end 82: else 83: case m = getch 84: when state.delim, '\\' 85: tokens << [match + m, :char] 86: when nil 87: tokens << [match, :error] 88: else 89: tokens << [match + m, :content] 90: end 91: end 92: 93: when '#' 94: case peek(1) 95: when '{' 96: inline_block_stack << [state, depth, heredocs] 97: value_expected = true 98: state = :initial 99: depth = 1 100: tokens << [:open, :inline] 101: tokens << [match + getch, :inline_delimiter] 102: when '$', '@' 103: tokens << [match, :escape] 104: last_state = state # scan one token as normal code, then return here 105: state = :initial 106: else 107: raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens 108: end 109: 110: when state.paren 111: state.paren_depth += 1 112: tokens << [match, :nesting_delimiter] 113: 114: when /#{patterns::REGEXP_SYMBOLS}/ox 115: tokens << [match, :function] 116: 117: else 118: raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens 119: 120: end 121: next 122: # }}} 123: else 124: # {{{ 125: if match = scan(/[ \t\f]+/) 126: kind = :space 127: match << scan(/\s*/) unless eos? || heredocs 128: value_expected = true if match.index(?\n) # FIXME not quite true 129: tokens << [match, kind] 130: next 131: 132: elsif match = scan(/\\?\n/) 133: kind = :space 134: if match == "\n" 135: value_expected = true # FIXME not quite true 136: state = :initial if state == :undef_comma_expected 137: end 138: if heredocs 139: unscan # heredoc scanning needs \n at start 140: state = heredocs.shift 141: tokens << [:open, state.type] 142: heredocs = nil if heredocs.empty? 143: next 144: else 145: match << scan(/\s*/) unless eos? 146: end 147: tokens << [match, kind] 148: next 149: 150: elsif bol? && match = scan(/\#!.*/) 151: tokens << [match, :doctype] 152: next 153: 154: elsif match = scan(/\#.*/) or 155: ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) 156: kind = :comment 157: value_expected = true 158: tokens << [match, kind] 159: next 160: 161: elsif state == :initial 162: 163: # IDENTS # 164: if match = scan(/#{patterns::METHOD_NAME}/o) 165: if last_token_dot 166: kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end 167: else 168: kind = patterns::IDENT_KIND[match] 169: if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) 170: kind = :constant 171: elsif kind == :reserved 172: state = patterns::DEF_NEW_STATE[match] 173: end 174: end 175: ## experimental! 176: value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) 177: 178: elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o) 179: kind = :ident 180: value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) 181: 182: # OPERATORS # 183: # TODO: match (), [], {} as one single operator 184: elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x) 185: if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ 186: value_expected = :set 187: end 188: last_token_dot = :set if self[1] 189: kind = :operator 190: unless inline_block_stack.empty? 191: case match 192: when '{' 193: depth += 1 194: when '}' 195: depth -= 1 196: if depth == 0 # closing brace of inline block reached 197: state, depth, heredocs = inline_block_stack.pop 198: heredocs = nil if heredocs && heredocs.empty? 199: tokens << [match, :inline_delimiter] 200: kind = :inline 201: match = :close 202: end 203: end 204: end 205: 206: elsif match = scan(/ ['"] /mx) 207: tokens << [:open, :string] 208: kind = :delimiter 209: state = patterns::StringState.new :string, match == '"', match # important for streaming 210: 211: elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) 212: kind = :instance_variable 213: 214: elsif value_expected and match = scan(/\//) 215: tokens << [:open, :regexp] 216: kind = :delimiter 217: interpreted = true 218: state = patterns::StringState.new :regexp, interpreted, match 219: 220: # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o) 221: elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o) 222: kind = self[1] ? :float : :integer 223: 224: elsif match = scan(/#{patterns::SYMBOL}/o) 225: case delim = match[1] 226: when ?', ?" 227: tokens << [:open, :symbol] 228: tokens << [':', :symbol] 229: match = delim.chr 230: kind = :delimiter 231: state = patterns::StringState.new :symbol, delim == ?", match 232: else 233: kind = :symbol 234: end 235: 236: elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) 237: value_expected = :set 238: kind = :operator 239: 240: elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o) 241: indented = self[1] == '-' 242: quote = self[3] 243: delim = self[quote ? 4 : 2] 244: kind = patterns::QUOTE_TO_TYPE[quote] 245: tokens << [:open, kind] 246: tokens << [match, :delimiter] 247: match = :close 248: heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart ) 249: heredocs ||= [] # create heredocs if empty 250: heredocs << heredoc 251: 252: elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o) 253: kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do 254: raise_inspect 'Unknown fancy string: %%%p' % k, tokens 255: end 256: tokens << [:open, kind] 257: state = patterns::StringState.new kind, interpreted, self[2] 258: kind = :delimiter 259: 260: elsif value_expected and match = scan(/#{patterns::CHARACTER}/o) 261: kind = :integer 262: 263: elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) 264: value_expected = :set 265: kind = :operator 266: 267: elsif match = scan(/`/) 268: if last_token_dot 269: kind = :operator 270: else 271: tokens << [:open, :shell] 272: kind = :delimiter 273: state = patterns::StringState.new :shell, true, match 274: end 275: 276: elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) 277: kind = :global_variable 278: 279: elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) 280: kind = :class_variable 281: 282: else 283: kind = :error 284: match = getch 285: 286: end 287: 288: elsif state == :def_expected 289: state = :initial 290: if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) 291: kind = :method 292: else 293: next 294: end 295: 296: elsif state == :module_expected 297: if match = scan(/<</) 298: kind = :operator 299: else 300: state = :initial 301: if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox) 302: kind = :class 303: else 304: next 305: end 306: end 307: 308: elsif state == :undef_expected 309: state = :undef_comma_expected 310: if match = scan(/#{patterns::METHOD_NAME_EX}/o) 311: kind = :method 312: elsif match = scan(/#{patterns::SYMBOL}/o) 313: case delim = match[1] 314: when ?', ?" 315: tokens << [:open, :symbol] 316: tokens << [':', :symbol] 317: match = delim.chr 318: kind = :delimiter 319: state = patterns::StringState.new :symbol, delim == ?", match 320: state.next_state = :undef_comma_expected 321: else 322: kind = :symbol 323: end 324: else 325: state = :initial 326: next 327: end 328: 329: elsif state == :alias_expected 330: if match = scan(/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) 331: tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)] 332: tokens << [self[2], :space] 333: tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)] 334: end 335: state = :initial 336: next 337: 338: elsif state == :undef_comma_expected 339: if match = scan(/,/) 340: kind = :operator 341: state = :undef_expected 342: else 343: state = :initial 344: next 345: end 346: 347: end 348: # }}} 349: 350: unless kind == :error 351: value_expected = value_expected == :set 352: last_token_dot = last_token_dot == :set 353: end 354: 355: if $DEBUG and not kind 356: raise_inspect 'Error token %p in line %d' % 357: [[match, kind], line], tokens, state 358: end 359: raise_inspect 'Empty token', tokens unless match 360: 361: tokens << [match, kind] 362: 363: if last_state 364: state = last_state 365: last_state = nil 366: end 367: end 368: end 369: 370: inline_block_stack << [state] if state.is_a? patterns::StringState 371: until inline_block_stack.empty? 372: this_block = inline_block_stack.pop 373: tokens << [:close, :inline] if this_block.size > 1 374: state = this_block.first 375: tokens << [:close, state.type] 376: end 377: 378: tokens 379: end