Class CodeRay::Scanners::HTML
In: lib/coderay/scanners/html.rb
Parent: Scanner

Methods

reset   scan_tokens   setup  

Included Modules

Streamable

Constants

ATTR_NAME = /[\w.:-]+/
ATTR_VALUE_UNQUOTED = ATTR_NAME
TAG_END = /\/?>/
HEX = /[0-9a-fA-F]/
ENTITY = / & (?: \w+ | \# (?: \d+ | x#{HEX}+ ) ) ; /ox
PLAIN_STRING_CONTENT = { "'" => /[^&'>\n]+/, '"' => /[^&">\n]+/, }

Public Instance methods

[Source]

    # File lib/coderay/scanners/html.rb, line 34
34:     def reset
35:       super
36:       @state = :initial
37:     end

Private Instance methods

[Source]

     # File lib/coderay/scanners/html.rb, line 45
 45:     def scan_tokens tokens, options
 46: 
 47:       state = @state
 48:       plain_string_content = @plain_string_content
 49: 
 50:       until eos?
 51: 
 52:         kind = nil
 53:         match = nil
 54: 
 55:         if scan(/\s+/m)
 56:           kind = :space
 57: 
 58:         else
 59: 
 60:           case state
 61: 
 62:           when :initial
 63:             if scan(/<!--.*?-->/m)
 64:               kind = :comment
 65:             elsif scan(/<!DOCTYPE.*?>/m)
 66:               kind = :doctype
 67:             elsif scan(/<\?xml.*?\?>/m)
 68:               kind = :preprocessor
 69:             elsif scan(/<\?.*?\?>|<%.*?%>/m)
 70:               kind = :comment
 71:             elsif scan(/<\/[-\w_.:]*>/m)
 72:               kind = :tag
 73:             elsif match = scan(/<[-\w_.:]+>?/m)
 74:               kind = :tag
 75:               state = :attribute unless match[-1] == ?>
 76:             elsif scan(/[^<>&]+/)
 77:               kind = :plain
 78:             elsif scan(/#{ENTITY}/ox)
 79:               kind = :entity
 80:             elsif scan(/[<>&]/)
 81:               kind = :error
 82:             else
 83:               raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
 84:             end
 85: 
 86:           when :attribute
 87:             if scan(/#{TAG_END}/)
 88:               kind = :tag
 89:               state = :initial
 90:             elsif scan(/#{ATTR_NAME}/o)
 91:               kind = :attribute_name
 92:               state = :attribute_equal
 93:             else
 94:               kind = :error
 95:               getch
 96:             end
 97: 
 98:           when :attribute_equal
 99:             if scan(/=/)
100:               kind = :operator
101:               state = :attribute_value
102:             elsif scan(/#{ATTR_NAME}/o)
103:               kind = :attribute_name
104:             elsif scan(/#{TAG_END}/o)
105:               kind = :tag
106:               state = :initial
107:             elsif scan(/./)
108:               kind = :error
109:               state = :attribute
110:             end
111: 
112:           when :attribute_value
113:             if scan(/#{ATTR_VALUE_UNQUOTED}/o)
114:               kind = :attribute_value
115:               state = :attribute
116:             elsif match = scan(/["']/)
117:               tokens << [:open, :string]
118:               state = :attribute_value_string
119:               plain_string_content = PLAIN_STRING_CONTENT[match]
120:               kind = :delimiter
121:             elsif scan(/#{TAG_END}/o)
122:               kind = :tag
123:               state = :initial
124:             else
125:               kind = :error
126:               getch
127:             end
128: 
129:           when :attribute_value_string
130:             if scan(plain_string_content)
131:               kind = :content
132:             elsif scan(/['"]/)
133:               tokens << [matched, :delimiter]
134:               tokens << [:close, :string]
135:               state = :attribute
136:               next
137:             elsif scan(/#{ENTITY}/ox)
138:               kind = :entity
139:             elsif scan(/&/)
140:               kind = :content
141:             elsif scan(/[\n>]/)
142:               tokens << [:close, :string]
143:               kind = :error
144:               state = :initial
145:             end
146: 
147:           else
148:             raise_inspect 'Unknown state: %p' % [state], tokens
149: 
150:           end
151: 
152:         end
153: 
154:         match ||= matched
155:         if $DEBUG and not kind
156:           raise_inspect 'Error token %p in line %d' %
157:             [[match, kind], line], tokens, state
158:         end
159:         raise_inspect 'Empty token', tokens unless match
160: 
161:         tokens << [match, kind]
162:       end
163: 
164:       if options[:keep_state]
165:         @state = state
166:         @plain_string_content = plain_string_content
167:       end
168: 
169:       tokens
170:     end

[Source]

    # File lib/coderay/scanners/html.rb, line 40
40:     def setup
41:       @state = :initial
42:       @plain_string_content = nil
43:     end

[Validate]