@@ -130,22 +130,19 @@ function wrap(tree, file) {
130130 function element ( node ) {
131131 var empty = voids . indexOf ( node . tagName ) !== - 1
132132
133+ resetTokenizer ( )
133134 parser . _processToken ( startTag ( node ) , ns . html )
134135
135136 all ( node . children )
136137
137138 if ( ! empty ) {
139+ resetTokenizer ( )
138140 parser . _processToken ( endTag ( node ) )
139-
140- // Put the parser back in the data state: some elements, like textareas
141- // and iframes, change the state.
142- // See <syntax-tree/hast-util-raw#7>.
143- // See <https://github.com/inikulin/parse5/blob/2528196/packages/parse5/lib/tokenizer/index.js#L222>.
144- tokenizer . state = dataState
145141 }
146142 }
147143
148144 function text ( node ) {
145+ resetTokenizer ( )
149146 parser . _processToken ( {
150147 type : characterToken ,
151148 chars : node . value ,
@@ -155,7 +152,7 @@ function wrap(tree, file) {
155152
156153 function doctype ( node ) {
157154 var p5 = toParse5 ( node )
158-
155+ resetTokenizer ( )
159156 parser . _processToken ( {
160157 type : doctypeToken ,
161158 name : p5 . name ,
@@ -167,6 +164,7 @@ function wrap(tree, file) {
167164 }
168165
169166 function comment ( node ) {
167+ resetTokenizer ( )
170168 parser . _processToken ( {
171169 type : commentToken ,
172170 data : node . value ,
@@ -182,35 +180,38 @@ function wrap(tree, file) {
182180 var token
183181
184182 // Reset preprocessor:
185- // See: <https://github.com/inikulin/parse5/blob/0491902 /packages/parse5/lib/tokenizer/preprocessor.js>.
183+ // See: <https://github.com/inikulin/parse5/blob/9c683e1 /packages/parse5/lib/tokenizer/preprocessor.js>.
186184 preprocessor . html = null
187- preprocessor . endOfChunkHit = false
188- preprocessor . lastChunkWritten = false
189- preprocessor . lastCharPos = - 1
190185 preprocessor . pos = - 1
186+ preprocessor . lastGapPos = - 1
187+ preprocessor . lastCharPos = - 1
188+ preprocessor . gapStack = [ ]
189+ preprocessor . skipNextNewLine = false
190+ preprocessor . lastChunkWritten = false
191+ preprocessor . endOfChunkHit = false
191192
192193 // Reset preprocessor mixin:
193- // See: <https://github.com/inikulin/parse5/blob/0491902/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js>.
194- posTracker . droppedBufferSize = 0
195- posTracker . line = line
196- posTracker . col = 1
197- posTracker . offset = 0
198- posTracker . lineStartPos = - column + 1
194+ // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js>.
195+ posTracker . isEol = false
196+ posTracker . lineStartPos = - column + 1 // Looks weird, but ensures we get correct positional info.
199197 posTracker . droppedBufferSize = offset
198+ posTracker . offset = 0
199+ posTracker . col = 1
200+ posTracker . line = line
200201
201202 // Reset location tracker:
202- // See: <https://github.com/inikulin/parse5/blob/0491902 /packages/parse5/lib/extensions/location-info/tokenizer-mixin.js>.
203+ // See: <https://github.com/inikulin/parse5/blob/9c683e1 /packages/parse5/lib/extensions/location-info/tokenizer-mixin.js>.
203204 locationTracker . currentAttrLocation = null
204205 locationTracker . ctLoc = createParse5Location ( node )
205206
206207 // See the code for `parse` and `parseFragment`:
207- // See: <https://github.com/inikulin/parse5/blob/0491902 /packages/parse5/lib/parser/index.js#L371>.
208+ // See: <https://github.com/inikulin/parse5/blob/9c683e1 /packages/parse5/lib/parser/index.js#L371>.
208209 tokenizer . write ( node . value )
209210 parser . _runParsingLoop ( null )
210211
211212 // Process final characters if they’re still there after hibernating.
212213 // Similar to:
213- // See: <https://github.com/inikulin/parse5/blob/3bfa7d9 /packages/parse5/lib/extensions/location-info/tokenizer-mixin.js#L95>.
214+ // See: <https://github.com/inikulin/parse5/blob/9c683e1 /packages/parse5/lib/extensions/location-info/tokenizer-mixin.js#L95>.
214215 token = tokenizer . currentCharacterToken
215216
216217 if ( token ) {
@@ -219,11 +220,26 @@ function wrap(tree, file) {
219220 token . location . endOffset = posTracker . offset + 1
220221 parser . _processToken ( token )
221222 }
223+ }
222224
225+ function resetTokenizer ( ) {
223226 // Reset tokenizer:
224- // See: <https://github.com/inikulin/parse5/blob/8b0048e/packages/parse5/lib/tokenizer/index.js#L215>.
225- tokenizer . currentToken = null
227+ // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/index.js#L218-L234>.
228+ // Especially putting it back in the `data` state is useful: some elements,
229+ // like textareas and iframes, change the state.
230+ // See GH-7.
231+ // But also if broken HTML is in `raw`, and then a correct element is given.
232+ // See GH-11.
233+ tokenizer . tokenQueue = [ ]
234+ tokenizer . state = dataState
235+ tokenizer . returnState = ''
236+ tokenizer . charRefCode = - 1
237+ tokenizer . tempBuff = [ ]
238+ tokenizer . lastStartTagName = ''
239+ tokenizer . consumedAfterSnapshot = - 1
240+ tokenizer . active = false
226241 tokenizer . currentCharacterToken = null
242+ tokenizer . currentToken = null
227243 tokenizer . currentAttr = null
228244 }
229245}
0 commit comments