|
1 | | -import Parser from 'parse5/lib/parser/index.js' |
2 | | -import {pointStart, pointEnd} from 'unist-util-position' |
3 | | -import {visit} from 'unist-util-visit' |
4 | | -import {fromParse5} from 'hast-util-from-parse5' |
5 | | -import {toParse5} from 'hast-util-to-parse5' |
6 | | -import {htmlVoidElements} from 'html-void-elements' |
7 | | -import {webNamespaces} from 'web-namespaces' |
8 | | -import {zwitch} from 'zwitch' |
| 1 | +/** |
| 2 | + * @typedef {import('./lib/index.js').Options} Options |
| 3 | + */ |
9 | 4 |
|
10 | | -var inTemplateMode = 'IN_TEMPLATE_MODE' |
11 | | -var dataState = 'DATA_STATE' |
12 | | -var characterToken = 'CHARACTER_TOKEN' |
13 | | -var startTagToken = 'START_TAG_TOKEN' |
14 | | -var endTagToken = 'END_TAG_TOKEN' |
15 | | -var commentToken = 'COMMENT_TOKEN' |
16 | | -var doctypeToken = 'DOCTYPE_TOKEN' |
17 | | - |
18 | | -var parseOptions = {sourceCodeLocationInfo: true, scriptingEnabled: false} |
19 | | - |
20 | | -export function raw(tree, file, options) { |
21 | | - var parser = new Parser(parseOptions) |
22 | | - var one = zwitch('type', { |
23 | | - handlers: { |
24 | | - root, |
25 | | - element, |
26 | | - text, |
27 | | - comment, |
28 | | - doctype, |
29 | | - raw: handleRaw |
30 | | - }, |
31 | | - unknown |
32 | | - }) |
33 | | - var stitches |
34 | | - var tokenizer |
35 | | - var preprocessor |
36 | | - var posTracker |
37 | | - var locationTracker |
38 | | - var result |
39 | | - var index |
40 | | - |
41 | | - if (file && !('contents' in file)) { |
42 | | - options = file |
43 | | - file = undefined |
44 | | - } |
45 | | - |
46 | | - if (options && options.passThrough) { |
47 | | - index = -1 |
48 | | - |
49 | | - while (++index < options.passThrough.length) { |
50 | | - one.handlers[options.passThrough[index]] = stitch |
51 | | - } |
52 | | - } |
53 | | - |
54 | | - result = fromParse5(documentMode(tree) ? document() : fragment(), file) |
55 | | - |
56 | | - if (stitches) { |
57 | | - visit(result, 'comment', mend) |
58 | | - } |
59 | | - |
60 | | - // Unpack if possible and when not given a `root`. |
61 | | - if (tree.type !== 'root' && result.children.length === 1) { |
62 | | - return result.children[0] |
63 | | - } |
64 | | - |
65 | | - return result |
66 | | - |
67 | | - function mend(node, index, parent) { |
68 | | - if (node.value.stitch) { |
69 | | - parent.children[index] = node.value.stitch |
70 | | - return index |
71 | | - } |
72 | | - } |
73 | | - |
74 | | - function fragment() { |
75 | | - var context = { |
76 | | - nodeName: 'template', |
77 | | - tagName: 'template', |
78 | | - attrs: [], |
79 | | - namespaceURI: webNamespaces.html, |
80 | | - childNodes: [] |
81 | | - } |
82 | | - var mock = { |
83 | | - nodeName: 'documentmock', |
84 | | - tagName: 'documentmock', |
85 | | - attrs: [], |
86 | | - namespaceURI: webNamespaces.html, |
87 | | - childNodes: [] |
88 | | - } |
89 | | - var doc = {nodeName: '#document-fragment', childNodes: []} |
90 | | - |
91 | | - parser._bootstrap(mock, context) |
92 | | - parser._pushTmplInsertionMode(inTemplateMode) |
93 | | - parser._initTokenizerForFragmentParsing() |
94 | | - parser._insertFakeRootElement() |
95 | | - parser._resetInsertionMode() |
96 | | - parser._findFormInFragmentContext() |
97 | | - |
98 | | - tokenizer = parser.tokenizer |
99 | | - preprocessor = tokenizer.preprocessor |
100 | | - locationTracker = tokenizer.__mixins[0] |
101 | | - posTracker = locationTracker.posTracker |
102 | | - |
103 | | - one(tree) |
104 | | - |
105 | | - parser._adoptNodes(mock.childNodes[0], doc) |
106 | | - |
107 | | - return doc |
108 | | - } |
109 | | - |
110 | | - function document() { |
111 | | - var doc = parser.treeAdapter.createDocument() |
112 | | - |
113 | | - parser._bootstrap(doc, null) |
114 | | - tokenizer = parser.tokenizer |
115 | | - preprocessor = tokenizer.preprocessor |
116 | | - locationTracker = tokenizer.__mixins[0] |
117 | | - posTracker = locationTracker.posTracker |
118 | | - |
119 | | - one(tree) |
120 | | - |
121 | | - return doc |
122 | | - } |
123 | | - |
124 | | - function all(nodes) { |
125 | | - var index = -1 |
126 | | - |
127 | | - /* istanbul ignore else - invalid nodes, see rehypejs/rehype-raw#7. */ |
128 | | - if (nodes) { |
129 | | - while (++index < nodes.length) { |
130 | | - one(nodes[index]) |
131 | | - } |
132 | | - } |
133 | | - } |
134 | | - |
135 | | - function root(node) { |
136 | | - all(node.children) |
137 | | - } |
138 | | - |
139 | | - function element(node) { |
140 | | - resetTokenizer() |
141 | | - parser._processToken(startTag(node), webNamespaces.html) |
142 | | - |
143 | | - all(node.children) |
144 | | - |
145 | | - if (!htmlVoidElements.includes(node.tagName)) { |
146 | | - resetTokenizer() |
147 | | - parser._processToken(endTag(node)) |
148 | | - } |
149 | | - } |
150 | | - |
151 | | - function text(node) { |
152 | | - resetTokenizer() |
153 | | - parser._processToken({ |
154 | | - type: characterToken, |
155 | | - chars: node.value, |
156 | | - location: createParse5Location(node) |
157 | | - }) |
158 | | - } |
159 | | - |
160 | | - function doctype(node) { |
161 | | - var p5 = toParse5(node) |
162 | | - resetTokenizer() |
163 | | - parser._processToken({ |
164 | | - type: doctypeToken, |
165 | | - name: p5.name, |
166 | | - forceQuirks: false, |
167 | | - publicId: p5.publicId, |
168 | | - systemId: p5.systemId, |
169 | | - location: createParse5Location(node) |
170 | | - }) |
171 | | - } |
172 | | - |
173 | | - function comment(node) { |
174 | | - resetTokenizer() |
175 | | - parser._processToken({ |
176 | | - type: commentToken, |
177 | | - data: node.value, |
178 | | - location: createParse5Location(node) |
179 | | - }) |
180 | | - } |
181 | | - |
182 | | - function handleRaw(node) { |
183 | | - var start = pointStart(node) |
184 | | - var line = start.line || 1 |
185 | | - var column = start.column || 1 |
186 | | - var offset = start.offset || 0 |
187 | | - var token |
188 | | - |
189 | | - // Reset preprocessor: |
190 | | - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/preprocessor.js>. |
191 | | - preprocessor.html = null |
192 | | - preprocessor.pos = -1 |
193 | | - preprocessor.lastGapPos = -1 |
194 | | - preprocessor.lastCharPos = -1 |
195 | | - preprocessor.gapStack = [] |
196 | | - preprocessor.skipNextNewLine = false |
197 | | - preprocessor.lastChunkWritten = false |
198 | | - preprocessor.endOfChunkHit = false |
199 | | - |
200 | | - // Reset preprocessor mixin: |
201 | | - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js>. |
202 | | - posTracker.isEol = false |
203 | | - posTracker.lineStartPos = -column + 1 // Looks weird, but ensures we get correct positional info. |
204 | | - posTracker.droppedBufferSize = offset |
205 | | - posTracker.offset = 0 |
206 | | - posTracker.col = 1 |
207 | | - posTracker.line = line |
208 | | - |
209 | | - // Reset location tracker: |
210 | | - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js>. |
211 | | - locationTracker.currentAttrLocation = null |
212 | | - locationTracker.ctLoc = createParse5Location(node) |
213 | | - |
214 | | - // See the code for `parse` and `parseFragment`: |
215 | | - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/parser/index.js#L371>. |
216 | | - tokenizer.write(node.value) |
217 | | - parser._runParsingLoop(null) |
218 | | - |
219 | | - // Process final characters if they’re still there after hibernating. |
220 | | - // Similar to: |
221 | | - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js#L95>. |
222 | | - token = tokenizer.currentCharacterToken |
223 | | - |
224 | | - if (token) { |
225 | | - token.location.endLine = posTracker.line |
226 | | - token.location.endCol = posTracker.col + 1 |
227 | | - token.location.endOffset = posTracker.offset + 1 |
228 | | - parser._processToken(token) |
229 | | - } |
230 | | - } |
231 | | - |
232 | | - function stitch(node) { |
233 | | - var clone = Object.assign({}, node) |
234 | | - |
235 | | - stitches = true |
236 | | - |
237 | | - // Recurse, because to somewhat handle `[<x>]</x>` (where `[]` denotes the |
238 | | - // passed through node). |
239 | | - if (node.children) { |
240 | | - clone.children = raw( |
241 | | - {type: 'root', children: node.children}, |
242 | | - file, |
243 | | - options |
244 | | - ).children |
245 | | - } |
246 | | - |
247 | | - // Hack: `value` is supposed to be a string, but as none of the tools |
248 | | - // (`parse5` or `hast-util-from-parse5`) looks at it, we can pass nodes |
249 | | - // through. |
250 | | - comment({value: {stitch: clone}}) |
251 | | - } |
252 | | - |
253 | | - function resetTokenizer() { |
254 | | - // Reset tokenizer: |
255 | | - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/index.js#L218-L234>. |
256 | | - // Especially putting it back in the `data` state is useful: some elements, |
257 | | - // like textareas and iframes, change the state. |
258 | | - // See GH-7. |
259 | | - // But also if broken HTML is in `raw`, and then a correct element is given. |
260 | | - // See GH-11. |
261 | | - tokenizer.tokenQueue = [] |
262 | | - tokenizer.state = dataState |
263 | | - tokenizer.returnState = '' |
264 | | - tokenizer.charRefCode = -1 |
265 | | - tokenizer.tempBuff = [] |
266 | | - tokenizer.lastStartTagName = '' |
267 | | - tokenizer.consumedAfterSnapshot = -1 |
268 | | - tokenizer.active = false |
269 | | - tokenizer.currentCharacterToken = null |
270 | | - tokenizer.currentToken = null |
271 | | - tokenizer.currentAttr = null |
272 | | - } |
273 | | -} |
274 | | - |
275 | | -function startTag(node) { |
276 | | - var location = createParse5Location(node) |
277 | | - |
278 | | - location.startTag = Object.assign({}, location) |
279 | | - |
280 | | - return { |
281 | | - type: startTagToken, |
282 | | - tagName: node.tagName, |
283 | | - selfClosing: false, |
284 | | - attrs: attributes(node), |
285 | | - location |
286 | | - } |
287 | | -} |
288 | | - |
289 | | -function attributes(node) { |
290 | | - return toParse5({ |
291 | | - tagName: node.tagName, |
292 | | - type: 'element', |
293 | | - properties: node.properties |
294 | | - }).attrs |
295 | | -} |
296 | | - |
297 | | -function endTag(node) { |
298 | | - var location = createParse5Location(node) |
299 | | - |
300 | | - location.endTag = Object.assign({}, location) |
301 | | - |
302 | | - return { |
303 | | - type: endTagToken, |
304 | | - tagName: node.tagName, |
305 | | - attrs: [], |
306 | | - location |
307 | | - } |
308 | | -} |
309 | | - |
310 | | -function unknown(node) { |
311 | | - throw new Error('Cannot compile `' + node.type + '` node') |
312 | | -} |
313 | | - |
314 | | -function documentMode(node) { |
315 | | - var head = node.type === 'root' ? node.children[0] : node |
316 | | - |
317 | | - return head && (head.type === 'doctype' || head.tagName === 'html') |
318 | | -} |
319 | | - |
320 | | -function createParse5Location(node) { |
321 | | - var start = pointStart(node) |
322 | | - var end = pointEnd(node) |
323 | | - |
324 | | - return { |
325 | | - startLine: start.line, |
326 | | - startCol: start.column, |
327 | | - startOffset: start.offset, |
328 | | - endLine: end.line, |
329 | | - endCol: end.column, |
330 | | - endOffset: end.offset |
331 | | - } |
332 | | -} |
| 5 | +export {raw} from './lib/index.js' |
0 commit comments