Skip to content

Commit d310e56

Browse files
Working with HTML tags that don't need to be closed.
1 parent 51cf41e commit d310e56

File tree

5 files changed

+165
-70
lines changed

5 files changed

+165
-70
lines changed

src/dom/xml-functions.ts

Lines changed: 54 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
DOM_COMMENT_NODE,
77
DOM_DOCUMENT_FRAGMENT_NODE,
88
DOM_DOCUMENT_NODE,
9+
DOM_DOCUMENT_TYPE_NODE,
910
DOM_ELEMENT_NODE,
1011
DOM_TEXT_NODE
1112
} from '../constants';
@@ -14,49 +15,57 @@ import { XNode } from './xnode';
1415
import { XDocument } from './xdocument';
1516
import { XmlOutputOptions } from './xml-output-options';
1617

17-
// Returns the text value of a node; for nodes without children this
18-
// is the nodeValue, for nodes with children this is the concatenation
19-
// of the value of all children. Browser-specific optimizations are used by
20-
// default; they can be disabled by passing "true" in as the second parameter.
21-
export function xmlValue(node: any, disallowBrowserSpecificOptimization: boolean = false) {
18+
19+
/**
20+
* Returns the text value of a node; for nodes without children this
21+
* is the nodeValue, for nodes with children this is the concatenation
22+
* of the value of all children. Browser-specific optimizations are used by
23+
* default; they can be disabled by passing "true" in as the second parameter.
24+
* @param node The Node (not exactly a `XNode` here).
25+
* @param disallowBrowserSpecificOptimization A boolean, to avoid browser optimization.
26+
* @returns The XML value as a string.
27+
*/
28+
export function xmlValue(node: any, disallowBrowserSpecificOptimization: boolean = false): string {
2229
if (!node) {
2330
return '';
2431
}
2532

2633
let ret = '';
27-
if (node.nodeType == DOM_TEXT_NODE || node.nodeType == DOM_CDATA_SECTION_NODE) {
28-
ret += node.nodeValue;
29-
} else if (node.nodeType == DOM_ATTRIBUTE_NODE) {
30-
ret += node.nodeValue;
31-
} else if (
32-
node.nodeType == DOM_ELEMENT_NODE ||
33-
node.nodeType == DOM_DOCUMENT_NODE ||
34-
node.nodeType == DOM_DOCUMENT_FRAGMENT_NODE
35-
) {
36-
if (!disallowBrowserSpecificOptimization) {
37-
// IE, Safari, Opera, and friends
38-
const innerText = node.innerText;
39-
if (innerText != undefined) {
40-
return innerText;
41-
}
42-
// Firefox
43-
const textContent = node.textContent;
44-
if (textContent != undefined) {
45-
return textContent;
34+
switch (node.nodeType) {
35+
case DOM_DOCUMENT_TYPE_NODE:
36+
return `<!DOCTYPE ${node.nodeValue}>`
37+
case DOM_TEXT_NODE:
38+
case DOM_CDATA_SECTION_NODE:
39+
case DOM_ATTRIBUTE_NODE:
40+
return node.nodeValue;
41+
case DOM_ELEMENT_NODE:
42+
case DOM_DOCUMENT_NODE:
43+
case DOM_DOCUMENT_FRAGMENT_NODE:
44+
if (!disallowBrowserSpecificOptimization) {
45+
// IE, Safari, Opera, and friends
46+
const innerText = node.innerText;
47+
if (innerText != undefined) {
48+
return innerText;
49+
}
50+
// Firefox
51+
const textContent = node.textContent;
52+
if (textContent != undefined) {
53+
return textContent;
54+
}
4655
}
47-
}
4856

49-
if (node.transformedChildNodes.length > 0) {
50-
for (let i = 0; i < node.transformedChildNodes.length; ++i) {
51-
ret += xmlValue(node.transformedChildNodes[i]);
52-
}
53-
} else {
54-
for (let i = 0; i < node.childNodes.length; ++i) {
55-
ret += xmlValue(node.childNodes[i]);
57+
if (node.transformedChildNodes.length > 0) {
58+
for (let i = 0; i < node.transformedChildNodes.length; ++i) {
59+
ret += xmlValue(node.transformedChildNodes[i]);
60+
}
61+
} else {
62+
for (let i = 0; i < node.childNodes.length; ++i) {
63+
ret += xmlValue(node.childNodes[i]);
64+
}
5665
}
57-
}
66+
67+
return ret;
5868
}
59-
return ret;
6069
}
6170

6271
// TODO: Give a better name to this.
@@ -98,14 +107,17 @@ export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolea
98107

99108
/**
100109
* Returns the representation of a node as XML text.
110+
* In general it is not used by XSLT, that uses `xmlTransformedText` instead.
101111
* @param {XNode} node The starting node.
102112
* @param {XmlOutputOptions} options XML output options.
103113
* @returns The XML string.
114+
* @see xmlTransformedText
104115
*/
105116
export function xmlText(node: XNode, options: XmlOutputOptions = {
106117
cData: false,
107118
escape: true,
108-
selfClosingTags: true
119+
selfClosingTags: true,
120+
outputMethod: 'xml'
109121
}) {
110122
const buffer: string[] = [];
111123
xmlTextRecursive(node, buffer, options);
@@ -133,7 +145,7 @@ function xmlTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptio
133145
}
134146

135147
if (node.childNodes.length === 0) {
136-
if (options.selfClosingTags) {
148+
if (options.selfClosingTags || (options.outputMethod === 'html' && ['hr', 'link'].includes(node.nodeName))) {
137149
buffer.push('/>');
138150
} else {
139151
buffer.push(`></${xmlFullNodeName(node)}>`);
@@ -163,7 +175,8 @@ export function xmlTransformedText(
163175
options: XmlOutputOptions = {
164176
cData: false,
165177
escape: true,
166-
selfClosingTags: true
178+
selfClosingTags: true,
179+
outputMethod: 'xml'
167180
}
168181
) {
169182
const buffer: string[] = [];
@@ -175,14 +188,14 @@ function xmlTransformedTextRecursive(node: XNode, buffer: any[], options: XmlOut
175188
if (node.visited) return;
176189
const nodeType = node.transformedNodeType || node.nodeType;
177190
const nodeValue = node.transformedNodeValue || node.nodeValue;
178-
if (nodeType == DOM_TEXT_NODE) {
191+
if (nodeType === DOM_TEXT_NODE) {
179192
if (node.transformedNodeValue && node.transformedNodeValue.trim() !== '') {
180193
const finalText = node.escape && options.escape?
181194
xmlEscapeText(node.transformedNodeValue) :
182195
node.transformedNodeValue;
183196
buffer.push(finalText);
184197
}
185-
} else if (nodeType == DOM_CDATA_SECTION_NODE) {
198+
} else if (nodeType === DOM_CDATA_SECTION_NODE) {
186199
if (options.cData) {
187200
buffer.push(nodeValue);
188201
} else {
@@ -199,7 +212,7 @@ function xmlTransformedTextRecursive(node: XNode, buffer: any[], options: XmlOut
199212
} else {
200213
xmlElementLogicMuted(node, buffer, options);
201214
}
202-
} else if (nodeType == DOM_DOCUMENT_NODE || nodeType == DOM_DOCUMENT_FRAGMENT_NODE) {
215+
} else if (nodeType === DOM_DOCUMENT_NODE || nodeType === DOM_DOCUMENT_FRAGMENT_NODE) {
203216
const childNodes = node.transformedChildNodes.concat(node.childNodes);
204217
childNodes.sort((a, b) => a.siblingPosition - b.siblingPosition);
205218

@@ -235,7 +248,7 @@ function xmlElementLogicTrivial(node: XNode, buffer: string[], options: XmlOutpu
235248
let childNodes = node.transformedChildNodes.length > 0 ? node.transformedChildNodes : node.childNodes;
236249
childNodes = childNodes.sort((a, b) => a.siblingPosition - b.siblingPosition);
237250
if (childNodes.length === 0) {
238-
if (options.selfClosingTags) {
251+
if (options.selfClosingTags || (options.outputMethod === 'html' && ['hr', 'link'].includes(node.nodeName))) {
239252
buffer.push('/>');
240253
} else {
241254
buffer.push(`></${xmlFullNodeName(node)}>`);

src/dom/xml-output-options.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ export type XmlOutputOptions = {
22
cData: boolean;
33
escape: boolean;
44
selfClosingTags: boolean;
5+
outputMethod: 'xml' | 'html'
56
}

src/dom/xml-parser.ts

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ export class XmlParser {
3636
XML11_TAGNAME_REGEXP = new RegExp(`^(${XML11_NAME})`);
3737
XML11_ATTRIBUTE_REGEXP = new RegExp(XML11_ATTRIBUTE, 'g');
3838

39-
lenientHtmlTags = ['link'];
39+
lenientHtmlTags = ['hr', 'link'];
4040

4141
/**
4242
* The entry point for this parser.
@@ -46,7 +46,7 @@ export class XmlParser {
4646
* @returns A DOM document.
4747
*/
4848
xmlParse(xmlOrHtml: string): XDocument {
49-
if (xmlOrHtml.toUpperCase().includes('<!DOCTYPE HTML')) {
49+
if (xmlOrHtml.toUpperCase().startsWith('<!DOCTYPE HTML')) {
5050
return this.htmlParse(xmlOrHtml);
5151
}
5252

@@ -61,7 +61,7 @@ export class XmlParser {
6161
*/
6262
private namespaceMapAt(node: XNode): { [prefix: string]: string } {
6363
const map = {
64-
// reserved namespaces https://www.w3.org/TR/REC-xml-names/#xmlReserved
64+
// reserved namespaces: https://www.w3.org/TR/REC-xml-names/#xmlReserved
6565
xmlns: 'http://www.w3.org/2000/xmlns/',
6666
xml: 'http://www.w3.org/XML/1998/namespace'
6767
};
@@ -138,23 +138,6 @@ export class XmlParser {
138138
parent = node;
139139
stack.push(node);
140140
}
141-
142-
const namespaceMap = this.namespaceMapAt(node);
143-
if (node.prefix !== null) {
144-
if (node.prefix in namespaceMap) node.namespaceUri = namespaceMap[node.prefix];
145-
// else, prefix is undefined. do anything?
146-
} else {
147-
if ('' in namespaceMap) node.namespaceUri = namespaceMap[''];
148-
}
149-
for (let i = 0; i < node.attributes.length; ++i) {
150-
if (node.attributes[i].prefix !== null) {
151-
if (node.attributes[i].prefix in namespaceMap) {
152-
node.attributes[i].namespaceUri = namespaceMap[node.attributes[i].prefix];
153-
}
154-
// else, prefix undefined.
155-
}
156-
// elements with no prefix always have no namespace, so do nothing here.
157-
}
158141
}
159142

160143
start = i + 1;
@@ -223,8 +206,8 @@ export class XmlParser {
223206
regexAttribute = this.XML10_ATTRIBUTE_REGEXP;
224207
}
225208

226-
const xmlDoc = new XDocument();
227-
const root = xmlDoc;
209+
const xmlDocument = new XDocument();
210+
const root = xmlDocument;
228211
const stack = [];
229212

230213
let parent: XNode = root;
@@ -253,7 +236,7 @@ export class XmlParser {
253236
} else {
254237
const empty = text.match(this.regexEmpty);
255238
const tagname = regexTagname.exec(text)[1];
256-
let node = domCreateElement(xmlDoc, tagname);
239+
let node = domCreateElement(xmlDocument, tagname);
257240

258241
let attribute;
259242
while ((attribute = regexAttribute.exec(text))) {
@@ -292,22 +275,32 @@ export class XmlParser {
292275
} else if (!tag && char === '<') {
293276
let text = xml.slice(start, i);
294277
if (text && parent !== root) {
295-
domAppendChild(parent, domCreateTextNode(xmlDoc, text));
278+
domAppendChild(parent, domCreateTextNode(xmlDocument, text));
296279
}
297280
if (xml.slice(i + 1, i + 4) === '!--') {
298281
let endTagIndex = xml.slice(i + 4).indexOf('-->');
299282
if (endTagIndex) {
300-
let node = domCreateComment(xmlDoc, xml.slice(i + 4, i + endTagIndex + 4));
283+
let node = domCreateComment(xmlDocument, xml.slice(i + 4, i + endTagIndex + 4));
301284
domAppendChild(parent, node);
302285
i += endTagIndex + 6;
303286
}
304287
} else if (xml.slice(i + 1, i + 9) === '![CDATA[') {
305288
let endTagIndex = xml.slice(i + 9).indexOf(']]>');
306289
if (endTagIndex) {
307-
let node = domCreateCDATASection(xmlDoc, xml.slice(i + 9, i + endTagIndex + 9));
290+
let node = domCreateCDATASection(xmlDocument, xml.slice(i + 9, i + endTagIndex + 9));
308291
domAppendChild(parent, node);
309292
i += endTagIndex + 11;
310293
}
294+
} else if (xml.slice(i + 1, i + 9) === '!DOCTYPE') { // "!DOCTYPE" can be used in a XSLT template.
295+
let endTagIndex = xml.slice(i + 9).indexOf('>');
296+
if (endTagIndex) {
297+
const dtdValue = xml.slice(i + 9, i + endTagIndex + 9).trimStart();
298+
// TODO: Not sure if this is a good solution.
299+
// Trying to implement this: https://github.com/DesignLiquido/xslt-processor/issues/30
300+
const node = domCreateDTDSection(xmlDocument, dtdValue);
301+
domAppendChild(parent, node);
302+
i += endTagIndex + dtdValue.length + 5;
303+
}
311304
} else {
312305
tag = true;
313306
}

src/xslt/xslt.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ export class Xslt {
7474
decimalFormatSettings: XsltDecimalFormatSettings;
7575

7676
outputDocument: XDocument;
77-
outputMethod: string;
77+
outputMethod: 'xml' | 'html';
7878
outputOmitXmlDeclaration: string;
7979
version: string;
8080

@@ -129,8 +129,10 @@ export class Xslt {
129129
const transformedOutputXml = xmlTransformedText(outputDocument, {
130130
cData: false,
131131
escape: this.options.escape,
132-
selfClosingTags: this.options.selfClosingTags
132+
selfClosingTags: this.options.selfClosingTags,
133+
outputMethod: this.outputMethod
133134
});
135+
134136
return transformedOutputXml;
135137
}
136138

0 commit comments

Comments
 (0)