Skip to content

Commit a9fdabe

Browse files
- Resolving #111; (#112)
- Adding `XBrowserNode` to deal with browser-generated nodes with extra properties; - Additional comments and notes at TODO and README files.
1 parent aa9a2a3 commit a9fdabe

File tree

9 files changed

+128
-39
lines changed

9 files changed

+128
-39
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ const xslt = new Xslt(options);
8383
```
8484

8585
- `cData` (`boolean`, default `true`): resolves CDATA elements in the output. Content under CDATA is resolved as text. This overrides `escape` for CDATA content.
86-
- `escape` (`boolean`, default `true`): replaces symbols like `<`, `>`, `&` and `"` by the corresponding [XML entities](https://www.tutorialspoint.com/xml/xml_character_entities.htm).
86+
- `escape` (`boolean`, default `true`): replaces symbols like `<`, `>`, `&` and `"` by the corresponding [HTML/XML entities](https://www.tutorialspoint.com/xml/xml_character_entities.htm). Can be overridden by `disable-output-escaping`, that also does the opposite, unescaping `&gt;` and `&lt;` by `<` and `>`, respectively.
8787
- `selfClosingTags` (`boolean`, default `true`): Self-closes tags that don't have inner elements, if `true`. For instance, `<test></test>` becomes `<test />`.
8888
- `outputMethod` (`string`, default `xml`): Specifies the default output method. if `<xsl:output>` is declared in your XSLT file, this will be overridden.
8989
- `parameters` (`array`, default `[]`): external parameters that you want to use.

TODO.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
XSLT-processor TODO
22
=====
33

4-
* Rethink match algorithm, as described in https://github.com/DesignLiquido/xslt-processor/pull/62#issuecomment-1636684453;
4+
* Rethink match algorithm, as described in https://github.com/DesignLiquido/xslt-processor/pull/62#issuecomment-1636684453. There's a good number of issues open about this problem:
5+
* https://github.com/DesignLiquido/xslt-processor/issues/108
6+
* https://github.com/DesignLiquido/xslt-processor/issues/109
7+
* https://github.com/DesignLiquido/xslt-processor/issues/110
58
* XSLT validation, besides the version number;
69
* XSL:number
710
* `attribute-set`, `decimal-format`, etc. (check `src/xslt.ts`)
8-
* `/html/body//ul/li|html/body//ol/li` has `/html/body//ul/li` evaluated by this XPath implementation as "absolute", and `/html/body//ol/li` as "relative". Both should be evaluated as "absolute".
9-
* Implement `<xsl:import>` with correct template precedence.
11+
* `/html/body//ul/li|html/body//ol/li` has `/html/body//ul/li` evaluated by this XPath implementation as "absolute", and `/html/body//ol/li` as "relative". Both should be evaluated as "absolute". One idea is to rewrite the XPath logic entirely, since it is nearly impossible to debug it.
12+
* Implement `<xsl:import>` with correct template precedence.
1013

1114
Help is much appreciated. It seems to currently work for most of our purposes, but fixes and additions are always welcome!

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"@rollup/plugin-typescript": "^11.1.1",
4949
"@types/he": "^1.2.0",
5050
"@types/jest": "^29.5.12",
51+
"@types/node-fetch": "^2.6.11",
5152
"@typescript-eslint/eslint-plugin": "^8.4.0",
5253
"@typescript-eslint/parser": "^8.4.0",
5354
"babel-jest": "^29.7.0",

src/dom/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ export * from './xdocument';
33
export * from './xml-functions';
44
export * from './xml-output-options';
55
export * from './xml-parser';
6+
export * from './xbrowser-node';
67
export * from './xnode';

src/dom/xbrowser-node.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import { XNode } from "./xnode";
2+
3+
/**
4+
* Special XNode class, that retains properties from browsers like
5+
* IE, Opera, Safari, etc.
6+
*/
7+
export class XBrowserNode extends XNode {
8+
innerText?: string;
9+
textContent?: string;
10+
}

src/dom/xml-functions.ts

Lines changed: 72 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import { domGetAttributeValue } from './functions';
1414
import { XNode } from './xnode';
1515
import { XDocument } from './xdocument';
1616
import { XmlOutputOptions } from './xml-output-options';
17-
17+
import { XBrowserNode } from './xbrowser-node';
1818

1919
/**
2020
* Returns the text value of a node; for nodes without children this
@@ -25,15 +25,15 @@ import { XmlOutputOptions } from './xml-output-options';
2525
* @param disallowBrowserSpecificOptimization A boolean, to avoid browser optimization.
2626
* @returns The XML value as a string.
2727
*/
28-
export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization: boolean = false): string {
28+
export function xmlValue(node: XNode, disallowBrowserSpecificOptimization: boolean = false): string {
2929
if (!node) {
3030
return '';
3131
}
3232

3333
let ret = '';
3434
switch (node.nodeType) {
3535
case DOM_DOCUMENT_TYPE_NODE:
36-
return `<!DOCTYPE ${node.nodeValue}>`
36+
return `<!DOCTYPE ${node.nodeValue}>`;
3737
case DOM_TEXT_NODE:
3838
case DOM_CDATA_SECTION_NODE:
3939
case DOM_ATTRIBUTE_NODE:
@@ -44,19 +44,22 @@ export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization:
4444
if (!disallowBrowserSpecificOptimization) {
4545
// Only returns something if node has either `innerText` or `textContent` (not an XNode).
4646
// IE, Safari, Opera, and friends (`innerText`)
47-
const innerText = node.innerText;
48-
if (innerText != undefined) {
47+
const browserNode = node as XBrowserNode;
48+
const innerText = browserNode.innerText;
49+
if (innerText !== undefined) {
4950
return innerText;
5051
}
5152
// Firefox (`textContent`)
52-
const textContent = node.textContent;
53-
if (textContent != undefined) {
53+
const textContent = browserNode.textContent;
54+
if (textContent !== undefined) {
5455
return textContent;
5556
}
5657
}
5758

5859
if (node.transformedChildNodes.length > 0) {
59-
const transformedTextNodes = node.transformedChildNodes.filter((n: XNode) => n.nodeType !== DOM_ATTRIBUTE_NODE);
60+
const transformedTextNodes = node.transformedChildNodes.filter(
61+
(n: XNode) => n.nodeType !== DOM_ATTRIBUTE_NODE
62+
);
6063
for (let i = 0; i < transformedTextNodes.length; ++i) {
6164
ret += xmlValue(transformedTextNodes[i]);
6265
}
@@ -71,8 +74,15 @@ export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization:
7174
}
7275
}
7376

74-
// TODO: Give a better name to this.
75-
export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolean = false) {
77+
/**
78+
* The older version to obtain a XML value from a node.
79+
* For now, this form is only used to get text from attribute nodes,
80+
* and it should be removed in future versions.
81+
* @param node The attribute node.
82+
* @param disallowBrowserSpecificOptimization A boolean, to avoid browser optimization.
83+
* @returns The XML value as a string.
84+
*/
85+
export function xmlValueLegacyBehavior(node: XNode, disallowBrowserSpecificOptimization: boolean = false) {
7686
if (!node) {
7787
return '';
7888
}
@@ -91,13 +101,14 @@ export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolea
91101
case DOM_ELEMENT_NODE:
92102
if (!disallowBrowserSpecificOptimization) {
93103
// IE, Safari, Opera, and friends
94-
const innerText = node.innerText;
95-
if (innerText != undefined) {
104+
const browserNode = node as XBrowserNode;
105+
const innerText = browserNode.innerText;
106+
if (innerText !== undefined) {
96107
return innerText;
97108
}
98109
// Firefox
99-
const textContent = node.textContent;
100-
if (textContent != undefined) {
110+
const textContent = browserNode.textContent;
111+
if (textContent !== undefined) {
101112
return textContent;
102113
}
103114
}
@@ -121,17 +132,28 @@ export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolea
121132
* @returns The XML string.
122133
* @see xmlTransformedText
123134
*/
124-
export function xmlText(node: XNode, options: XmlOutputOptions = {
125-
cData: true,
126-
escape: true,
127-
selfClosingTags: true,
128-
outputMethod: 'xml'
129-
}) {
135+
export function xmlText(
136+
node: XNode,
137+
options: XmlOutputOptions = {
138+
cData: true,
139+
escape: true,
140+
selfClosingTags: true,
141+
outputMethod: 'xml'
142+
}
143+
) {
130144
const buffer: string[] = [];
131145
xmlTextRecursive(node, buffer, options);
132146
return buffer.join('');
133147
}
134148

149+
/**
150+
* The recursive logic to transform a node in XML text.
151+
* It can be considered legacy, since it does not work with transformed nodes, and
152+
* probably will be removed in the future.
153+
* @param {XNode} node The node.
154+
* @param {string[]} buffer The buffer, that will represent the transformed XML text.
155+
* @param {XmlOutputOptions} options XML output options.
156+
*/
135157
function xmlTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptions) {
136158
if (node.nodeType == DOM_TEXT_NODE) {
137159
buffer.push(xmlEscapeText(node.nodeValue));
@@ -158,7 +180,10 @@ function xmlTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptio
158180
}
159181

160182
if (node.childNodes.length === 0) {
161-
if (options.selfClosingTags || (options.outputMethod === 'html' && ['hr', 'link'].includes(node.nodeName))) {
183+
if (
184+
options.selfClosingTags ||
185+
(options.outputMethod === 'html' && ['hr', 'link'].includes(node.nodeName))
186+
) {
162187
buffer.push('/>');
163188
} else {
164189
buffer.push(`></${xmlFullNodeName(node)}>`);
@@ -197,15 +222,20 @@ export function xmlTransformedText(
197222
return buffer.join('');
198223
}
199224

200-
function xmlTransformedTextRecursive(node: XNode, buffer: any[], options: XmlOutputOptions) {
225+
/**
226+
* The recursive logic to transform a node in XML text.
227+
* @param {XNode} node The node.
228+
* @param {string[]} buffer The buffer, that will represent the transformed XML text.
229+
* @param {XmlOutputOptions} options XML output options.
230+
*/
231+
function xmlTransformedTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptions) {
201232
if (node.visited) return;
202233
const nodeType = node.transformedNodeType || node.nodeType;
203234
const nodeValue = node.transformedNodeValue || node.nodeValue;
204235
if (nodeType === DOM_TEXT_NODE) {
205236
if (node.transformedNodeValue && node.transformedNodeValue.trim() !== '') {
206-
const finalText = node.escape && options.escape?
207-
xmlEscapeText(node.transformedNodeValue) :
208-
node.transformedNodeValue;
237+
const finalText =
238+
node.escape && options.escape ? xmlEscapeText(node.transformedNodeValue): xmlUnescapeText(node.transformedNodeValue);
209239
buffer.push(finalText);
210240
}
211241
} else if (nodeType === DOM_CDATA_SECTION_NODE) {
@@ -246,9 +276,9 @@ function xmlTransformedTextRecursive(node: XNode, buffer: any[], options: XmlOut
246276
function xmlElementLogicTrivial(node: XNode, buffer: string[], options: XmlOutputOptions) {
247277
buffer.push(`<${xmlFullNodeName(node)}`);
248278

249-
let attributes = node.transformedChildNodes.filter(n => n.nodeType === DOM_ATTRIBUTE_NODE);
279+
let attributes = node.transformedChildNodes.filter((n) => n.nodeType === DOM_ATTRIBUTE_NODE);
250280
if (attributes.length === 0) {
251-
attributes = node.childNodes.filter(n => n.nodeType === DOM_ATTRIBUTE_NODE);
281+
attributes = node.childNodes.filter((n) => n.nodeType === DOM_ATTRIBUTE_NODE);
252282
}
253283

254284
for (let i = 0; i < attributes.length; ++i) {
@@ -262,9 +292,9 @@ function xmlElementLogicTrivial(node: XNode, buffer: string[], options: XmlOutpu
262292
}
263293
}
264294

265-
let childNodes = node.transformedChildNodes.filter(n => n.nodeType !== DOM_ATTRIBUTE_NODE);
295+
let childNodes = node.transformedChildNodes.filter((n) => n.nodeType !== DOM_ATTRIBUTE_NODE);
266296
if (childNodes.length === 0) {
267-
childNodes = node.childNodes.filter(n => n.nodeType !== DOM_ATTRIBUTE_NODE);
297+
childNodes = node.childNodes.filter((n) => n.nodeType !== DOM_ATTRIBUTE_NODE);
268298
}
269299

270300
childNodes = childNodes.sort((a, b) => a.siblingPosition - b.siblingPosition);
@@ -317,7 +347,17 @@ function xmlFullNodeName(node: XNode): string {
317347
}
318348

319349
/**
320-
* Escape XML special markup chracters: tag delimiter < > and entity
350+
* Replaces HTML/XML entities to their literal characters.
351+
* Currently implementing only tag delimiters.
352+
* @param text The text to be transformed.
353+
* @returns The unescaped text.
354+
*/
355+
export function xmlUnescapeText(text: string): string {
356+
return `${text}`.replace(/&lt;/g, '<').replace(/&gt;/g, '>');
357+
}
358+
359+
/**
360+
* Escape XML special markup characters: tag delimiter <, >, and entity
321361
* reference start delimiter &. The escaped string can be used in XML
322362
* text portions (i.e. between tags).
323363
* @param s The string to be escaped.
@@ -332,8 +372,8 @@ export function xmlEscapeText(s: string): string {
332372
}
333373

334374
/**
335-
* Escape XML special markup characters: tag delimiter < > entity
336-
* reference start delimiter & and quotes ". The escaped string can be
375+
* Escape XML special markup characters: tag delimiter, <, >, entity
376+
* reference start delimiter &, and double quotes ("). The escaped string can be
337377
* used in double quoted XML attribute value portions (i.e. in
338378
* attributes within start tags).
339379
* @param s The string to be escaped.

src/xslt/xslt.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import {
2424
xmlGetAttribute,
2525
xmlTransformedText,
2626
xmlValue,
27-
xmlValue2
27+
xmlValueLegacyBehavior
2828
} from '../dom';
2929
import { ExprContext, XPath } from '../xpath';
3030

@@ -366,7 +366,7 @@ export class Xslt {
366366

367367
const documentFragment = domCreateDocumentFragment(this.outputDocument);
368368
await this.xsltChildNodes(context, template, documentFragment);
369-
const value = xmlValue2(documentFragment);
369+
const value = xmlValueLegacyBehavior(documentFragment);
370370

371371
if (output && output.nodeType === DOM_DOCUMENT_FRAGMENT_NODE) {
372372
domSetTransformedAttribute(output, name, value);

tests/xslt/xslt.test.tsx

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,16 @@ describe('xslt', () => {
199199
});
200200

201201
describe('xsl:text', () => {
202-
it('disable-output-escaping', async () => {
202+
// Apparently, this is not how `disable-output-escaping` works.
203+
// By an initial research, `<!DOCTYPE html>` explicitly mentioned in
204+
// the XSLT gives an error like:
205+
// `Unable to generate the XML document using the provided XML/XSL input.
206+
// org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 70;
207+
// A DOCTYPE is not allowed in content.`
208+
// All the examples of `disable-output-escaping` usage will point out
209+
// the opposite: `&lt;!DOCTYPE html&gt;` will become `<!DOCTYPE html>`.
210+
// This test will be kept here for historical purposes.
211+
it.skip('disable-output-escaping', async () => {
203212
const xml = `<anything></anything>`;
204213
const xslt = `<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
205214
<xsl:output method="html" indent="yes" />
@@ -216,6 +225,23 @@ describe('xslt', () => {
216225
assert.equal(html, '<!DOCTYPE html>');
217226
});
218227

228+
it('disable-output-escaping, XML/HTML entities', async () => {
229+
const xml = `<anything></anything>`;
230+
const xslt = `<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
231+
<xsl:output method="html" indent="yes" />
232+
<xsl:template match="/">
233+
<xsl:text disable-output-escaping='yes'>&lt;!DOCTYPE html&gt;</xsl:text>
234+
</xsl:template>
235+
</xsl:stylesheet>`;
236+
237+
const xsltClass = new Xslt();
238+
const xmlParser = new XmlParser();
239+
const parsedXml = xmlParser.xmlParse(xml);
240+
const parsedXslt = xmlParser.xmlParse(xslt);
241+
const html = await xsltClass.xsltProcess(parsedXml, parsedXslt);
242+
assert.equal(html, '<!DOCTYPE html>');
243+
});
244+
219245
it('CDATA as JavaScript', async () => {
220246
const xml = `<XampleXml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
221247
xmlns:xsd="http://www.w3.org/2001/XMLSchema">

yarn.lock

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,6 +1886,14 @@
18861886
resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841"
18871887
integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==
18881888

1889+
"@types/node-fetch@^2.6.11":
1890+
version "2.6.11"
1891+
resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.11.tgz#9b39b78665dae0e82a08f02f4967d62c66f95d24"
1892+
integrity sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==
1893+
dependencies:
1894+
"@types/node" "*"
1895+
form-data "^4.0.0"
1896+
18891897
"@types/node@*":
18901898
version "22.5.4"
18911899
resolved "https://registry.yarnpkg.com/@types/node/-/node-22.5.4.tgz#83f7d1f65bc2ed223bdbf57c7884f1d5a4fa84e8"

0 commit comments

Comments
 (0)