From c182e299058fa6962e7f42cca6258b06c2bc3466 Mon Sep 17 00:00:00 2001 From: Matt Carter Date: Thu, 18 Dec 2025 18:41:13 +0000 Subject: [PATCH 1/8] - Add tokenizer + initial work on parser --- src/matchers/context/Stylesheet.ts | 22 +++- src/matchers/css/index.ts | 1 + src/matchers/css/parser.ts | 159 +++++++++++++++++++++++++++++ src/matchers/css/specificity.ts | 48 +++++++++ src/matchers/css/tokenizer.ts | 114 +++++++++++++++++++++ src/matchers/toHaveCssStyle.ts | 21 +++- 6 files changed, 359 insertions(+), 6 deletions(-) create mode 100644 src/matchers/css/index.ts create mode 100644 src/matchers/css/parser.ts create mode 100644 src/matchers/css/specificity.ts create mode 100644 src/matchers/css/tokenizer.ts diff --git a/src/matchers/context/Stylesheet.ts b/src/matchers/context/Stylesheet.ts index ad6e867..9675172 100644 --- a/src/matchers/context/Stylesheet.ts +++ b/src/matchers/context/Stylesheet.ts @@ -1,7 +1,19 @@ import css, { CssRuleAST, CssStylesheetAST } from '@adobe/css-tools'; export type StylesheetRule = { - selectors: string; + /** + * The full selector string (e.g. ".class1, .class2"). + */ + selector: string; + + /** + * The individual selector parts (e.g. [".class1", ".class2"]). + */ + selectorParts: string[]; + + /** + * The declarations within the rule (e.g. { color: "red", fontSize: "12px" }). + */ declarations: Record; } @@ -27,7 +39,8 @@ export class Stylesheet { this.rules = this.ast!.stylesheet.rules.reduce>((acc, rule) => { if (rule.type === 'rule') { const cssRule = rule as CssRuleAST; - const selectors = cssRule.selectors.join(', '); + const { selectors: selectorParts } = cssRule; + const selector = selectorParts.join(', '); const declarations: Record = rule.declarations.reduce((declAcc, decl) => { if (decl.type === 'declaration') { @@ -38,8 +51,9 @@ export class Stylesheet { return { ...acc, - [selectors]: { - selectors, + [selector]: { + selector, + selectorParts: cssRule.selectors, declarations, } }; diff --git a/src/matchers/css/index.ts b/src/matchers/css/index.ts new file mode 100644 index 0000000..c278cf4 --- /dev/null +++ b/src/matchers/css/index.ts @@ -0,0 +1 @@ +export { Parser } from "./parser"; \ No newline at end of file diff --git a/src/matchers/css/parser.ts b/src/matchers/css/parser.ts new file mode 100644 index 0000000..8e1ea0d --- /dev/null +++ b/src/matchers/css/parser.ts @@ -0,0 +1,159 @@ +import { Token, tokenize, TokenType } from "./tokenizer"; + +class ParsingError extends Error { + constructor(message: string) { + super(`Parsing Error: ${message}`); + } +} + +class TokenStream { + public position: number; + + constructor (private tokens: Token[]) { + this.position = 0; + } + + public peek(): Token | null { + return this.tokens[this.position] || null; + } + + public consume(): Token | null { + return this.tokens[this.position++] || null; + } + + public consumeIf(type: TokenType): Token | null { + const token = this.peek(); + if (token?.type === type) { + return this.consume(); + } + return null; + } + + public consumeExpect(type: TokenType): Token { + const token = this.consume(); + + if (token?.type !== type) { + throw new ParsingError(`Expected token of type ${type}, but got ${token?.type || 'end of input'}`); + } + + return token; + } + + public setPosition(position: number) { + this.position = position; + } +} + +export class Parser { + private tokenStream: TokenStream; + private positionStack: number[]; + + constructor(private selector: string) { + this.tokenStream = new TokenStream(tokenize(selector)); + this.positionStack = []; + } + + /** + * Attempts to parse using the provided parse function. + * + * If the parse fails, the token stream is reset to its original state. + */ + private tryParse(parseFn: () => any) { + this.positionStack.push(this.tokenStream.position); + + try { + const result = parseFn(); + this.positionStack.pop(); + return result; + } catch (err) { + this.tokenStream.setPosition(this.positionStack.pop()!); + return null; + } + } + + private tryParseMultiple(...parseFns: (() => any)[]) { + for (const parseFn of parseFns) { + const result = this.tryParse(parseFn); + if (result !== null) { + return result; + } + } + + return null; + } + + private parseNumber() { + let value = ''; + let token: Token | null; + + while (token = this.tokenStream.peek()) { + if (token.type !== 'digit') { + break; + } + + const next = this.tokenStream.consume(); + value += next?.value; + } + + return Number(value); + } + + private parseName() { + let value = ''; + let token: Token | null; + + const validTokenTypes: TokenType[] = ['letter', 'digit', 'minus', 'underscore']; + value += this.tokenStream.consumeExpect('letter').value; + + while (token = this.tokenStream.peek()) { + if (!validTokenTypes.includes(token.type)) { + break; + } + + const next = this.tokenStream.consume(); + value += next?.value; + } + + return value; + } + + private parseIdentifier() { + let value = ''; + + value += this.tokenStream.consumeExpect('hash').value; + value += this.parseName(); + + return value; + } + + private parseClass() { + let value = ''; + + value += this.tokenStream.consumeExpect('period').value; + value += this.parseName(); + + return value; + } + + private parseTypeSelector() { + this.parseName(); + } + + public parse() { + const test = this.tryParseMultiple( + this.parseIdentifier.bind(this), + this.parseNumber.bind(this), + this.parseClass.bind(this), + this.parseTypeSelector.bind(this), + ); + + console.log(test); + + return test; + } +} + +// .this-is-a-class #id[attr="value"]::after:hover div > span + p ~ a:first-child + +// CSS selector that uses all possible syntax features +// div.class1.class2#id[attr="value"]:hover::after > span + p ~ a:first-child diff --git a/src/matchers/css/specificity.ts b/src/matchers/css/specificity.ts new file mode 100644 index 0000000..35dfd49 --- /dev/null +++ b/src/matchers/css/specificity.ts @@ -0,0 +1,48 @@ +type Specificity = { + inline: number; + idSelectors: number; + classSelectors: number; + typeSelectors: number; +} + +const isTypeSelector = (part: string): boolean => { + const pseudoElement = /(::[a-zA-Z]+[a-zA-Z0-9-]*)*/; + const elementSelector = /^[a-zA-Z]+[a-zA-Z0-9-]*$/; + return elementSelector.test(part) || pseudoElement.test(part); +} + +const isClassSelector = (part: string): boolean => { + const classSelector = /^\.[a-zA-Z]+[a-zA-Z0-9-]*$/; + const attributeSelector = /^\[.+\]$/; + const pseudoClassSelector = /^:[^:].*$/; + return false; +} + +export class SpecificityCalculator { + constructor(public selector: string) {} + + calculate() { + console.log(this.selector); + + const parts = this.selector.split(/\s+/); + console.log(parts); + + const specificity = parts.reduce((specificity, part) => { + if (part.startsWith('.')) { + specificity.classSelectors += 1; + } + + if (part.startsWith('#')) { + specificity.idSelectors += 1; + } + + if (isTypeSelector(part)) { + specificity.typeSelectors += 1; + } + + return specificity; + }, { inline: 0, idSelectors: 0, classSelectors: 0, typeSelectors: 0 }); + + console.log(specificity); + } +} \ No newline at end of file diff --git a/src/matchers/css/tokenizer.ts b/src/matchers/css/tokenizer.ts new file mode 100644 index 0000000..7921c59 --- /dev/null +++ b/src/matchers/css/tokenizer.ts @@ -0,0 +1,114 @@ +export type TokenType = 'letter' + | 'whitespace' + | 'digit' + | 'left_bracket' + | 'right_bracket' + | 'left_paren' + | 'right_parent' + | 'colon' + | 'period' + | 'hash' + | 'asterisk' + | 'equals' + | 'quote' + | 'tilde' + | 'left_angle_bracket' + | 'right_angle_bracket' + | 'dollar' + | 'caret' + | 'pipe' + | 'comma' + | 'plus' + | 'minus' + | 'underscore' + | 'other'; + +export type Token = { + value: string; + type: TokenType; +} + +const letterRegex = /[a-zA-Z]/; +const digitRegex = /[0-9]/; +const whitespaceRegex = /\s/; + +export const tokenize = (selector: string): Token[] => { + return selector.split('').reduce((tokens, char) => { + if (letterRegex.test(char)) { + tokens.push({ type: 'letter', value: char }); + } else if (digitRegex.test(char)) { + tokens.push({ type: 'digit', value: char }); + } else if (whitespaceRegex.test(char)) { + tokens.push({ type: 'whitespace', value: char }); + } else { + switch (char) { + case '[': + tokens.push({ type: 'left_bracket', value: char }); + break; + case ']': + tokens.push({ type: 'right_bracket', value: char }); + break; + case '(': + tokens.push({ type: 'left_paren', value: char }); + break; + case ')': + tokens.push({ type: 'right_parent', value: char }); + break; + case ':': + tokens.push({ type: 'colon', value: char }); + break; + case '.': + tokens.push({ type: 'period', value: char }); + break; + case '#': + tokens.push({ type: 'hash', value: char }); + break; + case '*': + tokens.push({ type: 'asterisk', value: char }); + break; + case '=': + tokens.push({ type: 'equals', value: char }); + break; + case '"': + case "'": + tokens.push({ type: 'quote', value: char }); + break; + case '~': + tokens.push({ type: 'tilde', value: char }); + break; + case '>': + tokens.push({ type: 'left_angle_bracket', value: char }); + break; + case '+': + tokens.push({ type: 'right_angle_bracket', value: char }); + break; + case '$': + tokens.push({ type: 'dollar', value: char }); + break; + case '^': + tokens.push({ type: 'caret', value: char }); + break; + case '|': + tokens.push({ type: 'pipe', value: char }); + break; + case ',': + tokens.push({ type: 'comma', value: char }); + break; + case '-': + tokens.push({ type: 'minus', value: char }); + break; + case '+': + tokens.push({ type: 'plus', value: char }); + break; + case '_': + tokens.push({ type: 'underscore', value: char }); + break; + default: + tokens.push({ type: 'other', value: char }); + break; + } + } + + return tokens; + }, []); +} diff --git a/src/matchers/toHaveCssStyle.ts b/src/matchers/toHaveCssStyle.ts index 29c2a9b..21de3b7 100644 --- a/src/matchers/toHaveCssStyle.ts +++ b/src/matchers/toHaveCssStyle.ts @@ -1,6 +1,8 @@ import type { MatcherFunction } from 'expect'; import { CSSModuleSnapshotsContext } from './context'; +import { SpecificityCalculator } from './css/specificity'; +import { StylesheetRule } from './context/Stylesheet'; type UnmatchedProperties = { property: string; @@ -42,7 +44,22 @@ export const toHaveCssStyle: MatcherFunction<[expectedStyles: Record actual.matches(rule.selectors)); + styleRules.forEach((rule) => { + rule.selectorParts.forEach((part) => { + const specificityCalculator = new SpecificityCalculator(part); + specificityCalculator.calculate(); + }) + }); + + const matchingStyleRules = styleRules.reduce((rules, rule) => { + const matchingParts = rule.selectorParts.filter((part) => actual.matches(part)); + + rules.push(...matchingParts.map((part) => ({ + part, + rule + }))); + return rules; + }, [] as { part: string; rule: StylesheetRule }[]); const unmatchedProperties: { property: string; value: string | number }[] = []; @@ -50,7 +67,7 @@ export const toHaveCssStyle: MatcherFunction<[expectedStyles: Record { - const isMatched = matchingStyleRules.some((rule) => { + const isMatched = matchingStyleRules.some(({ part, rule }) => { // Get only property declarations (ignore comments, etc). const propertyName = camelCaseToKebabCase(property); return rule.declarations[propertyName] && rule.declarations[propertyName] === value.toString(); From 8219db4853a9e10f27ef6ef2310164af7644d319 Mon Sep 17 00:00:00 2001 From: Matt Carter Date: Sun, 21 Dec 2025 18:05:19 +0000 Subject: [PATCH 2/8] - Add jest support - Whitespace + combinator parsing --- jest.config.js | 21 +++++ package-lock.json | 155 ++++++++++++++++++++++++++++++++ package.json | 3 + src/matchers/css/parser.test.ts | 10 +++ src/matchers/css/parser.ts | 35 ++++++-- tsconfig.test.json | 15 ++++ 6 files changed, 230 insertions(+), 9 deletions(-) create mode 100644 jest.config.js create mode 100644 src/matchers/css/parser.test.ts create mode 100644 tsconfig.test.json diff --git a/jest.config.js b/jest.config.js new file mode 100644 index 0000000..3c6800d --- /dev/null +++ b/jest.config.js @@ -0,0 +1,21 @@ +module.exports = { + preset: 'ts-jest', + testEnvironment: 'node', + roots: ['/src'], + testMatch: [ + '**/__tests__/**/*.ts', + '**/?(*.)+(spec|test).ts' + ], + transform: { + '^.+\\.ts$': 'ts-jest' + }, + collectCoverageFrom: [ + 'src/**/*.ts', + '!src/**/*.d.ts', + '!src/**/index.ts' + ], + coverageDirectory: 'coverage', + coverageReporters: ['text', 'lcov', 'html'], + moduleFileExtensions: ['ts', 'js', 'json'], + verbose: true +}; diff --git a/package-lock.json b/package-lock.json index eda81fb..d6a6eb8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -23,6 +23,7 @@ "@typescript-eslint/parser": "^6.13.1", "eslint": "^8.54.0", "jest": "^29.7.0", + "ts-jest": "^29.1.1", "typescript": "^5.3.2" } }, @@ -2203,6 +2204,19 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, + "node_modules/bs-logger": { + "version": "0.2.6", + "resolved": "https://registry.npmjs.org/bs-logger/-/bs-logger-0.2.6.tgz", + "integrity": "sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-json-stable-stringify": "2.x" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/bser": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz", @@ -3180,6 +3194,28 @@ "dev": true, "license": "MIT" }, + "node_modules/handlebars": { + "version": "4.7.8", + "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz", + "integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "minimist": "^1.2.5", + "neo-async": "^2.6.2", + "source-map": "^0.6.1", + "wordwrap": "^1.0.0" + }, + "bin": { + "handlebars": "bin/handlebars" + }, + "engines": { + "node": ">=0.4.7" + }, + "optionalDependencies": { + "uglify-js": "^3.1.4" + } + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -3496,6 +3532,7 @@ "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@jest/core": "^29.7.0", "@jest/types": "^29.6.3", @@ -4229,6 +4266,13 @@ "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==", "license": "MIT" }, + "node_modules/lodash.memoize": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/lodash.memoize/-/lodash.memoize-4.1.2.tgz", + "integrity": "sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==", + "dev": true, + "license": "MIT" + }, "node_modules/lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", @@ -4262,6 +4306,13 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", + "dev": true, + "license": "ISC" + }, "node_modules/makeerror": { "version": "1.0.12", "resolved": "https://registry.npmjs.org/makeerror/-/makeerror-1.0.12.tgz", @@ -4329,6 +4380,16 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -4361,6 +4422,13 @@ "dev": true, "license": "MIT" }, + "node_modules/neo-async": { + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", + "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==", + "dev": true, + "license": "MIT" + }, "node_modules/node-addon-api": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", @@ -5363,6 +5431,72 @@ "typescript": ">=4.2.0" } }, + "node_modules/ts-jest": { + "version": "29.4.6", + "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.4.6.tgz", + "integrity": "sha512-fSpWtOO/1AjSNQguk43hb/JCo16oJDnMJf3CdEGNkqsEX3t0KX96xvyX1D7PfLCpVoKu4MfVrqUkFyblYoY4lA==", + "dev": true, + "license": "MIT", + "dependencies": { + "bs-logger": "^0.2.6", + "fast-json-stable-stringify": "^2.1.0", + "handlebars": "^4.7.8", + "json5": "^2.2.3", + "lodash.memoize": "^4.1.2", + "make-error": "^1.3.6", + "semver": "^7.7.3", + "type-fest": "^4.41.0", + "yargs-parser": "^21.1.1" + }, + "bin": { + "ts-jest": "cli.js" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0" + }, + "peerDependencies": { + "@babel/core": ">=7.0.0-beta.0 <8", + "@jest/transform": "^29.0.0 || ^30.0.0", + "@jest/types": "^29.0.0 || ^30.0.0", + "babel-jest": "^29.0.0 || ^30.0.0", + "jest": "^29.0.0 || ^30.0.0", + "jest-util": "^29.0.0 || ^30.0.0", + "typescript": ">=4.3 <6" + }, + "peerDependenciesMeta": { + "@babel/core": { + "optional": true + }, + "@jest/transform": { + "optional": true + }, + "@jest/types": { + "optional": true + }, + "babel-jest": { + "optional": true + }, + "esbuild": { + "optional": true + }, + "jest-util": { + "optional": true + } + } + }, + "node_modules/ts-jest/node_modules/type-fest": { + "version": "4.41.0", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", + "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==", + "dev": true, + "license": "(MIT OR CC0-1.0)", + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/type-check": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", @@ -5414,6 +5548,20 @@ "node": ">=14.17" } }, + "node_modules/uglify-js": { + "version": "3.19.3", + "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.19.3.tgz", + "integrity": "sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ==", + "dev": true, + "license": "BSD-2-Clause", + "optional": true, + "bin": { + "uglifyjs": "bin/uglifyjs" + }, + "engines": { + "node": ">=0.8.0" + } + }, "node_modules/undici-types": { "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", @@ -5518,6 +5666,13 @@ "node": ">=0.10.0" } }, + "node_modules/wordwrap": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", + "integrity": "sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==", + "dev": true, + "license": "MIT" + }, "node_modules/wrap-ansi": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", diff --git a/package.json b/package.json index e8c36a7..2a6b394 100644 --- a/package.json +++ b/package.json @@ -26,6 +26,8 @@ "build:esm": "tsc --module esnext --outDir dist", "prepublishOnly": "npm run build", "test": "jest", + "test:watch": "jest --watch", + "test:coverage": "jest --coverage", "lint": "eslint src --ext .ts,.js,.cjs" }, "keywords": [ @@ -54,6 +56,7 @@ "@typescript-eslint/parser": "^6.13.1", "eslint": "^8.54.0", "jest": "^29.7.0", + "ts-jest": "^29.1.1", "typescript": "^5.3.2" }, "repository": { diff --git a/src/matchers/css/parser.test.ts b/src/matchers/css/parser.test.ts new file mode 100644 index 0000000..863dc79 --- /dev/null +++ b/src/matchers/css/parser.test.ts @@ -0,0 +1,10 @@ +import { Parser } from './parser'; + +describe('CSS Parser', () => { + test('test', () => { + const parser = new Parser('.test > div'); + const parsed = parser.parse(); + + expect(parsed).toBeTruthy(); + }); +}); diff --git a/src/matchers/css/parser.ts b/src/matchers/css/parser.ts index 8e1ea0d..382c3d0 100644 --- a/src/matchers/css/parser.ts +++ b/src/matchers/css/parser.ts @@ -29,16 +29,22 @@ class TokenStream { return null; } - public consumeExpect(type: TokenType): Token { + public consumeExpect(...types: TokenType[]): Token { const token = this.consume(); - if (token?.type !== type) { - throw new ParsingError(`Expected token of type ${type}, but got ${token?.type || 'end of input'}`); + if (!token || !types.includes(token.type)) { + throw new ParsingError(`Expected token of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`); } return token; } + public eatWhitespace() { + while (this.peek()?.type === 'whitespace') { + this.consume(); + } + } + public setPosition(position: number) { this.position = position; } @@ -139,13 +145,24 @@ export class Parser { this.parseName(); } + private parseCombinator() { + this.tokenStream.eatWhitespace(); + const result = this.tryParseMultiple(this.parseIdentifier.bind(this), this.parseClass.bind(this), this.parseTypeSelector.bind(this)); + this.tokenStream.eatWhitespace(); + + const validOperators: TokenType[] = ['plus', 'left_angle_bracket', 'tilde']; + this.tokenStream.consumeExpect(...validOperators); + } + public parse() { - const test = this.tryParseMultiple( - this.parseIdentifier.bind(this), - this.parseNumber.bind(this), - this.parseClass.bind(this), - this.parseTypeSelector.bind(this), - ); + // const test = this.tryParseMultiple( + // this.parseIdentifier.bind(this), + // this.parseNumber.bind(this), + // this.parseClass.bind(this), + // this.parseTypeSelector.bind(this), + // ); + + this.parseCombinator(); console.log(test); diff --git a/tsconfig.test.json b/tsconfig.test.json new file mode 100644 index 0000000..3cf32b0 --- /dev/null +++ b/tsconfig.test.json @@ -0,0 +1,15 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "outDir": "./dist-test", + "noEmit": true + }, + "include": [ + "src/**/*", + "tests/**/*" + ], + "exclude": [ + "node_modules", + "dist" + ] +} From 4470bb8fbcef9cba41cabec89a1e4e2d31ba046c Mon Sep 17 00:00:00 2001 From: Matt Carter Date: Tue, 23 Dec 2025 01:33:05 +0000 Subject: [PATCH 3/8] - Get parser working - Add types for AST --- src/matchers/css/ast.ts | 22 +++++ src/matchers/css/parser.test.ts | 4 +- src/matchers/css/parser.ts | 159 +++++++++++++++++++++++++++++--- src/matchers/css/tokenizer.ts | 2 +- 4 files changed, 172 insertions(+), 15 deletions(-) create mode 100644 src/matchers/css/ast.ts diff --git a/src/matchers/css/ast.ts b/src/matchers/css/ast.ts new file mode 100644 index 0000000..5a1e43b --- /dev/null +++ b/src/matchers/css/ast.ts @@ -0,0 +1,22 @@ +import { Token } from "./tokenizer"; + +interface Node { + type: string; +}; + +export interface SelectorNode extends Node { + type: 'Selector'; + value: string; +} + +export interface SelectorListNode extends Node { + type: 'SelectorList'; + selectors: SelectorNode[]; +} + +export interface CombinatorNode extends Node { + type: 'Combinator', + operator: Token; + left: Node | null; + right: Node | null; +} \ No newline at end of file diff --git a/src/matchers/css/parser.test.ts b/src/matchers/css/parser.test.ts index 863dc79..09aeb2e 100644 --- a/src/matchers/css/parser.test.ts +++ b/src/matchers/css/parser.test.ts @@ -2,9 +2,9 @@ import { Parser } from './parser'; describe('CSS Parser', () => { test('test', () => { - const parser = new Parser('.test > div'); + const parser = new Parser('.test > div + span ~ a foo +'); const parsed = parser.parse(); expect(parsed).toBeTruthy(); }); -}); +}); \ No newline at end of file diff --git a/src/matchers/css/parser.ts b/src/matchers/css/parser.ts index 382c3d0..d108bac 100644 --- a/src/matchers/css/parser.ts +++ b/src/matchers/css/parser.ts @@ -1,3 +1,4 @@ +import { CombinatorNode, SelectorListNode, SelectorNode } from "./ast"; import { Token, tokenize, TokenType } from "./tokenizer"; class ParsingError extends Error { @@ -54,7 +55,7 @@ export class Parser { private tokenStream: TokenStream; private positionStack: number[]; - constructor(private selector: string) { + constructor(selector: string) { this.tokenStream = new TokenStream(tokenize(selector)); this.positionStack = []; } @@ -77,6 +78,9 @@ export class Parser { } } + /** + * Tries to parse using a given set of parsers, returning the first successful result. + */ private tryParseMultiple(...parseFns: (() => any)[]) { for (const parseFn of parseFns) { const result = this.tryParse(parseFn); @@ -88,7 +92,52 @@ export class Parser { return null; } - private parseNumber() { + /** + * Tries to parse using a given set of parsers, until none of them succeed. + */ + private tryParseUntil(parseFn: (() => any)) { + const results: any[] = []; + + while (true) { + const result = this.tryParse(parseFn); + + if (result === null) { + break; + } + + results.push(result); + } + + return results; + + // while (true) { + // const result = parseFns.some((fn) => { + // try { + // return fn(); + // } catch (err) { + // // Ignore errors + // return null; + // } + // }); + + // console.log(result); + + + // console.log(JSON.stringify(result)); + // break; + + // if (result === null) { + // break; + // } + + // results.push(result); + // } + } + + /** + * Parses a number from the token stream. + */ + private parseNumber(): number { let value = ''; let token: Token | null; @@ -104,7 +153,10 @@ export class Parser { return Number(value); } - private parseName() { + /** + * Tries to parse a valid CSS name (identifier) from the token stream. + */ + private parseName(): string { let value = ''; let token: Token | null; @@ -123,7 +175,10 @@ export class Parser { return value; } - private parseIdentifier() { + /** + * Parses an ID selector from the token stream. + */ + private parseIdentifier(): string { let value = ''; value += this.tokenStream.consumeExpect('hash').value; @@ -132,6 +187,9 @@ export class Parser { return value; } + /** + * Parses a class selector from the token stream. + */ private parseClass() { let value = ''; @@ -141,17 +199,92 @@ export class Parser { return value; } - private parseTypeSelector() { - this.parseName(); + /** + * Parses a single CSS selector from the token stream. + */ + private parseSelector(): SelectorNode | null { + const result = this.tryParseMultiple( + this.parseIdentifier.bind(this), + this.parseClass.bind(this), + this.parseName.bind(this), + ) + + console.log('parsed selector:', result); + + if (result === null) { + console.log(`position = ${JSON.stringify(this.tokenStream.peek())}`); + } + + if (result === null) { + return null; + } + + return { + type: 'Selector', + value: result!, + } + } + + /** + * Parses a list of CSS selectors from the token stream. + */ + private parseSelectors(): SelectorListNode { + const selectors = this.tryParseUntil(this.parseSelector.bind(this)); + + return { + type: 'SelectorList', + selectors, + }; + } + + private parseDescendantCombinator(): CombinatorNode | null { + console.log('parsing descendant combinator'); + + const left = this.parseSelectors(); + const operator = this.tokenStream.consumeExpect('whitespace'); + const right = this.parseCombinator(); + + return { + type: 'Combinator', + operator, + left, + right, + } } - private parseCombinator() { + private parseOtherCombinator(): CombinatorNode | null { + console.log('parsing other combinator'); + const left = this.parseSelectors(); + + console.log(`[parseOtherCombinator] left: ${JSON.stringify(left)}`); + console.log(`[parseOtherCombinator] position before eating whitespace: ${JSON.stringify(this.tokenStream.peek())}`); + this.tokenStream.eatWhitespace(); - const result = this.tryParseMultiple(this.parseIdentifier.bind(this), this.parseClass.bind(this), this.parseTypeSelector.bind(this)); + + const validCombinators: TokenType[] = ['left_angle_bracket', 'plus', 'tilde']; + + const operator = this.tokenStream.consumeExpect(...validCombinators); + + console.log('parsing combinator, operator:', operator); + this.tokenStream.eatWhitespace(); - const validOperators: TokenType[] = ['plus', 'left_angle_bracket', 'tilde']; - this.tokenStream.consumeExpect(...validOperators); + const right = this.parseCombinator(); + + return { + type: 'Combinator', + operator, + left, + right, + } + } + + private parseCombinator(): CombinatorNode { + return this.tryParseMultiple( + this.parseOtherCombinator.bind(this), + this.parseDescendantCombinator.bind(this), + this.parseSelectors.bind(this), + )!; } public parse() { @@ -162,9 +295,11 @@ export class Parser { // this.parseTypeSelector.bind(this), // ); - this.parseCombinator(); + // this.parseCombinator(); + + const test = this.parseCombinator(); - console.log(test); + console.log(JSON.stringify(test)); return test; } diff --git a/src/matchers/css/tokenizer.ts b/src/matchers/css/tokenizer.ts index 7921c59..bfdfb04 100644 --- a/src/matchers/css/tokenizer.ts +++ b/src/matchers/css/tokenizer.ts @@ -79,7 +79,7 @@ export const tokenize = (selector: string): Token[] => { case '>': tokens.push({ type: 'left_angle_bracket', value: char }); break; - case '+': + case '<': tokens.push({ type: 'right_angle_bracket', value: char }); break; case '$': From 032d0b05c69bd028c389d4e5bc1bcf1d5d1d5328 Mon Sep 17 00:00:00 2001 From: Matt Carter Date: Thu, 25 Dec 2025 00:24:15 +0000 Subject: [PATCH 4/8] - Clean up files and refactoring into separate files --- src/matchers/css/ParsingError.ts | 8 + ...pecificity.ts => SpecificityCalculator.ts} | 0 src/matchers/css/TokenStream.ts | 100 ++++++++ src/matchers/css/ast.ts | 2 +- src/matchers/css/index.ts | 2 +- src/matchers/css/parser.test.ts | 4 +- src/matchers/css/parser.ts | 221 ++++++++---------- .../css/{tokenizer.ts => tokenize.ts} | 2 +- src/matchers/toHaveCssStyle.ts | 2 +- 9 files changed, 211 insertions(+), 130 deletions(-) create mode 100644 src/matchers/css/ParsingError.ts rename src/matchers/css/{specificity.ts => SpecificityCalculator.ts} (100%) create mode 100644 src/matchers/css/TokenStream.ts rename src/matchers/css/{tokenizer.ts => tokenize.ts} (98%) diff --git a/src/matchers/css/ParsingError.ts b/src/matchers/css/ParsingError.ts new file mode 100644 index 0000000..19c75cf --- /dev/null +++ b/src/matchers/css/ParsingError.ts @@ -0,0 +1,8 @@ +/** + * Represents a parsing error with a specific message. + */ +export class ParsingError extends Error { + constructor(message: string, location?: { line: number; column: number }) { + super(`Parsing Error: ${message}`); + } +} diff --git a/src/matchers/css/specificity.ts b/src/matchers/css/SpecificityCalculator.ts similarity index 100% rename from src/matchers/css/specificity.ts rename to src/matchers/css/SpecificityCalculator.ts diff --git a/src/matchers/css/TokenStream.ts b/src/matchers/css/TokenStream.ts new file mode 100644 index 0000000..62a5296 --- /dev/null +++ b/src/matchers/css/TokenStream.ts @@ -0,0 +1,100 @@ +import { ParsingError } from "./ParsingError"; +import { Token, TokenType } from "./tokenize"; + +/** + * Represents a stream of tokens for parsing, including methods to consume and peek tokens, + * as well as state and method for managing the parsing position. + */ +export class TokenStream { + public position: number; + public positionStack: number[]; + + constructor (private tokens: Token[]) { + this.position = 0; + this.positionStack = []; + } + + /** + * Peeks at the next token in the stream, without consuming it. + * @returns The next token, or null if we're at the end of the stream. + */ + public peek(): Token | null { + return this.tokens[this.position] || null; + } + + /** + * Consumes and returns the next token in the stream. + * @returns The consumed token, or null if we're at the end of the stream. + */ + public consume(): Token | null { + return this.tokens[this.position++] || null; + } + + /** + * Consumes a token if it matches the expected type. + * @returns The consumed token, or null if the next token does not match the expected type. + */ + public consumeIf(type: TokenType): Token | null { + const token = this.peek(); + if (token?.type === type) { + return this.consume(); + } + return null; + } + + /** + * Consumes a token matching one of the expected types. + * @returns The consumed token. + * @throws {ParsingError} If the next token does not match any of the expected types. + */ + public consumeExpect(...types: TokenType[]): Token { + const token = this.consume(); + + if (!token || !types.includes(token.type)) { + throw new ParsingError(`Expected token of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`); + } + + return token; + } + + /** + * Consumes a whitespace token. + */ + public eatWhitespace() { + this.consumeIf('whitespace'); + } + + /** + * Stores the current position in the position stack. + */ + public storePosition() { + this.positionStack.push(this.position); + } + + /** + * Clears the last stored position without restoring it. + */ + public clearPosition() { + this.positionStack.pop(); + } + + /** + * Restores the last stored position from the position stack. + */ + public restorePosition() { + const pos = this.positionStack.pop(); + if (pos !== undefined) { + this.position = pos; + } + } + + /** + * Expects the end of input, throwing an error if not. + * @throws {ParsingError} If the end of the input has not been reached. + */ + public expectEndOfInput() { + if (this.peek() !== null) { + throw new ParsingError(`Expected end of input, but got token of type ${this.peek()!.type}`); + } + } +} \ No newline at end of file diff --git a/src/matchers/css/ast.ts b/src/matchers/css/ast.ts index 5a1e43b..d5f180c 100644 --- a/src/matchers/css/ast.ts +++ b/src/matchers/css/ast.ts @@ -1,4 +1,4 @@ -import { Token } from "./tokenizer"; +import { Token } from "./tokenize"; interface Node { type: string; diff --git a/src/matchers/css/index.ts b/src/matchers/css/index.ts index c278cf4..31e876d 100644 --- a/src/matchers/css/index.ts +++ b/src/matchers/css/index.ts @@ -1 +1 @@ -export { Parser } from "./parser"; \ No newline at end of file +export { Parser } from "./Parser"; \ No newline at end of file diff --git a/src/matchers/css/parser.test.ts b/src/matchers/css/parser.test.ts index 09aeb2e..5e4f657 100644 --- a/src/matchers/css/parser.test.ts +++ b/src/matchers/css/parser.test.ts @@ -1,8 +1,8 @@ -import { Parser } from './parser'; +import { Parser } from './Parser'; describe('CSS Parser', () => { test('test', () => { - const parser = new Parser('.test > div + span ~ a foo +'); + const parser = new Parser('.test #shit >'); const parsed = parser.parse(); expect(parsed).toBeTruthy(); diff --git a/src/matchers/css/parser.ts b/src/matchers/css/parser.ts index d108bac..c7ebeb9 100644 --- a/src/matchers/css/parser.ts +++ b/src/matchers/css/parser.ts @@ -1,55 +1,30 @@ import { CombinatorNode, SelectorListNode, SelectorNode } from "./ast"; -import { Token, tokenize, TokenType } from "./tokenizer"; +import { ParsingError } from "./ParsingError"; +import { Token, tokenize, TokenType } from "./tokenize"; +import { TokenStream } from "./TokenStream"; -class ParsingError extends Error { - constructor(message: string) { - super(`Parsing Error: ${message}`); - } +type TryParseResult = { + errors: ParsingError[]; + result: T | null; } -class TokenStream { - public position: number; - - constructor (private tokens: Token[]) { - this.position = 0; - } - - public peek(): Token | null { - return this.tokens[this.position] || null; - } +const isTryParseResult = (obj: any): obj is TryParseResult => { + return obj && typeof obj === 'object' && 'errors' in obj && 'result' in obj; +}; - public consume(): Token | null { - return this.tokens[this.position++] || null; - } +const unwrapResult = (tryParseResult: T | TryParseResult): TryParseResult => { + let result: T | null, errors: ParsingError[]; - public consumeIf(type: TokenType): Token | null { - const token = this.peek(); - if (token?.type === type) { - return this.consume(); - } - return null; + if (isTryParseResult(tryParseResult)) { + result = tryParseResult.result; + errors = tryParseResult.errors; + } else { + result = tryParseResult as T; + errors = []; } - public consumeExpect(...types: TokenType[]): Token { - const token = this.consume(); - - if (!token || !types.includes(token.type)) { - throw new ParsingError(`Expected token of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`); - } - - return token; - } - - public eatWhitespace() { - while (this.peek()?.type === 'whitespace') { - this.consume(); - } - } - - public setPosition(position: number) { - this.position = position; - } -} + return { result, errors }; +}; export class Parser { private tokenStream: TokenStream; @@ -65,73 +40,76 @@ export class Parser { * * If the parse fails, the token stream is reset to its original state. */ - private tryParse(parseFn: () => any) { - this.positionStack.push(this.tokenStream.position); + private tryParse(parseFn: () => T): TryParseResult { + this.tokenStream.storePosition(); try { - const result = parseFn(); - this.positionStack.pop(); - return result; - } catch (err) { - this.tokenStream.setPosition(this.positionStack.pop()!); - return null; + const { result, errors } = unwrapResult(parseFn()); + this.tokenStream.clearPosition(); + + return { + errors: [], + result, + }; + } catch (err: ParsingError | any) { + console.log(`[tryParse] caught error: ${err.message}`); + + this.tokenStream.restorePosition(); + + return { + errors: [err.message], + result: null, + }; } } /** * Tries to parse using a given set of parsers, returning the first successful result. */ - private tryParseMultiple(...parseFns: (() => any)[]) { + private tryParseMultiple(...parseFns: (() => T)[]): TryParseResult { + const totalErrors: ParsingError[] = []; + for (const parseFn of parseFns) { - const result = this.tryParse(parseFn); + const { result, errors } = this.tryParse(parseFn); + + // console.log(`[tryParseMultiple] errors=${JSON.stringify(errors)}`); + if (result !== null) { - return result; + return { result, errors: [] }; } + + totalErrors.push(...errors); } - return null; + return { result: null, errors: totalErrors }; } /** - * Tries to parse using a given set of parsers, until none of them succeed. + * Tries to parse using a given parser, until it fails. */ - private tryParseUntil(parseFn: (() => any)) { + private tryParseUntil(parseFn: (() => any)): TryParseResult { + const totalErrors: ParsingError[] = []; const results: any[] = []; + let count = 0; + while (true) { - const result = this.tryParse(parseFn); + const { result, errors } = this.tryParse(parseFn); + + console.log(`[tryParseUntil] errors=${JSON.stringify(errors)}`); if (result === null) { + totalErrors.push(...errors); break; } results.push(result); } - return results; - - // while (true) { - // const result = parseFns.some((fn) => { - // try { - // return fn(); - // } catch (err) { - // // Ignore errors - // return null; - // } - // }); - - // console.log(result); - - - // console.log(JSON.stringify(result)); - // break; - - // if (result === null) { - // break; - // } - - // results.push(result); - // } + return { + errors: results.length === 0 ? totalErrors : [], + result: results, + }; } /** @@ -202,44 +180,50 @@ export class Parser { /** * Parses a single CSS selector from the token stream. */ - private parseSelector(): SelectorNode | null { - const result = this.tryParseMultiple( + private parseSelector(): TryParseResult | null { + const { result, errors } = this.tryParseMultiple( this.parseIdentifier.bind(this), this.parseClass.bind(this), this.parseName.bind(this), ) - console.log('parsed selector:', result); - - if (result === null) { - console.log(`position = ${JSON.stringify(this.tokenStream.peek())}`); - } + // console.log(`[parseSelector] result=${JSON.stringify(result)}`); + console.log(`[parseSelector] errors=${JSON.stringify(errors)}`); if (result === null) { - return null; + return { result, errors }; } return { + errors: [], + result : { type: 'Selector', value: result!, - } + }, + }; } /** * Parses a list of CSS selectors from the token stream. */ private parseSelectors(): SelectorListNode { - const selectors = this.tryParseUntil(this.parseSelector.bind(this)); + const { result, errors } = this.tryParseUntil(this.parseSelector.bind(this)); - return { + console.log(`[parseSelectors] errors=${JSON.stringify(errors)}`); + + if (result === null || result.length === 0) { + throw new ParsingError('Expected at least one selector'); + } + + const node: SelectorListNode = { type: 'SelectorList', - selectors, - }; + selectors: result, + } + + return node; } private parseDescendantCombinator(): CombinatorNode | null { - console.log('parsing descendant combinator'); - const left = this.parseSelectors(); const operator = this.tokenStream.consumeExpect('whitespace'); const right = this.parseCombinator(); @@ -253,22 +237,13 @@ export class Parser { } private parseOtherCombinator(): CombinatorNode | null { - console.log('parsing other combinator'); const left = this.parseSelectors(); - - console.log(`[parseOtherCombinator] left: ${JSON.stringify(left)}`); - console.log(`[parseOtherCombinator] position before eating whitespace: ${JSON.stringify(this.tokenStream.peek())}`); - this.tokenStream.eatWhitespace(); - const validCombinators: TokenType[] = ['left_angle_bracket', 'plus', 'tilde']; - + const validCombinators: TokenType[] = ['left_angle_bracket', 'plus', 'tilde']; const operator = this.tokenStream.consumeExpect(...validCombinators); - console.log('parsing combinator, operator:', operator); - this.tokenStream.eatWhitespace(); - const right = this.parseCombinator(); return { @@ -279,33 +254,31 @@ export class Parser { } } - private parseCombinator(): CombinatorNode { - return this.tryParseMultiple( + private parseCombinator(): CombinatorNode | SelectorListNode { + const { result, errors } = this.tryParseMultiple( this.parseOtherCombinator.bind(this), this.parseDescendantCombinator.bind(this), this.parseSelectors.bind(this), )!; + + console.log(`[parseCombinator] result=${JSON.stringify(result)}, errors=${JSON.stringify(errors)}`); + + if (result === null) { + throw new ParsingError('Failed to parse combinator: ' + errors.map(e => e.message).join('; ')); + } + + return result; } public parse() { - // const test = this.tryParseMultiple( - // this.parseIdentifier.bind(this), - // this.parseNumber.bind(this), - // this.parseClass.bind(this), - // this.parseTypeSelector.bind(this), - // ); + const test = this.parseCombinator(); - // this.parseCombinator(); + this.tokenStream.eatWhitespace(); - const test = this.parseCombinator(); + this.tokenStream.expectEndOfInput(); console.log(JSON.stringify(test)); return test; } -} - -// .this-is-a-class #id[attr="value"]::after:hover div > span + p ~ a:first-child - -// CSS selector that uses all possible syntax features -// div.class1.class2#id[attr="value"]:hover::after > span + p ~ a:first-child +} \ No newline at end of file diff --git a/src/matchers/css/tokenizer.ts b/src/matchers/css/tokenize.ts similarity index 98% rename from src/matchers/css/tokenizer.ts rename to src/matchers/css/tokenize.ts index bfdfb04..0741b3a 100644 --- a/src/matchers/css/tokenizer.ts +++ b/src/matchers/css/tokenize.ts @@ -30,7 +30,7 @@ export type Token = { const letterRegex = /[a-zA-Z]/; const digitRegex = /[0-9]/; -const whitespaceRegex = /\s/; +const whitespaceRegex = /\s+/; export const tokenize = (selector: string): Token[] => { return selector.split('').reduce((tokens, char) => { diff --git a/src/matchers/toHaveCssStyle.ts b/src/matchers/toHaveCssStyle.ts index 21de3b7..30b3f7d 100644 --- a/src/matchers/toHaveCssStyle.ts +++ b/src/matchers/toHaveCssStyle.ts @@ -1,7 +1,7 @@ import type { MatcherFunction } from 'expect'; import { CSSModuleSnapshotsContext } from './context'; -import { SpecificityCalculator } from './css/specificity'; +import { SpecificityCalculator } from './css/SpecificityCalculator'; import { StylesheetRule } from './context/Stylesheet'; type UnmatchedProperties = { From 2b45920067749cc136e3280d5113ecbbf0e30629 Mon Sep 17 00:00:00 2001 From: Matt Carter Date: Thu, 25 Dec 2025 00:36:57 +0000 Subject: [PATCH 5/8] - More cleaning up --- src/matchers/css/TokenStream.ts | 3 + src/matchers/css/TryParseResult.ts | 33 +++++++++ src/matchers/css/parser.ts | 110 +++++++++++------------------ 3 files changed, 79 insertions(+), 67 deletions(-) create mode 100644 src/matchers/css/TryParseResult.ts diff --git a/src/matchers/css/TokenStream.ts b/src/matchers/css/TokenStream.ts index 62a5296..8a3f068 100644 --- a/src/matchers/css/TokenStream.ts +++ b/src/matchers/css/TokenStream.ts @@ -93,6 +93,9 @@ export class TokenStream { * @throws {ParsingError} If the end of the input has not been reached. */ public expectEndOfInput() { + // Consume any trailing whitespace. + this.eatWhitespace(); + if (this.peek() !== null) { throw new ParsingError(`Expected end of input, but got token of type ${this.peek()!.type}`); } diff --git a/src/matchers/css/TryParseResult.ts b/src/matchers/css/TryParseResult.ts new file mode 100644 index 0000000..20263dc --- /dev/null +++ b/src/matchers/css/TryParseResult.ts @@ -0,0 +1,33 @@ +import { ParsingError } from "./ParsingError"; + +/** + * Represents the result of a try-parse operation, including any errors encountered, and the parsed result if successful. + */ +export type TryParseResult = { + errors: ParsingError[]; + result: T | null; +} + +/** + * Type guard to check if an object is a TryParseResult. + */ +export const isTryParseResult = (obj: any): obj is TryParseResult => { + return obj && typeof obj === 'object' && 'errors' in obj && 'result' in obj; +}; + +/** + * Unwraps a TryParseResult, returning a consistent structure. + */ +export const unwrapResult = (tryParseResult: T | TryParseResult): TryParseResult => { + let result: T | null, errors: ParsingError[]; + + if (isTryParseResult(tryParseResult)) { + result = tryParseResult.result; + errors = tryParseResult.errors; + } else { + result = tryParseResult as T; + errors = []; + } + + return { result, errors }; +}; diff --git a/src/matchers/css/parser.ts b/src/matchers/css/parser.ts index c7ebeb9..9145fd5 100644 --- a/src/matchers/css/parser.ts +++ b/src/matchers/css/parser.ts @@ -2,43 +2,24 @@ import { CombinatorNode, SelectorListNode, SelectorNode } from "./ast"; import { ParsingError } from "./ParsingError"; import { Token, tokenize, TokenType } from "./tokenize"; import { TokenStream } from "./TokenStream"; +import { TryParseResult, unwrapResult } from "./TryParseResult"; -type TryParseResult = { - errors: ParsingError[]; - result: T | null; -} - -const isTryParseResult = (obj: any): obj is TryParseResult => { - return obj && typeof obj === 'object' && 'errors' in obj && 'result' in obj; -}; - -const unwrapResult = (tryParseResult: T | TryParseResult): TryParseResult => { - let result: T | null, errors: ParsingError[]; - - if (isTryParseResult(tryParseResult)) { - result = tryParseResult.result; - errors = tryParseResult.errors; - } else { - result = tryParseResult as T; - errors = []; - } - - return { result, errors }; -}; - +/** + * Parser for CSS selectors, implemented using combinators, utilises a TokenStream to read tokens and build an AST. + */ export class Parser { private tokenStream: TokenStream; - private positionStack: number[]; constructor(selector: string) { this.tokenStream = new TokenStream(tokenize(selector)); - this.positionStack = []; } + // === Combinator Parsing Helpers === + /** - * Attempts to parse using the provided parse function. - * - * If the parse fails, the token stream is reset to its original state. + * Attempts to parse using the provided parse function. If the parsing fails, the token stream is reset to its original state. + * @return {TryParseResult} The result of the parsing attempt, including any errors encountered. + * @typeParam T The type of the parsing result. */ private tryParse(parseFn: () => T): TryParseResult { this.tokenStream.storePosition(); @@ -52,8 +33,6 @@ export class Parser { result, }; } catch (err: ParsingError | any) { - console.log(`[tryParse] caught error: ${err.message}`); - this.tokenStream.restorePosition(); return { @@ -64,7 +43,9 @@ export class Parser { } /** - * Tries to parse using a given set of parsers, returning the first successful result. + * Tries to parse using a given set of parsers, returning the first successful result, if any. + * @return {TryParseResult} The result of the parsing attempt, including accumulated errors encountered. + * @typeParam T The type of the parsing result. */ private tryParseMultiple(...parseFns: (() => T)[]): TryParseResult { const totalErrors: ParsingError[] = []; @@ -72,8 +53,6 @@ export class Parser { for (const parseFn of parseFns) { const { result, errors } = this.tryParse(parseFn); - // console.log(`[tryParseMultiple] errors=${JSON.stringify(errors)}`); - if (result !== null) { return { result, errors: [] }; } @@ -85,7 +64,9 @@ export class Parser { } /** - * Tries to parse using a given parser, until it fails. + * Tries to repeatedly parse with the given parsing function, until it fails. + * @return {TryParseResult} The result of the parsing attempt, including accumulated errors encountered. + * @typeParam T The type of the parsing result. */ private tryParseUntil(parseFn: (() => any)): TryParseResult { const totalErrors: ParsingError[] = []; @@ -112,24 +93,26 @@ export class Parser { }; } - /** - * Parses a number from the token stream. - */ - private parseNumber(): number { - let value = ''; - let token: Token | null; - - while (token = this.tokenStream.peek()) { - if (token.type !== 'digit') { - break; - } + // /** + // * Parses a number from the token stream. + // */ + // private parseNumber(): number { + // let value = ''; + // let token: Token | null; + + // while (token = this.tokenStream.peek()) { + // if (token.type !== 'digit') { + // break; + // } - const next = this.tokenStream.consume(); - value += next?.value; - } + // const next = this.tokenStream.consume(); + // value += next?.value; + // } - return Number(value); - } + // return Number(value); + // } + + // === Selectors === /** * Tries to parse a valid CSS name (identifier) from the token stream. @@ -187,9 +170,6 @@ export class Parser { this.parseName.bind(this), ) - // console.log(`[parseSelector] result=${JSON.stringify(result)}`); - console.log(`[parseSelector] errors=${JSON.stringify(errors)}`); - if (result === null) { return { result, errors }; } @@ -204,13 +184,10 @@ export class Parser { } /** - * Parses a list of CSS selectors from the token stream. + * Parses a compound CSS selector (one or more selectors not separated by a combinator) from the token stream. */ - private parseSelectors(): SelectorListNode { - const { result, errors } = this.tryParseUntil(this.parseSelector.bind(this)); - - console.log(`[parseSelectors] errors=${JSON.stringify(errors)}`); - + private parseCompoundSelector(): SelectorListNode { + const { result } = this.tryParseUntil(this.parseSelector.bind(this)); if (result === null || result.length === 0) { throw new ParsingError('Expected at least one selector'); } @@ -223,8 +200,10 @@ export class Parser { return node; } + // === Combinators === + private parseDescendantCombinator(): CombinatorNode | null { - const left = this.parseSelectors(); + const left = this.parseCompoundSelector(); const operator = this.tokenStream.consumeExpect('whitespace'); const right = this.parseCombinator(); @@ -237,7 +216,7 @@ export class Parser { } private parseOtherCombinator(): CombinatorNode | null { - const left = this.parseSelectors(); + const left = this.parseCompoundSelector(); this.tokenStream.eatWhitespace(); const validCombinators: TokenType[] = ['left_angle_bracket', 'plus', 'tilde']; @@ -258,11 +237,9 @@ export class Parser { const { result, errors } = this.tryParseMultiple( this.parseOtherCombinator.bind(this), this.parseDescendantCombinator.bind(this), - this.parseSelectors.bind(this), + this.parseCompoundSelector.bind(this), )!; - console.log(`[parseCombinator] result=${JSON.stringify(result)}, errors=${JSON.stringify(errors)}`); - if (result === null) { throw new ParsingError('Failed to parse combinator: ' + errors.map(e => e.message).join('; ')); } @@ -270,11 +247,10 @@ export class Parser { return result; } + // == Entry Point == + public parse() { const test = this.parseCombinator(); - - this.tokenStream.eatWhitespace(); - this.tokenStream.expectEndOfInput(); console.log(JSON.stringify(test)); From 2fd8449df9f7be9fcd7073651fcc20c1e0b8eb37 Mon Sep 17 00:00:00 2001 From: Matt Carter Date: Fri, 26 Dec 2025 01:19:37 +0000 Subject: [PATCH 6/8] - Switch to parsing ast nodes in all parser fns - Docs - Add unwrap + throw util - Add additional types and utils to the AST & TokenStream --- src/matchers/css/TokenStream.ts | 4 + src/matchers/css/TryParseResult.ts | 18 +++ src/matchers/css/ast.ts | 21 +++- src/matchers/css/parser.test.ts | 15 ++- src/matchers/css/parser.ts | 172 ++++++++++++++++++++--------- 5 files changed, 173 insertions(+), 57 deletions(-) diff --git a/src/matchers/css/TokenStream.ts b/src/matchers/css/TokenStream.ts index 8a3f068..e536f9d 100644 --- a/src/matchers/css/TokenStream.ts +++ b/src/matchers/css/TokenStream.ts @@ -100,4 +100,8 @@ export class TokenStream { throw new ParsingError(`Expected end of input, but got token of type ${this.peek()!.type}`); } } + + public peekRemainder(): string { + return this.tokens.slice(this.position).map(t => t.value).join(''); + } } \ No newline at end of file diff --git a/src/matchers/css/TryParseResult.ts b/src/matchers/css/TryParseResult.ts index 20263dc..6efc958 100644 --- a/src/matchers/css/TryParseResult.ts +++ b/src/matchers/css/TryParseResult.ts @@ -31,3 +31,21 @@ export const unwrapResult = (tryParseResult: T | TryParseResult): TryParse return { result, errors }; }; + +/** + * Unwraps a TryParseResult or throws an error if parsing failed. + * @param tryParseResult The TryParseResult to unwrap. + * @param errorMessage Optional custom error message to use if parsing failed. + * @returns The parsed result if successful. + * @throws {ParsingError} If parsing failed, with accumulated error messages. + */ +export const unwrapResultOrThrow = (tryParseResult: T | TryParseResult, errorMessage?: string): T => { + const { result, errors } = unwrapResult(tryParseResult); + + if (result === null || (Array.isArray(result) && result.length === 0)) { + const errorMessages = errors.map(err => err.message).join('; '); + throw new ParsingError(errorMessage ?? errorMessages); + } + + return result; +}; diff --git a/src/matchers/css/ast.ts b/src/matchers/css/ast.ts index d5f180c..215fa86 100644 --- a/src/matchers/css/ast.ts +++ b/src/matchers/css/ast.ts @@ -9,8 +9,25 @@ export interface SelectorNode extends Node { value: string; } -export interface SelectorListNode extends Node { - type: 'SelectorList'; +export interface StringNode extends Node { + type: 'String'; + value: string; +} + +export interface Expression extends Node { + type: 'Expression', + attribute: SelectorNode; + operator: string; + value: SelectorNode | StringNode; +} + +export interface AttributeSelectorNode extends Node { + type: 'AttributeSelector'; + expression: Expression; +} + +export interface CompoundSelectorNode extends Node { + type: 'CompoundSelector'; selectors: SelectorNode[]; } diff --git a/src/matchers/css/parser.test.ts b/src/matchers/css/parser.test.ts index 5e4f657..abe9f51 100644 --- a/src/matchers/css/parser.test.ts +++ b/src/matchers/css/parser.test.ts @@ -1,10 +1,17 @@ import { Parser } from './Parser'; describe('CSS Parser', () => { - test('test', () => { - const parser = new Parser('.test #shit >'); - const parsed = parser.parse(); + describe('complex combinator', () => { + test('test successful', () => { + const parser = new Parser('#test[data-testid="value"] > p.example[data-role="main"]'); + const parsed = parser.parse(); + expect(parsed).toBeDefined(); + }); - expect(parsed).toBeTruthy(); + test('test failure', () => { + const parser = new Parser('#test[data-testid="value" > p.example[data-role="main"]'); // Missing closing bracket + parser.parse(); + // expect(() => parser.parse()).toThrow(); + }); }); }); \ No newline at end of file diff --git a/src/matchers/css/parser.ts b/src/matchers/css/parser.ts index 9145fd5..583e07a 100644 --- a/src/matchers/css/parser.ts +++ b/src/matchers/css/parser.ts @@ -1,8 +1,8 @@ -import { CombinatorNode, SelectorListNode, SelectorNode } from "./ast"; +import { AttributeSelectorNode, CombinatorNode, CompoundSelectorNode, Expression, SelectorNode, StringNode } from "./ast"; import { ParsingError } from "./ParsingError"; import { Token, tokenize, TokenType } from "./tokenize"; import { TokenStream } from "./TokenStream"; -import { TryParseResult, unwrapResult } from "./TryParseResult"; +import { TryParseResult, unwrapResult, unwrapResultOrThrow } from "./TryParseResult"; /** * Parser for CSS selectors, implemented using combinators, utilises a TokenStream to read tokens and build an AST. @@ -60,6 +60,8 @@ export class Parser { totalErrors.push(...errors); } + console.log('tryParseMultiple totalErrors:', totalErrors); + return { result: null, errors: totalErrors }; } @@ -77,8 +79,6 @@ export class Parser { while (true) { const { result, errors } = this.tryParse(parseFn); - console.log(`[tryParseUntil] errors=${JSON.stringify(errors)}`); - if (result === null) { totalErrors.push(...errors); break; @@ -93,6 +93,22 @@ export class Parser { }; } + /** + * Combines two parsing functions into one, returning a tuple of their results. + * @return {() => [T, U]} A new parsing function that returns a tuple of the results from the two input parsing functions. + * @typeParam T The type of the first parsing result. + * @typeParam U The type of the second parsing result. + * @throws {ParsingError} If either of the parsing functions fail. + */ + private and(parseFn1: () => T, parseFn2: () => U): () => [T, U] { + return () => { + const result1 = parseFn1(); + const result2 = parseFn2(); + + return [result1, result2]; + }; + } + // /** // * Parses a number from the token stream. // */ @@ -117,7 +133,7 @@ export class Parser { /** * Tries to parse a valid CSS name (identifier) from the token stream. */ - private parseName(): string { + private parseName(): SelectorNode { let value = ''; let token: Token | null; @@ -133,79 +149,131 @@ export class Parser { value += next?.value; } - return value; + return { + type: 'Selector', + value, + } } /** * Parses an ID selector from the token stream. */ - private parseIdentifier(): string { - let value = ''; - - value += this.tokenStream.consumeExpect('hash').value; - value += this.parseName(); + private parseIdentifier(): SelectorNode { + console.log(this.tokenStream.consumeExpect('hash')); + let { value: name } = this.parseName(); - return value; + return { + type: 'Selector', + value: `#${name}`, + } } /** * Parses a class selector from the token stream. */ - private parseClass() { + private parseClass(): SelectorNode { + this.tokenStream.consumeExpect('period'); + let { value: className } = this.parseName(); + + return { + type: 'Selector', + value: `.${className}`, + } + } + + private parseString(): StringNode { let value = ''; - value += this.tokenStream.consumeExpect('period').value; - value += this.parseName(); + value += this.tokenStream.consumeExpect('quote').value; + + while (true) { + const next = this.tokenStream.consume(); + if (next === null) { + throw new ParsingError('Unterminated string literal'); + } + + if (next.type === 'quote') { + value += next.value; + break; + } + + value += next.value; + } + + return { + type: 'String', + value, + }; + } + + private parseExpression(): Expression { + const attribute = this.parseName(); + const operator = this.tokenStream.consumeExpect('tilde', 'pipe', 'caret', 'dollar', 'asterisk', 'equals').value; + const optionalOperator = this.tokenStream.consumeIf('equals') + const value = this.tryParseMultiple(this.parseString.bind(this), this.parseName.bind(this)); + + if (value.result === null) { + throw new ParsingError('Expected string or name as attribute selector value'); + } + + return { + type: 'Expression', + attribute, + operator: operator + (optionalOperator ? optionalOperator.value : ''), + value: value.result, + }; + } + + private parseAttributeSelector(): AttributeSelectorNode { + console.log(this.tokenStream.peekRemainder()) + + this.tokenStream.consumeExpect('left_bracket').value; + const expression = this.parseExpression(); - return value; + console.log(this.tokenStream.peekRemainder()); + this.tokenStream.consumeExpect('right_bracket').value; + + return { + type: 'AttributeSelector', + expression, + }; } /** * Parses a single CSS selector from the token stream. */ - private parseSelector(): TryParseResult | null { - const { result, errors } = this.tryParseMultiple( + private parseSelector(): SelectorNode | AttributeSelectorNode { + const result = this.tryParseMultiple( this.parseIdentifier.bind(this), this.parseClass.bind(this), this.parseName.bind(this), - ) - - if (result === null) { - return { result, errors }; - } + this.parseAttributeSelector.bind(this) + ); - return { - errors: [], - result : { - type: 'Selector', - value: result!, - }, - }; + return unwrapResultOrThrow(result); } /** * Parses a compound CSS selector (one or more selectors not separated by a combinator) from the token stream. */ - private parseCompoundSelector(): SelectorListNode { - const { result } = this.tryParseUntil(this.parseSelector.bind(this)); - if (result === null || result.length === 0) { - throw new ParsingError('Expected at least one selector'); - } + private parseCompoundSelector(): CompoundSelectorNode { + const result = this.tryParseUntil(this.parseSelector.bind(this)); + const selectors = unwrapResultOrThrow(result, 'Expected at least one selector'); - const node: SelectorListNode = { - type: 'SelectorList', - selectors: result, + const node: CompoundSelectorNode = { + type: 'CompoundSelector', + selectors, } return node; } - // === Combinators === + // === CSS Combinators === - private parseDescendantCombinator(): CombinatorNode | null { + private parseDescendantCombinator(): CombinatorNode { const left = this.parseCompoundSelector(); const operator = this.tokenStream.consumeExpect('whitespace'); - const right = this.parseCombinator(); + const right = this.parseComplexSelector(); return { type: 'Combinator', @@ -215,7 +283,7 @@ export class Parser { } } - private parseOtherCombinator(): CombinatorNode | null { + private parseOtherCombinator(): CombinatorNode { const left = this.parseCompoundSelector(); this.tokenStream.eatWhitespace(); @@ -223,7 +291,7 @@ export class Parser { const operator = this.tokenStream.consumeExpect(...validCombinators); this.tokenStream.eatWhitespace(); - const right = this.parseCombinator(); + const right = this.parseComplexSelector(); return { type: 'Combinator', @@ -233,25 +301,27 @@ export class Parser { } } - private parseCombinator(): CombinatorNode | SelectorListNode { - const { result, errors } = this.tryParseMultiple( + /** + * Parses a complex CSS selector, which may include combinators, from the token stream. + */ + private parseComplexSelector(): CombinatorNode | CompoundSelectorNode | SelectorNode | AttributeSelectorNode { + const tryParseResult = this.tryParseMultiple( this.parseOtherCombinator.bind(this), this.parseDescendantCombinator.bind(this), this.parseCompoundSelector.bind(this), )!; - if (result === null) { - throw new ParsingError('Failed to parse combinator: ' + errors.map(e => e.message).join('; ')); - } + console.log(`tryParseResult: ${JSON.stringify(tryParseResult)}`); - return result; + const selector = unwrapResultOrThrow(tryParseResult, 'Failed to parse complex selector') + return selector; } // == Entry Point == public parse() { - const test = this.parseCombinator(); - this.tokenStream.expectEndOfInput(); + const test = this.parseComplexSelector(); + // this.tokenStream.expectEndOfInput(); console.log(JSON.stringify(test)); From e3ec1b78ba74f9fb51bbbb44d6e49f3b5946102a Mon Sep 17 00:00:00 2001 From: Matt Carter Date: Sat, 27 Dec 2025 01:55:48 +0000 Subject: [PATCH 7/8] - Improve error handling, keep track of deepest error encountered and surface to the user - Report line/column numbers in parsing errors - Refactor types --- src/matchers/css/ParsingError.ts | 10 +++- src/matchers/css/TokenStream.ts | 15 ++++- src/matchers/css/TryParseResult.ts | 11 ++-- src/matchers/css/parser.ts | 84 ++++++++++++++++++--------- src/matchers/css/tokenize.ts | 91 +++++++++++++----------------- src/matchers/css/types.ts | 41 ++++++++++++++ 6 files changed, 163 insertions(+), 89 deletions(-) create mode 100644 src/matchers/css/types.ts diff --git a/src/matchers/css/ParsingError.ts b/src/matchers/css/ParsingError.ts index 19c75cf..b4be17d 100644 --- a/src/matchers/css/ParsingError.ts +++ b/src/matchers/css/ParsingError.ts @@ -1,8 +1,14 @@ +import { ParsingErrorPosition } from "./types"; + /** * Represents a parsing error with a specific message. */ export class ParsingError extends Error { - constructor(message: string, location?: { line: number; column: number }) { - super(`Parsing Error: ${message}`); + public location: ParsingErrorPosition; + + constructor(message: string, location: ParsingErrorPosition) { + super(`Parsing Error [${location.line}:${location.column}]: ${message}`); + + this.location = location; } } diff --git a/src/matchers/css/TokenStream.ts b/src/matchers/css/TokenStream.ts index e536f9d..081e8c1 100644 --- a/src/matchers/css/TokenStream.ts +++ b/src/matchers/css/TokenStream.ts @@ -1,5 +1,5 @@ import { ParsingError } from "./ParsingError"; -import { Token, TokenType } from "./tokenize"; +import { Token, TokenType, ParsingErrorPosition } from "./types"; /** * Represents a stream of tokens for parsing, including methods to consume and peek tokens, @@ -51,7 +51,7 @@ export class TokenStream { const token = this.consume(); if (!token || !types.includes(token.type)) { - throw new ParsingError(`Expected token of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`); + throw new ParsingError(`Expected token of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`, this.getPositionForError()); } return token; @@ -97,11 +97,20 @@ export class TokenStream { this.eatWhitespace(); if (this.peek() !== null) { - throw new ParsingError(`Expected end of input, but got token of type ${this.peek()!.type}`); + throw new ParsingError(`Expected end of input, but got token of type ${this.peek()!.type}`, this.getPositionForError()); } } public peekRemainder(): string { return this.tokens.slice(this.position).map(t => t.value).join(''); } + + public getPositionForError(): ParsingErrorPosition { + const tokenToUse = this.peek() || this.tokens[this.tokens.length - 1]; + + return { + position: this.position, + ...tokenToUse.position, + }; + } } \ No newline at end of file diff --git a/src/matchers/css/TryParseResult.ts b/src/matchers/css/TryParseResult.ts index 6efc958..4ae60fe 100644 --- a/src/matchers/css/TryParseResult.ts +++ b/src/matchers/css/TryParseResult.ts @@ -1,4 +1,5 @@ import { ParsingError } from "./ParsingError"; +import { ParsingErrorPosition } from "./types"; /** * Represents the result of a try-parse operation, including any errors encountered, and the parsed result if successful. @@ -33,18 +34,20 @@ export const unwrapResult = (tryParseResult: T | TryParseResult): TryParse }; /** - * Unwraps a TryParseResult or throws an error if parsing failed. + * Unwraps a TryParseResult or throws an error with the sum of all accumulated errors, or a custom error message, if parsing failed. * @param tryParseResult The TryParseResult to unwrap. + * @param tokenStream The TokenStream to get the error position from. * @param errorMessage Optional custom error message to use if parsing failed. * @returns The parsed result if successful. * @throws {ParsingError} If parsing failed, with accumulated error messages. */ -export const unwrapResultOrThrow = (tryParseResult: T | TryParseResult, errorMessage?: string): T => { +export const unwrapResultOrThrow = (tryParseResult: T | TryParseResult, errorPosition: ParsingErrorPosition, errorMessage?: string): T => { const { result, errors } = unwrapResult(tryParseResult); if (result === null || (Array.isArray(result) && result.length === 0)) { - const errorMessages = errors.map(err => err.message).join('; '); - throw new ParsingError(errorMessage ?? errorMessages); + // Combine all error messages into one. + const errorMessages = errors.map(err => err.message.replace(/Parsing Error: /, '')).join('; '); + throw new ParsingError(errorMessage ?? errorMessages, errorPosition); } return result; diff --git a/src/matchers/css/parser.ts b/src/matchers/css/parser.ts index 583e07a..4b39fac 100644 --- a/src/matchers/css/parser.ts +++ b/src/matchers/css/parser.ts @@ -1,23 +1,48 @@ import { AttributeSelectorNode, CombinatorNode, CompoundSelectorNode, Expression, SelectorNode, StringNode } from "./ast"; import { ParsingError } from "./ParsingError"; -import { Token, tokenize, TokenType } from "./tokenize"; +import { tokenize } from "./tokenize"; import { TokenStream } from "./TokenStream"; import { TryParseResult, unwrapResult, unwrapResultOrThrow } from "./TryParseResult"; +import { Token, TokenType } from "./types"; /** - * Parser for CSS selectors, implemented using combinators, utilises a TokenStream to read tokens and build an AST. + * Backtracking parser for CSS selectors, implemented using combinators, utilises a TokenStream to read tokens and build an AST. */ export class Parser { + /** + * The stream of tokens from the input selector string. + */ private tokenStream: TokenStream; + /** + * The deepest parsing error encountered during parsing. + * + * Used to track the most relevant error to report back to the user. + */ + private deepestError: ParsingError | null; + + /** + * Creates a new CSS selector parser instance. + * @param selector The CSS selector string to parse. + */ constructor(selector: string) { this.tokenStream = new TokenStream(tokenize(selector)); + this.deepestError = null; + } + + private onErrorRaised(error: ParsingError) { + if (!!this.deepestError && this.deepestError.location.position > error.location.position) { + return; + } + + this.deepestError = error; } // === Combinator Parsing Helpers === /** * Attempts to parse using the provided parse function. If the parsing fails, the token stream is reset to its original state. + * Foundation for implementing backtracking in the parser. * @return {TryParseResult} The result of the parsing attempt, including any errors encountered. * @typeParam T The type of the parsing result. */ @@ -35,8 +60,12 @@ export class Parser { } catch (err: ParsingError | any) { this.tokenStream.restorePosition(); + // If this is the deepest error so far (parser that made it to the furthest point), we store it in the parser + // state for later reporting. + this.onErrorRaised(err); + return { - errors: [err.message], + errors: [err], result: null, }; } @@ -60,8 +89,6 @@ export class Parser { totalErrors.push(...errors); } - console.log('tryParseMultiple totalErrors:', totalErrors); - return { result: null, errors: totalErrors }; } @@ -120,7 +147,7 @@ export class Parser { // if (token.type !== 'digit') { // break; // } - + // const next = this.tokenStream.consume(); // value += next?.value; // } @@ -159,7 +186,7 @@ export class Parser { * Parses an ID selector from the token stream. */ private parseIdentifier(): SelectorNode { - console.log(this.tokenStream.consumeExpect('hash')); + this.tokenStream.consumeExpect('hash'); let { value: name } = this.parseName(); return { @@ -189,7 +216,7 @@ export class Parser { while (true) { const next = this.tokenStream.consume(); if (next === null) { - throw new ParsingError('Unterminated string literal'); + throw new ParsingError('Unterminated string literal', this.tokenStream.getPositionForError()); } if (next.type === 'quote') { @@ -210,27 +237,23 @@ export class Parser { const attribute = this.parseName(); const operator = this.tokenStream.consumeExpect('tilde', 'pipe', 'caret', 'dollar', 'asterisk', 'equals').value; const optionalOperator = this.tokenStream.consumeIf('equals') - const value = this.tryParseMultiple(this.parseString.bind(this), this.parseName.bind(this)); - - if (value.result === null) { - throw new ParsingError('Expected string or name as attribute selector value'); - } + const value = unwrapResultOrThrow( + this.tryParseMultiple(this.parseString.bind(this), this.parseName.bind(this)), + this.tokenStream.getPositionForError(), + 'Expected string or name as attribute selector value' + ); return { type: 'Expression', attribute, operator: operator + (optionalOperator ? optionalOperator.value : ''), - value: value.result, + value, }; } private parseAttributeSelector(): AttributeSelectorNode { - console.log(this.tokenStream.peekRemainder()) - this.tokenStream.consumeExpect('left_bracket').value; const expression = this.parseExpression(); - - console.log(this.tokenStream.peekRemainder()); this.tokenStream.consumeExpect('right_bracket').value; return { @@ -250,7 +273,7 @@ export class Parser { this.parseAttributeSelector.bind(this) ); - return unwrapResultOrThrow(result); + return unwrapResultOrThrow(result, this.tokenStream.getPositionForError()); } /** @@ -258,7 +281,8 @@ export class Parser { */ private parseCompoundSelector(): CompoundSelectorNode { const result = this.tryParseUntil(this.parseSelector.bind(this)); - const selectors = unwrapResultOrThrow(result, 'Expected at least one selector'); + + const selectors = unwrapResultOrThrow(result, this.tokenStream.getPositionForError(), 'Expected at least one selector'); const node: CompoundSelectorNode = { type: 'CompoundSelector', @@ -287,7 +311,7 @@ export class Parser { const left = this.parseCompoundSelector(); this.tokenStream.eatWhitespace(); - const validCombinators: TokenType[] = ['left_angle_bracket', 'plus', 'tilde']; + const validCombinators: TokenType[] = ['left_angle_bracket', 'plus', 'tilde']; const operator = this.tokenStream.consumeExpect(...validCombinators); this.tokenStream.eatWhitespace(); @@ -311,19 +335,25 @@ export class Parser { this.parseCompoundSelector.bind(this), )!; - console.log(`tryParseResult: ${JSON.stringify(tryParseResult)}`); - - const selector = unwrapResultOrThrow(tryParseResult, 'Failed to parse complex selector') + const selector = unwrapResultOrThrow(tryParseResult, this.tokenStream.getPositionForError()); return selector; } // == Entry Point == public parse() { - const test = this.parseComplexSelector(); - // this.tokenStream.expectEndOfInput(); + try { + const test = this.parseComplexSelector(); + this.tokenStream.expectEndOfInput(); + } catch (err: ParsingError | any) { + if (err instanceof ParsingError) { + if (this.deepestError && this.deepestError.location.position > err.location.position) { + throw this.deepestError; + } + } - console.log(JSON.stringify(test)); + throw err; + } return test; } diff --git a/src/matchers/css/tokenize.ts b/src/matchers/css/tokenize.ts index 0741b3a..c2cdb15 100644 --- a/src/matchers/css/tokenize.ts +++ b/src/matchers/css/tokenize.ts @@ -1,110 +1,95 @@ -export type TokenType = 'letter' - | 'whitespace' - | 'digit' - | 'left_bracket' - | 'right_bracket' - | 'left_paren' - | 'right_parent' - | 'colon' - | 'period' - | 'hash' - | 'asterisk' - | 'equals' - | 'quote' - | 'tilde' - | 'left_angle_bracket' - | 'right_angle_bracket' - | 'dollar' - | 'caret' - | 'pipe' - | 'comma' - | 'plus' - | 'minus' - | 'underscore' - | 'other'; - -export type Token = { - value: string; - type: TokenType; -} +import { Token } from "./types"; const letterRegex = /[a-zA-Z]/; const digitRegex = /[0-9]/; const whitespaceRegex = /\s+/; +const newLineRegex = /\n/; export const tokenize = (selector: string): Token[] => { + const position = { + line: 1, + column: 1, + } + return selector.split('').reduce((tokens, char) => { + position.column += 1; + if (letterRegex.test(char)) { - tokens.push({ type: 'letter', value: char }); + tokens.push({ type: 'letter', value: char, position }); } else if (digitRegex.test(char)) { - tokens.push({ type: 'digit', value: char }); + tokens.push({ type: 'digit', value: char, position }); } else if (whitespaceRegex.test(char)) { - tokens.push({ type: 'whitespace', value: char }); + if (newLineRegex.test(char)) { + position.line += 1; + position.column = 1; + } + + tokens.push({ type: 'whitespace', value: char, position }); } else { switch (char) { case '[': - tokens.push({ type: 'left_bracket', value: char }); + tokens.push({ type: 'left_bracket', value: char, position }); break; case ']': - tokens.push({ type: 'right_bracket', value: char }); + tokens.push({ type: 'right_bracket', value: char, position }); break; case '(': - tokens.push({ type: 'left_paren', value: char }); + tokens.push({ type: 'left_paren', value: char, position }); break; case ')': - tokens.push({ type: 'right_parent', value: char }); + tokens.push({ type: 'right_parent', value: char, position }); break; case ':': - tokens.push({ type: 'colon', value: char }); + tokens.push({ type: 'colon', value: char, position }); break; case '.': - tokens.push({ type: 'period', value: char }); + tokens.push({ type: 'period', value: char, position }); break; case '#': - tokens.push({ type: 'hash', value: char }); + tokens.push({ type: 'hash', value: char, position }); break; case '*': - tokens.push({ type: 'asterisk', value: char }); + tokens.push({ type: 'asterisk', value: char, position }); break; case '=': - tokens.push({ type: 'equals', value: char }); + tokens.push({ type: 'equals', value: char, position }); break; case '"': case "'": - tokens.push({ type: 'quote', value: char }); + tokens.push({ type: 'quote', value: char, position }); break; case '~': - tokens.push({ type: 'tilde', value: char }); + tokens.push({ type: 'tilde', value: char, position }); break; case '>': - tokens.push({ type: 'left_angle_bracket', value: char }); + tokens.push({ type: 'left_angle_bracket', value: char, position }); break; case '<': - tokens.push({ type: 'right_angle_bracket', value: char }); + tokens.push({ type: 'right_angle_bracket', value: char, position }); break; case '$': - tokens.push({ type: 'dollar', value: char }); + tokens.push({ type: 'dollar', value: char, position }); break; case '^': - tokens.push({ type: 'caret', value: char }); + tokens.push({ type: 'caret', value: char, position }); break; case '|': - tokens.push({ type: 'pipe', value: char }); + tokens.push({ type: 'pipe', value: char, position }); break; case ',': - tokens.push({ type: 'comma', value: char }); + tokens.push({ type: 'comma', value: char, position }); break; case '-': - tokens.push({ type: 'minus', value: char }); + tokens.push({ type: 'minus', value: char, position }); break; case '+': - tokens.push({ type: 'plus', value: char }); + tokens.push({ type: 'plus', value: char, position }); break; case '_': - tokens.push({ type: 'underscore', value: char }); + tokens.push({ type: 'underscore', value: char, position }); break; default: - tokens.push({ type: 'other', value: char }); + tokens.push({ type: 'other', value: char, position }); break; } } diff --git a/src/matchers/css/types.ts b/src/matchers/css/types.ts new file mode 100644 index 0000000..36c417b --- /dev/null +++ b/src/matchers/css/types.ts @@ -0,0 +1,41 @@ +export type TokenType = 'letter' + | 'whitespace' + | 'digit' + | 'left_bracket' + | 'right_bracket' + | 'left_paren' + | 'right_parent' + | 'colon' + | 'period' + | 'hash' + | 'asterisk' + | 'equals' + | 'quote' + | 'tilde' + | 'left_angle_bracket' + | 'right_angle_bracket' + | 'dollar' + | 'caret' + | 'pipe' + | 'comma' + | 'plus' + | 'minus' + | 'underscore' + | 'other'; + +export type TokenPosition = { + line: number; + column: number; +} + +export type Token = { + value: string; + type: TokenType; + position: TokenPosition; +} + +export type ParsingErrorPosition = { + line?: number; + column?: number; + position: number; +} \ No newline at end of file From 5426962f4c17497acf38a39303e3b9d9e51cd1f7 Mon Sep 17 00:00:00 2001 From: Matt Carter Date: Sat, 27 Dec 2025 16:52:44 +0000 Subject: [PATCH 8/8] - Get tests passing - Clean up expression parser - Add some more utility methods --- src/matchers/css/TokenStream.ts | 4 +-- src/matchers/css/ast.ts | 4 +-- src/matchers/css/parser.test.ts | 24 +++++++++++++-- src/matchers/css/parser.ts | 36 +++++++++++++++------- src/matchers/css/tokenize.ts | 53 ++++++++++++++++++--------------- 5 files changed, 80 insertions(+), 41 deletions(-) diff --git a/src/matchers/css/TokenStream.ts b/src/matchers/css/TokenStream.ts index 081e8c1..a742f74 100644 --- a/src/matchers/css/TokenStream.ts +++ b/src/matchers/css/TokenStream.ts @@ -34,9 +34,9 @@ export class TokenStream { * Consumes a token if it matches the expected type. * @returns The consumed token, or null if the next token does not match the expected type. */ - public consumeIf(type: TokenType): Token | null { + public consumeIf(...types: TokenType[]): Token | null { const token = this.peek(); - if (token?.type === type) { + if (token && types.includes(token.type)) { return this.consume(); } return null; diff --git a/src/matchers/css/ast.ts b/src/matchers/css/ast.ts index 215fa86..33162b3 100644 --- a/src/matchers/css/ast.ts +++ b/src/matchers/css/ast.ts @@ -17,8 +17,8 @@ export interface StringNode extends Node { export interface Expression extends Node { type: 'Expression', attribute: SelectorNode; - operator: string; - value: SelectorNode | StringNode; + operator?: string; + value?: SelectorNode | StringNode; } export interface AttributeSelectorNode extends Node { diff --git a/src/matchers/css/parser.test.ts b/src/matchers/css/parser.test.ts index abe9f51..1a4bfdc 100644 --- a/src/matchers/css/parser.test.ts +++ b/src/matchers/css/parser.test.ts @@ -10,8 +10,28 @@ describe('CSS Parser', () => { test('test failure', () => { const parser = new Parser('#test[data-testid="value" > p.example[data-role="main"]'); // Missing closing bracket - parser.parse(); - // expect(() => parser.parse()).toThrow(); + expect(() => parser.parse()).toThrow('Parsing Error [1:28]: Expected token of type right_bracket, but got whitespace'); + }); + + test('another failure', () => { + const parser = new Parser('div[data-testid="this is unterminated"] + div.example[d='); + expect(() => parser.parse()).toThrow('Parsing Error [1:57]: Expected token of type letter, but got end of input'); + }) + }); + + describe('attribute selector', () => { + test('simple attribute selector', () => { + const parser = new Parser('div[title]'); + const parsed = parser.parse(); + + expect(parsed).toBeDefined(); + }); + + test('attribute selector with operator and value', () => { + const parser = new Parser('div[class^="header"]'); + const parsed = parser.parse(); + + expect(parsed).toBeDefined(); }); }); }); \ No newline at end of file diff --git a/src/matchers/css/parser.ts b/src/matchers/css/parser.ts index 4b39fac..e6d76fb 100644 --- a/src/matchers/css/parser.ts +++ b/src/matchers/css/parser.ts @@ -235,24 +235,39 @@ export class Parser { private parseExpression(): Expression { const attribute = this.parseName(); - const operator = this.tokenStream.consumeExpect('tilde', 'pipe', 'caret', 'dollar', 'asterisk', 'equals').value; - const optionalOperator = this.tokenStream.consumeIf('equals') - const value = unwrapResultOrThrow( - this.tryParseMultiple(this.parseString.bind(this), this.parseName.bind(this)), - this.tokenStream.getPositionForError(), - 'Expected string or name as attribute selector value' - ); + + const parseComplexExpressionParts = () => { + this.tokenStream.eatWhitespace(); + + let operator = this.tokenStream.consumeIf('tilde', 'pipe', 'caret', 'dollar', 'asterisk')?.value || ''; + operator += this.tokenStream.consumeExpect('equals').value; + + this.tokenStream.eatWhitespace(); + + const value = unwrapResultOrThrow( + this.tryParseMultiple(this.parseString.bind(this), this.parseName.bind(this)), + this.tokenStream.getPositionForError(), + 'Expected string or name as attribute selector value' + ); + + return { + operator, + value, + } + }; + + const complexPartsResult = this.tryParse(parseComplexExpressionParts); return { type: 'Expression', attribute, - operator: operator + (optionalOperator ? optionalOperator.value : ''), - value, + ...complexPartsResult.result, }; } private parseAttributeSelector(): AttributeSelectorNode { this.tokenStream.consumeExpect('left_bracket').value; + const expression = this.parseExpression(); this.tokenStream.consumeExpect('right_bracket').value; @@ -345,6 +360,7 @@ export class Parser { try { const test = this.parseComplexSelector(); this.tokenStream.expectEndOfInput(); + return test; } catch (err: ParsingError | any) { if (err instanceof ParsingError) { if (this.deepestError && this.deepestError.location.position > err.location.position) { @@ -354,7 +370,5 @@ export class Parser { throw err; } - - return test; } } \ No newline at end of file diff --git a/src/matchers/css/tokenize.ts b/src/matchers/css/tokenize.ts index c2cdb15..32e1517 100644 --- a/src/matchers/css/tokenize.ts +++ b/src/matchers/css/tokenize.ts @@ -5,6 +5,11 @@ const digitRegex = /[0-9]/; const whitespaceRegex = /\s+/; const newLineRegex = /\n/; +/** + * Tokenizer for CSS selectors. Converts a selector string into an array of tokens. + * @param selector The CSS selector string to tokenize. + * @returns An array of tokens representing the selector, including their types and positions. + */ export const tokenize = (selector: string): Token[] => { const position = { line: 1, @@ -15,81 +20,81 @@ export const tokenize = (selector: string): Token[] => { position.column += 1; if (letterRegex.test(char)) { - tokens.push({ type: 'letter', value: char, position }); + tokens.push({ type: 'letter', value: char, position: { ...position } }); } else if (digitRegex.test(char)) { - tokens.push({ type: 'digit', value: char, position }); + tokens.push({ type: 'digit', value: char, position: { ...position } }); } else if (whitespaceRegex.test(char)) { if (newLineRegex.test(char)) { position.line += 1; position.column = 1; } - tokens.push({ type: 'whitespace', value: char, position }); + tokens.push({ type: 'whitespace', value: char, position: { ...position } }); } else { switch (char) { case '[': - tokens.push({ type: 'left_bracket', value: char, position }); + tokens.push({ type: 'left_bracket', value: char, position: { ...position } }); break; case ']': - tokens.push({ type: 'right_bracket', value: char, position }); + tokens.push({ type: 'right_bracket', value: char, position: { ...position } }); break; case '(': - tokens.push({ type: 'left_paren', value: char, position }); + tokens.push({ type: 'left_paren', value: char, position: { ...position } }); break; case ')': - tokens.push({ type: 'right_parent', value: char, position }); + tokens.push({ type: 'right_parent', value: char, position: { ...position } }); break; case ':': - tokens.push({ type: 'colon', value: char, position }); + tokens.push({ type: 'colon', value: char, position: { ...position } }); break; case '.': - tokens.push({ type: 'period', value: char, position }); + tokens.push({ type: 'period', value: char, position: { ...position } }); break; case '#': - tokens.push({ type: 'hash', value: char, position }); + tokens.push({ type: 'hash', value: char, position: { ...position } }); break; case '*': - tokens.push({ type: 'asterisk', value: char, position }); + tokens.push({ type: 'asterisk', value: char, position: { ...position } }); break; case '=': - tokens.push({ type: 'equals', value: char, position }); + tokens.push({ type: 'equals', value: char, position: { ...position } }); break; case '"': case "'": - tokens.push({ type: 'quote', value: char, position }); + tokens.push({ type: 'quote', value: char, position: { ...position } }); break; case '~': - tokens.push({ type: 'tilde', value: char, position }); + tokens.push({ type: 'tilde', value: char, position: { ...position } }); break; case '>': - tokens.push({ type: 'left_angle_bracket', value: char, position }); + tokens.push({ type: 'left_angle_bracket', value: char, position: { ...position } }); break; case '<': - tokens.push({ type: 'right_angle_bracket', value: char, position }); + tokens.push({ type: 'right_angle_bracket', value: char, position: { ...position } }); break; case '$': - tokens.push({ type: 'dollar', value: char, position }); + tokens.push({ type: 'dollar', value: char, position: { ...position } }); break; case '^': - tokens.push({ type: 'caret', value: char, position }); + tokens.push({ type: 'caret', value: char, position: { ...position } }); break; case '|': - tokens.push({ type: 'pipe', value: char, position }); + tokens.push({ type: 'pipe', value: char, position: { ...position } }); break; case ',': - tokens.push({ type: 'comma', value: char, position }); + tokens.push({ type: 'comma', value: char, position: { ...position } }); break; case '-': - tokens.push({ type: 'minus', value: char, position }); + tokens.push({ type: 'minus', value: char, position: { ...position } }); break; case '+': - tokens.push({ type: 'plus', value: char, position }); + tokens.push({ type: 'plus', value: char, position: { ...position } }); break; case '_': - tokens.push({ type: 'underscore', value: char, position }); + tokens.push({ type: 'underscore', value: char, position: { ...position } }); break; default: - tokens.push({ type: 'other', value: char, position }); + tokens.push({ type: 'other', value: char, position: { ...position } }); break; } }