From 6628267df68e8484867da46f84ea0698d98bfc43 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 05:00:06 +0800 Subject: [PATCH 01/20] Renaming `namedCaptureGroups` to `hasNamedCapturingGroups` Just for consistency Partially ports https://github.com/microsoft/TypeScript/pull/60249/commits/c44a0572d7c767d4c4cf279224f5b61be6129a0e --- internal/regexpchecker/regexpchecker.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 244b5a1ebd..574fab55ad 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -55,7 +55,7 @@ type regExpValidator struct { unicodeSetsMode bool anyUnicodeMode bool anyUnicodeModeOrNonAnnexB bool - namedCaptureGroups bool + hasNamedCapturingGroups bool numberOfCapturingGroups int groupSpecifiers map[string]bool groupNameReferences []namedReference @@ -119,20 +119,20 @@ func Check( v.anyUnicodeMode = v.regExpFlags®ExpFlagsAnyUnicodeMode != 0 v.annexB = true v.anyUnicodeModeOrNonAnnexB = v.anyUnicodeMode || !v.annexB - v.namedCaptureGroups = v.detectNamedCaptureGroups() + v.hasNamedCapturingGroups = v.detectNamedCapturingGroups() v.scanDisjunction(false) v.validateGroupReferences() v.validateDecimalEscapes() } -// detectNamedCaptureGroups performs a quick scan of the pattern to detect -// if it contains any named capture groups (?...). This is needed because +// detectNamedCapturingGroups performs a quick scan of the pattern to detect +// if it contains any named capturing groups (?...). This is needed because // the presence of named groups changes the interpretation of \k escapes: // - Without named groups: \k is an identity escape (matches literal 'k') // - With named groups: \k must be followed by or it's a syntax error // This matches the behavior in scanner.ts's reScanSlashToken. -func (v *regExpValidator) detectNamedCaptureGroups() bool { +func (v *regExpValidator) detectNamedCapturingGroups() bool { inEscape := false inCharacterClass := false text := v.text[v.pos:v.end] @@ -158,7 +158,7 @@ func (v *regExpValidator) detectNamedCaptureGroups() bool { text[i+2] == '<' && text[i+3] != '=' && text[i+3] != '!' { - // Found (?< that's not (?<= or (?') - } else if v.anyUnicodeModeOrNonAnnexB || v.namedCaptureGroups { + } else if v.anyUnicodeModeOrNonAnnexB || v.hasNamedCapturingGroups { v.error(diagnostics.X_k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, v.pos-2, 2) } case 'q': From cf301f960601409ca9fdb924ca50127a0d5fd1f5 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 08:32:56 +0800 Subject: [PATCH 02/20] Fix Incorrect Disjunction Alternative Visibility Fixes: no error on `/((?))(?)/` Ports https://github.com/microsoft/TypeScript/pull/60249/commits/c4355267fbc984d710caf2760d9a1399cb2555c4#diff-65d73a953f046cf34092e70dd4a376ad5c885da833e750f88afeca31b6d63926 --- internal/regexpchecker/regexpchecker.go | 99 +++++++++------- ...sionDuplicateCapturingGroupName.errors.txt | 106 ++++++++++++++++++ ...arExpressionDuplicateCapturingGroupName.js | 61 ++++++++++ ...ressionDuplicateCapturingGroupName.symbols | 35 ++++++ ...xpressionDuplicateCapturingGroupName.types | 70 ++++++++++++ ...arExpressionDuplicateCapturingGroupName.ts | 33 ++++++ 6 files changed, 362 insertions(+), 42 deletions(-) create mode 100644 testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.errors.txt create mode 100644 testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.js create mode 100644 testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.symbols create mode 100644 testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.types create mode 100644 testdata/tests/cases/compiler/regularExpressionDuplicateCapturingGroupName.ts diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 574fab55ad..d3a3018786 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -44,28 +44,37 @@ var charCodeToRegExpFlag = map[rune]regExpFlags{ // regExpValidator is used to validate regular expressions type regExpValidator struct { - text string - pos int - end int - languageVersion core.ScriptTarget - languageVariant core.LanguageVariant - onError scanner.ErrorCallback - regExpFlags regExpFlags - annexB bool - unicodeSetsMode bool - anyUnicodeMode bool - anyUnicodeModeOrNonAnnexB bool - hasNamedCapturingGroups bool - numberOfCapturingGroups int - groupSpecifiers map[string]bool - groupNameReferences []namedReference - decimalEscapes []decimalEscape - namedCapturingGroupsScopeStack []map[string]bool - topNamedCapturingGroupsScope map[string]bool - mayContainStrings bool - isCharacterComplement bool - tokenValue string - surrogateState *surrogatePairState // For non-Unicode mode: tracks partial surrogate pair + text string + pos int + end int + languageVersion core.ScriptTarget + languageVariant core.LanguageVariant + onError scanner.ErrorCallback + regExpFlags regExpFlags + annexB bool + unicodeSetsMode bool + anyUnicodeMode bool + anyUnicodeModeOrNonAnnexB bool + hasNamedCapturingGroups bool + numberOfCapturingGroups int + groupSpecifiers map[string]bool + groupNameReferences []namedReference + decimalEscapes []decimalEscape + disjunctionsScopesStack []disjunctionsScope + topDisjunctionsScope disjunctionsScope + mayContainStrings bool + isCharacterComplement bool + tokenValue string + surrogateState *surrogatePairState // For non-Unicode mode: tracks partial surrogate pair +} + +type disjunction struct { + groupName string // Not a named capturing group if empty +} + +type disjunctionsScope struct { + disjunctions []disjunction + currentAlternativeIndex int } // surrogatePairState tracks when we're in the middle of emitting a surrogate pair @@ -215,17 +224,14 @@ func (v *regExpValidator) checkRegularExpressionFlagAvailability(flag regExpFlag } func (v *regExpValidator) scanDisjunction(isInGroup bool) { + v.topDisjunctionsScope = disjunctionsScope{} for { - v.namedCapturingGroupsScopeStack = append(v.namedCapturingGroupsScopeStack, v.topNamedCapturingGroupsScope) - v.topNamedCapturingGroupsScope = nil v.scanAlternative(isInGroup) - v.topNamedCapturingGroupsScope = v.namedCapturingGroupsScopeStack[len(v.namedCapturingGroupsScopeStack)-1] - v.namedCapturingGroupsScopeStack = v.namedCapturingGroupsScopeStack[:len(v.namedCapturingGroupsScopeStack)-1] - if v.charAtOffset(0) != '|' { return } v.pos++ + v.topDisjunctionsScope.currentAlternativeIndex = len(v.topDisjunctionsScope.disjunctions) } } @@ -252,6 +258,7 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { } case '(': v.pos++ + var groupName string if v.charAtOffset(0) == '?' { v.pos++ switch v.charAtOffset(0) { @@ -266,7 +273,7 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { v.pos++ isPreviousTermQuantifiable = false default: - v.scanGroupName(false) + groupName = v.scanGroupName(false) v.scanExpectedChar('>') if v.languageVersion < core.ScriptTargetES2018 { v.error(diagnostics.Named_capturing_groups_are_only_available_when_targeting_ES2018_or_later, groupNameStart, v.pos-groupNameStart) @@ -291,7 +298,15 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { v.numberOfCapturingGroups++ isPreviousTermQuantifiable = true } + disjunction := disjunction{groupName} + v.topDisjunctionsScope.disjunctions = append(v.topDisjunctionsScope.disjunctions, disjunction) + oldTopDisjunctionsScope := v.topDisjunctionsScope + oldDisjunctionsScopesStack := v.disjunctionsScopesStack + v.disjunctionsScopesStack = append(v.disjunctionsScopesStack, v.topDisjunctionsScope) v.scanDisjunction(true) + oldTopDisjunctionsScope.disjunctions = append(oldTopDisjunctionsScope.disjunctions, v.topDisjunctionsScope.disjunctions...) + v.topDisjunctionsScope = oldTopDisjunctionsScope + v.disjunctionsScopesStack = oldDisjunctionsScopesStack v.scanExpectedChar(')') case '{': v.pos++ @@ -861,34 +876,34 @@ func parseHexValue(text string, start, end int) int { return code } -func (v *regExpValidator) scanGroupName(isReference bool) { +func (v *regExpValidator) scanGroupName(isReference bool) string { tokenStart := v.pos v.scanIdentifier(v.charAtOffset(0)) if v.pos == tokenStart { v.error(diagnostics.Expected_a_capturing_group_name, v.pos, 0) - } else if isReference { + return "" + } + if isReference { v.groupNameReferences = append(v.groupNameReferences, namedReference{pos: tokenStart, end: v.pos, name: v.tokenValue}) } else { - // Check for duplicate names in scope - if v.topNamedCapturingGroupsScope != nil && v.topNamedCapturingGroupsScope[v.tokenValue] { - v.error(diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, v.pos-tokenStart) - } else { - for _, scope := range v.namedCapturingGroupsScopeStack { - if scope != nil && scope[v.tokenValue] { - v.error(diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, v.pos-tokenStart) - break + if v.tokenValue != "" { + // Check for duplicate names in scope + outer: + for _, scope := range append(v.disjunctionsScopesStack, v.topDisjunctionsScope) { + for i := scope.currentAlternativeIndex; i < len(scope.disjunctions); i++ { + if scope.disjunctions[i].groupName == v.tokenValue { + v.error(diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, v.pos-tokenStart) + break outer + } } } } - if v.topNamedCapturingGroupsScope == nil { - v.topNamedCapturingGroupsScope = make(map[string]bool) - } - v.topNamedCapturingGroupsScope[v.tokenValue] = true if v.groupSpecifiers == nil { v.groupSpecifiers = make(map[string]bool) } v.groupSpecifiers[v.tokenValue] = true } + return v.tokenValue } // scanSourceCharacter scans and returns a single "character" from the source. diff --git a/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.errors.txt b/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.errors.txt new file mode 100644 index 0000000000..d6fd036b97 --- /dev/null +++ b/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.errors.txt @@ -0,0 +1,106 @@ +regularExpressionDuplicateCapturingGroupName.ts(2,13): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(3,14): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(4,15): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(5,16): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(6,16): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(7,18): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(8,16): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(9,18): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(11,19): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(12,21): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(14,20): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(15,20): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(16,20): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(19,12): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(20,13): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(23,50): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(23,57): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(23,64): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(24,35): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(24,42): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(24,66): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(24,74): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(24,81): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. +regularExpressionDuplicateCapturingGroupName.ts(24,89): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + + +==== regularExpressionDuplicateCapturingGroupName.ts (24 errors) ==== + // Adjacent homonymous capturing groups + /(?)(?)/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?)((?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /((?))(?)/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /((?))((?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?)(?=(?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?<=(?))(?)/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?)(?!(?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?))(?)/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + + /((?))((?=(?)))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /((?)))((?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + + /(?=(?))(?=(?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?!(?))(?!(?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?=(?))(?!(?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + + // Nested homonymous capturing groups + /(?(?))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?((?)))/; + ~~~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + + // Complicated cases + /(?)((?)((?)|(?))|(?)|((?)))(?)((?)|(?))/; + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + /(?)(((?)|(?))((?)|(?)|(?))|(?)|((?)))(?)(((?)|(?))|(?))/; + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + ~ +!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other. + + // Should not error + /(?)|(?)/; + /(?)|((?))/; + /((?))|(?)/; + /((?))|((?))/; + \ No newline at end of file diff --git a/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.js b/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.js new file mode 100644 index 0000000000..d864ac8915 --- /dev/null +++ b/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.js @@ -0,0 +1,61 @@ +//// [tests/cases/compiler/regularExpressionDuplicateCapturingGroupName.ts] //// + +//// [regularExpressionDuplicateCapturingGroupName.ts] +// Adjacent homonymous capturing groups +/(?)(?)/; +/(?)((?))/; +/((?))(?)/; +/((?))((?))/; +/(?)(?=(?))/; +/(?<=(?))(?)/; +/(?)(?!(?))/; +/(?))(?)/; + +/((?))((?=(?)))/; +/((?)))((?))/; + +/(?=(?))(?=(?))/; +/(?!(?))(?!(?))/; +/(?=(?))(?!(?))/; + +// Nested homonymous capturing groups +/(?(?))/; +/(?((?)))/; + +// Complicated cases +/(?)((?)((?)|(?))|(?)|((?)))(?)((?)|(?))/; +/(?)(((?)|(?))((?)|(?)|(?))|(?)|((?)))(?)(((?)|(?))|(?))/; + +// Should not error +/(?)|(?)/; +/(?)|((?))/; +/((?))|(?)/; +/((?))|((?))/; + + +//// [regularExpressionDuplicateCapturingGroupName.js] +// Adjacent homonymous capturing groups +/(?)(?)/; +/(?)((?))/; +/((?))(?)/; +/((?))((?))/; +/(?)(?=(?))/; +/(?<=(?))(?)/; +/(?)(?!(?))/; +/(?))(?)/; +/((?))((?=(?)))/; +/((?)))((?))/; +/(?=(?))(?=(?))/; +/(?!(?))(?!(?))/; +/(?=(?))(?!(?))/; +// Nested homonymous capturing groups +/(?(?))/; +/(?((?)))/; +// Complicated cases +/(?)((?)((?)|(?))|(?)|((?)))(?)((?)|(?))/; +/(?)(((?)|(?))((?)|(?)|(?))|(?)|((?)))(?)(((?)|(?))|(?))/; +// Should not error +/(?)|(?)/; +/(?)|((?))/; +/((?))|(?)/; +/((?))|((?))/; diff --git a/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.symbols b/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.symbols new file mode 100644 index 0000000000..fdc97a61fc --- /dev/null +++ b/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.symbols @@ -0,0 +1,35 @@ +//// [tests/cases/compiler/regularExpressionDuplicateCapturingGroupName.ts] //// + +=== regularExpressionDuplicateCapturingGroupName.ts === + +// Adjacent homonymous capturing groups +/(?)(?)/; +/(?)((?))/; +/((?))(?)/; +/((?))((?))/; +/(?)(?=(?))/; +/(?<=(?))(?)/; +/(?)(?!(?))/; +/(?))(?)/; + +/((?))((?=(?)))/; +/((?)))((?))/; + +/(?=(?))(?=(?))/; +/(?!(?))(?!(?))/; +/(?=(?))(?!(?))/; + +// Nested homonymous capturing groups +/(?(?))/; +/(?((?)))/; + +// Complicated cases +/(?)((?)((?)|(?))|(?)|((?)))(?)((?)|(?))/; +/(?)(((?)|(?))((?)|(?)|(?))|(?)|((?)))(?)(((?)|(?))|(?))/; + +// Should not error +/(?)|(?)/; +/(?)|((?))/; +/((?))|(?)/; +/((?))|((?))/; + diff --git a/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.types b/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.types new file mode 100644 index 0000000000..32118926b0 --- /dev/null +++ b/testdata/baselines/reference/compiler/regularExpressionDuplicateCapturingGroupName.types @@ -0,0 +1,70 @@ +//// [tests/cases/compiler/regularExpressionDuplicateCapturingGroupName.ts] //// + +=== regularExpressionDuplicateCapturingGroupName.ts === +// Adjacent homonymous capturing groups +/(?)(?)/; +>/(?)(?)/ : RegExp + +/(?)((?))/; +>/(?)((?))/ : RegExp + +/((?))(?)/; +>/((?))(?)/ : RegExp + +/((?))((?))/; +>/((?))((?))/ : RegExp + +/(?)(?=(?))/; +>/(?)(?=(?))/ : RegExp + +/(?<=(?))(?)/; +>/(?<=(?))(?)/ : RegExp + +/(?)(?!(?))/; +>/(?)(?!(?))/ : RegExp + +/(?))(?)/; +>/(?))(?)/ : RegExp + +/((?))((?=(?)))/; +>/((?))((?=(?)))/ : RegExp + +/((?)))((?))/; +>/((?)))((?))/ : RegExp + +/(?=(?))(?=(?))/; +>/(?=(?))(?=(?))/ : RegExp + +/(?!(?))(?!(?))/; +>/(?!(?))(?!(?))/ : RegExp + +/(?=(?))(?!(?))/; +>/(?=(?))(?!(?))/ : RegExp + +// Nested homonymous capturing groups +/(?(?))/; +>/(?(?))/ : RegExp + +/(?((?)))/; +>/(?((?)))/ : RegExp + +// Complicated cases +/(?)((?)((?)|(?))|(?)|((?)))(?)((?)|(?))/; +>/(?)((?)((?)|(?))|(?)|((?)))(?)((?)|(?))/ : RegExp + +/(?)(((?)|(?))((?)|(?)|(?))|(?)|((?)))(?)(((?)|(?))|(?))/; +>/(?)(((?)|(?))((?)|(?)|(?))|(?)|((?)))(?)(((?)|(?))|(?))/ : RegExp + +// Should not error +/(?)|(?)/; +>/(?)|(?)/ : RegExp + +/(?)|((?))/; +>/(?)|((?))/ : RegExp + +/((?))|(?)/; +>/((?))|(?)/ : RegExp + +/((?))|((?))/; +>/((?))|((?))/ : RegExp + diff --git a/testdata/tests/cases/compiler/regularExpressionDuplicateCapturingGroupName.ts b/testdata/tests/cases/compiler/regularExpressionDuplicateCapturingGroupName.ts new file mode 100644 index 0000000000..2af5114c0f --- /dev/null +++ b/testdata/tests/cases/compiler/regularExpressionDuplicateCapturingGroupName.ts @@ -0,0 +1,33 @@ +// @strict: true +// @target: esnext + +// Adjacent homonymous capturing groups +/(?)(?)/; +/(?)((?))/; +/((?))(?)/; +/((?))((?))/; +/(?)(?=(?))/; +/(?<=(?))(?)/; +/(?)(?!(?))/; +/(?))(?)/; + +/((?))((?=(?)))/; +/((?)))((?))/; + +/(?=(?))(?=(?))/; +/(?!(?))(?!(?))/; +/(?=(?))(?!(?))/; + +// Nested homonymous capturing groups +/(?(?))/; +/(?((?)))/; + +// Complicated cases +/(?)((?)((?)|(?))|(?)|((?)))(?)((?)|(?))/; +/(?)(((?)|(?))((?)|(?)|(?))|(?)|((?)))(?)(((?)|(?))|(?))/; + +// Should not error +/(?)|(?)/; +/(?)|((?))/; +/((?))|(?)/; +/((?))|((?))/; From af9d18a137cfe3f33637053552718ed3fdd66925 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 09:02:33 +0800 Subject: [PATCH 03/20] Add fast path to `utf16Length` --- internal/regexpchecker/utf16.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/internal/regexpchecker/utf16.go b/internal/regexpchecker/utf16.go index 3475a4ad82..4cd2d3991e 100644 --- a/internal/regexpchecker/utf16.go +++ b/internal/regexpchecker/utf16.go @@ -106,10 +106,18 @@ func utf16Length(s string) int { return 1 } - // Otherwise, count UTF-16 code units from UTF-8 runes + // Otherwise, count UTF-16 code units from UTF-8 string + sLength := len(s) length := 0 - for _, r := range s { - length += charSize(r) + // ASCII fast path similar to stdlib utf8.RuneCount + for ; length < sLength; length++ { + if ch := s[length]; ch == 0 || ch >= 0x80 { + // non-ASCII slow path, count from runes + for _, r := range s[length:] { + length += charSize(r) + } + return length + } } return length } From fc9cb9794d6d3e046ef80eb41729d47d044b29d9 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 09:04:26 +0800 Subject: [PATCH 04/20] Fix: `inEscape` should always be checked and toggled first --- internal/regexpchecker/regexpchecker.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index d3a3018786..f23c218cad 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -147,14 +147,17 @@ func (v *regExpValidator) detectNamedCapturingGroups() bool { text := v.text[v.pos:v.end] for i, ch := range text { + if inEscape { + inEscape = false + continue + } + // Only check ASCII characters for the pattern (?< if ch >= 0x80 { continue } - if inEscape { - inEscape = false - } else if ch == '\\' { + if ch == '\\' { inEscape = true } else if ch == '[' { inCharacterClass = true From a1fd6fafbd39f56e00ba21e8a2e3b3b0138a7eba Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 09:05:37 +0800 Subject: [PATCH 05/20] Fast path to "Numbers out of order" check --- internal/regexpchecker/regexpchecker.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index f23c218cad..36b7989da1 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -332,7 +332,7 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { isPreviousTermQuantifiable = true break } - } else if maxVal != "" { + } else if maxVal != "" && (v.anyUnicodeModeOrNonAnnexB || v.charAtOffset(0) == '}') { minInt := 0 maxInt := 0 for _, c := range minVal { @@ -341,7 +341,7 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { for _, c := range maxVal { maxInt = maxInt*10 + int(c-'0') } - if minInt > maxInt && (v.anyUnicodeModeOrNonAnnexB || v.charAtOffset(0) == '}') { + if minInt > maxInt { v.error(diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, v.pos-digitsStart) } } From d217838fb4c90171340e7a68c8f1de4b98d06001 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 09:24:48 +0800 Subject: [PATCH 06/20] Correct error message for simultaneous `(?uv:)` in pattern modifiers --- internal/regexpchecker/regexpchecker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 36b7989da1..4ef76895d7 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -464,7 +464,7 @@ func (v *regExpValidator) scanFlags(currFlags regExpFlags, checkModifiers bool) v.error(diagnostics.Unknown_regular_expression_flag, v.pos, size) } else if currFlags&flag != 0 { v.error(diagnostics.Duplicate_regular_expression_flag, v.pos, size) - } else if (currFlags|flag)®ExpFlagsAnyUnicodeMode == regExpFlagsAnyUnicodeMode { + } else if !checkModifiers && (currFlags|flag)®ExpFlagsAnyUnicodeMode == regExpFlagsAnyUnicodeMode { v.error(diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, v.pos, size) } else if checkModifiers && flag®ExpFlagsModifiers == 0 { v.error(diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, v.pos, size) From 21a3a2bb22f2d98764e0fc2420cca04a83dede6b Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 12:31:52 +0800 Subject: [PATCH 07/20] Refactor: Replace `0x80` with `utf8.RuneSelf` --- internal/regexpchecker/regexpchecker.go | 6 +++--- internal/regexpchecker/utf16.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 4ef76895d7..d38be51ec6 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -153,7 +153,7 @@ func (v *regExpValidator) detectNamedCapturingGroups() bool { } // Only check ASCII characters for the pattern (?< - if ch >= 0x80 { + if ch >= utf8.RuneSelf { continue } @@ -182,7 +182,7 @@ func (v *regExpValidator) charAndSize() (rune, int) { return 0, 0 } // Simple ASCII fast path - if ch := v.text[v.pos]; ch < 0x80 { + if ch := v.text[v.pos]; ch < utf8.RuneSelf { return rune(ch), 1 } // Decode multi-byte UTF-8 character @@ -195,7 +195,7 @@ func (v *regExpValidator) charAtOffset(offset int) rune { return 0 } // Simple ASCII fast path - if ch := v.text[v.pos+offset]; ch < 0x80 { + if ch := v.text[v.pos+offset]; ch < utf8.RuneSelf { return rune(ch) } // Decode multi-byte UTF-8 character diff --git a/internal/regexpchecker/utf16.go b/internal/regexpchecker/utf16.go index 4cd2d3991e..ffa4862718 100644 --- a/internal/regexpchecker/utf16.go +++ b/internal/regexpchecker/utf16.go @@ -111,7 +111,7 @@ func utf16Length(s string) int { length := 0 // ASCII fast path similar to stdlib utf8.RuneCount for ; length < sLength; length++ { - if ch := s[length]; ch == 0 || ch >= 0x80 { + if ch := s[length]; ch == 0 || ch >= utf8.RuneSelf { // non-ASCII slow path, count from runes for _, r := range s[length:] { length += charSize(r) From 74a02b9546fbcfd344242d699cba645ef32a8c50 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 12:32:58 +0800 Subject: [PATCH 08/20] Add "Use LanguageFeatureMinimumTarget" TODO comments --- internal/regexpchecker/regexpchecker.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index d38be51ec6..40a8be60fb 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -209,6 +209,7 @@ func (v *regExpValidator) error(message *diagnostics.Message, start, length int, func (v *regExpValidator) checkRegularExpressionFlagAvailability(flag regExpFlags, size int) { var availableFrom core.ScriptTarget + // TODO: Use LanguageFeatureMinimumTarget switch flag { case regExpFlagsHasIndices: availableFrom = core.ScriptTargetES2022 @@ -278,6 +279,7 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { default: groupName = v.scanGroupName(false) v.scanExpectedChar('>') + // TODO: Move to LanguageFeatureMinimumTarget.RegularExpressionNamedCapturingGroups if v.languageVersion < core.ScriptTargetES2018 { v.error(diagnostics.Named_capturing_groups_are_only_available_when_targeting_ES2018_or_later, groupNameStart, v.pos-groupNameStart) } From 5f13252bfd70822b4028eabb9e6da3cf9765456f Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 12:33:53 +0800 Subject: [PATCH 09/20] Add two comments --- internal/regexpchecker/regexpchecker.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 40a8be60fb..accd8286b3 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -268,6 +268,7 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { switch v.charAtOffset(0) { case '=', '!': v.pos++ + // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable isPreviousTermQuantifiable = !v.anyUnicodeModeOrNonAnnexB case '<': groupNameStart := v.pos @@ -367,6 +368,7 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { case '*', '+', '?': v.pos++ if v.charAtOffset(0) == '?' { + // Non-greedy v.pos++ } if !isPreviousTermQuantifiable { From 46e20005b3189c8648d6ae52cd6441d4b560b75b Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 12:35:27 +0800 Subject: [PATCH 10/20] Refactor(Fix Incorrect Disjunction): Don't return token value in `scanGroupName` --- internal/regexpchecker/regexpchecker.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index accd8286b3..86b702d9e9 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -278,7 +278,8 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { v.pos++ isPreviousTermQuantifiable = false default: - groupName = v.scanGroupName(false) + v.scanGroupName(false) + groupName = v.tokenValue v.scanExpectedChar('>') // TODO: Move to LanguageFeatureMinimumTarget.RegularExpressionNamedCapturingGroups if v.languageVersion < core.ScriptTargetES2018 { @@ -883,12 +884,11 @@ func parseHexValue(text string, start, end int) int { return code } -func (v *regExpValidator) scanGroupName(isReference bool) string { +func (v *regExpValidator) scanGroupName(isReference bool) { tokenStart := v.pos v.scanIdentifier(v.charAtOffset(0)) if v.pos == tokenStart { v.error(diagnostics.Expected_a_capturing_group_name, v.pos, 0) - return "" } if isReference { v.groupNameReferences = append(v.groupNameReferences, namedReference{pos: tokenStart, end: v.pos, name: v.tokenValue}) @@ -910,7 +910,6 @@ func (v *regExpValidator) scanGroupName(isReference bool) string { } v.groupSpecifiers[v.tokenValue] = true } - return v.tokenValue } // scanSourceCharacter scans and returns a single "character" from the source. From c4c767161e301eab79cea8acb384f2359abbbac0 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 12:36:52 +0800 Subject: [PATCH 11/20] Refactor: extract digit parsing to separate methods --- internal/regexpchecker/regexpchecker.go | 38 ++++++++++++++----------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 86b702d9e9..6b3403a1fe 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -337,14 +337,8 @@ func (v *regExpValidator) scanAlternative(isInGroup bool) { break } } else if maxVal != "" && (v.anyUnicodeModeOrNonAnnexB || v.charAtOffset(0) == '}') { - minInt := 0 - maxInt := 0 - for _, c := range minVal { - minInt = minInt*10 + int(c-'0') - } - for _, c := range maxVal { - maxInt = maxInt*10 + int(c-'0') - } + minInt := parseDecimalValue(minVal, 0, len(minVal)) + maxInt := parseDecimalValue(maxVal, 0, len(maxVal)) if minInt > maxInt { v.error(diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, v.pos-digitsStart) } @@ -516,10 +510,7 @@ func (v *regExpValidator) scanDecimalEscape() bool { if ch >= '1' && ch <= '9' { start := v.pos v.scanDigits() - value := 0 - for _, c := range v.tokenValue { - value = value*10 + int(c-'0') - } + value := parseDecimalValue(v.tokenValue, 0, len(v.tokenValue)) v.decimalEscapes = append(v.decimalEscapes, decimalEscape{pos: start, end: v.pos, value: value}) return true } @@ -719,10 +710,7 @@ func (v *regExpValidator) scanEscapeSequence(atomEscape bool) string { v.pos++ } // Always report errors for octal escapes in regexp mode - code := 0 - for i := start + 1; i < v.pos; i++ { - code = code*8 + int(v.text[i]-'0') - } + code := parseOctalValue(v.text, start+1, v.pos) hexCode := fmt.Sprintf("\\x%02x", code) if !atomEscape && ch != '0' { v.error(diagnostics.Octal_escape_sequences_and_backreferences_are_not_allowed_in_a_character_class_If_this_was_intended_as_an_escape_sequence_use_the_syntax_0_instead, start, v.pos-start, hexCode) @@ -868,6 +856,24 @@ func (v *regExpValidator) scanEscapeSequence(atomEscape bool) string { } } +// parses octal digits from text and returns the integer value +func parseOctalValue(text string, start, end int) int { + code := 0 + for i := start; i < end; i++ { + code = code*8 + int(text[i]-'0') + } + return code +} + +// parses decimal digits from text and returns the integer value +func parseDecimalValue(text string, start, end int) int { + code := 0 + for i := start; i < end; i++ { + code = code*10 + int(text[i]-'0') + } + return code +} + // parseHexValue parses hexadecimal digits from text and returns the integer value func parseHexValue(text string, start, end int) int { code := 0 From c4c1dbe89e4a733032494a8211109b9ccec355a3 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 12:37:54 +0800 Subject: [PATCH 12/20] Refactor: Use `AppendSeq` for spelling candidates for consistency --- internal/regexpchecker/regexpchecker.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 6b3403a1fe..31565261ee 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -410,9 +410,7 @@ func (v *regExpValidator) validateGroupReferences() { if len(v.groupSpecifiers) > 0 { // Convert map keys to slice candidates := make([]string, 0, len(v.groupSpecifiers)) - for name := range v.groupSpecifiers { - candidates = append(candidates, name) - } + candidates = slices.AppendSeq(candidates, maps.Keys(v.groupSpecifiers)) suggestion := core.GetSpellingSuggestion(ref.name, candidates, core.Identity[string]) if suggestion != "" { v.error(diagnostics.Did_you_mean_0, ref.pos, ref.end-ref.pos, suggestion) @@ -562,9 +560,7 @@ func (v *regExpValidator) scanUnicodePropertyValueExpression(isCharacterCompleme v.error(diagnostics.Unknown_Unicode_property_name, propertyNameOrValueStart, v.pos-propertyNameOrValueStart) // Provide spelling suggestion candidates := make([]string, 0, len(nonBinaryUnicodePropertyNames)) - for key := range nonBinaryUnicodePropertyNames { - candidates = append(candidates, key) - } + candidates = slices.AppendSeq(candidates, maps.Keys(nonBinaryUnicodePropertyNames)) suggestion := core.GetSpellingSuggestion(propertyNameOrValue, candidates, core.Identity[string]) if suggestion != "" { v.error(diagnostics.Did_you_mean_0, propertyNameOrValueStart, v.pos-propertyNameOrValueStart, suggestion) From 528d7ba14ca51311c18105fc8aeb1cbcfa2de6de Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 12:40:32 +0800 Subject: [PATCH 13/20] Fix: Only word characters should be scanned in Unicode property value expressions --- internal/regexpchecker/regexpchecker.go | 24 ++++++++++++++++++++---- internal/scanner/scanner.go | 6 +++--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 31565261ee..34dbd034fd 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -547,7 +547,7 @@ func (v *regExpValidator) scanUnicodePropertyValueExpression(isCharacterCompleme start := v.pos - 3 propertyNameOrValueStart := v.pos - v.scanIdentifier(v.charAtOffset(0)) + v.scanWordCharacters(v.charAtOffset(0)) propertyNameOrValue := v.tokenValue if v.charAtOffset(0) == '=' { @@ -569,7 +569,7 @@ func (v *regExpValidator) scanUnicodePropertyValueExpression(isCharacterCompleme } v.pos++ propertyValueStart := v.pos - v.scanIdentifier(v.charAtOffset(0)) + v.scanWordCharacters(v.charAtOffset(0)) propertyValue := v.tokenValue if v.pos == propertyValueStart { v.error(diagnostics.Expected_a_Unicode_property_value, propertyValueStart, 0) @@ -626,13 +626,29 @@ func (v *regExpValidator) scanUnicodePropertyValueExpression(isCharacterCompleme } } +func (v *regExpValidator) scanWordCharacters(ch rune) { + start := v.pos + if ch != 0 && scanner.IsWordCharacter(ch) { + v.pos++ + for v.pos < v.end { + ch = v.charAtOffset(0) + if scanner.IsWordCharacter(ch) { + v.pos++ + } else { + break + } + } + } + v.tokenValue = v.text[start:v.pos] +} + func (v *regExpValidator) scanIdentifier(ch rune) { start := v.pos - if ch != 0 && (scanner.IsIdentifierStart(ch) || ch == '_' || ch == '$') { + if ch != 0 && scanner.IsIdentifierStart(ch) { v.pos++ for v.pos < v.end { ch = v.charAtOffset(0) - if scanner.IsIdentifierPart(ch) || ch == '_' || ch == '$' { + if scanner.IsIdentifierPart(ch) { v.pos++ } else { break diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 1ee64b62f1..90a402c293 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -1401,7 +1401,7 @@ func (s *Scanner) scanIdentifier(prefixLength int) bool { for { s.pos++ ch = s.char() - if !(isWordCharacter(ch) || ch == '$') { + if !(IsWordCharacter(ch) || ch == '$') { break } } @@ -2007,7 +2007,7 @@ func IsValidIdentifier(s string) bool { } // Section 6.1.4 -func isWordCharacter(ch rune) bool { +func IsWordCharacter(ch rune) bool { return stringutil.IsASCIILetter(ch) || stringutil.IsDigit(ch) || ch == '_' } @@ -2020,7 +2020,7 @@ func IsIdentifierPart(ch rune) bool { } func IsIdentifierPartEx(ch rune, languageVariant core.LanguageVariant) bool { - return isWordCharacter(ch) || ch == '$' || + return IsWordCharacter(ch) || ch == '$' || ch >= utf8.RuneSelf && isUnicodeIdentifierPart(ch) || languageVariant == core.LanguageVariantJSX && (ch == '-' || ch == ':') // "-" and ":" are valid in JSX Identifiers } From 581edc1148325e98f51b72c43ac45abb380fc405 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 12:41:07 +0800 Subject: [PATCH 14/20] Refactor: Use `switch` for `canonicalName` --- internal/regexpchecker/regexpchecker.go | 5 +++-- internal/regexpchecker/tables.go | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 34dbd034fd..78b8ef1708 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -578,9 +578,10 @@ func (v *regExpValidator) scanUnicodePropertyValueExpression(isCharacterCompleme // Provide spelling suggestion based on the property name canonicalName := nonBinaryUnicodePropertyNames[propertyNameOrValue] var candidates []string - if canonicalName == "General_Category" { + switch canonicalName { + case "General_Category": candidates = generalCategoryValues.KeysSlice() - } else if canonicalName == "Script" || canonicalName == "Script_Extensions" { + case "Script", "Script_Extensions": candidates = scriptValues.KeysSlice() } if len(candidates) > 0 { diff --git a/internal/regexpchecker/tables.go b/internal/regexpchecker/tables.go index ddf554b31e..fd6087c442 100644 --- a/internal/regexpchecker/tables.go +++ b/internal/regexpchecker/tables.go @@ -550,10 +550,10 @@ func isValidNonBinaryUnicodePropertyName(name string) bool { func isValidUnicodeProperty(name, value string) bool { canonicalName := nonBinaryUnicodePropertyNames[name] - if canonicalName == "General_Category" { + switch canonicalName { + case "General_Category": return generalCategoryValues.Has(value) - } - if canonicalName == "Script" || canonicalName == "Script_Extensions" { + case "Script", "Script_Extensions": return scriptValues.Has(value) } return false From bceabafe8bbbfae473ec3c44fbaae3044ac5688d Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 13:03:34 +0800 Subject: [PATCH 15/20] Fix: `/[\c]/` should match `\` (Incorrect error text range in `/[_-\c]/`) --- internal/regexpchecker/regexpchecker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 78b8ef1708..0f8afd6292 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -674,7 +674,7 @@ func (v *regExpValidator) scanCharacterEscape(atomEscape bool) string { } if v.anyUnicodeModeOrNonAnnexB { v.error(diagnostics.X_c_must_be_followed_by_an_ASCII_letter, v.pos-2, 2) - } else if atomEscape { + } else { v.pos-- return "\\" } From 87e01b08bfa7899bc3221a3617b474e94ad3adbd Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 13:07:42 +0800 Subject: [PATCH 16/20] Fix escape sequence error position (removes 1 diff) --- internal/regexpchecker/regexpchecker.go | 22 +--- ...numericSeparators.unicodeEscape.errors.txt | 28 ++--- ...icSeparators.unicodeEscape.errors.txt.diff | 108 ------------------ 3 files changed, 20 insertions(+), 138 deletions(-) delete mode 100644 testdata/baselines/reference/submodule/conformance/parser.numericSeparators.unicodeEscape.errors.txt.diff diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 0f8afd6292..502f9e82de 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -757,18 +757,13 @@ func (v *regExpValidator) scanEscapeSequence(atomEscape bool) string { case 'x': // Hex escape '\xDD' hexStart := v.pos - validHex := true for range 2 { if v.pos >= v.end || !stringutil.IsHexDigit(v.charAtOffset(0)) { - validHex = false - break + v.error(diagnostics.Hexadecimal_digit_expected, v.pos, 0) + return v.text[start:v.pos] } v.pos++ } - if !validHex { - v.error(diagnostics.Hexadecimal_digit_expected, hexStart, v.pos-hexStart) - return v.text[start:v.pos] - } code := parseHexValue(v.text, hexStart, v.pos) return string(rune(code)) @@ -789,8 +784,8 @@ func (v *regExpValidator) scanEscapeSequence(atomEscape bool) string { } if v.charAtOffset(0) == '}' { v.pos++ - } else if hasDigits { - v.error(diagnostics.Unterminated_Unicode_escape_sequence, start, v.pos-start) + } else { + v.error(diagnostics.Unterminated_Unicode_escape_sequence, v.pos, 0) return v.text[start:v.pos] } // Parse hex value (-1 to skip closing brace) @@ -806,18 +801,13 @@ func (v *regExpValidator) scanEscapeSequence(atomEscape bool) string { } else { // Standard unicode escape '\uDDDD' hexStart := v.pos - validHex := true for range 4 { if v.pos >= v.end || !stringutil.IsHexDigit(v.charAtOffset(0)) { - validHex = false - break + v.error(diagnostics.Hexadecimal_digit_expected, v.pos, 0) + return v.text[start:v.pos] } v.pos++ } - if !validHex { - v.error(diagnostics.Hexadecimal_digit_expected, hexStart, v.pos-hexStart) - return v.text[start:v.pos] - } code := parseHexValue(v.text, hexStart, v.pos) // For surrogates, we need to preserve the actual value since string(rune(surrogate)) // converts to 0xFFFD. We encode the surrogate as UTF-16BE bytes. diff --git a/testdata/baselines/reference/submodule/conformance/parser.numericSeparators.unicodeEscape.errors.txt b/testdata/baselines/reference/submodule/conformance/parser.numericSeparators.unicodeEscape.errors.txt index 10c3bfa576..6e8015ba66 100644 --- a/testdata/baselines/reference/submodule/conformance/parser.numericSeparators.unicodeEscape.errors.txt +++ b/testdata/baselines/reference/submodule/conformance/parser.numericSeparators.unicodeEscape.errors.txt @@ -1,7 +1,7 @@ 1.ts(1,7): error TS1199: Unterminated Unicode escape sequence. 10.ts(1,5): error TS1125: Hexadecimal digit expected. 11.ts(1,5): error TS1125: Hexadecimal digit expected. -12.ts(1,4): error TS1125: Hexadecimal digit expected. +12.ts(1,5): error TS1125: Hexadecimal digit expected. 13.ts(1,5): error TS1125: Hexadecimal digit expected. 14.ts(1,5): error TS1125: Hexadecimal digit expected. 15.ts(1,5): error TS1125: Hexadecimal digit expected. @@ -19,28 +19,28 @@ 25.ts(1,11): error TS1199: Unterminated Unicode escape sequence. 26.ts(1,11): error TS1199: Unterminated Unicode escape sequence. 27.ts(1,11): error TS1199: Unterminated Unicode escape sequence. -28.ts(1,2): error TS1199: Unterminated Unicode escape sequence. +28.ts(1,11): error TS1199: Unterminated Unicode escape sequence. 28.ts(1,12): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? 3.ts(1,7): error TS1199: Unterminated Unicode escape sequence. 37.ts(1,7): error TS1199: Unterminated Unicode escape sequence. 38.ts(1,7): error TS1199: Unterminated Unicode escape sequence. 39.ts(1,7): error TS1199: Unterminated Unicode escape sequence. -4.ts(1,2): error TS1199: Unterminated Unicode escape sequence. +4.ts(1,7): error TS1199: Unterminated Unicode escape sequence. 4.ts(1,12): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? -40.ts(1,2): error TS1199: Unterminated Unicode escape sequence. +40.ts(1,7): error TS1199: Unterminated Unicode escape sequence. 40.ts(1,13): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? 41.ts(1,6): error TS1125: Hexadecimal digit expected. 42.ts(1,6): error TS1125: Hexadecimal digit expected. 43.ts(1,6): error TS1125: Hexadecimal digit expected. -44.ts(1,4): error TS1125: Hexadecimal digit expected. +44.ts(1,6): error TS1125: Hexadecimal digit expected. 45.ts(1,5): error TS1125: Hexadecimal digit expected. 46.ts(1,5): error TS1125: Hexadecimal digit expected. 47.ts(1,5): error TS1125: Hexadecimal digit expected. -48.ts(1,4): error TS1125: Hexadecimal digit expected. +48.ts(1,5): error TS1125: Hexadecimal digit expected. 5.ts(1,6): error TS1125: Hexadecimal digit expected. 6.ts(1,6): error TS1125: Hexadecimal digit expected. 7.ts(1,6): error TS1125: Hexadecimal digit expected. -8.ts(1,4): error TS1125: Hexadecimal digit expected. +8.ts(1,6): error TS1125: Hexadecimal digit expected. 9.ts(1,5): error TS1125: Hexadecimal digit expected. @@ -61,7 +61,7 @@ ==== 4.ts (2 errors) ==== /\u{10_ffff}/u - ~~~~~ + !!! error TS1199: Unterminated Unicode escape sequence. ~ !!! error TS1508: Unexpected '}'. Did you mean to escape it with backslash? @@ -83,7 +83,7 @@ ==== 8.ts (1 errors) ==== /\uff_ff/u - ~~ + !!! error TS1125: Hexadecimal digit expected. ==== 9.ts (1 errors) ==== @@ -103,7 +103,7 @@ ==== 12.ts (1 errors) ==== /\xf_f/u - ~ + !!! error TS1125: Hexadecimal digit expected. ==== 13.ts (1 errors) ==== @@ -185,7 +185,7 @@ ==== 28.ts (2 errors) ==== /\u{10ffff_}/u - ~~~~~~~~~ + !!! error TS1199: Unterminated Unicode escape sequence. ~ !!! error TS1508: Unexpected '}'. Did you mean to escape it with backslash? @@ -231,7 +231,7 @@ ==== 40.ts (2 errors) ==== /\u{10__ffff}/u - ~~~~~ + !!! error TS1199: Unterminated Unicode escape sequence. ~ !!! error TS1508: Unexpected '}'. Did you mean to escape it with backslash? @@ -253,7 +253,7 @@ ==== 44.ts (1 errors) ==== /\uff__ff/u - ~~ + !!! error TS1125: Hexadecimal digit expected. ==== 45.ts (1 errors) ==== @@ -273,6 +273,6 @@ ==== 48.ts (1 errors) ==== /\xf__f/u - ~ + !!! error TS1125: Hexadecimal digit expected. \ No newline at end of file diff --git a/testdata/baselines/reference/submodule/conformance/parser.numericSeparators.unicodeEscape.errors.txt.diff b/testdata/baselines/reference/submodule/conformance/parser.numericSeparators.unicodeEscape.errors.txt.diff deleted file mode 100644 index 0cce15a967..0000000000 --- a/testdata/baselines/reference/submodule/conformance/parser.numericSeparators.unicodeEscape.errors.txt.diff +++ /dev/null @@ -1,108 +0,0 @@ ---- old.parser.numericSeparators.unicodeEscape.errors.txt -+++ new.parser.numericSeparators.unicodeEscape.errors.txt -@@= skipped -0, +0 lines =@@ - 1.ts(1,7): error TS1199: Unterminated Unicode escape sequence. - 10.ts(1,5): error TS1125: Hexadecimal digit expected. - 11.ts(1,5): error TS1125: Hexadecimal digit expected. --12.ts(1,5): error TS1125: Hexadecimal digit expected. -+12.ts(1,4): error TS1125: Hexadecimal digit expected. - 13.ts(1,5): error TS1125: Hexadecimal digit expected. - 14.ts(1,5): error TS1125: Hexadecimal digit expected. - 15.ts(1,5): error TS1125: Hexadecimal digit expected. -@@= skipped -18, +18 lines =@@ - 25.ts(1,11): error TS1199: Unterminated Unicode escape sequence. - 26.ts(1,11): error TS1199: Unterminated Unicode escape sequence. - 27.ts(1,11): error TS1199: Unterminated Unicode escape sequence. --28.ts(1,11): error TS1199: Unterminated Unicode escape sequence. -+28.ts(1,2): error TS1199: Unterminated Unicode escape sequence. - 28.ts(1,12): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? - 3.ts(1,7): error TS1199: Unterminated Unicode escape sequence. - 37.ts(1,7): error TS1199: Unterminated Unicode escape sequence. - 38.ts(1,7): error TS1199: Unterminated Unicode escape sequence. - 39.ts(1,7): error TS1199: Unterminated Unicode escape sequence. --4.ts(1,7): error TS1199: Unterminated Unicode escape sequence. -+4.ts(1,2): error TS1199: Unterminated Unicode escape sequence. - 4.ts(1,12): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? --40.ts(1,7): error TS1199: Unterminated Unicode escape sequence. -+40.ts(1,2): error TS1199: Unterminated Unicode escape sequence. - 40.ts(1,13): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? - 41.ts(1,6): error TS1125: Hexadecimal digit expected. - 42.ts(1,6): error TS1125: Hexadecimal digit expected. - 43.ts(1,6): error TS1125: Hexadecimal digit expected. --44.ts(1,6): error TS1125: Hexadecimal digit expected. -+44.ts(1,4): error TS1125: Hexadecimal digit expected. - 45.ts(1,5): error TS1125: Hexadecimal digit expected. - 46.ts(1,5): error TS1125: Hexadecimal digit expected. - 47.ts(1,5): error TS1125: Hexadecimal digit expected. --48.ts(1,5): error TS1125: Hexadecimal digit expected. -+48.ts(1,4): error TS1125: Hexadecimal digit expected. - 5.ts(1,6): error TS1125: Hexadecimal digit expected. - 6.ts(1,6): error TS1125: Hexadecimal digit expected. - 7.ts(1,6): error TS1125: Hexadecimal digit expected. --8.ts(1,6): error TS1125: Hexadecimal digit expected. -+8.ts(1,4): error TS1125: Hexadecimal digit expected. - 9.ts(1,5): error TS1125: Hexadecimal digit expected. - - -@@= skipped -42, +42 lines =@@ - - ==== 4.ts (2 errors) ==== - /\u{10_ffff}/u -- -+ ~~~~~ - !!! error TS1199: Unterminated Unicode escape sequence. - ~ - !!! error TS1508: Unexpected '}'. Did you mean to escape it with backslash? -@@= skipped -22, +22 lines =@@ - - ==== 8.ts (1 errors) ==== - /\uff_ff/u -- -+ ~~ - !!! error TS1125: Hexadecimal digit expected. - - ==== 9.ts (1 errors) ==== -@@= skipped -20, +20 lines =@@ - - ==== 12.ts (1 errors) ==== - /\xf_f/u -- -+ ~ - !!! error TS1125: Hexadecimal digit expected. - - ==== 13.ts (1 errors) ==== -@@= skipped -82, +82 lines =@@ - - ==== 28.ts (2 errors) ==== - /\u{10ffff_}/u -- -+ ~~~~~~~~~ - !!! error TS1199: Unterminated Unicode escape sequence. - ~ - !!! error TS1508: Unexpected '}'. Did you mean to escape it with backslash? -@@= skipped -46, +46 lines =@@ - - ==== 40.ts (2 errors) ==== - /\u{10__ffff}/u -- -+ ~~~~~ - !!! error TS1199: Unterminated Unicode escape sequence. - ~ - !!! error TS1508: Unexpected '}'. Did you mean to escape it with backslash? -@@= skipped -22, +22 lines =@@ - - ==== 44.ts (1 errors) ==== - /\uff__ff/u -- -+ ~~ - !!! error TS1125: Hexadecimal digit expected. - - ==== 45.ts (1 errors) ==== -@@= skipped -20, +20 lines =@@ - - ==== 48.ts (1 errors) ==== - /\xf__f/u -- -+ ~ - !!! error TS1125: Hexadecimal digit expected. - \ No newline at end of file From 63a819afec8236af6a8c4eec42508d86ec1e0277 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 13:09:42 +0800 Subject: [PATCH 17/20] Refactor: Replace slices with `charAtOffset` --- internal/regexpchecker/regexpchecker.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index 502f9e82de..d878a482f9 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -820,7 +820,7 @@ func (v *regExpValidator) scanEscapeSequence(atomEscape bool) string { } // In Unicode mode, check for surrogate pairs if v.anyUnicodeMode && isHighSurrogate(rune(code)) && - v.pos+6 <= v.end && v.text[v.pos:v.pos+2] == "\\u" { + v.pos+6 <= v.end && v.charAtOffset(0) == '\\' && v.charAtOffset(1) == 'u' { // High surrogate followed by potential low surrogate nextStart := v.pos nextPos := v.pos + 2 @@ -1099,8 +1099,7 @@ func (v *regExpValidator) scanClassSetExpression() { var operand string // Check for operators at the start - slice := v.text[v.pos:min(v.pos+2, v.end)] - if slice == "--" || slice == "&&" { + if ch == v.charAtOffset(1) && (ch == '-' || ch == '&') { v.error(diagnostics.Expected_a_class_set_operand, v.pos, 0) v.mayContainStrings = false } else { @@ -1206,8 +1205,8 @@ func (v *regExpValidator) scanClassSetExpression() { } start = v.pos - slice = v.text[v.pos:min(v.pos+2, v.end)] - if slice == "--" || slice == "&&" { + ch = v.charAtOffset(0) + if ch == v.charAtOffset(1) && (ch == '-' || ch == '&') { v.error(diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, v.pos, 2) v.pos += 2 operand = v.text[start:v.pos] From 60a35ea0e76a23bd63662944206d6f37fb2ce7b4 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 13:11:14 +0800 Subject: [PATCH 18/20] Remove redundant check --- internal/regexpchecker/regexpchecker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index d878a482f9..c9cdcfcbf2 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -978,7 +978,7 @@ func (v *regExpValidator) scanClassRanges() { } atomStart := v.pos atom := v.scanClassAtom() - if v.charAtOffset(0) == '-' && v.charAtOffset(1) != ']' { + if v.charAtOffset(0) == '-' { v.pos++ if v.isClassContentExit(v.charAtOffset(0)) { return From a2ffe1ab5e7f426822f06a2acf4cdfb192dc08cc Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 13:13:14 +0800 Subject: [PATCH 19/20] Remove a mistaken error in `scanClassSetExpression` (https://github.com/microsoft/TypeScript/issues/62707) --- internal/regexpchecker/regexpchecker.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index c9cdcfcbf2..f7ef9f376b 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -1127,8 +1127,6 @@ func (v *regExpValidator) scanClassSetExpression() { expressionMayContainStrings = v.mayContainStrings v.mayContainStrings = !isCharacterComplement && expressionMayContainStrings return - } else { - v.error(diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, v.pos, 1, string(ch)) } default: if isCharacterComplement && v.mayContainStrings { From fed5053895a3732b7d3b9e7cb7b3f84bc6ea0527 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Mon, 17 Nov 2025 13:13:43 +0800 Subject: [PATCH 20/20] Clarify comments in `scanClassSetExpression` --- internal/regexpchecker/regexpchecker.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/regexpchecker/regexpchecker.go b/internal/regexpchecker/regexpchecker.go index f7ef9f376b..51e152dfd6 100644 --- a/internal/regexpchecker/regexpchecker.go +++ b/internal/regexpchecker/regexpchecker.go @@ -1106,7 +1106,7 @@ func (v *regExpValidator) scanClassSetExpression() { operand = v.scanClassSetOperand() } - // Check what follows the first operand + // Use the first operator to determine the expression type switch v.charAtOffset(0) { case '-': if v.charAtOffset(1) == '-' { @@ -1135,7 +1135,7 @@ func (v *regExpValidator) scanClassSetExpression() { expressionMayContainStrings = v.mayContainStrings } - // Continue scanning operands + // Neither a classSetExpressionIntersection nor a classSetExpressionSubtraction, scan as class union for { ch = v.charAtOffset(0) if ch == 0 {