diff --git a/.gitignore b/.gitignore
index 4fc9b28f88a5..3e7b0fbc6782 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,6 @@ serena/
 .beads/
 AGENTS.md
 
+# Generated benchmark output
+packages/pyright-internal/src/tests/benchmarks/.generated/
+
diff --git a/packages/pyright-internal/package.json b/packages/pyright-internal/package.json
index 990232d58b82..aa4d7a82006a 100644
--- a/packages/pyright-internal/package.json
+++ b/packages/pyright-internal/package.json
@@ -13,9 +13,10 @@
         "clean": "shx rm -rf ./dist ./out",
         "webpack:testserver": "webpack --config ./src/tests/lsp/webpack.testserver.config.js --mode=development",
         "webpack:testserver:watch": "npm run clean && webpack --config ./src/tests/lsp/webpack.testserver.config.js --mode development --watch --progress",
-        "test": "npm run webpack:testserver && node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit",
-        "test:norebuild": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit",
-        "test:coverage": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --reporters=jest-junit --reporters=default --coverage --coverageReporters=cobertura --coverageReporters=html --coverageReporters=json",
+        "test": "npm run webpack:testserver && node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks",
+        "test:norebuild": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks",
+        "test:benchmark": "cross-env PYRIGHT_RUN_BENCHMARKS=1 node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testTimeout=300000 --runInBand --detectOpenHandles src/tests/benchmarks",
+        "test:coverage": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks --reporters=jest-junit --reporters=default --coverage --coverageReporters=cobertura --coverageReporters=html --coverageReporters=json",
         "test:imports": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest importResolver.test --forceExit --runInBand"
     },
     "dependencies": {
diff --git a/packages/pyright-internal/src/analyzer/sourceFile.ts b/packages/pyright-internal/src/analyzer/sourceFile.ts
index 776801ecabca..feb92ad716c1 100644
--- a/packages/pyright-internal/src/analyzer/sourceFile.ts
+++ b/packages/pyright-internal/src/analyzer/sourceFile.ts
@@ -774,7 +774,7 @@ export class SourceFile {
                     this._writableData.taskListDiagnostics = [];
                     this._addTaskListDiagnostics(
                         configOptions.taskListTokens,
-                        parseFileResults.tokenizerOutput,
+                        parseFileResults,
                         this._writableData.taskListDiagnostics
                     );
                 });
@@ -1327,13 +1327,16 @@ export class SourceFile {
     // to the specified diagnostic list.
     private _addTaskListDiagnostics(
         taskListTokens: TaskListToken[] | undefined,
-        tokenizerOutput: TokenizerOutput,
+        parseFileResults: ParseFileResults,
         diagList: Diagnostic[]
     ) {
         if (!taskListTokens || taskListTokens.length === 0 || !diagList) {
             return;
         }
 
+        const tokenizerOutput = parseFileResults.tokenizerOutput;
+        const fileContents = parseFileResults.text;
+
         for (let i = 0; i < tokenizerOutput.tokens.count; i++) {
             const token = tokenizerOutput.tokens.getItemAt(i);
 
@@ -1343,36 +1346,65 @@ export class SourceFile {
             }
 
             for (const comment of token.comments) {
-                for (const token of taskListTokens) {
-                    // Check if the comment matches the task list token.
-                    // The comment must start with zero or more whitespace characters,
-                    // followed by the taskListToken (case insensitive),
-                    // followed by (0+ whitespace + EOL) OR (1+ NON-alphanumeric characters)
-                    const regexStr = '^[\\s]*' + token.text + '([\\s]*$|[\\W]+)';
-                    const regex = RegExp(regexStr, 'i'); // case insensitive
-
-                    // If the comment doesn't match, skip it.
-                    if (!regex.test(comment.value)) {
+                for (const taskToken of taskListTokens) {
+                    // Match: optional leading whitespace, then taskToken.text (case-insensitive),
+                    // then either (whitespace to end) or (non-alphanumeric char).
+                    const commentStart = comment.start;
+                    const commentEnd = commentStart + comment.length;
+                    const taskText = taskToken.text;
+                    const taskLen = taskText.length;
+
+                    // Skip leading whitespace within the source text range.
+                    let pos = commentStart;
+                    while (pos < commentEnd) {
+                        const ch = fileContents.charCodeAt(pos);
+                        if (ch === 0x20 || ch === 0x09 || ch === 0x0a || ch === 0x0d || ch === 0x0c || ch === 0x0b) {
+                            pos++;
+                        } else {
+                            break;
+                        }
+                    }
+
+                    // Check if the task token text matches (case-insensitive).
+                    if (pos + taskLen > commentEnd) {
                         continue;
                     }
 
-                    // Calculate the range for the diagnostic. This allows navigation
-                    // to the comment via double clicking the item in the task list pane.
-                    let rangeStart = comment.start;
+                    let matched = true;
+                    for (let k = 0; k < taskLen; k++) {
+                        const a = fileContents.charCodeAt(pos + k);
+                        const b = taskText.charCodeAt(k);
+                        if (a !== b && (a | 0x20) !== (b | 0x20)) {
+                            matched = false;
+                            break;
+                        }
+                    }
+                    if (!matched) {
+                        continue;
+                    }
 
-                    // The comment technically starts right after the comment identifier(#),
-                    // but we want the caret right before the task list token (since there
-                    // might be whitespace before it).
-                    const indexOfToken = comment.value.toLowerCase().indexOf(token.text.toLowerCase());
-                    rangeStart += indexOfToken;
+                    // After the token, require whitespace-to-end or a non-word character.
+                    const afterPos = pos + taskLen;
+                    if (afterPos < commentEnd) {
+                        const ch = fileContents.charCodeAt(afterPos);
+                        // Check if ch is a word character [a-zA-Z0-9_]
+                        const isWord =
+                            (ch >= 0x61 && ch <= 0x7a) ||
+                            (ch >= 0x41 && ch <= 0x5a) ||
+                            (ch >= 0x30 && ch <= 0x39) ||
+                            ch === 0x5f;
+                        if (isWord) {
+                            continue;
+                        }
+                    }
 
+                    // Match succeeded. pos is the offset of the task token in the source text.
                     const rangeEnd = TextRange.getEnd(comment);
-                    const range = convertOffsetsToRange(rangeStart, rangeEnd, tokenizerOutput.lines!);
+                    const range = convertOffsetsToRange(pos, rangeEnd, tokenizerOutput.lines!);
 
-                    // Add the diagnostic to the list and trim whitespace from the comment so
-                    // it's easier to read in the task list.
+                    const commentValue = comment.value;
                     diagList.push(
-                        new Diagnostic(DiagnosticCategory.TaskItem, comment.value.trim(), range, token.priority)
+                        new Diagnostic(DiagnosticCategory.TaskItem, commentValue.trim(), range, taskToken.priority)
                     );
                 }
             }
diff --git a/packages/pyright-internal/src/parser/characterStream.ts b/packages/pyright-internal/src/parser/characterStream.ts
index a7065bffd1b9..4960c552edce 100644
--- a/packages/pyright-internal/src/parser/characterStream.ts
+++ b/packages/pyright-internal/src/parser/characterStream.ts
@@ -108,8 +108,30 @@ export class CharacterStream {
     }
 
     skipWhitespace(): void {
-        while (!this.isEndOfStream() && this.isAtWhiteSpace()) {
-            this.moveNext();
+        // Tight loop: advance _position/_currentChar directly while the
+        // current char is a space/tab/form-feed. Avoids the method-call
+        // overhead of moveNext() + isAtWhiteSpace() + isWhiteSpace() per
+        // iteration, which is one of the hottest paths in tokenization.
+        const text = this._text;
+        const len = text.length;
+        let pos = this._position;
+        while (pos < len) {
+            const ch = text.charCodeAt(pos);
+            if (ch === Char.Space || ch === Char.Tab || ch === Char.FormFeed) {
+                pos++;
+            } else {
+                break;
+            }
+        }
+        if (pos !== this._position) {
+            this._position = pos;
+            if (pos >= len) {
+                this._isEndOfStream = true;
+                this._position = len;
+                this._currentChar = 0;
+            } else {
+                this._currentChar = text.charCodeAt(pos);
+            }
         }
     }
 
diff --git a/packages/pyright-internal/src/parser/parser.ts b/packages/pyright-internal/src/parser/parser.ts
index 4e6e86f63de8..212e7a41928d 100644
--- a/packages/pyright-internal/src/parser/parser.ts
+++ b/packages/pyright-internal/src/parser/parser.ts
@@ -232,6 +232,8 @@ const maxChildNodeDepth = 256;
 export class Parser {
     private _fileContents?: string;
     private _tokenizerOutput?: TokenizerOutput;
+    private _tokens?: TextRangeCollection<Token>;
+    private _tokenCount = 0;
     private _tokenIndex = 0;
     private _areErrorsSuppressed = false;
     private _parseOptions: ParseOptions = new ParseOptions();
@@ -406,6 +408,8 @@ export class Parser {
             initialParenDepth,
             this._parseOptions.useNotebookMode
         );
+        this._tokens = this._tokenizerOutput.tokens;
+        this._tokenCount = this._tokens.count;
         this._tokenIndex = 0;
     }
 
@@ -5259,7 +5263,7 @@ export class Parser {
     }
 
     private _getNextToken(): Token {
-        const token = this._tokenizerOutput!.tokens.getItemAt(this._tokenIndex);
+        const token = this._tokens!.getItemAt(this._tokenIndex);
         if (!this._atEof()) {
             this._tokenIndex++;
         }
@@ -5270,19 +5274,20 @@ export class Parser {
     private _atEof(): boolean {
         // Are we pointing at the last token in the stream (which is
         // assumed to be an end-of-stream token)?
-        return this._tokenIndex >= this._tokenizerOutput!.tokens.count - 1;
+        return this._tokenIndex >= this._tokenCount - 1;
     }
 
     private _peekToken(count = 0): Token {
-        if (this._tokenIndex + count < 0) {
-            return this._tokenizerOutput!.tokens.getItemAt(0);
+        const targetIndex = this._tokenIndex + count;
+        if (targetIndex < 0) {
+            return this._tokens!.getItemAt(0);
         }
 
-        if (this._tokenIndex + count >= this._tokenizerOutput!.tokens.count) {
-            return this._tokenizerOutput!.tokens.getItemAt(this._tokenizerOutput!.tokens.count - 1);
+        if (targetIndex >= this._tokenCount) {
+            return this._tokens!.getItemAt(this._tokenCount - 1);
         }
 
-        return this._tokenizerOutput!.tokens.getItemAt(this._tokenIndex + count);
+        return this._tokens!.getItemAt(targetIndex);
     }
 
     private _peekTokenType(): TokenType {
diff --git a/packages/pyright-internal/src/parser/tokenizer.ts b/packages/pyright-internal/src/parser/tokenizer.ts
index 416532935708..2e75bb5351d8 100644
--- a/packages/pyright-internal/src/parser/tokenizer.ts
+++ b/packages/pyright-internal/src/parser/tokenizer.ts
@@ -92,6 +92,66 @@ const _keywords: Map<string, KeywordType> = new Map([
 
 const _softKeywords = new Set(['match', 'case', 'type']);
 
+// Fast-reject table: keywords are 2–9 chars long and only start with these
+// character codes. A 128-entry boolean table indexed by charCodeAt(0) rejects
+// most identifiers without touching the _keywords Map.
+const _keywordFirstCharTable: boolean[] = (() => {
+    const table = new Array<boolean>(128).fill(false);
+    for (const kw of _keywords.keys()) {
+        const code = kw.charCodeAt(0);
+        if (code < 128) {
+            table[code] = true;
+        }
+    }
+    return table;
+})();
+
+const _keywordMinLen = 2;
+const _keywordMaxLen = 9; // __debug__
+
+interface KeywordEntry {
+    text: string;
+    type: KeywordType;
+}
+
+// For keyword-like identifiers, compare directly against the source text slice
+// to avoid creating temporary substring objects on the keyword path.
+const _keywordEntriesByFirstChar: Array<KeywordEntry[] | undefined> = (() => {
+    const entriesByFirstChar: Array<KeywordEntry[] | undefined> = new Array(128);
+    for (const [text, type] of _keywords.entries()) {
+        const firstCharCode = text.charCodeAt(0);
+        if (firstCharCode < 128) {
+            const entries = entriesByFirstChar[firstCharCode] ?? (entriesByFirstChar[firstCharCode] = []);
+            entries.push({ text, type });
+        }
+    }
+    return entriesByFirstChar;
+})();
+
+function getKeywordTypeFromTextSlice(text: string, start: number, length: number): KeywordType | undefined {
+    if (length < _keywordMinLen || length > _keywordMaxLen) {
+        return undefined;
+    }
+
+    const firstCharCode = text.charCodeAt(start);
+    if (firstCharCode >= 128 || !_keywordFirstCharTable[firstCharCode]) {
+        return undefined;
+    }
+
+    const candidates = _keywordEntriesByFirstChar[firstCharCode];
+    if (!candidates) {
+        return undefined;
+    }
+
+    for (const candidate of candidates) {
+        if (candidate.text.length === length && text.startsWith(candidate.text, start)) {
+            return candidate.type;
+        }
+    }
+
+    return undefined;
+}
+
 const _operatorInfo: { [key: number]: OperatorFlags } = {
     [OperatorType.Add]: OperatorFlags.Unary | OperatorFlags.Binary,
     [OperatorType.AddEqual]: OperatorFlags.Assignment,
@@ -138,17 +198,377 @@ const _operatorInfo: { [key: number]: OperatorFlags } = {
     [OperatorType.NotIn]: OperatorFlags.Binary,
 };
 
+const _unsetSingleCharOperatorType = -1;
+const _singleCharOperatorTypeTable: Int16Array = (() => {
+    const table = new Int16Array(128);
+    table.fill(_unsetSingleCharOperatorType);
+    table[Char.Equal] = OperatorType.Assign;
+    table[Char.Plus] = OperatorType.Add;
+    table[Char.Hyphen] = OperatorType.Subtract;
+    table[Char.Asterisk] = OperatorType.Multiply;
+    table[Char.Slash] = OperatorType.Divide;
+    table[Char.Ampersand] = OperatorType.BitwiseAnd;
+    table[Char.Bar] = OperatorType.BitwiseOr;
+    table[Char.Caret] = OperatorType.BitwiseXor;
+    table[Char.Percent] = OperatorType.Mod;
+    table[Char.Tilde] = OperatorType.BitwiseInvert;
+    table[Char.At] = OperatorType.MatrixMultiply;
+    table[Char.Less] = OperatorType.LessThan;
+    table[Char.Greater] = OperatorType.GreaterThan;
+    return table;
+})();
+
+const _singleCharEqualOperatorTypeTable: Int16Array = (() => {
+    const table = new Int16Array(128);
+    table.fill(_unsetSingleCharOperatorType);
+    table[Char.Plus] = OperatorType.AddEqual;
+    table[Char.Hyphen] = OperatorType.SubtractEqual;
+    table[Char.Asterisk] = OperatorType.MultiplyEqual;
+    table[Char.Slash] = OperatorType.DivideEqual;
+    table[Char.Ampersand] = OperatorType.BitwiseAndEqual;
+    table[Char.Bar] = OperatorType.BitwiseOrEqual;
+    table[Char.Caret] = OperatorType.BitwiseXorEqual;
+    table[Char.Percent] = OperatorType.ModEqual;
+    table[Char.At] = OperatorType.MatrixMultiplyEqual;
+    return table;
+})();
+
+function getTwoCharKey(char1: number, char2: number): number {
+    return (char1 << 8) | char2;
+}
+
+// Two-char operator/token tables: use Map instead of Int16Array(65536).
+// With only 5+1 entries, a Map uses ~200 bytes vs 256KB for two Int16Arrays.
+const _twoCharOperatorTypeMap = new Map<number, OperatorType>([
+    [getTwoCharKey(Char.Equal, Char.Equal), OperatorType.Equals],
+    [getTwoCharKey(Char.ExclamationMark, Char.Equal), OperatorType.NotEquals],
+    [getTwoCharKey(Char.Less, Char.Equal), OperatorType.LessThanOrEqual],
+    [getTwoCharKey(Char.Greater, Char.Equal), OperatorType.GreaterThanOrEqual],
+    [getTwoCharKey(Char.Less, Char.Greater), OperatorType.LessOrGreaterThan],
+]);
+
+const _twoCharSpecialTokenTypeMap = new Map<number, TokenType>([
+    [getTwoCharKey(Char.Hyphen, Char.Greater), TokenType.Arrow],
+]);
+
+const _repeatedCharOperatorTypeTable: Int16Array = (() => {
+    const table = new Int16Array(128);
+    table.fill(_unsetSingleCharOperatorType);
+    table[Char.Asterisk] = OperatorType.Power;
+    table[Char.Slash] = OperatorType.FloorDivide;
+    table[Char.Less] = OperatorType.LeftShift;
+    table[Char.Greater] = OperatorType.RightShift;
+    return table;
+})();
+
+const _repeatedCharEqualOperatorTypeTable: Int16Array = (() => {
+    const table = new Int16Array(128);
+    table.fill(_unsetSingleCharOperatorType);
+    table[Char.Asterisk] = OperatorType.PowerEqual;
+    table[Char.Slash] = OperatorType.FloorDivideEqual;
+    table[Char.Less] = OperatorType.LeftShiftEqual;
+    table[Char.Greater] = OperatorType.RightShiftEqual;
+    return table;
+})();
+
 const _byteOrderMarker = 0xfeff;
 
 const defaultTabSize = 8;
-const magicsRegEx = /\\\s*$/;
-// The character class for type: ignore rule codes includes ':' so that
-// tool-namespaced codes such as "ty:unresolved-reference" are accepted.
-// pyright: ignore uses the original class since tool-namespaced codes
-// are not expected there.
-const typeIgnoreCommentRegEx = /((^|#)\s*)type:\s*ignore(\s*\[([\s\w:,-]*)\]|\s|$)/;
-const pyrightIgnoreCommentRegEx = /((^|#)\s*)pyright:\s*ignore(\s*\[([\s\w-,]*)\]|\s|$)/;
-const underscoreRegEx = /_/g;
+
+// Fast-reject table: only these ASCII chars can begin a string literal
+// (quote chars or valid string prefix chars f/r/b/u/t and their uppercase).
+// Checking this table first avoids calling _getStringPrefixLength() for the
+// vast majority of tokens (identifiers, numbers, operators, etc.).
+const _canStartString: boolean[] = (() => {
+    const table = new Array<boolean>(128).fill(false);
+    table[Char.SingleQuote] = true;
+    table[Char.DoubleQuote] = true;
+    for (const ch of [Char.f, Char.F, Char.r, Char.R, Char.b, Char.B, Char.u, Char.U, Char.t, Char.T]) {
+        table[ch] = true;
+    }
+    return table;
+})();
+
+// ASCII identifier-continue table. Indexed by char code < 128; true if the
+// char can appear inside an identifier (letter, digit, underscore).
+// Building this at module load by querying isIdentifierChar lets the tight
+// identifier-swallow loop avoid function-call overhead entirely on the common
+// ASCII path. Non-ASCII chars fall back to the generic path.
+const _asciiIdentifierContinue: boolean[] = (() => {
+    const table = new Array<boolean>(128).fill(false);
+    for (let i = 0; i < 128; i++) {
+        if (isIdentifierChar(i)) {
+            table[i] = true;
+        }
+    }
+    return table;
+})();
+
+const _asciiIdentifierStart: boolean[] = (() => {
+    const table = new Array<boolean>(128).fill(false);
+    for (let i = 0; i < 128; i++) {
+        if (isIdentifierStartChar(i)) {
+            table[i] = true;
+        }
+    }
+    return table;
+})();
+
+// Create a detached copy of a source text range without going through Buffer.
+// Each charAt() for ASCII returns a V8-cached single-char string that does not
+// reference the parent. The concatenation chain becomes a ConsString independent
+// of the source text, avoiding V8 SlicedString memory pinning.
+// ~4-9x faster than Buffer.from(str,'utf8').toString('utf8') for typical
+// Python identifier lengths (5-20 chars).
+function detachSubstring(text: string, start: number, end: number): string {
+    let result = '';
+    for (let i = start; i < end; i++) {
+        result += text.charAt(i);
+    }
+    return result;
+}
+
+// Strip underscore characters from a source text range without first creating
+// an intermediate substring.
+function removeUnderscoresFromRange(text: string, start: number, end: number): string {
+    let firstUnderscoreIndex = -1;
+    for (let i = start; i < end; i++) {
+        if (text.charCodeAt(i) === Char.Underscore) {
+            firstUnderscoreIndex = i;
+            break;
+        }
+    }
+
+    if (firstUnderscoreIndex < 0) {
+        return text.slice(start, end);
+    }
+
+    let result = text.slice(start, firstUnderscoreIndex);
+    for (let i = firstUnderscoreIndex + 1; i < end; i++) {
+        if (text.charCodeAt(i) !== Char.Underscore) {
+            result += text[i];
+        }
+    }
+    return result;
+}
+
+// Manual replacement for magicsRegEx = /\\\s*$/
+// Check if a range [start, end) within `text` ends with a backslash followed
+// by optional whitespace.
+function endsWithBackslashContinuation(text: string, start: number, end: number): boolean {
+    let i = end - 1;
+    // Skip trailing whitespace
+    while (i >= start) {
+        const ch = text.charCodeAt(i);
+        if (ch === Char.Space || ch === Char.Tab || ch === Char.FormFeed) {
+            i--;
+        } else {
+            break;
+        }
+    }
+    return i >= start && text.charCodeAt(i) === Char.Backslash;
+}
+
+// Result structure matching the shape previously extracted from regex match groups.
+interface IgnoreDirectiveMatch {
+    fullMatch: string; // group 0: full matched text
+    prefix: string; // group 1: prefix before directive keyword
+    bracketContent?: string; // group 4: content inside [...] if present
+    index: number; // match position within the input string
+}
+
+// Parses a bracketed rule list starting at `pos` (which must point at '[').
+// Returns the bracket content (without brackets) and the position just past ']',
+// or undefined if the bracket is malformed (e.g. unclosed, or contains invalid chars
+// before a closing bracket is found).
+function parseIgnoreBracketContent(
+    text: string,
+    pos: number,
+    rangeEnd: number,
+    allowColon: boolean
+): { content: string; newPos: number } | undefined {
+    pos++; // skip '['
+    const bracketStart = pos;
+    while (pos < rangeEnd && text.charCodeAt(pos) !== Char.CloseBracket) {
+        // Only allow valid bracket content chars: \s, \w, -, ,
+        // (plus ':' for type: ignore to support tool-namespaced codes)
+        const bc = text.charCodeAt(pos);
+        if (
+            (bc >= Char.a && bc <= Char.z) ||
+            (bc >= Char.A && bc <= Char.Z) ||
+            (bc >= Char._0 && bc <= Char._9) ||
+            bc === Char.Underscore ||
+            bc === Char.Hyphen ||
+            bc === Char.Comma ||
+            bc === Char.Space ||
+            bc === Char.Tab ||
+            (allowColon && bc === Char.Colon)
+        ) {
+            pos++;
+        } else {
+            break;
+        }
+    }
+    if (pos < rangeEnd && text.charCodeAt(pos) === Char.CloseBracket) {
+        return { content: text.slice(bracketStart, pos), newPos: pos + 1 };
+    }
+    return undefined;
+}
+
+// Manual replacement for typeIgnoreCommentRegEx / pyrightIgnoreCommentRegEx.
+// Scans `text` within [rangeStart, rangeEnd) for `<directive>: ignore [rules]`
+// where directive is 'type' or 'pyright'.
+// Returns a match object or undefined. Returned `index` is absolute within `text`.
+function matchIgnoreDirective(
+    text: string,
+    rangeStart: number,
+    rangeEnd: number,
+    directive: string
+): IgnoreDirectiveMatch | undefined {
+    // The directive can be preceded by optional `#` and whitespace, or
+    // appear at the start of the range with optional whitespace.
+    // type: ignore allows tool-namespaced codes (e.g. "ty:rule-name") in brackets;
+    // pyright: ignore does not.
+    const allowColonInBracket = directive === 'type';
+    let searchFrom = rangeStart;
+
+    while (searchFrom < rangeEnd) {
+        // Find the next occurrence of the directive keyword, bounded by
+        // rangeEnd. A bounded hand-rolled scan is important here: native
+        // String.prototype.indexOf has no end bound and, when the keyword is
+        // absent from the current comment but present elsewhere in the file,
+        // can scan well past rangeEnd — producing O(n) behavior per comment
+        // and O(n^2) overall on comment-heavy files.
+        const firstCharCode = directive.charCodeAt(0);
+        let directiveIdx = -1;
+        const scanLimit = rangeEnd - directive.length;
+        for (let i = searchFrom; i <= scanLimit; i++) {
+            if (text.charCodeAt(i) === firstCharCode) {
+                let found = true;
+                for (let d = 1; d < directive.length; d++) {
+                    if (text.charCodeAt(i + d) !== directive.charCodeAt(d)) {
+                        found = false;
+                        break;
+                    }
+                }
+                if (found) {
+                    directiveIdx = i;
+                    break;
+                }
+            }
+        }
+        if (directiveIdx < 0) {
+            return undefined;
+        }
+
+        // Determine the prefix: scan backward from directiveIdx to find
+        // the `#` or start-of-range, collecting whitespace.
+        let prefixStart = directiveIdx;
+        let foundAnchor = false;
+
+        // Walk backward over spaces/tabs
+        let j = directiveIdx - 1;
+        while (j >= rangeStart && (text.charCodeAt(j) === Char.Space || text.charCodeAt(j) === Char.Tab)) {
+            j--;
+        }
+
+        if (j < rangeStart) {
+            // At start of range
+            prefixStart = rangeStart;
+            foundAnchor = true;
+        } else if (text.charCodeAt(j) === Char.Hash) {
+            prefixStart = j;
+            foundAnchor = true;
+        }
+
+        if (!foundAnchor) {
+            searchFrom = directiveIdx + 1;
+            continue;
+        }
+
+        // After directive keyword, expect ':'
+        let pos = directiveIdx + directive.length;
+        if (pos >= rangeEnd || text.charCodeAt(pos) !== Char.Colon) {
+            searchFrom = directiveIdx + 1;
+            continue;
+        }
+        pos++; // skip ':'
+
+        // Skip optional whitespace after ':'
+        while (pos < rangeEnd && (text.charCodeAt(pos) === Char.Space || text.charCodeAt(pos) === Char.Tab)) {
+            pos++;
+        }
+
+        // Expect 'ignore'
+        const ignoreStr = 'ignore';
+        if (pos + ignoreStr.length > rangeEnd) {
+            searchFrom = directiveIdx + 1;
+            continue;
+        }
+
+        let matched = true;
+        for (let k = 0; k < ignoreStr.length; k++) {
+            if (text.charCodeAt(pos + k) !== ignoreStr.charCodeAt(k)) {
+                matched = false;
+                break;
+            }
+        }
+        if (!matched) {
+            searchFrom = directiveIdx + 1;
+            continue;
+        }
+        pos += ignoreStr.length;
+
+        // After 'ignore', expect whitespace, '[', or end-of-range
+        let bracketContent: string | undefined;
+
+        if (pos >= rangeEnd) {
+            // End of range — valid
+        } else {
+            const ch = text.charCodeAt(pos);
+            if (ch === Char.Space || ch === Char.Tab) {
+                // Skip whitespace to check for optional bracket
+                while (pos < rangeEnd && (text.charCodeAt(pos) === Char.Space || text.charCodeAt(pos) === Char.Tab)) {
+                    pos++;
+                }
+                if (pos < rangeEnd && text.charCodeAt(pos) === Char.OpenBracket) {
+                    const parsed = parseIgnoreBracketContent(text, pos, rangeEnd, allowColonInBracket);
+                    if (parsed === undefined) {
+                        searchFrom = directiveIdx + 1;
+                        continue;
+                    }
+                    bracketContent = parsed.content;
+                    pos = parsed.newPos;
+                }
+            } else if (ch === Char.OpenBracket) {
+                // Bracket immediately after 'ignore'
+                const parsed = parseIgnoreBracketContent(text, pos, rangeEnd, allowColonInBracket);
+                if (parsed === undefined) {
+                    searchFrom = directiveIdx + 1;
+                    continue;
+                }
+                bracketContent = parsed.content;
+                pos = parsed.newPos;
+            } else {
+                // No space, no bracket — not a valid match
+                searchFrom = directiveIdx + 1;
+                continue;
+            }
+        }
+
+        const prefix = text.slice(prefixStart, directiveIdx);
+        const fullMatch = text.slice(prefixStart, pos);
+
+        return {
+            fullMatch,
+            prefix,
+            bracketContent,
+            index: prefixStart,
+        };
+    }
+
+    return undefined;
+}
 
 export interface TokenizerOutput {
     // List of all tokens.
@@ -228,6 +648,10 @@ export class Tokenizer {
     private _lineRanges: TextRange[] = [];
     private _indentAmounts: IndentInfo[] = [];
     private _typeIgnoreAll: IgnoreComment | undefined;
+    // Cached answer to "are there any non-trivial tokens yet?" Once true it
+    // stays true, so the O(n) scan in _handleComment only runs while the token
+    // stream consists purely of NewLine / Indent tokens.
+    private _hasTokenBeforeIgnoreAll = false;
     private _typeIgnoreLines = new Map<number, IgnoreComment>();
     private _pyrightIgnoreLines = new Map<number, IgnoreComment>();
     private _comments: Comment[] | undefined;
@@ -259,10 +683,15 @@ export class Tokenizer {
     // Assume Jupyter notebook tokenization rules?
     private _useNotebookMode = false;
 
-    // Intern identifier strings within a single tokenization pass. This reduces
-    // per-identifier allocations while still ensuring we don't retain substrings
-    // that reference the original source text.
-    private readonly _identifierInternedStrings = new Map<string, string>();
+    // Direct-mapped identifier intern cache. Indexed by a cheap hash of
+    // (firstChar, lastChar, length). On a hit (slot defined and string
+    // equals the current source range), reuse the cached string instead of
+    // re-allocating via detachSubstring. Collisions simply overwrite the
+    // slot — no chaining, O(1) lookup, no Map overhead. Sized as a power of
+    // two so the mask is a single AND.
+    private static readonly _identifierCacheSize = 2048;
+    private static readonly _identifierCacheMask = Tokenizer._identifierCacheSize - 1;
+    private _identifierCache: Array<string | undefined> = new Array(Tokenizer._identifierCacheSize);
 
     tokenize(
         text: string,
@@ -293,7 +722,8 @@ export class Tokenizer {
         this._lineRanges = [];
         this._indentAmounts = [];
         this._useNotebookMode = useNotebookMode;
-        this._identifierInternedStrings.clear();
+        // Clear per-source identifier intern cache.
+        this._identifierCache.fill(undefined);
 
         const end = start + length;
 
@@ -459,21 +889,24 @@ export class Tokenizer {
     // tokens onto the token list. Returns true if the caller should advance
     // to the next character.
     private _handleCharacter(): boolean {
-        // f-strings, b-strings, etc
-        const stringPrefixLength = this._getStringPrefixLength();
-
-        if (stringPrefixLength >= 0) {
-            let stringPrefix = '';
-            if (stringPrefixLength > 0) {
-                stringPrefix = this._cs.getText().slice(this._cs.position, this._cs.position + stringPrefixLength);
-                // Indeed a string
-                this._cs.advance(stringPrefixLength);
-            }
+        // f-strings, b-strings, etc — only check if current char can start a string
+        const currentChar = this._cs.currentChar;
+        if (currentChar < 128 && _canStartString[currentChar]) {
+            const stringPrefixLength = this._getStringPrefixLength();
+
+            if (stringPrefixLength >= 0) {
+                let stringPrefix = '';
+                if (stringPrefixLength > 0) {
+                    stringPrefix = this._cs.getText().slice(this._cs.position, this._cs.position + stringPrefixLength);
+                    // Indeed a string
+                    this._cs.advance(stringPrefixLength);
+                }
 
-            const quoteTypeFlags = this._getQuoteTypeFlags(stringPrefix);
-            if (quoteTypeFlags !== StringTokenFlags.None) {
-                this._handleString(quoteTypeFlags, stringPrefixLength);
-                return true;
+                const quoteTypeFlags = this._getQuoteTypeFlags(stringPrefix);
+                if (quoteTypeFlags !== StringTokenFlags.None) {
+                    this._handleString(quoteTypeFlags, stringPrefixLength);
+                    return true;
+                }
             }
         }
 
@@ -890,51 +1323,104 @@ export class Tokenizer {
     }
 
     private _tryIdentifier(): boolean {
-        const swallowRemainingChars = () => {
-            while (true) {
-                if (isIdentifierChar(this._cs.currentChar)) {
-                    this._cs.moveNext();
-                } else if (isIdentifierChar(this._cs.currentChar, this._cs.nextChar)) {
-                    this._cs.moveNext();
-                    this._cs.moveNext();
+        const cs = this._cs;
+        const text = cs.getText();
+        const textLen = text.length;
+        const start = cs.position;
+
+        // Fast path for ASCII identifier start. Avoids the function call and
+        // surrogate logic for the common case (Python source is overwhelmingly
+        // ASCII identifiers).
+        const firstChar = cs.currentChar;
+        let pos = start;
+        if (firstChar < 128) {
+            if (!_asciiIdentifierStart[firstChar]) {
+                // Not an identifier start and not a surrogate candidate.
+                return false;
+            }
+            pos++;
+
+            // Tight loop: advance while we're still in ASCII identifier chars.
+            while (pos < textLen) {
+                const ch = text.charCodeAt(pos);
+                if (ch < 128 && _asciiIdentifierContinue[ch]) {
+                    pos++;
                 } else {
                     break;
                 }
             }
-        };
 
-        const start = this._cs.position;
-        if (isIdentifierStartChar(this._cs.currentChar)) {
-            this._cs.moveNext();
-            swallowRemainingChars();
-        } else if (isIdentifierStartChar(this._cs.currentChar, this._cs.nextChar)) {
-            this._cs.moveNext();
-            this._cs.moveNext();
-            swallowRemainingChars();
+            // If we hit a non-ASCII char, fall back to the generic loop to
+            // handle possible unicode identifier continue / surrogate pairs.
+            if (pos < textLen && text.charCodeAt(pos) >= 128) {
+                cs.advance(pos - start);
+                this._swallowNonAsciiIdentifierChars();
+                pos = cs.position;
+            } else {
+                cs.advance(pos - start);
+            }
+        } else {
+            // Non-ASCII start: use the generic path (supports surrogates).
+            if (isIdentifierStartChar(firstChar)) {
+                cs.moveNext();
+            } else if (isIdentifierStartChar(firstChar, cs.nextChar)) {
+                cs.moveNext();
+                cs.moveNext();
+            } else {
+                return false;
+            }
+            this._swallowNonAsciiIdentifierChars();
+            pos = cs.position;
         }
 
-        if (this._cs.position > start) {
-            const value = this._cs.getText().slice(start, this._cs.position);
-            const keywordType = _keywords.get(value);
+        if (pos > start) {
+            const end = pos;
+            const length = end - start;
+            const keywordType = getKeywordTypeFromTextSlice(text, start, length);
+
             if (keywordType !== undefined) {
-                this._tokens.push(
-                    KeywordToken.create(start, this._cs.position - start, keywordType, this._getComments())
-                );
+                this._tokens.push(KeywordToken.create(start, length, keywordType, this._getComments()));
             } else {
-                const internedValue = this._identifierInternedStrings.get(value) ?? this._internIdentifierString(value);
-                this._tokens.push(
-                    IdentifierToken.create(start, this._cs.position - start, internedValue, this._getComments())
-                );
+                const value = this._internIdentifier(text, start, end, length);
+                this._tokens.push(IdentifierToken.create(start, length, value, this._getComments()));
             }
             return true;
         }
         return false;
     }
 
-    private _internIdentifierString(value: string) {
-        const clonedValue = cloneStr(value);
-        this._identifierInternedStrings.set(clonedValue, clonedValue);
-        return clonedValue;
+    // Per-tokenize identifier intern cache. Direct-mapped, so collisions
+    // simply replace the slot. Common identifiers (self, cls, True, None,
+    // str, int, dict, etc.) get deduplicated to a single string object,
+    // avoiding repeated detachSubstring allocations for the same name.
+    private _internIdentifier(text: string, start: number, end: number, length: number): string {
+        const firstChar = text.charCodeAt(start);
+        const lastChar = text.charCodeAt(end - 1);
+        // Hash mixes length, first and last char; multiplier values chosen
+        // to spread hits for common short identifiers across the table.
+        const hash = (firstChar * 31 + lastChar * 7 + length) & Tokenizer._identifierCacheMask;
+        const cached = this._identifierCache[hash];
+        if (cached !== undefined && cached.length === length && text.startsWith(cached, start)) {
+            return cached;
+        }
+        const value = detachSubstring(text, start, end);
+        this._identifierCache[hash] = value;
+        return value;
+    }
+
+    // Generic identifier-continue loop that handles unicode + surrogate pairs.
+    // Falls back to this when the fast ASCII loop encounters a non-ASCII char.
+    private _swallowNonAsciiIdentifierChars(): void {
+        while (true) {
+            if (isIdentifierChar(this._cs.currentChar)) {
+                this._cs.moveNext();
+            } else if (isIdentifierChar(this._cs.currentChar, this._cs.nextChar)) {
+                this._cs.moveNext();
+                this._cs.moveNext();
+            } else {
+                break;
+            }
+        }
     }
 
     private _isPossibleNumber(): boolean {
@@ -990,8 +1476,9 @@ export class Tokenizer {
             }
 
             if (radix > 0) {
-                const text = this._cs.getText().slice(start, this._cs.position);
-                const simpleIntText = text.replace(underscoreRegEx, '');
+                const end = this._cs.position;
+                const text = this._cs.getText();
+                const simpleIntText = removeUnderscoresFromRange(text, start, end);
                 let intValue: number | bigint = parseInt(simpleIntText.slice(leadingChars), radix);
 
                 if (!isNaN(intValue)) {
@@ -1005,7 +1492,7 @@ export class Tokenizer {
                     }
 
                     this._tokens.push(
-                        NumberToken.create(start, text.length, intValue, true, false, this._getComments())
+                        NumberToken.create(start, end - start, intValue, true, false, this._getComments())
                     );
                     return true;
                 }
@@ -1043,12 +1530,14 @@ export class Tokenizer {
         }
 
         if (isDecimalInteger) {
-            let text = this._cs.getText().slice(start, this._cs.position);
-            const simpleIntText = text.replace(underscoreRegEx, '');
+            const textEnd = this._cs.position;
+            const sourceText = this._cs.getText();
+            const simpleIntText = removeUnderscoresFromRange(sourceText, start, textEnd);
             let intValue: number | bigint = parseInt(simpleIntText, 10);
 
             if (!isNaN(intValue)) {
                 let isImaginary = false;
+                let tokenLength = textEnd - start;
 
                 const bigIntValue = BigInt(simpleIntText);
                 if (
@@ -1061,12 +1550,12 @@ export class Tokenizer {
 
                 if (this._cs.currentChar === Char.j || this._cs.currentChar === Char.J) {
                     isImaginary = true;
-                    text += String.fromCharCode(this._cs.currentChar);
                     this._cs.moveNext();
+                    tokenLength += 1;
                 }
 
                 this._tokens.push(
-                    NumberToken.create(start, text.length, intValue, true, isImaginary, this._getComments())
+                    NumberToken.create(start, tokenLength, intValue, true, isImaginary, this._getComments())
                 );
                 return true;
             }
@@ -1079,24 +1568,19 @@ export class Tokenizer {
             (this._cs.currentChar === Char.Period && this._cs.nextChar >= Char._0 && this._cs.nextChar <= Char._9)
         ) {
             if (this._skipFloatingPointCandidate()) {
-                let text = this._cs.getText().slice(start, this._cs.position);
-                const value = parseFloat(text);
+                const floatEnd = this._cs.position;
+                const floatText = removeUnderscoresFromRange(this._cs.getText(), start, floatEnd);
+                const value = parseFloat(floatText);
                 if (!isNaN(value)) {
                     let isImaginary = false;
+                    let tokenLength = floatEnd - start;
                     if (this._cs.currentChar === Char.j || this._cs.currentChar === Char.J) {
                         isImaginary = true;
-                        text += String.fromCharCode(this._cs.currentChar);
                         this._cs.moveNext();
+                        tokenLength += 1;
                     }
                     this._tokens.push(
-                        NumberToken.create(
-                            start,
-                            this._cs.position - start,
-                            value,
-                            false,
-                            isImaginary,
-                            this._getComments()
-                        )
+                        NumberToken.create(start, tokenLength, value, false, isImaginary, this._getComments())
                     );
                     return true;
                 }
@@ -1108,139 +1592,76 @@ export class Tokenizer {
     }
 
     private _tryOperator(): boolean {
+        const currentChar = this._cs.currentChar;
         let length = 0;
         const nextChar = this._cs.nextChar;
         let operatorType: OperatorType;
 
-        switch (this._cs.currentChar) {
-            case Char.Plus:
-                length = nextChar === Char.Equal ? 2 : 1;
-                operatorType = length === 2 ? OperatorType.AddEqual : OperatorType.Add;
-                break;
-
-            case Char.Ampersand:
-                length = nextChar === Char.Equal ? 2 : 1;
-                operatorType = length === 2 ? OperatorType.BitwiseAndEqual : OperatorType.BitwiseAnd;
-                break;
-
-            case Char.Bar:
-                length = nextChar === Char.Equal ? 2 : 1;
-                operatorType = length === 2 ? OperatorType.BitwiseOrEqual : OperatorType.BitwiseOr;
-                break;
-
-            case Char.Caret:
-                length = nextChar === Char.Equal ? 2 : 1;
-                operatorType = length === 2 ? OperatorType.BitwiseXorEqual : OperatorType.BitwiseXor;
-                break;
-
-            case Char.Equal:
-                if (
-                    this._activeFString?.activeReplacementField &&
-                    this._activeFString?.activeReplacementField.parenDepth === this._parenDepth &&
-                    !this._activeFString.activeReplacementField.inFormatSpecifier &&
-                    nextChar !== Char.Equal
-                ) {
-                    length = 1;
-                    operatorType = OperatorType.Assign;
-                    break;
-                }
-
-                length = nextChar === Char.Equal ? 2 : 1;
-                operatorType = length === 2 ? OperatorType.Equals : OperatorType.Assign;
-                break;
-
-            case Char.ExclamationMark:
-                if (nextChar !== Char.Equal) {
-                    if (this._activeFString) {
-                        // Handle the conversion separator (!) within an f-string.
-                        this._tokens.push(
-                            Token.create(TokenType.ExclamationMark, this._cs.position, 1, this._getComments())
-                        );
-                        this._cs.advance(1);
-                        return true;
-                    }
-
-                    return false;
-                }
-                length = 2;
-                operatorType = OperatorType.NotEquals;
-                break;
-
-            case Char.Percent:
-                length = nextChar === Char.Equal ? 2 : 1;
-                operatorType = length === 2 ? OperatorType.ModEqual : OperatorType.Mod;
-                break;
+        if (currentChar < 128 && nextChar < 128) {
+            const twoCharKey = (currentChar << 8) | nextChar;
+            const specialTokenType = _twoCharSpecialTokenTypeMap.get(twoCharKey);
+            if (specialTokenType !== undefined) {
+                this._tokens.push(Token.create(specialTokenType, this._cs.position, 2, this._getComments()));
+                this._cs.advance(2);
+                return true;
+            }
 
-            case Char.Tilde:
-                length = 1;
-                operatorType = OperatorType.BitwiseInvert;
-                break;
+            const twoCharOperatorType = _twoCharOperatorTypeMap.get(twoCharKey);
+            if (twoCharOperatorType !== undefined) {
+                this._tokens.push(OperatorToken.create(this._cs.position, 2, twoCharOperatorType, this._getComments()));
+                this._cs.advance(2);
+                return true;
+            }
 
-            case Char.Hyphen:
-                if (nextChar === Char.Greater) {
-                    this._tokens.push(Token.create(TokenType.Arrow, this._cs.position, 2, this._getComments()));
-                    this._cs.advance(2);
+            if (currentChar === nextChar) {
+                const repeatedOperatorType = _repeatedCharOperatorTypeTable[currentChar];
+                if (repeatedOperatorType !== _unsetSingleCharOperatorType) {
+                    const hasTrailingEqual = this._cs.lookAhead(2) === Char.Equal;
+                    const repeatedLength = hasTrailingEqual ? 3 : 2;
+                    const operatorType = hasTrailingEqual
+                        ? _repeatedCharEqualOperatorTypeTable[currentChar]
+                        : repeatedOperatorType;
+                    this._tokens.push(
+                        OperatorToken.create(
+                            this._cs.position,
+                            repeatedLength,
+                            operatorType as OperatorType,
+                            this._getComments()
+                        )
+                    );
+                    this._cs.advance(repeatedLength);
                     return true;
                 }
+            }
+        }
 
-                length = nextChar === Char.Equal ? 2 : 1;
-                operatorType = length === 2 ? OperatorType.SubtractEqual : OperatorType.Subtract;
-                break;
-
-            case Char.Asterisk:
-                if (nextChar === Char.Asterisk) {
-                    length = this._cs.lookAhead(2) === Char.Equal ? 3 : 2;
-                    operatorType = length === 3 ? OperatorType.PowerEqual : OperatorType.Power;
-                } else {
-                    length = nextChar === Char.Equal ? 2 : 1;
-                    operatorType = length === 2 ? OperatorType.MultiplyEqual : OperatorType.Multiply;
-                }
-                break;
-
-            case Char.Slash:
-                if (nextChar === Char.Slash) {
-                    length = this._cs.lookAhead(2) === Char.Equal ? 3 : 2;
-                    operatorType = length === 3 ? OperatorType.FloorDivideEqual : OperatorType.FloorDivide;
-                } else {
-                    length = nextChar === Char.Equal ? 2 : 1;
-                    operatorType = length === 2 ? OperatorType.DivideEqual : OperatorType.Divide;
-                }
-                break;
-
-            case Char.Less:
-                if (nextChar === Char.Less) {
-                    length = this._cs.lookAhead(2) === Char.Equal ? 3 : 2;
-                    operatorType = length === 3 ? OperatorType.LeftShiftEqual : OperatorType.LeftShift;
-                } else if (nextChar === Char.Greater) {
+        if (currentChar < 128) {
+            const singleCharOperatorType = _singleCharOperatorTypeTable[currentChar];
+            if (singleCharOperatorType !== _unsetSingleCharOperatorType) {
+                const equalOperatorType = _singleCharEqualOperatorTypeTable[currentChar];
+                if (nextChar === Char.Equal && equalOperatorType !== _unsetSingleCharOperatorType) {
                     length = 2;
-                    operatorType = OperatorType.LessOrGreaterThan;
+                    operatorType = equalOperatorType as OperatorType;
                 } else {
-                    length = nextChar === Char.Equal ? 2 : 1;
-                    operatorType = length === 2 ? OperatorType.LessThanOrEqual : OperatorType.LessThan;
-                }
-                break;
-
-            case Char.Greater:
-                if (nextChar === Char.Greater) {
-                    length = this._cs.lookAhead(2) === Char.Equal ? 3 : 2;
-                    operatorType = length === 3 ? OperatorType.RightShiftEqual : OperatorType.RightShift;
-                } else {
-                    length = nextChar === Char.Equal ? 2 : 1;
-                    operatorType = length === 2 ? OperatorType.GreaterThanOrEqual : OperatorType.GreaterThan;
+                    length = 1;
+                    operatorType = singleCharOperatorType as OperatorType;
                 }
-                break;
 
-            case Char.At:
-                length = nextChar === Char.Equal ? 2 : 1;
-                operatorType = length === 2 ? OperatorType.MatrixMultiplyEqual : OperatorType.MatrixMultiply;
-                break;
+                this._tokens.push(OperatorToken.create(this._cs.position, length, operatorType, this._getComments()));
+                this._cs.advance(length);
+                return true;
+            }
+        }
 
-            default:
-                return false;
+        // `!=` is handled by the 2-char fast path above.
+        if (currentChar === Char.ExclamationMark && this._activeFString) {
+            // Handle the conversion separator (!) within an f-string.
+            this._tokens.push(Token.create(TokenType.ExclamationMark, this._cs.position, 1, this._getComments()));
+            this._cs.advance(1);
+            return true;
         }
-        this._tokens.push(OperatorToken.create(this._cs.position, length, operatorType, this._getComments()));
-        this._cs.advance(length);
-        return length > 0;
+
+        return false;
     }
 
     private _handleInvalid(): boolean {
@@ -1298,19 +1719,17 @@ export class Tokenizer {
 
     private _handleIPythonMagics(type: CommentType): void {
         const start = this._cs.position + 1;
+        const sourceText = this._cs.getText();
 
         let begin = start;
         while (true) {
             this._cs.skipToEol();
 
             if (type === CommentType.IPythonMagic || type === CommentType.IPythonShellEscape) {
-                const length = this._cs.position - begin;
-                const value = this._cs.getText().slice(begin, begin + length);
-
                 // is it multiline magics?
                 // %magic command \
                 //        next arguments
-                if (!value.match(magicsRegEx)) {
+                if (!endsWithBackslashContinuation(sourceText, begin, this._cs.position)) {
                     break;
                 }
             }
@@ -1324,7 +1743,7 @@ export class Tokenizer {
         }
 
         const length = this._cs.position - start;
-        const comment = Comment.create(start, length, this._cs.getText().slice(start, start + length), type);
+        const comment = Comment.create(start, length, sourceText.slice(start, start + length), type);
         this._addComments(comment);
     }
 
@@ -1333,53 +1752,74 @@ export class Tokenizer {
         this._cs.skipToEol();
 
         const length = this._cs.position - start;
-        const comment = Comment.create(start, length, this._cs.getText().slice(start, start + length));
-
-        const typeIgnoreRegexMatch = comment.value.match(typeIgnoreCommentRegEx);
-        if (typeIgnoreRegexMatch) {
-            const commentStart = start + (typeIgnoreRegexMatch.index ?? 0);
-            const textRange: TextRange = {
-                start: commentStart + typeIgnoreRegexMatch[1].length,
-                length: typeIgnoreRegexMatch[0].length - typeIgnoreRegexMatch[1].length,
-            };
-            const ignoreComment: IgnoreComment = {
-                range: textRange,
-                rulesList: this._getIgnoreCommentRulesList(commentStart, typeIgnoreRegexMatch),
-            };
+        const sourceText = this._cs.getText();
+        const end = start + length;
 
-            if (this._tokens.findIndex((t) => t.type !== TokenType.NewLine && t && t.type !== TokenType.Indent) < 0) {
-                this._typeIgnoreAll = ignoreComment;
-            } else {
-                this._typeIgnoreLines.set(this._lineRanges.length, ignoreComment);
+        // Fast pre-filter: any ignore directive must contain the substring 'ignore'.
+        // indexOf is a highly-optimized native call and lets us skip the full
+        // directive scan for the vast majority of comments (which are free-form text).
+        const ignoreIdx = sourceText.indexOf('ignore', start);
+        if (ignoreIdx >= 0 && ignoreIdx < end) {
+            const typeIgnoreMatch = matchIgnoreDirective(sourceText, start, end, 'type');
+            if (typeIgnoreMatch) {
+                const commentStart = typeIgnoreMatch.index;
+                const textRange: TextRange = {
+                    start: commentStart + typeIgnoreMatch.prefix.length,
+                    length: typeIgnoreMatch.fullMatch.length - typeIgnoreMatch.prefix.length,
+                };
+                const ignoreComment: IgnoreComment = {
+                    range: textRange,
+                    rulesList: this._getIgnoreCommentRulesList(commentStart, typeIgnoreMatch),
+                };
+
+                let isIgnoreAll = false;
+                if (!this._hasTokenBeforeIgnoreAll) {
+                    // Are there any tokens other than NewLine / Indent yet?
+                    const hasOther = this._tokens.some(
+                        (t) => t && t.type !== TokenType.NewLine && t.type !== TokenType.Indent
+                    );
+                    if (hasOther) {
+                        this._hasTokenBeforeIgnoreAll = true;
+                    } else {
+                        isIgnoreAll = true;
+                    }
+                }
+
+                if (isIgnoreAll) {
+                    this._typeIgnoreAll = ignoreComment;
+                } else {
+                    this._typeIgnoreLines.set(this._lineRanges.length, ignoreComment);
+                }
             }
-        }
 
-        const pyrightIgnoreRegexMatch = comment.value.match(pyrightIgnoreCommentRegEx);
-        if (pyrightIgnoreRegexMatch) {
-            const commentStart = start + (pyrightIgnoreRegexMatch.index ?? 0);
-            const textRange: TextRange = {
-                start: commentStart + pyrightIgnoreRegexMatch[1].length,
-                length: pyrightIgnoreRegexMatch[0].length - pyrightIgnoreRegexMatch[1].length,
-            };
-            const ignoreComment: IgnoreComment = {
-                range: textRange,
-                rulesList: this._getIgnoreCommentRulesList(commentStart, pyrightIgnoreRegexMatch),
-            };
-            this._pyrightIgnoreLines.set(this._lineRanges.length, ignoreComment);
+            const pyrightIgnoreMatch = matchIgnoreDirective(sourceText, start, end, 'pyright');
+            if (pyrightIgnoreMatch) {
+                const commentStart = pyrightIgnoreMatch.index;
+                const textRange: TextRange = {
+                    start: commentStart + pyrightIgnoreMatch.prefix.length,
+                    length: pyrightIgnoreMatch.fullMatch.length - pyrightIgnoreMatch.prefix.length,
+                };
+                const ignoreComment: IgnoreComment = {
+                    range: textRange,
+                    rulesList: this._getIgnoreCommentRulesList(commentStart, pyrightIgnoreMatch),
+                };
+                this._pyrightIgnoreLines.set(this._lineRanges.length, ignoreComment);
+            }
         }
 
+        const comment = Comment.create(start, length, sourceText.slice(start, end));
         this._addComments(comment);
     }
 
     // Extracts the individual rules within a "type: ignore [x, y, z]" comment.
-    private _getIgnoreCommentRulesList(start: number, match: RegExpMatchArray): IgnoreCommentRule[] | undefined {
-        if (match.length < 5 || match[4] === undefined) {
+    private _getIgnoreCommentRulesList(start: number, match: IgnoreDirectiveMatch): IgnoreCommentRule[] | undefined {
+        if (match.bracketContent === undefined) {
             return undefined;
         }
 
-        const splitElements = match[4].split(',');
+        const splitElements = match.bracketContent.split(',');
         const commentRules: IgnoreCommentRule[] = [];
-        let currentOffset = start + match[0].indexOf('[') + 1;
+        let currentOffset = start + match.fullMatch.indexOf('[') + 1;
 
         for (const element of splitElements) {
             const frontTrimmed = element.trimStart();
diff --git a/packages/pyright-internal/src/parser/tokenizerTypes.ts b/packages/pyright-internal/src/parser/tokenizerTypes.ts
index 19dcae595d4e..8fa2ec988515 100644
--- a/packages/pyright-internal/src/parser/tokenizerTypes.ts
+++ b/packages/pyright-internal/src/parser/tokenizerTypes.ts
@@ -193,15 +193,8 @@ export interface Comment extends TextRange {
 }
 
 export namespace Comment {
-    export function create(start: number, length: number, value: string, type = CommentType.Regular) {
-        const comment: Comment = {
-            type,
-            start,
-            length,
-            value,
-        };
-
-        return comment;
+    export function create(start: number, length: number, value: string, type = CommentType.Regular): Comment {
+        return { type, start, length, value };
     }
 }
 
@@ -209,21 +202,23 @@ export interface TokenBase extends TextRange {
     readonly type: TokenType;
 
     // Comments prior to the token.
+    // Intentionally optional: most tokens have no comments, so omitting this
+    // property keeps V8 object size smaller for the common case. Each `create`
+    // factory returns a two-shape object (with vs. without `comments`) so that
+    // comment-free tokens skip the extra property slot entirely.
     readonly comments?: Comment[] | undefined;
 }
 
 export interface Token extends TokenBase {}
 
 export namespace Token {
-    export function create(type: TokenType, start: number, length: number, comments: Comment[] | undefined) {
-        const token: Token = {
+    export function create(type: TokenType, start: number, length: number, comments: Comment[] | undefined): Token {
+        return {
             start,
             length,
             type,
             comments,
         };
-
-        return token;
     }
 }
 
@@ -240,17 +235,27 @@ export namespace IndentToken {
         indentAmount: number,
         isIndentAmbiguous: boolean,
         comments: Comment[] | undefined
-    ) {
-        const token: IndentToken = {
+    ): IndentToken {
+        // Two-shape pattern: omit `comments` slot when unused to reduce
+        // per-token allocation size. ~95% of tokens carry no comments.
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.Indent,
+                isIndentAmbiguous,
+                comments,
+                indentAmount,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.Indent,
             isIndentAmbiguous,
-            comments,
             indentAmount,
         };
-
-        return token;
     }
 }
 
@@ -269,18 +274,27 @@ export namespace DedentToken {
         matchesIndent: boolean,
         isDedentAmbiguous: boolean,
         comments: Comment[] | undefined
-    ) {
-        const token: DedentToken = {
+    ): DedentToken {
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.Dedent,
+                comments,
+                indentAmount,
+                matchesIndent,
+                isDedentAmbiguous,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.Dedent,
-            comments,
             indentAmount,
             matchesIndent,
             isDedentAmbiguous,
         };
-
-        return token;
     }
 }
 
@@ -290,16 +304,28 @@ export interface NewLineToken extends Token {
 }
 
 export namespace NewLineToken {
-    export function create(start: number, length: number, newLineType: NewLineType, comments: Comment[] | undefined) {
-        const token: NewLineToken = {
+    export function create(
+        start: number,
+        length: number,
+        newLineType: NewLineType,
+        comments: Comment[] | undefined
+    ): NewLineToken {
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.NewLine,
+                comments,
+                newLineType,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.NewLine,
-            comments,
             newLineType,
         };
-
-        return token;
     }
 }
 
@@ -309,16 +335,28 @@ export interface KeywordToken extends Token {
 }
 
 export namespace KeywordToken {
-    export function create(start: number, length: number, keywordType: KeywordType, comments: Comment[] | undefined) {
-        const token: KeywordToken = {
+    export function create(
+        start: number,
+        length: number,
+        keywordType: KeywordType,
+        comments: Comment[] | undefined
+    ): KeywordToken {
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.Keyword,
+                comments,
+                keywordType,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.Keyword,
-            comments,
             keywordType,
         };
-
-        return token;
     }
 
     export function isSoftKeyword(token: KeywordToken) {
@@ -350,19 +388,30 @@ export namespace StringToken {
         escapedValue: string,
         prefixLength: number,
         comments: Comment[] | undefined
-    ) {
-        const token: StringToken = {
+    ): StringToken {
+        const quoteMarkLength = flags & StringTokenFlags.Triplicate ? 3 : 1;
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.String,
+                flags,
+                escapedValue,
+                prefixLength,
+                quoteMarkLength,
+                comments,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.String,
             flags,
             escapedValue,
             prefixLength,
-            quoteMarkLength: flags & StringTokenFlags.Triplicate ? 3 : 1,
-            comments,
+            quoteMarkLength,
         };
-
-        return token;
     }
 }
 
@@ -386,18 +435,28 @@ export namespace FStringStartToken {
         flags: StringTokenFlags,
         prefixLength: number,
         comments: Comment[] | undefined
-    ) {
-        const token: FStringStartToken = {
+    ): FStringStartToken {
+        const quoteMarkLength = flags & StringTokenFlags.Triplicate ? 3 : 1;
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.FStringStart,
+                flags,
+                prefixLength,
+                quoteMarkLength,
+                comments,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.FStringStart,
             flags,
             prefixLength,
-            quoteMarkLength: flags & StringTokenFlags.Triplicate ? 3 : 1,
-            comments,
+            quoteMarkLength,
         };
-
-        return token;
     }
 }
 
@@ -456,18 +515,27 @@ export namespace NumberToken {
         isInteger: boolean,
         isImaginary: boolean,
         comments: Comment[] | undefined
-    ) {
-        const token: NumberToken = {
+    ): NumberToken {
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.Number,
+                isInteger,
+                isImaginary,
+                value,
+                comments,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.Number,
             isInteger,
             isImaginary,
             value,
-            comments,
         };
-
-        return token;
     }
 }
 
@@ -477,16 +545,28 @@ export interface OperatorToken extends Token {
 }
 
 export namespace OperatorToken {
-    export function create(start: number, length: number, operatorType: OperatorType, comments: Comment[] | undefined) {
-        const token: OperatorToken = {
+    export function create(
+        start: number,
+        length: number,
+        operatorType: OperatorType,
+        comments: Comment[] | undefined
+    ): OperatorToken {
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.Operator,
+                operatorType,
+                comments,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.Operator,
             operatorType,
-            comments,
         };
-
-        return token;
     }
 }
 
@@ -496,18 +576,36 @@ export interface IdentifierToken extends Token {
 }
 
 export namespace IdentifierToken {
-    export function create(start: number, length: number, value: string, comments: Comment[] | undefined) {
+    export function create(
+        start: number,
+        length: number,
+        value: string,
+        comments: Comment[] | undefined
+    ): IdentifierToken {
         // Perform "NFKC normalization", as per the Python lexical spec.
-        const normalizedValue = value.normalize('NFKC');
-
-        const token: IdentifierToken = {
+        let normalizedValue = value;
+        for (let i = 0; i < value.length; i++) {
+            if (value.charCodeAt(i) > 0x7f) {
+                normalizedValue = value.normalize('NFKC');
+                break;
+            }
+        }
+
+        if (comments !== undefined) {
+            return {
+                start,
+                length,
+                type: TokenType.Identifier,
+                value: normalizedValue,
+                comments,
+            };
+        }
+
+        return {
             start,
             length,
             type: TokenType.Identifier,
             value: normalizedValue,
-            comments,
         };
-
-        return token;
     }
 }
diff --git a/packages/pyright-internal/src/tests/benchmarkData/comment_heavy.py b/packages/pyright-internal/src/tests/benchmarkData/comment_heavy.py
new file mode 100644
index 000000000000..a855a3f33dcb
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarkData/comment_heavy.py
@@ -0,0 +1,284 @@
+# comment_heavy.py — many type: ignore / pyright: ignore / noqa comments
+# Stresses the tokenizer's comment directive scanning paths.
+
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+# --- type: ignore variants ---
+
+x1: int = "not_int"  # type: ignore
+x2: str = 42  # type: ignore
+x3: float = "nope"  # type: ignore
+x4: bool = 123  # type: ignore
+x5: bytes = 456  # type: ignore
+
+x6: int = "a"  # type: ignore[assignment]
+x7: str = 1  # type: ignore[assignment]
+x8: float = True  # type: ignore[assignment]
+x9: bool = None  # type: ignore[assignment]
+x10: bytes = []  # type: ignore[assignment]
+
+x11 = undefined_name  # type: ignore[name-defined]
+x12 = another_undefined  # type: ignore[name-defined]
+x13 = yet_another  # type: ignore[name-defined]
+
+# --- pyright: ignore variants ---
+
+y1: int = "not_int"  # pyright: ignore
+y2: str = 42  # pyright: ignore
+y3: float = "nope"  # pyright: ignore
+y4: bool = 123  # pyright: ignore
+y5: bytes = 456  # pyright: ignore
+
+y6: int = "a"  # pyright: ignore[reportAssignmentType]
+y7: str = 1  # pyright: ignore[reportAssignmentType]
+y8: float = True  # pyright: ignore[reportAssignmentType]
+y9: bool = None  # pyright: ignore[reportAssignmentType]
+y10: bytes = []  # pyright: ignore[reportAssignmentType]
+
+y11: int = "str"  # pyright: ignore[reportAssignmentType, reportGeneralClassIssues]
+y12: str = 42  # pyright: ignore[reportAssignmentType, reportGeneralClassIssues]
+
+# --- noqa comments ---
+
+import os  # noqa: F401
+import sys  # noqa: F401
+import re  # noqa
+import json  # noqa: E302
+import csv  # noqa: F401, E302
+import io  # noqa
+
+# --- Mixed comments ---
+
+z1: int = "str"  # type: ignore  # noqa: F841
+z2: str = 42  # type: ignore[assignment]  # noqa
+z3 = undefined  # type: ignore[name-defined]  # noqa: F821
+z4: int = "nope"  # pyright: ignore  # noqa: F841
+z5: int = "nope"  # pyright: ignore[reportAssignmentType]  # noqa
+
+# --- Regular comments (should be fast-rejected by directive scanner) ---
+
+# This is a regular comment
+# Another regular comment
+# Yet another regular comment that is quite long and spans many characters to stress the scanner
+# Regular comment with some keywords: def class import return if else
+# Regular comment mentioning ignore but not as a directive
+# A comment that says "type" but is not a type: ignore
+# type: This looks similar but is not a valid directive
+# pyright: This also looks similar but is not valid
+
+# --- Doc comments (hash-prefixed) ---
+
+# Module: comment_heavy
+# Purpose: Stress test comment directive scanning
+# Author: Benchmark generator
+# Date: 2024-01-01
+# Version: 1.0.0
+
+# --- Function with many ignored lines ---
+
+
+def poorly_typed_function(
+    a,  # type: ignore
+    b,  # type: ignore
+    c,  # type: ignore
+    d,  # type: ignore
+    e,  # type: ignore
+) -> None:  # type: ignore
+    result = a + b  # type: ignore
+    result2 = c * d  # type: ignore
+    result3 = e ** 2  # type: ignore
+    final = result + result2 + result3  # type: ignore
+    return final  # type: ignore
+
+
+def another_poorly_typed(x, y, z):  # type: ignore
+    # type: ignore on every line
+    a: int = x  # type: ignore
+    b: str = y  # type: ignore
+    c: float = z  # type: ignore
+    d: bool = a + b  # type: ignore
+    e: bytes = c + d  # type: ignore
+    f: list = e * 2  # type: ignore
+    g: dict = f + 1  # type: ignore
+    h: tuple = g - 1  # type: ignore
+    i: set = h / 2  # type: ignore
+    j: int = i + j  # type: ignore  # noqa: F821
+    return (a, b, c, d, e, f, g, h, i, j)  # type: ignore
+
+
+# --- Class with pyright: ignore ---
+
+
+class IgnoredClass:
+    x: int = "not_int"  # pyright: ignore[reportAssignmentType]
+    y: str = 42  # pyright: ignore[reportAssignmentType]
+
+    def __init__(self) -> None:
+        self.a: int = "str"  # pyright: ignore[reportAssignmentType]
+        self.b: str = 42  # pyright: ignore[reportAssignmentType]
+        self.c: float = "3.14"  # pyright: ignore[reportAssignmentType]
+        self.d: bool = "True"  # pyright: ignore[reportAssignmentType]
+
+    def method1(self) -> int:  # type: ignore
+        return "not_int"  # type: ignore
+
+    def method2(self) -> str:  # type: ignore
+        return 42  # type: ignore
+
+    def method3(self) -> float:  # type: ignore
+        return True  # type: ignore
+
+    def method4(self) -> bool:  # type: ignore
+        return 3.14  # type: ignore
+
+    def method5(self) -> bytes:  # type: ignore
+        return "string"  # type: ignore
+
+    def method6(self) -> list:  # type: ignore
+        return 123  # type: ignore
+
+    def method7(self) -> dict:  # type: ignore
+        return [1, 2, 3]  # type: ignore
+
+    def method8(self) -> tuple:  # type: ignore
+        return {1: 2}  # type: ignore
+
+    def method9(self) -> set:  # type: ignore
+        return (1, 2, 3)  # type: ignore
+
+    def method10(self) -> None:  # type: ignore
+        pass  # type: ignore
+
+
+# --- Bulk ignore blocks (100 lines) ---
+
+
+def bulk_ignores_1():
+    v1 = undefined_1  # type: ignore[name-defined]
+    v2 = undefined_2  # type: ignore[name-defined]
+    v3 = undefined_3  # type: ignore[name-defined]
+    v4 = undefined_4  # type: ignore[name-defined]
+    v5 = undefined_5  # type: ignore[name-defined]
+    v6 = undefined_6  # type: ignore[name-defined]
+    v7 = undefined_7  # type: ignore[name-defined]
+    v8 = undefined_8  # type: ignore[name-defined]
+    v9 = undefined_9  # type: ignore[name-defined]
+    v10 = undefined_10  # type: ignore[name-defined]
+    v11 = undefined_11  # pyright: ignore[reportUndefinedVariable]
+    v12 = undefined_12  # pyright: ignore[reportUndefinedVariable]
+    v13 = undefined_13  # pyright: ignore[reportUndefinedVariable]
+    v14 = undefined_14  # pyright: ignore[reportUndefinedVariable]
+    v15 = undefined_15  # pyright: ignore[reportUndefinedVariable]
+    v16 = undefined_16  # pyright: ignore[reportUndefinedVariable]
+    v17 = undefined_17  # pyright: ignore[reportUndefinedVariable]
+    v18 = undefined_18  # pyright: ignore[reportUndefinedVariable]
+    v19 = undefined_19  # pyright: ignore[reportUndefinedVariable]
+    v20 = undefined_20  # pyright: ignore[reportUndefinedVariable]
+    return None
+
+
+def bulk_ignores_2():
+    # 20 more lines with mixed directives
+    a1: int = "wrong"  # type: ignore[assignment]
+    a2: str = 42  # type: ignore[assignment]
+    a3: float = True  # type: ignore[assignment]
+    a4: bool = 3.14  # type: ignore[assignment]
+    a5: bytes = None  # type: ignore[assignment]
+    a6: list = 42  # type: ignore[assignment]
+    a7: dict = "str"  # type: ignore[assignment]
+    a8: tuple = False  # type: ignore[assignment]
+    a9: set = 3.14  # type: ignore[assignment]
+    a10: int = None  # type: ignore[assignment]
+    b1: int = "wrong"  # pyright: ignore[reportAssignmentType]
+    b2: str = 42  # pyright: ignore[reportAssignmentType]
+    b3: float = True  # pyright: ignore[reportAssignmentType]
+    b4: bool = 3.14  # pyright: ignore[reportAssignmentType]
+    b5: bytes = None  # pyright: ignore[reportAssignmentType]
+    b6: list = 42  # pyright: ignore[reportAssignmentType]
+    b7: dict = "str"  # pyright: ignore[reportAssignmentType]
+    b8: tuple = False  # pyright: ignore[reportAssignmentType]
+    b9: set = 3.14  # pyright: ignore[reportAssignmentType]
+    b10: int = None  # pyright: ignore[reportAssignmentType]
+    return None
+
+
+# --- Lines with NO comments at all (to test non-comment fast path) ---
+
+
+def clean_function_1(a: int, b: str, c: float) -> Tuple[int, str, float]:
+    x = a + 1
+    y = b + " world"
+    z = c * 2.0
+    return (x, y, z)
+
+
+def clean_function_2(items: List[int]) -> Dict[str, int]:
+    result: Dict[str, int] = {}
+    total = 0
+    for i, item in enumerate(items):
+        key = f"item_{i}"
+        result[key] = item
+        total += item
+    result["total"] = total
+    result["count"] = len(items)
+    result["average"] = total // max(len(items), 1)
+    return result
+
+
+def clean_function_3(
+    data: Dict[str, Any],
+    keys: List[str],
+    default: Any = None,
+) -> List[Any]:
+    return [data.get(k, default) for k in keys]
+
+
+def clean_function_4(matrix: List[List[int]]) -> List[List[int]]:
+    if not matrix:
+        return []
+    rows = len(matrix)
+    cols = len(matrix[0])
+    transposed: List[List[int]] = []
+    for j in range(cols):
+        row: List[int] = []
+        for i in range(rows):
+            row.append(matrix[i][j])
+        transposed.append(row)
+    return transposed
+
+
+def clean_function_5(text: str, width: int = 80) -> List[str]:
+    words = text.split()
+    lines: List[str] = []
+    current_line: List[str] = []
+    current_length = 0
+    for word in words:
+        if current_length + len(word) + len(current_line) > width:
+            lines.append(" ".join(current_line))
+            current_line = [word]
+            current_length = len(word)
+        else:
+            current_line.append(word)
+            current_length += len(word)
+    if current_line:
+        lines.append(" ".join(current_line))
+    return lines
+
+
+# --- Inline type comments (old-style annotations) ---
+
+
+def old_style_annotations():
+    a = 42  # type: int
+    b = "hello"  # type: str
+    c = 3.14  # type: float
+    d = True  # type: bool
+    e = None  # type: Optional[int]
+    f = [1, 2, 3]  # type: List[int]
+    g = {"a": 1}  # type: Dict[str, int]
+    h = (1, "a")  # type: Tuple[int, str]
+    i = {1, 2, 3}  # type: Set[int]
+    return (a, b, c, d, e, f, g, h, i)
+
+
+# End of comment_heavy.py
diff --git a/packages/pyright-internal/src/tests/benchmarkData/fstring_heavy.py b/packages/pyright-internal/src/tests/benchmarkData/fstring_heavy.py
new file mode 100644
index 000000000000..9eb9ce8f2bf0
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarkData/fstring_heavy.py
@@ -0,0 +1,273 @@
+# fstring_heavy.py — deeply nested f-strings for tokenizer stress-testing
+# Tests the f-string context stack handling and expression scanning.
+
+from typing import Any, Dict, List, Optional, Tuple
+
+# Simple f-strings
+name = "world"
+greeting = f"Hello, {name}!"
+multi = f"{'hello'.upper()} {'world'.lower()}"
+
+# Nested f-strings (depth 2)
+value = 42
+nested_1 = f"result: {f'inner {value}'}"
+nested_2 = f"outer {f'middle {f'{value}'}'}"
+
+# F-strings with format specs
+pi = 3.14159265358979
+formatted_float = f"{pi:.4f}"
+formatted_int = f"{value:05d}"
+formatted_hex = f"{value:#010x}"
+formatted_bin = f"{value:08b}"
+formatted_exp = f"{pi:.2e}"
+formatted_percent = f"{0.756:.1%}"
+
+# F-strings with expressions
+data = [1, 2, 3, 4, 5]
+expr_1 = f"sum={sum(data)}, len={len(data)}, avg={sum(data)/len(data):.2f}"
+expr_2 = f"max={max(data)}, min={min(data)}, range={max(data)-min(data)}"
+
+# F-strings with conditionals
+status = "ok"
+cond_1 = f"Status: {'PASS' if status == 'ok' else 'FAIL'}"
+cond_2 = f"Value: {value if value > 0 else -value} ({'positive' if value > 0 else 'negative'})"
+
+# F-strings with dictionary access
+config: Dict[str, Any] = {"host": "localhost", "port": 8080, "debug": True}
+dict_1 = f"Server: {config['host']}:{config['port']}"
+dict_2 = f"Debug mode: {'ON' if config['debug'] else 'OFF'}"
+
+# F-strings with list comprehensions
+comp_1 = f"squares: {[x**2 for x in range(10)]}"
+comp_2 = f"evens: {[x for x in range(20) if x % 2 == 0]}"
+
+# F-strings with method calls
+text = "hello world"
+method_1 = f"{text.title()!r}"
+method_2 = f"{text.replace('world', 'python').upper()}"
+method_3 = f"{', '.join(str(x) for x in range(5))}"
+
+# Multiline f-strings
+multiline_1 = f"""
+Name: {name}
+Value: {value}
+Status: {status}
+Config: {config}
+"""
+
+multiline_2 = f"""
+{'='*50}
+Report Summary
+{'='*50}
+Total items: {len(data)}
+Sum: {sum(data)}
+Average: {sum(data)/len(data):.2f}
+{'='*50}
+"""
+
+# F-strings with walrus operator
+walrus_1 = f"{(n := 10)} doubled is {n * 2}"
+
+# Deeply nested f-strings (depth 3)
+deep_1 = f"L1:{f'L2:{f'L3:{value}'}'}"
+deep_2 = f"a{f'b{f'c{f'd'}'}'}"
+
+# F-strings with escape characters
+escape_1 = f"path: {'C:\\\\Users\\\\test'}"
+escape_2 = f"newline: {'line1\\nline2'}"
+escape_3 = f"tab: {'col1\\tcol2'}"
+
+# F-string with complex expressions
+import_fstr = f"{'import ' + 'os'}"
+lambda_fstr = f"{(lambda x: x * 2)(21)}"
+
+# Batch of similar f-strings (simulating template usage)
+items: List[Dict[str, Any]] = [
+    {"name": f"item_{i}", "price": i * 10.5, "qty": i + 1}
+    for i in range(50)
+]
+
+
+def format_item(item: Dict[str, Any]) -> str:
+    return f"  {item['name']:<20s} ${item['price']:>8.2f} x{item['qty']:>4d} = ${item['price'] * item['qty']:>10.2f}"
+
+
+def format_table(items: List[Dict[str, Any]], title: str = "Inventory") -> str:
+    header = f"{'Name':<20s} {'Price':>8s} {'Qty':>4s} {'Total':>10s}"
+    separator = f"{'-'*20} {'-'*8} {'-'*4} {'-'*10}"
+    rows = "\n".join(format_item(item) for item in items)
+    total = sum(item["price"] * item["qty"] for item in items)
+    return f"""
+{title}
+{f'=' * len(title)}
+{header}
+{separator}
+{rows}
+{separator}
+{'TOTAL':>34s} ${total:>10.2f}
+"""
+
+
+# F-strings in class definitions
+class FormattedRecord:
+    def __init__(self, id: int, name: str, value: float) -> None:
+        self.id = id
+        self.name = name
+        self.value = value
+
+    def __str__(self) -> str:
+        return f"Record(id={self.id}, name={self.name!r}, value={self.value:.4f})"
+
+    def __repr__(self) -> str:
+        return f"FormattedRecord({self.id!r}, {self.name!r}, {self.value!r})"
+
+    def to_csv(self) -> str:
+        return f"{self.id},{self.name},{self.value:.2f}"
+
+    def to_json(self) -> str:
+        return f'{{"id": {self.id}, "name": "{self.name}", "value": {self.value}}}'
+
+    def to_xml(self) -> str:
+        return f"<record id=\"{self.id}\"><name>{self.name}</name><value>{self.value:.2f}</value></record>"
+
+    def summary(self, verbose: bool = False) -> str:
+        base = f"#{self.id}: {self.name} = {self.value:.2f}"
+        if verbose:
+            return f"{base} (type={type(self.value).__name__}, len_name={len(self.name)})"
+        return base
+
+
+# F-strings with nested data structures
+matrix: List[List[int]] = [[i * 10 + j for j in range(10)] for i in range(10)]
+
+
+def format_matrix(m: List[List[int]]) -> str:
+    rows = "\n".join(
+        f"  [{', '.join(f'{cell:3d}' for cell in row)}]"
+        for row in m
+    )
+    return f"Matrix {len(m)}x{len(m[0]) if m else 0}:\n[\n{rows}\n]"
+
+
+def format_tree(
+    node: Dict[str, Any], indent: int = 0, prefix: str = ""
+) -> str:
+    name = node.get("name", "?")
+    children = node.get("children", [])
+    result = f"{' ' * indent}{prefix}{name}"
+    for i, child in enumerate(children):
+        is_last = i == len(children) - 1
+        child_prefix = f"{'└── ' if is_last else '├── '}"
+        result += f"\n{format_tree(child, indent + 4, child_prefix)}"
+    return result
+
+
+# Many small f-strings to stress token emission
+def generate_report_lines(count: int) -> List[str]:
+    lines: List[str] = []
+    for i in range(count):
+        lines.append(f"Line {i:04d}: value={i * 3.14:.2f}, hex={i:#06x}, bin={i:08b}")
+    return lines
+
+
+def format_log_entry(
+    timestamp: str,
+    level: str,
+    module: str,
+    message: str,
+    extra: Optional[Dict[str, Any]] = None,
+) -> str:
+    base = f"[{timestamp}] {level:>8s} {module:<30s} {message}"
+    if extra:
+        pairs = " ".join(f"{k}={v!r}" for k, v in extra.items())
+        return f"{base} | {pairs}"
+    return base
+
+
+# F-strings with type annotations in strings (for older Python compat)
+future_annotations_example = {
+    "field1": f"{'Optional[List[Dict[str, Any]]]'}",
+    "field2": f"{'Union[int, str, Tuple[int, ...]]'}",
+    "field3": f"{'Callable[[str, int], Optional[bool]]'}",
+}
+
+# More deeply nested formatting
+def deep_format(data: Dict[str, Any], depth: int = 0) -> str:
+    indent = "  " * depth
+    parts: List[str] = []
+    for key, val in data.items():
+        if isinstance(val, dict):
+            inner = deep_format(val, depth + 1)
+            parts.append(f"{indent}{key}:\n{inner}")
+        elif isinstance(val, list):
+            items_str = f", ".join(f"{v!r}" for v in val)
+            parts.append(f"{indent}{key}: [{items_str}]")
+        else:
+            parts.append(f"{indent}{key}: {val!r}")
+    return "\n".join(parts)
+
+
+# Batch f-string generation to reach ~500 lines of f-string-heavy code
+class LogFormatter:
+    _format: str
+    _fields: List[str]
+
+    def __init__(self, fmt: str, fields: Optional[List[str]] = None) -> None:
+        self._format = fmt
+        self._fields = fields or []
+
+    def format(self, **kwargs: Any) -> str:
+        return f"[{self._format}] " + " ".join(
+            f"{f}={kwargs.get(f, 'N/A')!r}" for f in self._fields
+        )
+
+
+class TemplateEngine:
+    _templates: Dict[str, str]
+
+    def __init__(self) -> None:
+        self._templates = {}
+
+    def register(self, name: str, template: str) -> None:
+        self._templates[name] = template
+
+    def render(self, name: str, **ctx: Any) -> str:
+        tmpl = self._templates.get(name, "")
+        return f"[{name}] {tmpl}" + "".join(
+            f" {k}={v}" for k, v in ctx.items()
+        )
+
+
+class HtmlBuilder:
+    _parts: List[str]
+
+    def __init__(self) -> None:
+        self._parts = []
+
+    def tag(self, name: str, content: str, **attrs: str) -> "HtmlBuilder":
+        attr_str = " ".join(f'{k}="{v}"' for k, v in attrs.items())
+        if attr_str:
+            self._parts.append(f"<{name} {attr_str}>{content}</{name}>")
+        else:
+            self._parts.append(f"<{name}>{content}</{name}>")
+        return self
+
+    def div(self, content: str, class_name: str = "") -> "HtmlBuilder":
+        if class_name:
+            self._parts.append(f'<div class="{class_name}">{content}</div>')
+        else:
+            self._parts.append(f"<div>{content}</div>")
+        return self
+
+    def span(self, content: str, style: str = "") -> "HtmlBuilder":
+        if style:
+            self._parts.append(f'<span style="{style}">{content}</span>')
+        else:
+            self._parts.append(f"<span>{content}</span>")
+        return self
+
+    def build(self) -> str:
+        return f"<!DOCTYPE html>\n<html>\n<body>\n{''.join(self._parts)}\n</body>\n</html>"
+
+
+# End of fstring_heavy.py
diff --git a/packages/pyright-internal/src/tests/benchmarkData/import_heavy.py b/packages/pyright-internal/src/tests/benchmarkData/import_heavy.py
new file mode 100644
index 000000000000..f48c4f89f942
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarkData/import_heavy.py
@@ -0,0 +1,375 @@
+# import_heavy.py — many import statements for resolution benchmarking
+
+from __future__ import annotations
+
+# Standard library imports (varied styles)
+import os
+import sys
+import io
+import re
+import json
+import csv
+import math
+import time
+import copy
+import enum
+import abc
+import ast
+import dis
+import ssl
+import xml
+import html
+import http
+import uuid
+import zlib
+import gzip
+import lzma
+import bz2
+import base64
+import hashlib
+import hmac
+import secrets
+import random
+import struct
+import array
+import queue
+import heapq
+import bisect
+import decimal
+import fractions
+import statistics
+import string
+import textwrap
+import unicodedata
+import difflib
+import pprint
+import reprlib
+import warnings
+import traceback
+import linecache
+import inspect
+import dis
+import code
+import codeop
+import compile
+import compileall
+
+# From imports
+from os import path, getcwd, listdir, makedirs, remove, rename
+from os.path import (
+    join,
+    exists,
+    isfile,
+    isdir,
+    basename,
+    dirname,
+    abspath,
+    relpath,
+    normpath,
+    splitext,
+    getsize,
+    getmtime,
+)
+from sys import argv, exit, stdin, stdout, stderr, platform, version
+from io import BytesIO, StringIO, BufferedReader, TextIOWrapper
+from re import compile, match, search, findall, sub, split, Pattern, Match
+from json import dumps, loads, dump, load, JSONEncoder, JSONDecoder
+from csv import reader, writer, DictReader, DictWriter
+from math import (
+    ceil,
+    floor,
+    sqrt,
+    pow,
+    log,
+    log2,
+    log10,
+    exp,
+    sin,
+    cos,
+    tan,
+    pi,
+    e,
+    inf,
+    nan,
+    isnan,
+    isinf,
+    isfinite,
+    gcd,
+    factorial,
+)
+from time import time as time_func, sleep, monotonic, perf_counter
+from copy import copy as shallow_copy, deepcopy
+from enum import Enum, IntEnum, Flag, IntFlag, auto, unique
+from abc import ABC, ABCMeta, abstractmethod
+from collections import (
+    OrderedDict,
+    defaultdict,
+    deque,
+    Counter,
+    namedtuple,
+    ChainMap,
+)
+from collections.abc import (
+    Iterable,
+    Iterator,
+    Generator,
+    Sequence,
+    MutableSequence,
+    Set,
+    MutableSet,
+    Mapping,
+    MutableMapping,
+    Callable,
+    Hashable,
+    Sized,
+    Container,
+    Reversible,
+    Collection,
+    Awaitable,
+    Coroutine,
+    AsyncIterator,
+    AsyncIterable,
+    AsyncGenerator,
+)
+from typing import (
+    Any,
+    ClassVar,
+    Dict,
+    Final,
+    Generic,
+    List,
+    Literal,
+    Optional,
+    Protocol,
+    Set as TSet,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+    cast,
+    overload,
+    runtime_checkable,
+    get_type_hints,
+    TYPE_CHECKING,
+    NamedTuple,
+    TypedDict,
+    Annotated,
+    TypeAlias,
+    TypeGuard,
+    Never,
+    Self,
+    Unpack,
+    ParamSpec,
+    Concatenate,
+    assert_type,
+    reveal_type,
+    dataclass_transform,
+    no_type_check,
+)
+from functools import (
+    reduce,
+    partial,
+    lru_cache,
+    wraps,
+    total_ordering,
+    singledispatch,
+    cached_property,
+)
+from itertools import (
+    chain,
+    combinations,
+    permutations,
+    product,
+    repeat,
+    count,
+    cycle,
+    islice,
+    groupby,
+    starmap,
+    accumulate,
+    zip_longest,
+    compress,
+    filterfalse,
+    takewhile,
+    dropwhile,
+    tee,
+)
+from contextlib import (
+    contextmanager,
+    asynccontextmanager,
+    closing,
+    suppress,
+    redirect_stdout,
+    redirect_stderr,
+    nullcontext,
+    ExitStack,
+    AbstractContextManager,
+)
+from dataclasses import dataclass, field, fields, asdict, astuple, make_dataclass
+from pathlib import Path, PurePath, PosixPath, WindowsPath, PurePosixPath
+from datetime import datetime, date, time as dt_time, timedelta, timezone
+from urllib.parse import (
+    urlparse,
+    urlencode,
+    urljoin,
+    quote,
+    unquote,
+    parse_qs,
+    parse_qsl,
+    urlsplit,
+    urlunsplit,
+)
+from http import HTTPStatus
+from http.client import HTTPConnection, HTTPSConnection, HTTPResponse
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from xml.etree import ElementTree
+from xml.dom import minidom
+from html.parser import HTMLParser
+from concurrent.futures import (
+    ThreadPoolExecutor,
+    ProcessPoolExecutor,
+    Future,
+    as_completed,
+    wait,
+    FIRST_COMPLETED,
+    ALL_COMPLETED,
+)
+from threading import Thread, Lock, RLock, Event, Condition, Semaphore, Timer
+from multiprocessing import Process, Pool, Queue as MPQueue, Value, Array, Manager
+from subprocess import run, Popen, PIPE, DEVNULL, CalledProcessError
+from shutil import copy2, copytree, rmtree, move, which, disk_usage
+from tempfile import (
+    TemporaryFile,
+    NamedTemporaryFile,
+    mkdtemp,
+    mkstemp,
+    gettempdir,
+    SpooledTemporaryFile,
+)
+from unittest import TestCase, TestSuite, TestLoader, TextTestRunner, mock
+from unittest.mock import Mock, MagicMock, patch, call, ANY, PropertyMock
+from logging import (
+    Logger,
+    getLogger,
+    StreamHandler,
+    FileHandler,
+    Formatter,
+    DEBUG,
+    INFO,
+    WARNING,
+    ERROR,
+    CRITICAL,
+    basicConfig,
+)
+from argparse import ArgumentParser, Namespace, FileType, Action, HelpFormatter
+from configparser import ConfigParser, RawConfigParser
+from socket import socket, AF_INET, AF_INET6, SOCK_STREAM, SOCK_DGRAM
+from signal import signal, SIGINT, SIGTERM, SIG_DFL, SIG_IGN
+from weakref import ref, WeakValueDictionary, WeakKeyDictionary, finalize
+from operator import (
+    add,
+    sub,
+    mul,
+    truediv,
+    floordiv,
+    mod,
+    pow as op_pow,
+    neg,
+    pos,
+    abs as op_abs,
+    eq,
+    ne,
+    lt,
+    le,
+    gt,
+    ge,
+    and_,
+    or_,
+    xor,
+    not_,
+    itemgetter,
+    attrgetter,
+    methodcaller,
+)
+
+# Conditional imports
+if TYPE_CHECKING:
+    from _typeshed import SupportsRead, SupportsWrite, StrPath
+    from typing_extensions import Buffer, ReadOnly
+
+# Aliased imports
+import os.path as osp
+import collections.abc as cabc
+import xml.etree.ElementTree as ET
+
+# Try/except imports (common pattern)
+try:
+    import numpy as np  # type: ignore
+except ImportError:
+    np = None  # type: ignore
+
+try:
+    import pandas as pd  # type: ignore
+except ImportError:
+    pd = None  # type: ignore
+
+try:
+    import requests  # type: ignore
+except ImportError:
+    requests = None  # type: ignore
+
+try:
+    import yaml  # type: ignore
+except ImportError:
+    yaml = None  # type: ignore
+
+try:
+    import toml  # type: ignore
+except ImportError:
+    toml = None  # type: ignore
+
+
+# Code that uses imported names to exercise resolution
+def use_imports() -> None:
+    """Function that references many imported names."""
+    p = Path(".")
+    files = list(p.iterdir())
+    cwd = getcwd()
+
+    data: Dict[str, Any] = {"key": "value"}
+    json_str = dumps(data)
+    parsed = loads(json_str)
+
+    now = datetime.now()
+    delta = timedelta(days=1)
+    tomorrow = now + delta
+
+    url = urlparse("https://example.com/path?key=value")
+
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        futures = [executor.submit(lambda x: x * 2, i) for i in range(10)]
+
+    parser = ArgumentParser(description="test")
+    parser.add_argument("--verbose", action="store_true")
+
+    logger = getLogger(__name__)
+    logger.setLevel(DEBUG)
+
+    tmp_dir = mkdtemp()
+    result = sqrt(144)
+    items = list(chain([1, 2], [3, 4], [5, 6]))
+    grouped = groupby(sorted(items), key=lambda x: x % 2)
+
+    counter = Counter(items)
+
+    # Type aliases using imported types
+    Config: TypeAlias = Dict[str, Union[str, int, float, bool, List[Any]]]
+    Handler: TypeAlias = Callable[[str, int], Optional[bool]]
+    DataRow: TypeAlias = Tuple[int, str, float, Optional[str]]
+
+    _ = (p, files, cwd, data, json_str, parsed, now, delta, tomorrow, url,
+         parser, logger, tmp_dir, result, items, grouped, counter)
+
+
+# End of import_heavy.py
diff --git a/packages/pyright-internal/src/tests/benchmarkData/large_class.py b/packages/pyright-internal/src/tests/benchmarkData/large_class.py
new file mode 100644
index 000000000000..8d4fb76e484f
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarkData/large_class.py
@@ -0,0 +1,853 @@
+# large_class.py — class with 200+ methods for member completion benchmarking
+
+from __future__ import annotations
+
+from typing import (
+    Any,
+    ClassVar,
+    Dict,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    TypeVar,
+    Union,
+)
+
+_T = TypeVar("_T")
+
+
+class LargeClass:
+    """A class with many methods to stress member completion."""
+
+    # Class variables
+    VERSION: ClassVar[str] = "1.0.0"
+    MAX_SIZE: ClassVar[int] = 1000
+    DEFAULT_NAME: ClassVar[str] = "unnamed"
+
+    # Instance variables
+    _name: str
+    _data: List[Any]
+    _metadata: Dict[str, Any]
+    _flags: Set[str]
+    _parent: Optional[LargeClass]
+    _children: List[LargeClass]
+    _cache: Dict[str, Any]
+    _counter: int
+
+    def __init__(
+        self,
+        name: str,
+        data: Optional[List[Any]] = None,
+        parent: Optional[LargeClass] = None,
+    ) -> None:
+        self._name = name
+        self._data = data or []
+        self._metadata = {}
+        self._flags = set()
+        self._parent = parent
+        self._children = []
+        self._cache = {}
+        self._counter = 0
+
+    # --- Properties (20) ---
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @name.setter
+    def name(self, value: str) -> None:
+        self._name = value
+
+    @property
+    def data(self) -> List[Any]:
+        return self._data
+
+    @property
+    def metadata(self) -> Dict[str, Any]:
+        return self._metadata
+
+    @property
+    def flags(self) -> Set[str]:
+        return self._flags
+
+    @property
+    def parent(self) -> Optional[LargeClass]:
+        return self._parent
+
+    @property
+    def children(self) -> List[LargeClass]:
+        return self._children
+
+    @property
+    def size(self) -> int:
+        return len(self._data)
+
+    @property
+    def is_empty(self) -> bool:
+        return len(self._data) == 0
+
+    @property
+    def is_root(self) -> bool:
+        return self._parent is None
+
+    @property
+    def is_leaf(self) -> bool:
+        return len(self._children) == 0
+
+    @property
+    def depth(self) -> int:
+        d = 0
+        node = self._parent
+        while node is not None:
+            d += 1
+            node = node._parent
+        return d
+
+    @property
+    def path(self) -> str:
+        parts: List[str] = []
+        node: Optional[LargeClass] = self
+        while node is not None:
+            parts.append(node._name)
+            node = node._parent
+        parts.reverse()
+        return "/".join(parts)
+
+    @property
+    def root(self) -> LargeClass:
+        node = self
+        while node._parent is not None:
+            node = node._parent
+        return node
+
+    @property
+    def siblings(self) -> List[LargeClass]:
+        if self._parent is None:
+            return []
+        return [c for c in self._parent._children if c is not self]
+
+    @property
+    def descendant_count(self) -> int:
+        count = len(self._children)
+        for child in self._children:
+            count += child.descendant_count
+        return count
+
+    @property
+    def total_data_size(self) -> int:
+        total = len(self._data)
+        for child in self._children:
+            total += child.total_data_size
+        return total
+
+    @property
+    def counter(self) -> int:
+        return self._counter
+
+    @property
+    def cache_size(self) -> int:
+        return len(self._cache)
+
+    @property
+    def has_metadata(self) -> bool:
+        return len(self._metadata) > 0
+
+    # --- Data manipulation methods (40) ---
+
+    def add_item(self, item: Any) -> None:
+        self._data.append(item)
+
+    def add_items(self, items: Sequence[Any]) -> None:
+        self._data.extend(items)
+
+    def insert_item(self, index: int, item: Any) -> None:
+        self._data.insert(index, item)
+
+    def remove_item(self, item: Any) -> bool:
+        try:
+            self._data.remove(item)
+            return True
+        except ValueError:
+            return False
+
+    def pop_item(self, index: int = -1) -> Any:
+        return self._data.pop(index)
+
+    def clear_data(self) -> None:
+        self._data.clear()
+
+    def sort_data(self, reverse: bool = False) -> None:
+        self._data.sort(reverse=reverse)
+
+    def reverse_data(self) -> None:
+        self._data.reverse()
+
+    def get_item(self, index: int) -> Any:
+        return self._data[index]
+
+    def get_items(self, start: int, end: int) -> List[Any]:
+        return self._data[start:end]
+
+    def set_item(self, index: int, value: Any) -> None:
+        self._data[index] = value
+
+    def find_item(self, item: Any) -> int:
+        try:
+            return self._data.index(item)
+        except ValueError:
+            return -1
+
+    def contains_item(self, item: Any) -> bool:
+        return item in self._data
+
+    def count_item(self, item: Any) -> int:
+        return self._data.count(item)
+
+    def first_item(self) -> Optional[Any]:
+        return self._data[0] if self._data else None
+
+    def last_item(self) -> Optional[Any]:
+        return self._data[-1] if self._data else None
+
+    def unique_items(self) -> List[Any]:
+        seen: Set[Any] = set()
+        result: List[Any] = []
+        for item in self._data:
+            if item not in seen:
+                seen.add(item)
+                result.append(item)
+        return result
+
+    def filter_items(self, predicate: Any) -> List[Any]:
+        return [item for item in self._data if predicate(item)]
+
+    def map_items(self, func: Any) -> List[Any]:
+        return [func(item) for item in self._data]
+
+    def reduce_items(self, func: Any, initial: Any = None) -> Any:
+        result = initial
+        for item in self._data:
+            if result is None:
+                result = item
+            else:
+                result = func(result, item)
+        return result
+
+    def zip_with(self, other: Sequence[Any]) -> List[Tuple[Any, Any]]:
+        return list(zip(self._data, other))
+
+    def enumerate_items(self) -> List[Tuple[int, Any]]:
+        return list(enumerate(self._data))
+
+    def chunk_data(self, size: int) -> List[List[Any]]:
+        return [self._data[i : i + size] for i in range(0, len(self._data), size)]
+
+    def flatten_data(self) -> List[Any]:
+        result: List[Any] = []
+        for item in self._data:
+            if isinstance(item, list):
+                result.extend(item)
+            else:
+                result.append(item)
+        return result
+
+    def take_items(self, n: int) -> List[Any]:
+        return self._data[:n]
+
+    def drop_items(self, n: int) -> List[Any]:
+        return self._data[n:]
+
+    def sample_items(self, n: int) -> List[Any]:
+        import random
+        return random.sample(self._data, min(n, len(self._data)))
+
+    def shuffle_data(self) -> None:
+        import random
+        random.shuffle(self._data)
+
+    def min_item(self) -> Optional[Any]:
+        return min(self._data) if self._data else None
+
+    def max_item(self) -> Optional[Any]:
+        return max(self._data) if self._data else None
+
+    def sum_items(self) -> Any:
+        return sum(self._data) if self._data else 0
+
+    def average_items(self) -> Optional[float]:
+        if not self._data:
+            return None
+        return sum(self._data) / len(self._data)
+
+    def group_by(self, key_func: Any) -> Dict[Any, List[Any]]:
+        groups: Dict[Any, List[Any]] = {}
+        for item in self._data:
+            k = key_func(item)
+            if k not in groups:
+                groups[k] = []
+            groups[k].append(item)
+        return groups
+
+    def partition(self, predicate: Any) -> Tuple[List[Any], List[Any]]:
+        true_items: List[Any] = []
+        false_items: List[Any] = []
+        for item in self._data:
+            if predicate(item):
+                true_items.append(item)
+            else:
+                false_items.append(item)
+        return (true_items, false_items)
+
+    def all_match(self, predicate: Any) -> bool:
+        return all(predicate(item) for item in self._data)
+
+    def any_match(self, predicate: Any) -> bool:
+        return any(predicate(item) for item in self._data)
+
+    def none_match(self, predicate: Any) -> bool:
+        return not any(predicate(item) for item in self._data)
+
+    def find_first(self, predicate: Any) -> Optional[Any]:
+        for item in self._data:
+            if predicate(item):
+                return item
+        return None
+
+    def find_last(self, predicate: Any) -> Optional[Any]:
+        for item in reversed(self._data):
+            if predicate(item):
+                return item
+        return None
+
+    def distinct_count(self) -> int:
+        return len(set(self._data))
+
+    # --- Metadata methods (20) ---
+
+    def set_metadata(self, key: str, value: Any) -> None:
+        self._metadata[key] = value
+
+    def get_metadata(self, key: str, default: Any = None) -> Any:
+        return self._metadata.get(key, default)
+
+    def has_metadata_key(self, key: str) -> bool:
+        return key in self._metadata
+
+    def remove_metadata(self, key: str) -> Optional[Any]:
+        return self._metadata.pop(key, None)
+
+    def clear_metadata(self) -> None:
+        self._metadata.clear()
+
+    def metadata_keys(self) -> List[str]:
+        return list(self._metadata.keys())
+
+    def metadata_values(self) -> List[Any]:
+        return list(self._metadata.values())
+
+    def metadata_items(self) -> List[Tuple[str, Any]]:
+        return list(self._metadata.items())
+
+    def merge_metadata(self, other: Dict[str, Any]) -> None:
+        self._metadata.update(other)
+
+    def copy_metadata_from(self, source: LargeClass) -> None:
+        self._metadata.update(source._metadata)
+
+    def filter_metadata(self, predicate: Any) -> Dict[str, Any]:
+        return {k: v for k, v in self._metadata.items() if predicate(k, v)}
+
+    def transform_metadata_values(self, func: Any) -> Dict[str, Any]:
+        return {k: func(v) for k, v in self._metadata.items()}
+
+    def metadata_to_json(self) -> str:
+        import json
+        return json.dumps(self._metadata)
+
+    def metadata_from_json(self, json_str: str) -> None:
+        import json
+        self._metadata = json.loads(json_str)
+
+    def validate_metadata(self, schema: Dict[str, type]) -> List[str]:
+        errors: List[str] = []
+        for key, expected_type in schema.items():
+            if key not in self._metadata:
+                errors.append(f"Missing key: {key}")
+            elif not isinstance(self._metadata[key], expected_type):
+                errors.append(f"Wrong type for {key}: expected {expected_type.__name__}")
+        return errors
+
+    def metadata_diff(self, other: LargeClass) -> Dict[str, Tuple[Any, Any]]:
+        all_keys = set(self._metadata.keys()) | set(other._metadata.keys())
+        diff: Dict[str, Tuple[Any, Any]] = {}
+        for key in all_keys:
+            v1 = self._metadata.get(key)
+            v2 = other._metadata.get(key)
+            if v1 != v2:
+                diff[key] = (v1, v2)
+        return diff
+
+    def snapshot_metadata(self) -> Dict[str, Any]:
+        return dict(self._metadata)
+
+    def restore_metadata(self, snapshot: Dict[str, Any]) -> None:
+        self._metadata = dict(snapshot)
+
+    def metadata_size_bytes(self) -> int:
+        import sys
+        return sys.getsizeof(self._metadata)
+
+    def metadata_summary(self) -> str:
+        return f"Metadata: {len(self._metadata)} keys"
+
+    # --- Flag methods (15) ---
+
+    def add_flag(self, flag: str) -> None:
+        self._flags.add(flag)
+
+    def remove_flag(self, flag: str) -> None:
+        self._flags.discard(flag)
+
+    def has_flag(self, flag: str) -> bool:
+        return flag in self._flags
+
+    def toggle_flag(self, flag: str) -> bool:
+        if flag in self._flags:
+            self._flags.discard(flag)
+            return False
+        self._flags.add(flag)
+        return True
+
+    def clear_flags(self) -> None:
+        self._flags.clear()
+
+    def set_flags(self, flags: Set[str]) -> None:
+        self._flags = set(flags)
+
+    def get_flags(self) -> Set[str]:
+        return set(self._flags)
+
+    def flag_count(self) -> int:
+        return len(self._flags)
+
+    def has_any_flag(self, flags: Set[str]) -> bool:
+        return bool(self._flags & flags)
+
+    def has_all_flags(self, flags: Set[str]) -> bool:
+        return flags.issubset(self._flags)
+
+    def common_flags(self, other: LargeClass) -> Set[str]:
+        return self._flags & other._flags
+
+    def diff_flags(self, other: LargeClass) -> Set[str]:
+        return self._flags - other._flags
+
+    def union_flags(self, other: LargeClass) -> Set[str]:
+        return self._flags | other._flags
+
+    def flags_to_list(self) -> List[str]:
+        return sorted(self._flags)
+
+    def flags_summary(self) -> str:
+        return f"Flags: {', '.join(sorted(self._flags))}"
+
+    # --- Tree methods (25) ---
+
+    def add_child(self, child: LargeClass) -> None:
+        child._parent = self
+        self._children.append(child)
+
+    def remove_child(self, child: LargeClass) -> bool:
+        try:
+            self._children.remove(child)
+            child._parent = None
+            return True
+        except ValueError:
+            return False
+
+    def detach(self) -> None:
+        if self._parent:
+            self._parent.remove_child(self)
+
+    def move_to(self, new_parent: LargeClass) -> None:
+        self.detach()
+        new_parent.add_child(self)
+
+    def get_child(self, index: int) -> LargeClass:
+        return self._children[index]
+
+    def find_child(self, name: str) -> Optional[LargeClass]:
+        for child in self._children:
+            if child._name == name:
+                return child
+        return None
+
+    def find_descendant(self, name: str) -> Optional[LargeClass]:
+        for child in self._children:
+            if child._name == name:
+                return child
+            found = child.find_descendant(name)
+            if found is not None:
+                return found
+        return None
+
+    def walk_tree(self) -> Iterator[LargeClass]:
+        yield self
+        for child in self._children:
+            yield from child.walk_tree()
+
+    def walk_leaves(self) -> Iterator[LargeClass]:
+        if self.is_leaf:
+            yield self
+        else:
+            for child in self._children:
+                yield from child.walk_leaves()
+
+    def ancestors(self) -> List[LargeClass]:
+        result: List[LargeClass] = []
+        node = self._parent
+        while node is not None:
+            result.append(node)
+            node = node._parent
+        return result
+
+    def common_ancestor(self, other: LargeClass) -> Optional[LargeClass]:
+        my_ancestors = set(id(a) for a in self.ancestors())
+        node: Optional[LargeClass] = other
+        while node is not None:
+            if id(node) in my_ancestors:
+                return node
+            node = node._parent
+        return None
+
+    def subtree_size(self) -> int:
+        return 1 + sum(child.subtree_size() for child in self._children)
+
+    def height(self) -> int:
+        if not self._children:
+            return 0
+        return 1 + max(child.height() for child in self._children)
+
+    def is_ancestor_of(self, other: LargeClass) -> bool:
+        node = other._parent
+        while node is not None:
+            if node is self:
+                return True
+            node = node._parent
+        return False
+
+    def is_descendant_of(self, other: LargeClass) -> bool:
+        return other.is_ancestor_of(self)
+
+    def child_count(self) -> int:
+        return len(self._children)
+
+    def sort_children(self, key: Optional[Any] = None) -> None:
+        if key:
+            self._children.sort(key=key)
+        else:
+            self._children.sort(key=lambda c: c._name)
+
+    def reverse_children(self) -> None:
+        self._children.reverse()
+
+    def flatten_tree(self) -> List[LargeClass]:
+        return list(self.walk_tree())
+
+    def tree_depth_map(self) -> Dict[int, List[LargeClass]]:
+        result: Dict[int, List[LargeClass]] = {}
+        for node in self.walk_tree():
+            d = node.depth
+            if d not in result:
+                result[d] = []
+            result[d].append(node)
+        return result
+
+    def prune(self, predicate: Any) -> int:
+        removed = 0
+        keep: List[LargeClass] = []
+        for child in self._children:
+            if predicate(child):
+                child._parent = None
+                removed += 1
+            else:
+                keep.append(child)
+                removed += child.prune(predicate)
+        self._children = keep
+        return removed
+
+    def clone(self) -> LargeClass:
+        new_node = LargeClass(self._name, list(self._data))
+        new_node._metadata = dict(self._metadata)
+        new_node._flags = set(self._flags)
+        for child in self._children:
+            cloned_child = child.clone()
+            new_node.add_child(cloned_child)
+        return new_node
+
+    def merge_with(self, other: LargeClass) -> None:
+        self._data.extend(other._data)
+        self._metadata.update(other._metadata)
+        self._flags.update(other._flags)
+        for child in other._children:
+            self.add_child(child)
+
+    def tree_summary(self) -> str:
+        return f"Tree({self._name}, children={self.child_count()}, descendants={self.descendant_count})"
+
+    # --- Cache methods (10) ---
+
+    def cache_get(self, key: str) -> Optional[Any]:
+        return self._cache.get(key)
+
+    def cache_set(self, key: str, value: Any) -> None:
+        self._cache[key] = value
+
+    def cache_has(self, key: str) -> bool:
+        return key in self._cache
+
+    def cache_remove(self, key: str) -> Optional[Any]:
+        return self._cache.pop(key, None)
+
+    def cache_clear(self) -> None:
+        self._cache.clear()
+
+    def cache_keys(self) -> List[str]:
+        return list(self._cache.keys())
+
+    def cache_values(self) -> List[Any]:
+        return list(self._cache.values())
+
+    def cache_items(self) -> List[Tuple[str, Any]]:
+        return list(self._cache.items())
+
+    def cache_update(self, data: Dict[str, Any]) -> None:
+        self._cache.update(data)
+
+    def cache_get_or_set(self, key: str, factory: Any) -> Any:
+        if key not in self._cache:
+            self._cache[key] = factory()
+        return self._cache[key]
+
+    # --- Counter methods (10) ---
+
+    def increment(self, by: int = 1) -> int:
+        self._counter += by
+        return self._counter
+
+    def decrement(self, by: int = 1) -> int:
+        self._counter -= by
+        return self._counter
+
+    def reset_counter(self) -> None:
+        self._counter = 0
+
+    def set_counter(self, value: int) -> None:
+        self._counter = value
+
+    def counter_is_zero(self) -> bool:
+        return self._counter == 0
+
+    def counter_is_positive(self) -> bool:
+        return self._counter > 0
+
+    def counter_is_negative(self) -> bool:
+        return self._counter < 0
+
+    def counter_abs(self) -> int:
+        return abs(self._counter)
+
+    def counter_clamp(self, low: int, high: int) -> int:
+        self._counter = max(low, min(high, self._counter))
+        return self._counter
+
+    def counter_summary(self) -> str:
+        return f"Counter: {self._counter}"
+
+    # --- Serialization methods (10) ---
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "name": self._name,
+            "data": self._data,
+            "metadata": self._metadata,
+            "flags": list(self._flags),
+            "counter": self._counter,
+            "children": [c.to_dict() for c in self._children],
+        }
+
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> LargeClass:
+        obj = cls(d["name"], d.get("data", []))
+        obj._metadata = d.get("metadata", {})
+        obj._flags = set(d.get("flags", []))
+        obj._counter = d.get("counter", 0)
+        for child_dict in d.get("children", []):
+            child = cls.from_dict(child_dict)
+            obj.add_child(child)
+        return obj
+
+    def to_json(self) -> str:
+        import json
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> LargeClass:
+        import json
+        return cls.from_dict(json.loads(json_str))
+
+    def to_yaml_str(self) -> str:
+        lines: List[str] = [f"name: {self._name}"]
+        lines.append(f"counter: {self._counter}")
+        lines.append(f"flags: [{', '.join(sorted(self._flags))}]")
+        return "\n".join(lines)
+
+    def copy(self) -> LargeClass:
+        return LargeClass.from_dict(self.to_dict())
+
+    def equals(self, other: LargeClass) -> bool:
+        return self.to_dict() == other.to_dict()
+
+    def hash_value(self) -> int:
+        return hash((self._name, tuple(self._data), self._counter))
+
+    def size_bytes(self) -> int:
+        import sys
+        return sys.getsizeof(self)
+
+    def describe(self) -> str:
+        return (
+            f"LargeClass(name={self._name!r}, "
+            f"data_size={len(self._data)}, "
+            f"metadata_keys={len(self._metadata)}, "
+            f"flags={len(self._flags)}, "
+            f"children={len(self._children)}, "
+            f"counter={self._counter})"
+        )
+
+    # --- Dunder methods (20) ---
+
+    def __repr__(self) -> str:
+        return f"LargeClass({self._name!r})"
+
+    def __str__(self) -> str:
+        return self._name
+
+    def __len__(self) -> int:
+        return len(self._data)
+
+    def __bool__(self) -> bool:
+        return len(self._data) > 0
+
+    def __contains__(self, item: Any) -> bool:
+        return item in self._data
+
+    def __iter__(self) -> Iterator[Any]:
+        return iter(self._data)
+
+    def __getitem__(self, index: int) -> Any:
+        return self._data[index]
+
+    def __setitem__(self, index: int, value: Any) -> None:
+        self._data[index] = value
+
+    def __delitem__(self, index: int) -> None:
+        del self._data[index]
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, LargeClass):
+            return NotImplemented
+        return self._name == other._name and self._data == other._data
+
+    def __ne__(self, other: object) -> bool:
+        return not self.__eq__(other)
+
+    def __hash__(self) -> int:
+        return hash(self._name)
+
+    def __lt__(self, other: LargeClass) -> bool:
+        return self._name < other._name
+
+    def __le__(self, other: LargeClass) -> bool:
+        return self._name <= other._name
+
+    def __gt__(self, other: LargeClass) -> bool:
+        return self._name > other._name
+
+    def __ge__(self, other: LargeClass) -> bool:
+        return self._name >= other._name
+
+    def __add__(self, other: LargeClass) -> LargeClass:
+        result = self.clone()
+        result._data.extend(other._data)
+        return result
+
+    def __iadd__(self, other: LargeClass) -> LargeClass:
+        self._data.extend(other._data)
+        return self
+
+    def __enter__(self) -> LargeClass:
+        return self
+
+    def __exit__(self, *args: Any) -> None:
+        self.clear_data()
+        self.clear_metadata()
+        self.clear_flags()
+        self.cache_clear()
+
+
+# --- Subclass to add more methods for completion ancestor chain ---
+
+
+class ExtendedClass(LargeClass):
+    """Extension with additional domain methods."""
+
+    _tags: List[str]
+    _version: int
+
+    def __init__(self, name: str, version: int = 1) -> None:
+        super().__init__(name)
+        self._tags = []
+        self._version = version
+
+    def add_tag(self, tag: str) -> None:
+        self._tags.append(tag)
+
+    def remove_tag(self, tag: str) -> None:
+        if tag in self._tags:
+            self._tags.remove(tag)
+
+    def has_tag(self, tag: str) -> bool:
+        return tag in self._tags
+
+    def get_tags(self) -> List[str]:
+        return list(self._tags)
+
+    def clear_tags(self) -> None:
+        self._tags.clear()
+
+    def bump_version(self) -> int:
+        self._version += 1
+        return self._version
+
+    def get_version(self) -> int:
+        return self._version
+
+    def set_version(self, version: int) -> None:
+        self._version = version
+
+    def version_string(self) -> str:
+        return f"v{self._version}"
+
+    def full_describe(self) -> str:
+        base = self.describe()
+        return f"{base}, tags={len(self._tags)}, version={self._version}"
+
+
+# Marker for completion benchmark — trigger point
+obj = ExtendedClass("test")
+obj.  # completion trigger point
diff --git a/packages/pyright-internal/src/tests/benchmarkData/large_stdlib.py b/packages/pyright-internal/src/tests/benchmarkData/large_stdlib.py
new file mode 100644
index 000000000000..9cc973aca832
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarkData/large_stdlib.py
@@ -0,0 +1,1721 @@
+# large_stdlib.py — simulates a large stdlib-like module (~3000+ lines)
+# Used for tokenizer/parser/type-evaluator benchmarking.
+
+from __future__ import annotations
+
+import os
+import sys
+import typing
+from typing import (
+    Any,
+    ClassVar,
+    Dict,
+    Final,
+    Generic,
+    Iterator,
+    List,
+    Literal,
+    Optional,
+    Protocol,
+    Sequence,
+    Set,
+    Tuple,
+    TypeVar,
+    Union,
+    overload,
+    runtime_checkable,
+)
+
+_T = TypeVar("_T")
+_T_co = TypeVar("_T_co", covariant=True)
+_KT = TypeVar("_KT")
+_VT = TypeVar("_VT")
+_S = TypeVar("_S", bound="Sortable")
+
+
+class Sortable(Protocol):
+    def __lt__(self, other: Any) -> bool: ...
+    def __le__(self, other: Any) -> bool: ...
+
+
+# --- Large class hierarchy ---
+
+
+class BaseNode:
+    """Base class for all AST nodes."""
+
+    kind: ClassVar[str] = "base"
+    _parent: Optional[BaseNode] = None
+    _children: List[BaseNode]
+    _line: int
+    _col: int
+    _end_line: int
+    _end_col: int
+
+    def __init__(
+        self,
+        line: int = 0,
+        col: int = 0,
+        end_line: int = 0,
+        end_col: int = 0,
+    ) -> None:
+        self._children = []
+        self._line = line
+        self._col = col
+        self._end_line = end_line
+        self._end_col = end_col
+
+    @property
+    def parent(self) -> Optional[BaseNode]:
+        return self._parent
+
+    @parent.setter
+    def parent(self, value: Optional[BaseNode]) -> None:
+        self._parent = value
+
+    def add_child(self, child: BaseNode) -> None:
+        child._parent = self
+        self._children.append(child)
+
+    def remove_child(self, child: BaseNode) -> None:
+        self._children.remove(child)
+        child._parent = None
+
+    def walk(self) -> Iterator[BaseNode]:
+        yield self
+        for child in self._children:
+            yield from child.walk()
+
+    def find_parent(self, kind: str) -> Optional[BaseNode]:
+        node = self._parent
+        while node is not None:
+            if node.kind == kind:
+                return node
+            node = node._parent
+        return None
+
+    def depth(self) -> int:
+        d = 0
+        node = self._parent
+        while node is not None:
+            d += 1
+            node = node._parent
+        return d
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(line={self._line}, col={self._col})"
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, BaseNode):
+            return NotImplemented
+        return (
+            self.kind == other.kind
+            and self._line == other._line
+            and self._col == other._col
+        )
+
+    def __hash__(self) -> int:
+        return hash((self.kind, self._line, self._col))
+
+
+class Expression(BaseNode):
+    kind: ClassVar[str] = "expression"
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class Statement(BaseNode):
+    kind: ClassVar[str] = "statement"
+
+    def execute(self) -> None:
+        raise NotImplementedError
+
+
+class Module(BaseNode):
+    kind: ClassVar[str] = "module"
+    name: str
+    docstring: Optional[str]
+    imports: List[ImportStatement]
+    body: List[Statement]
+
+    def __init__(self, name: str, docstring: Optional[str] = None) -> None:
+        super().__init__()
+        self.name = name
+        self.docstring = docstring
+        self.imports = []
+        self.body = []
+
+
+class ImportStatement(Statement):
+    kind: ClassVar[str] = "import"
+    module_name: str
+    alias: Optional[str]
+    names: List[Tuple[str, Optional[str]]]
+
+    def __init__(
+        self,
+        module_name: str,
+        alias: Optional[str] = None,
+        names: Optional[List[Tuple[str, Optional[str]]]] = None,
+    ) -> None:
+        super().__init__()
+        self.module_name = module_name
+        self.alias = alias
+        self.names = names or []
+
+    def execute(self) -> None:
+        pass
+
+
+class FunctionDef(Statement):
+    kind: ClassVar[str] = "funcdef"
+    name: str
+    args: List[Argument]
+    return_type: Optional[Expression]
+    body: List[Statement]
+    decorators: List[Expression]
+    is_async: bool
+
+    def __init__(
+        self,
+        name: str,
+        args: Optional[List[Argument]] = None,
+        return_type: Optional[Expression] = None,
+        is_async: bool = False,
+    ) -> None:
+        super().__init__()
+        self.name = name
+        self.args = args or []
+        self.return_type = return_type
+        self.body = []
+        self.decorators = []
+        self.is_async = is_async
+
+    def execute(self) -> None:
+        pass
+
+
+class ClassDef(Statement):
+    kind: ClassVar[str] = "classdef"
+    name: str
+    bases: List[Expression]
+    body: List[Statement]
+    decorators: List[Expression]
+    metaclass: Optional[Expression]
+
+    def __init__(
+        self,
+        name: str,
+        bases: Optional[List[Expression]] = None,
+        metaclass: Optional[Expression] = None,
+    ) -> None:
+        super().__init__()
+        self.name = name
+        self.bases = bases or []
+        self.body = []
+        self.decorators = []
+        self.metaclass = metaclass
+
+    def execute(self) -> None:
+        pass
+
+
+class Argument:
+    name: str
+    annotation: Optional[Expression]
+    default: Optional[Expression]
+    kind: str  # "positional", "keyword", "*args", "**kwargs"
+
+    def __init__(
+        self,
+        name: str,
+        annotation: Optional[Expression] = None,
+        default: Optional[Expression] = None,
+        kind: str = "positional",
+    ) -> None:
+        self.name = name
+        self.annotation = annotation
+        self.default = default
+        self.kind = kind
+
+
+class AssignStatement(Statement):
+    kind: ClassVar[str] = "assign"
+    targets: List[Expression]
+    value: Expression
+    type_comment: Optional[str]
+
+    def __init__(
+        self,
+        targets: List[Expression],
+        value: Expression,
+        type_comment: Optional[str] = None,
+    ) -> None:
+        super().__init__()
+        self.targets = targets
+        self.value = value
+        self.type_comment = type_comment
+
+    def execute(self) -> None:
+        pass
+
+
+class ReturnStatement(Statement):
+    kind: ClassVar[str] = "return"
+    value: Optional[Expression]
+
+    def __init__(self, value: Optional[Expression] = None) -> None:
+        super().__init__()
+        self.value = value
+
+    def execute(self) -> None:
+        pass
+
+
+class IfStatement(Statement):
+    kind: ClassVar[str] = "if"
+    condition: Expression
+    body: List[Statement]
+    elif_clauses: List[Tuple[Expression, List[Statement]]]
+    else_body: Optional[List[Statement]]
+
+    def __init__(self, condition: Expression) -> None:
+        super().__init__()
+        self.condition = condition
+        self.body = []
+        self.elif_clauses = []
+        self.else_body = None
+
+    def execute(self) -> None:
+        pass
+
+
+class ForStatement(Statement):
+    kind: ClassVar[str] = "for"
+    target: Expression
+    iterable: Expression
+    body: List[Statement]
+    else_body: Optional[List[Statement]]
+    is_async: bool
+
+    def __init__(
+        self,
+        target: Expression,
+        iterable: Expression,
+        is_async: bool = False,
+    ) -> None:
+        super().__init__()
+        self.target = target
+        self.iterable = iterable
+        self.body = []
+        self.else_body = None
+        self.is_async = is_async
+
+    def execute(self) -> None:
+        pass
+
+
+class WhileStatement(Statement):
+    kind: ClassVar[str] = "while"
+    condition: Expression
+    body: List[Statement]
+    else_body: Optional[List[Statement]]
+
+    def __init__(self, condition: Expression) -> None:
+        super().__init__()
+        self.condition = condition
+        self.body = []
+        self.else_body = None
+
+    def execute(self) -> None:
+        pass
+
+
+class TryStatement(Statement):
+    kind: ClassVar[str] = "try"
+    body: List[Statement]
+    handlers: List[ExceptHandler]
+    else_body: Optional[List[Statement]]
+    finally_body: Optional[List[Statement]]
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.body = []
+        self.handlers = []
+        self.else_body = None
+        self.finally_body = None
+
+    def execute(self) -> None:
+        pass
+
+
+class ExceptHandler(BaseNode):
+    kind: ClassVar[str] = "except_handler"
+    exception_type: Optional[Expression]
+    name: Optional[str]
+    body: List[Statement]
+
+    def __init__(
+        self,
+        exception_type: Optional[Expression] = None,
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__()
+        self.exception_type = exception_type
+        self.name = name
+        self.body = []
+
+
+class WithStatement(Statement):
+    kind: ClassVar[str] = "with"
+    items: List[Tuple[Expression, Optional[Expression]]]
+    body: List[Statement]
+    is_async: bool
+
+    def __init__(self, is_async: bool = False) -> None:
+        super().__init__()
+        self.items = []
+        self.body = []
+        self.is_async = is_async
+
+    def execute(self) -> None:
+        pass
+
+
+class RaiseStatement(Statement):
+    kind: ClassVar[str] = "raise"
+    exception: Optional[Expression]
+    cause: Optional[Expression]
+
+    def __init__(
+        self,
+        exception: Optional[Expression] = None,
+        cause: Optional[Expression] = None,
+    ) -> None:
+        super().__init__()
+        self.exception = exception
+        self.cause = cause
+
+    def execute(self) -> None:
+        pass
+
+
+class AssertStatement(Statement):
+    kind: ClassVar[str] = "assert"
+    test: Expression
+    msg: Optional[Expression]
+
+    def __init__(
+        self,
+        test: Expression,
+        msg: Optional[Expression] = None,
+    ) -> None:
+        super().__init__()
+        self.test = test
+        self.msg = msg
+
+    def execute(self) -> None:
+        pass
+
+
+# --- Expressions ---
+
+
+class NameExpr(Expression):
+    kind: ClassVar[str] = "name"
+    id: str
+
+    def __init__(self, id: str) -> None:
+        super().__init__()
+        self.id = id
+
+    def evaluate(self) -> str:
+        return self.id
+
+
+class NumberLiteral(Expression):
+    kind: ClassVar[str] = "number"
+    value: Union[int, float, complex]
+
+    def __init__(self, value: Union[int, float, complex]) -> None:
+        super().__init__()
+        self.value = value
+
+    def evaluate(self) -> Union[int, float, complex]:
+        return self.value
+
+
+class StringLiteral(Expression):
+    kind: ClassVar[str] = "string"
+    value: str
+    is_fstring: bool
+    is_bytes: bool
+    is_raw: bool
+
+    def __init__(
+        self,
+        value: str,
+        is_fstring: bool = False,
+        is_bytes: bool = False,
+        is_raw: bool = False,
+    ) -> None:
+        super().__init__()
+        self.value = value
+        self.is_fstring = is_fstring
+        self.is_bytes = is_bytes
+        self.is_raw = is_raw
+
+    def evaluate(self) -> str:
+        return self.value
+
+
+class BoolLiteral(Expression):
+    kind: ClassVar[str] = "bool"
+    value: bool
+
+    def __init__(self, value: bool) -> None:
+        super().__init__()
+        self.value = value
+
+    def evaluate(self) -> bool:
+        return self.value
+
+
+class NoneLiteral(Expression):
+    kind: ClassVar[str] = "none"
+
+    def evaluate(self) -> None:
+        return None
+
+
+class EllipsisLiteral(Expression):
+    kind: ClassVar[str] = "ellipsis"
+
+    def evaluate(self) -> Any:
+        return ...
+
+
+class BinaryOp(Expression):
+    kind: ClassVar[str] = "binop"
+    left: Expression
+    op: str
+    right: Expression
+
+    def __init__(self, left: Expression, op: str, right: Expression) -> None:
+        super().__init__()
+        self.left = left
+        self.op = op
+        self.right = right
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class UnaryOp(Expression):
+    kind: ClassVar[str] = "unaryop"
+    op: str
+    operand: Expression
+
+    def __init__(self, op: str, operand: Expression) -> None:
+        super().__init__()
+        self.op = op
+        self.operand = operand
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class CompareExpr(Expression):
+    kind: ClassVar[str] = "compare"
+    left: Expression
+    comparators: List[Tuple[str, Expression]]
+
+    def __init__(self, left: Expression) -> None:
+        super().__init__()
+        self.left = left
+        self.comparators = []
+
+    def evaluate(self) -> bool:
+        raise NotImplementedError
+
+
+class CallExpr(Expression):
+    kind: ClassVar[str] = "call"
+    func: Expression
+    args: List[Expression]
+    kwargs: Dict[str, Expression]
+    starargs: List[Expression]
+    starkwargs: List[Expression]
+
+    def __init__(self, func: Expression) -> None:
+        super().__init__()
+        self.func = func
+        self.args = []
+        self.kwargs = {}
+        self.starargs = []
+        self.starkwargs = []
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class AttributeExpr(Expression):
+    kind: ClassVar[str] = "attribute"
+    value: Expression
+    attr: str
+
+    def __init__(self, value: Expression, attr: str) -> None:
+        super().__init__()
+        self.value = value
+        self.attr = attr
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class SubscriptExpr(Expression):
+    kind: ClassVar[str] = "subscript"
+    value: Expression
+    index: Expression
+
+    def __init__(self, value: Expression, index: Expression) -> None:
+        super().__init__()
+        self.value = value
+        self.index = index
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class ListExpr(Expression):
+    kind: ClassVar[str] = "list"
+    elements: List[Expression]
+
+    def __init__(self, elements: Optional[List[Expression]] = None) -> None:
+        super().__init__()
+        self.elements = elements or []
+
+    def evaluate(self) -> list:
+        raise NotImplementedError
+
+
+class DictExpr(Expression):
+    kind: ClassVar[str] = "dict"
+    keys: List[Optional[Expression]]
+    values: List[Expression]
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.keys = []
+        self.values = []
+
+    def evaluate(self) -> dict:
+        raise NotImplementedError
+
+
+class SetExpr(Expression):
+    kind: ClassVar[str] = "set"
+    elements: List[Expression]
+
+    def __init__(self, elements: Optional[List[Expression]] = None) -> None:
+        super().__init__()
+        self.elements = elements or []
+
+    def evaluate(self) -> set:
+        raise NotImplementedError
+
+
+class TupleExpr(Expression):
+    kind: ClassVar[str] = "tuple"
+    elements: List[Expression]
+
+    def __init__(self, elements: Optional[List[Expression]] = None) -> None:
+        super().__init__()
+        self.elements = elements or []
+
+    def evaluate(self) -> tuple:
+        raise NotImplementedError
+
+
+class LambdaExpr(Expression):
+    kind: ClassVar[str] = "lambda"
+    args: List[Argument]
+    body: Expression
+
+    def __init__(self, body: Expression) -> None:
+        super().__init__()
+        self.args = []
+        self.body = body
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class ListCompExpr(Expression):
+    kind: ClassVar[str] = "listcomp"
+    element: Expression
+    generators: List[Tuple[Expression, Expression, List[Expression]]]
+
+    def __init__(self, element: Expression) -> None:
+        super().__init__()
+        self.element = element
+        self.generators = []
+
+    def evaluate(self) -> list:
+        raise NotImplementedError
+
+
+class DictCompExpr(Expression):
+    kind: ClassVar[str] = "dictcomp"
+    key: Expression
+    value: Expression
+    generators: List[Tuple[Expression, Expression, List[Expression]]]
+
+    def __init__(self, key: Expression, value: Expression) -> None:
+        super().__init__()
+        self.key = key
+        self.value = value
+        self.generators = []
+
+    def evaluate(self) -> dict:
+        raise NotImplementedError
+
+
+class SetCompExpr(Expression):
+    kind: ClassVar[str] = "setcomp"
+    element: Expression
+    generators: List[Tuple[Expression, Expression, List[Expression]]]
+
+    def __init__(self, element: Expression) -> None:
+        super().__init__()
+        self.element = element
+        self.generators = []
+
+    def evaluate(self) -> set:
+        raise NotImplementedError
+
+
+class GeneratorExpr(Expression):
+    kind: ClassVar[str] = "genexpr"
+    element: Expression
+    generators: List[Tuple[Expression, Expression, List[Expression]]]
+
+    def __init__(self, element: Expression) -> None:
+        super().__init__()
+        self.element = element
+        self.generators = []
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class ConditionalExpr(Expression):
+    kind: ClassVar[str] = "conditional"
+    body: Expression
+    test: Expression
+    orelse: Expression
+
+    def __init__(
+        self,
+        body: Expression,
+        test: Expression,
+        orelse: Expression,
+    ) -> None:
+        super().__init__()
+        self.body = body
+        self.test = test
+        self.orelse = orelse
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class SliceExpr(Expression):
+    kind: ClassVar[str] = "slice"
+    lower: Optional[Expression]
+    upper: Optional[Expression]
+    step: Optional[Expression]
+
+    def __init__(
+        self,
+        lower: Optional[Expression] = None,
+        upper: Optional[Expression] = None,
+        step: Optional[Expression] = None,
+    ) -> None:
+        super().__init__()
+        self.lower = lower
+        self.upper = upper
+        self.step = step
+
+    def evaluate(self) -> slice:
+        raise NotImplementedError
+
+
+class StarredExpr(Expression):
+    kind: ClassVar[str] = "starred"
+    value: Expression
+
+    def __init__(self, value: Expression) -> None:
+        super().__init__()
+        self.value = value
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class WalrusExpr(Expression):
+    kind: ClassVar[str] = "walrus"
+    target: NameExpr
+    value: Expression
+
+    def __init__(self, target: NameExpr, value: Expression) -> None:
+        super().__init__()
+        self.target = target
+        self.value = value
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class MatchStatement(Statement):
+    kind: ClassVar[str] = "match"
+    subject: Expression
+    cases: List[MatchCase]
+
+    def __init__(self, subject: Expression) -> None:
+        super().__init__()
+        self.subject = subject
+        self.cases = []
+
+    def execute(self) -> None:
+        pass
+
+
+class MatchCase(BaseNode):
+    kind: ClassVar[str] = "match_case"
+    pattern: Expression
+    guard: Optional[Expression]
+    body: List[Statement]
+
+    def __init__(
+        self,
+        pattern: Expression,
+        guard: Optional[Expression] = None,
+    ) -> None:
+        super().__init__()
+        self.pattern = pattern
+        self.guard = guard
+        self.body = []
+
+
+# --- Generic containers ---
+
+
+class Container(Generic[_T]):
+    """A generic container with multiple operations."""
+
+    _items: List[_T]
+    _capacity: int
+    _name: str
+
+    def __init__(self, name: str, capacity: int = 100) -> None:
+        self._items = []
+        self._capacity = capacity
+        self._name = name
+
+    def add(self, item: _T) -> bool:
+        if len(self._items) >= self._capacity:
+            return False
+        self._items.append(item)
+        return True
+
+    def remove(self, item: _T) -> bool:
+        try:
+            self._items.remove(item)
+            return True
+        except ValueError:
+            return False
+
+    def get(self, index: int) -> _T:
+        return self._items[index]
+
+    def __len__(self) -> int:
+        return len(self._items)
+
+    def __iter__(self) -> Iterator[_T]:
+        return iter(self._items)
+
+    def __contains__(self, item: _T) -> bool:
+        return item in self._items
+
+    def clear(self) -> None:
+        self._items.clear()
+
+    def sort(self: Container[_S]) -> None:
+        self._items.sort()
+
+    @property
+    def capacity(self) -> int:
+        return self._capacity
+
+    @property
+    def is_full(self) -> bool:
+        return len(self._items) >= self._capacity
+
+    @property
+    def is_empty(self) -> bool:
+        return len(self._items) == 0
+
+
+class OrderedContainer(Container[_T]):
+    """Container that maintains insertion order with index access."""
+
+    _index_map: Dict[int, _T]
+
+    def __init__(self, name: str, capacity: int = 100) -> None:
+        super().__init__(name, capacity)
+        self._index_map = {}
+
+    def add(self, item: _T) -> bool:
+        result = super().add(item)
+        if result:
+            self._index_map[len(self._items) - 1] = item
+        return result
+
+    def get_by_index(self, index: int) -> Optional[_T]:
+        return self._index_map.get(index)
+
+
+class MappedContainer(Generic[_KT, _VT]):
+    """A dictionary-like container."""
+
+    _store: Dict[_KT, _VT]
+    _max_size: int
+
+    def __init__(self, max_size: int = 1000) -> None:
+        self._store = {}
+        self._max_size = max_size
+
+    def put(self, key: _KT, value: _VT) -> bool:
+        if len(self._store) >= self._max_size and key not in self._store:
+            return False
+        self._store[key] = value
+        return True
+
+    def get(self, key: _KT, default: Optional[_VT] = None) -> Optional[_VT]:
+        return self._store.get(key, default)
+
+    def remove(self, key: _KT) -> Optional[_VT]:
+        return self._store.pop(key, None)
+
+    def keys(self) -> Set[_KT]:
+        return set(self._store.keys())
+
+    def values(self) -> List[_VT]:
+        return list(self._store.values())
+
+    def items(self) -> List[Tuple[_KT, _VT]]:
+        return list(self._store.items())
+
+    def __len__(self) -> int:
+        return len(self._store)
+
+    def __contains__(self, key: _KT) -> bool:
+        return key in self._store
+
+
+# --- Overloaded functions ---
+
+
+@overload
+def process(value: int) -> str: ...
+@overload
+def process(value: str) -> int: ...
+@overload
+def process(value: bytes) -> List[int]: ...
+@overload
+def process(value: List[int]) -> bytes: ...
+@overload
+def process(value: Dict[str, Any]) -> List[Tuple[str, Any]]: ...
+
+
+def process(
+    value: Union[int, str, bytes, List[int], Dict[str, Any]],
+) -> Union[str, int, List[int], bytes, List[Tuple[str, Any]]]:
+    if isinstance(value, int):
+        return str(value)
+    elif isinstance(value, str):
+        return len(value)
+    elif isinstance(value, bytes):
+        return list(value)
+    elif isinstance(value, list):
+        return bytes(value)
+    else:
+        return list(value.items())
+
+
+@overload
+def convert(src: str, target: type[int]) -> int: ...
+@overload
+def convert(src: str, target: type[float]) -> float: ...
+@overload
+def convert(src: str, target: type[bool]) -> bool: ...
+@overload
+def convert(src: str, target: type[bytes]) -> bytes: ...
+
+
+def convert(
+    src: str,
+    target: Union[type[int], type[float], type[bool], type[bytes]],
+) -> Union[int, float, bool, bytes]:
+    return target(src)  # type: ignore
+
+
+# --- Protocol examples ---
+
+
+@runtime_checkable
+class Serializable(Protocol):
+    def serialize(self) -> bytes: ...
+    def deserialize(self, data: bytes) -> None: ...
+
+
+@runtime_checkable
+class Comparable(Protocol[_T_co]):
+    def compare_to(self, other: _T_co) -> int: ...
+
+
+class Hashable(Protocol):
+    def __hash__(self) -> int: ...
+    def __eq__(self, other: object) -> bool: ...
+
+
+class Sizeable(Protocol):
+    def __len__(self) -> int: ...
+    def __sizeof__(self) -> int: ...
+
+
+class Printable(Protocol):
+    def __str__(self) -> str: ...
+    def __repr__(self) -> str: ...
+
+
+# --- Complex type annotations ---
+
+
+ConfigValue = Union[str, int, float, bool, None, List["ConfigValue"], Dict[str, "ConfigValue"]]
+
+NestedDict = Dict[str, Union[str, int, Dict[str, Union[str, int, Dict[str, Any]]]]]
+
+CallbackType = typing.Callable[[str, int, Optional[Dict[str, Any]]], bool]
+
+EventHandler = typing.Callable[..., Optional[bool]]
+
+TreeNode = Union[
+    "LeafNode",
+    "BranchNode",
+    Tuple["TreeNode", "TreeNode"],
+]
+
+
+class LeafNode:
+    value: Any
+
+    def __init__(self, value: Any) -> None:
+        self.value = value
+
+
+class BranchNode:
+    children: List[TreeNode]
+    label: str
+
+    def __init__(self, label: str) -> None:
+        self.children = []
+        self.label = label
+
+
+# --- Large function set (simulating stdlib coverage) ---
+
+
+def compute_checksum(data: bytes, algorithm: str = "crc32") -> int:
+    """Compute a checksum of the given data."""
+    if algorithm == "crc32":
+        result = 0
+        for byte in data:
+            result = (result >> 8) ^ byte
+        return result & 0xFFFFFFFF
+    elif algorithm == "simple":
+        return sum(data) & 0xFFFFFFFF
+    else:
+        raise ValueError(f"Unknown algorithm: {algorithm}")
+
+
+def format_bytes(size: int) -> str:
+    """Format a byte count as a human-readable string."""
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if abs(size) < 1024.0:
+            return f"{size:.1f} {unit}"
+        size = int(size / 1024)
+    return f"{size:.1f} PB"
+
+
+def parse_version(version_str: str) -> Tuple[int, int, int]:
+    """Parse a version string like '1.2.3' into a tuple."""
+    parts = version_str.split(".")
+    if len(parts) != 3:
+        raise ValueError(f"Invalid version: {version_str}")
+    return (int(parts[0]), int(parts[1]), int(parts[2]))
+
+
+def merge_dicts(
+    *dicts: Dict[str, Any],
+    deep: bool = False,
+) -> Dict[str, Any]:
+    """Merge multiple dictionaries."""
+    result: Dict[str, Any] = {}
+    for d in dicts:
+        if deep:
+            for key, value in d.items():
+                if (
+                    key in result
+                    and isinstance(result[key], dict)
+                    and isinstance(value, dict)
+                ):
+                    result[key] = merge_dicts(result[key], value, deep=True)
+                else:
+                    result[key] = value
+        else:
+            result.update(d)
+    return result
+
+
+def flatten_list(nested: List[Any], max_depth: int = -1) -> List[Any]:
+    """Flatten a nested list up to max_depth levels."""
+    result: List[Any] = []
+    for item in nested:
+        if isinstance(item, list) and max_depth != 0:
+            result.extend(flatten_list(item, max_depth - 1))
+        else:
+            result.append(item)
+    return result
+
+
+def chunk_list(lst: List[_T], size: int) -> List[List[_T]]:
+    """Split a list into chunks of the given size."""
+    return [lst[i : i + size] for i in range(0, len(lst), size)]
+
+
+def deduplicate(items: Sequence[_T]) -> List[_T]:
+    """Remove duplicates while preserving order."""
+    seen: Set[Any] = set()
+    result: List[_T] = []
+    for item in items:
+        key = id(item) if not isinstance(item, (str, int, float, bool, bytes)) else item
+        if key not in seen:
+            seen.add(key)
+            result.append(item)
+    return result
+
+
+def retry(
+    func: typing.Callable[[], _T],
+    max_attempts: int = 3,
+    delay: float = 1.0,
+    backoff: float = 2.0,
+    exceptions: Tuple[type, ...] = (Exception,),
+) -> _T:
+    """Retry a function with exponential backoff."""
+    last_exception: Optional[Exception] = None
+    current_delay = delay
+    for attempt in range(max_attempts):
+        try:
+            return func()
+        except exceptions as e:
+            last_exception = e
+            if attempt < max_attempts - 1:
+                current_delay *= backoff
+    raise last_exception  # type: ignore
+
+
+def memoize(func: typing.Callable[..., _T]) -> typing.Callable[..., _T]:
+    """Simple memoization decorator."""
+    cache: Dict[str, _T] = {}
+
+    def wrapper(*args: Any, **kwargs: Any) -> _T:
+        key = str((args, sorted(kwargs.items())))
+        if key not in cache:
+            cache[key] = func(*args, **kwargs)
+        return cache[key]
+
+    return wrapper
+
+
+# --- More node types to add bulk ---
+
+
+class YieldExpr(Expression):
+    kind: ClassVar[str] = "yield"
+    value: Optional[Expression]
+
+    def __init__(self, value: Optional[Expression] = None) -> None:
+        super().__init__()
+        self.value = value
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class YieldFromExpr(Expression):
+    kind: ClassVar[str] = "yield_from"
+    value: Expression
+
+    def __init__(self, value: Expression) -> None:
+        super().__init__()
+        self.value = value
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class AwaitExpr(Expression):
+    kind: ClassVar[str] = "await"
+    value: Expression
+
+    def __init__(self, value: Expression) -> None:
+        super().__init__()
+        self.value = value
+
+    def evaluate(self) -> Any:
+        raise NotImplementedError
+
+
+class FormattedValue(Expression):
+    kind: ClassVar[str] = "formatted_value"
+    value: Expression
+    conversion: Optional[str]
+    format_spec: Optional[Expression]
+
+    def __init__(
+        self,
+        value: Expression,
+        conversion: Optional[str] = None,
+        format_spec: Optional[Expression] = None,
+    ) -> None:
+        super().__init__()
+        self.value = value
+        self.conversion = conversion
+        self.format_spec = format_spec
+
+    def evaluate(self) -> str:
+        raise NotImplementedError
+
+
+class JoinedStr(Expression):
+    """Represents an f-string."""
+
+    kind: ClassVar[str] = "fstring"
+    values: List[Expression]
+
+    def __init__(self, values: Optional[List[Expression]] = None) -> None:
+        super().__init__()
+        self.values = values or []
+
+    def evaluate(self) -> str:
+        raise NotImplementedError
+
+
+class TypeAlias(Statement):
+    kind: ClassVar[str] = "type_alias"
+    name: str
+    type_params: List[Expression]
+    value: Expression
+
+    def __init__(
+        self,
+        name: str,
+        value: Expression,
+        type_params: Optional[List[Expression]] = None,
+    ) -> None:
+        super().__init__()
+        self.name = name
+        self.value = value
+        self.type_params = type_params or []
+
+    def execute(self) -> None:
+        pass
+
+
+class GlobalStatement(Statement):
+    kind: ClassVar[str] = "global"
+    names: List[str]
+
+    def __init__(self, names: List[str]) -> None:
+        super().__init__()
+        self.names = names
+
+    def execute(self) -> None:
+        pass
+
+
+class NonlocalStatement(Statement):
+    kind: ClassVar[str] = "nonlocal"
+    names: List[str]
+
+    def __init__(self, names: List[str]) -> None:
+        super().__init__()
+        self.names = names
+
+    def execute(self) -> None:
+        pass
+
+
+class DeleteStatement(Statement):
+    kind: ClassVar[str] = "del"
+    targets: List[Expression]
+
+    def __init__(self, targets: List[Expression]) -> None:
+        super().__init__()
+        self.targets = targets
+
+    def execute(self) -> None:
+        pass
+
+
+class PassStatement(Statement):
+    kind: ClassVar[str] = "pass"
+
+    def execute(self) -> None:
+        pass
+
+
+class BreakStatement(Statement):
+    kind: ClassVar[str] = "break"
+
+    def execute(self) -> None:
+        pass
+
+
+class ContinueStatement(Statement):
+    kind: ClassVar[str] = "continue"
+
+    def execute(self) -> None:
+        pass
+
+
+# --- Visitor pattern ---
+
+
+class NodeVisitor(Generic[_T]):
+    """AST node visitor with generic return type."""
+
+    def visit(self, node: BaseNode) -> _T:
+        method_name = f"visit_{node.kind}"
+        visitor = getattr(self, method_name, self.generic_visit)
+        return visitor(node)
+
+    def generic_visit(self, node: BaseNode) -> _T:
+        raise NotImplementedError(f"No visitor for {node.kind}")
+
+    def visit_module(self, node: Module) -> _T:
+        return self.generic_visit(node)
+
+    def visit_funcdef(self, node: FunctionDef) -> _T:
+        return self.generic_visit(node)
+
+    def visit_classdef(self, node: ClassDef) -> _T:
+        return self.generic_visit(node)
+
+    def visit_import(self, node: ImportStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_assign(self, node: AssignStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_return(self, node: ReturnStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_if(self, node: IfStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_for(self, node: ForStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_while(self, node: WhileStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_try(self, node: TryStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_with(self, node: WithStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_raise(self, node: RaiseStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_assert(self, node: AssertStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_expression(self, node: Expression) -> _T:
+        return self.generic_visit(node)
+
+    def visit_name(self, node: NameExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_number(self, node: NumberLiteral) -> _T:
+        return self.visit_expression(node)
+
+    def visit_string(self, node: StringLiteral) -> _T:
+        return self.visit_expression(node)
+
+    def visit_bool(self, node: BoolLiteral) -> _T:
+        return self.visit_expression(node)
+
+    def visit_none(self, node: NoneLiteral) -> _T:
+        return self.visit_expression(node)
+
+    def visit_ellipsis(self, node: EllipsisLiteral) -> _T:
+        return self.visit_expression(node)
+
+    def visit_binop(self, node: BinaryOp) -> _T:
+        return self.visit_expression(node)
+
+    def visit_unaryop(self, node: UnaryOp) -> _T:
+        return self.visit_expression(node)
+
+    def visit_compare(self, node: CompareExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_call(self, node: CallExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_attribute(self, node: AttributeExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_subscript(self, node: SubscriptExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_list(self, node: ListExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_dict(self, node: DictExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_set(self, node: SetExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_tuple(self, node: TupleExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_lambda(self, node: LambdaExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_listcomp(self, node: ListCompExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_dictcomp(self, node: DictCompExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_setcomp(self, node: SetCompExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_genexpr(self, node: GeneratorExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_conditional(self, node: ConditionalExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_slice(self, node: SliceExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_starred(self, node: StarredExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_walrus(self, node: WalrusExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_match(self, node: MatchStatement) -> _T:
+        return self.generic_visit(node)
+
+    def visit_yield(self, node: YieldExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_yield_from(self, node: YieldFromExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_await(self, node: AwaitExpr) -> _T:
+        return self.visit_expression(node)
+
+    def visit_fstring(self, node: JoinedStr) -> _T:
+        return self.visit_expression(node)
+
+
+# --- Transformer subclass ---
+
+
+class NodeTransformer(NodeVisitor[BaseNode]):
+    """Visitor that returns transformed nodes."""
+
+    def generic_visit(self, node: BaseNode) -> BaseNode:
+        return node
+
+
+# --- Registry pattern ---
+
+
+class NodeRegistry:
+    """Registry of node factories."""
+
+    _factories: Dict[str, typing.Callable[..., BaseNode]]
+
+    def __init__(self) -> None:
+        self._factories = {}
+
+    def register(
+        self, kind: str
+    ) -> typing.Callable[
+        [typing.Callable[..., BaseNode]], typing.Callable[..., BaseNode]
+    ]:
+        def decorator(
+            factory: typing.Callable[..., BaseNode],
+        ) -> typing.Callable[..., BaseNode]:
+            self._factories[kind] = factory
+            return factory
+
+        return decorator
+
+    def create(self, kind: str, **kwargs: Any) -> BaseNode:
+        factory = self._factories.get(kind)
+        if factory is None:
+            raise KeyError(f"No factory registered for kind: {kind}")
+        return factory(**kwargs)
+
+    def kinds(self) -> List[str]:
+        return list(self._factories.keys())
+
+
+# --- Utility constants ---
+
+MAX_RECURSION_DEPTH: Final[int] = 256
+DEFAULT_INDENT: Final[str] = "    "
+BUILTIN_TYPES: Final[Tuple[str, ...]] = (
+    "int",
+    "float",
+    "complex",
+    "bool",
+    "str",
+    "bytes",
+    "bytearray",
+    "memoryview",
+    "list",
+    "tuple",
+    "dict",
+    "set",
+    "frozenset",
+    "range",
+    "slice",
+    "type",
+    "object",
+    "None",
+)
+
+COMPARISON_OPS: Final[Tuple[str, ...]] = (
+    "==",
+    "!=",
+    "<",
+    "<=",
+    ">",
+    ">=",
+    "is",
+    "is not",
+    "in",
+    "not in",
+)
+
+BOOLEAN_OPS: Final[Tuple[str, ...]] = ("and", "or")
+
+UNARY_OPS: Final[Tuple[str, ...]] = ("+", "-", "~", "not")
+
+BINARY_OPS: Final[Tuple[str, ...]] = (
+    "+",
+    "-",
+    "*",
+    "/",
+    "//",
+    "%",
+    "**",
+    "<<",
+    ">>",
+    "|",
+    "^",
+    "&",
+    "@",
+)
+
+AUGMENTED_ASSIGN_OPS: Final[Tuple[str, ...]] = (
+    "+=",
+    "-=",
+    "*=",
+    "/=",
+    "//=",
+    "%=",
+    "**=",
+    "<<=",
+    ">>=",
+    "|=",
+    "^=",
+    "&=",
+    "@=",
+)
+
+
+# --- Large function set to add line count ---
+
+
+def validate_identifier(name: str) -> bool:
+    """Check if a string is a valid Python identifier."""
+    if not name:
+        return False
+    if name[0].isdigit():
+        return False
+    return all(c.isalnum() or c == "_" for c in name)
+
+
+def escape_string(s: str, quote: str = '"') -> str:
+    """Escape a string for Python source output."""
+    result = s.replace("\\", "\\\\")
+    result = result.replace(quote, "\\" + quote)
+    result = result.replace("\n", "\\n")
+    result = result.replace("\r", "\\r")
+    result = result.replace("\t", "\\t")
+    return f"{quote}{result}{quote}"
+
+
+def indent_code(code: str, level: int = 1, indent: str = DEFAULT_INDENT) -> str:
+    """Indent each line of code by the given level."""
+    prefix = indent * level
+    lines = code.split("\n")
+    return "\n".join(prefix + line if line.strip() else line for line in lines)
+
+
+def strip_comments(source: str) -> str:
+    """Remove line comments from Python source code (naive)."""
+    lines = source.split("\n")
+    result: List[str] = []
+    for line in lines:
+        in_string = False
+        quote_char = ""
+        comment_start = -1
+        i = 0
+        while i < len(line):
+            ch = line[i]
+            if in_string:
+                if ch == "\\" and i + 1 < len(line):
+                    i += 2
+                    continue
+                if ch == quote_char:
+                    in_string = False
+            else:
+                if ch in ('"', "'"):
+                    in_string = True
+                    quote_char = ch
+                elif ch == "#":
+                    comment_start = i
+                    break
+            i += 1
+        if comment_start >= 0:
+            result.append(line[:comment_start].rstrip())
+        else:
+            result.append(line)
+    return "\n".join(result)
+
+
+def count_lines(source: str) -> Dict[str, int]:
+    """Count types of lines in a source file."""
+    lines = source.split("\n")
+    total = len(lines)
+    blank = sum(1 for l in lines if not l.strip())
+    comment = sum(1 for l in lines if l.strip().startswith("#"))
+    code = total - blank - comment
+    return {
+        "total": total,
+        "blank": blank,
+        "comment": comment,
+        "code": code,
+    }
+
+
+def find_all_names(source: str) -> List[str]:
+    """Find all potential identifiers in source (naive regex-free scan)."""
+    names: List[str] = []
+    current = ""
+    for ch in source:
+        if ch.isalnum() or ch == "_":
+            current += ch
+        else:
+            if current and not current[0].isdigit():
+                names.append(current)
+            current = ""
+    if current and not current[0].isdigit():
+        names.append(current)
+    return deduplicate(names)
+
+
+def build_scope_chain(node: BaseNode) -> List[str]:
+    """Build a list of enclosing scope names for a given node."""
+    chain: List[str] = []
+    current: Optional[BaseNode] = node
+    while current is not None:
+        if isinstance(current, (FunctionDef, ClassDef)):
+            chain.append(current.name)
+        elif isinstance(current, Module):
+            chain.append(current.name)
+        current = current._parent
+    chain.reverse()
+    return chain
+
+
+def compute_complexity(node: BaseNode) -> int:
+    """Compute a naive cyclomatic complexity for a node."""
+    complexity = 1
+    for child in node.walk():
+        if isinstance(child, (IfStatement, ForStatement, WhileStatement)):
+            complexity += 1
+        elif isinstance(child, TryStatement):
+            complexity += len(child.handlers)
+        elif isinstance(child, (BinaryOp,)) and child.op in BOOLEAN_OPS:
+            complexity += 1
+    return complexity
+
+
+# --- Type alias collection ---
+
+JsonPrimitive = Union[str, int, float, bool, None]
+JsonArray = List["JsonValue"]
+JsonObject = Dict[str, "JsonValue"]
+JsonValue = Union[JsonPrimitive, JsonArray, JsonObject]
+
+HttpMethod = Literal["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"]
+StatusCode = Literal[200, 201, 204, 301, 302, 400, 401, 403, 404, 500, 502, 503]
+
+Color = Tuple[int, int, int]
+ColorWithAlpha = Tuple[int, int, int, float]
+AnyColor = Union[Color, ColorWithAlpha, str]
+
+Point2D = Tuple[float, float]
+Point3D = Tuple[float, float, float]
+BoundingBox = Tuple[Point2D, Point2D]
+BoundingBox3D = Tuple[Point3D, Point3D]
+
+Matrix = List[List[float]]
+SparseMatrix = Dict[Tuple[int, int], float]
+
+PathLike = Union[str, os.PathLike[str]]
+
+Callback = typing.Callable[[], None]
+ErrorHandler = typing.Callable[[Exception], bool]
+Predicate = typing.Callable[[Any], bool]
+Comparator = typing.Callable[[Any, Any], int]
+Transformer = typing.Callable[[_T], _T]
+
+# End of large_stdlib.py
diff --git a/packages/pyright-internal/src/tests/benchmarkData/repetitive_identifiers.py b/packages/pyright-internal/src/tests/benchmarkData/repetitive_identifiers.py
new file mode 100644
index 000000000000..36a22fbe4c16
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarkData/repetitive_identifiers.py
@@ -0,0 +1,233 @@
+# repetitive_identifiers.py — stresses the tokenizer's identifier intern
+# cache by using a small set of identifiers (self, cls, T, K, V, str, int,
+# list, dict, None, True, False, etc.) thousands of times. Tokenizing this
+# file should hit the identifier intern cache on the vast majority of
+# identifier tokens.
+
+from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar, Union
+
+T = TypeVar("T")
+K = TypeVar("K")
+V = TypeVar("V")
+
+
+class C1(Generic[T, K, V]):
+    def __init__(self, x: T, y: K, z: V) -> None:
+        self.x = x
+        self.y = y
+        self.z = z
+
+    def get_x(self) -> T:
+        return self.x
+
+    def get_y(self) -> K:
+        return self.y
+
+    def get_z(self) -> V:
+        return self.z
+
+    def set_x(self, x: T) -> None:
+        self.x = x
+
+    def set_y(self, y: K) -> None:
+        self.y = y
+
+    def set_z(self, z: V) -> None:
+        self.z = z
+
+    def swap(self, other: "C1[T, K, V]") -> None:
+        self.x, other.x = other.x, self.x
+        self.y, other.y = other.y, self.y
+        self.z, other.z = other.z, self.z
+
+    @classmethod
+    def make(cls, x: T, y: K, z: V) -> "C1[T, K, V]":
+        return cls(x, y, z)
+
+    @classmethod
+    def pair(cls, x: T, y: K, z: V) -> Tuple["C1[T, K, V]", "C1[T, K, V]"]:
+        return cls(x, y, z), cls(x, y, z)
+
+
+class C2(Generic[T, K, V]):
+    def __init__(self, x: T, y: K, z: V) -> None:
+        self.x = x
+        self.y = y
+        self.z = z
+
+    def get_x(self) -> T:
+        return self.x
+
+    def get_y(self) -> K:
+        return self.y
+
+    def get_z(self) -> V:
+        return self.z
+
+    def set_x(self, x: T) -> None:
+        self.x = x
+
+    def set_y(self, y: K) -> None:
+        self.y = y
+
+    def set_z(self, z: V) -> None:
+        self.z = z
+
+    @classmethod
+    def make(cls, x: T, y: K, z: V) -> "C2[T, K, V]":
+        return cls(x, y, z)
+
+
+def f1(x: int, y: int, z: int) -> int:
+    return x + y + z
+
+
+def f2(x: int, y: int, z: int) -> int:
+    return x + y + z
+
+
+def f3(x: int, y: int, z: int) -> int:
+    return x + y + z
+
+
+def f4(x: int, y: int, z: int) -> int:
+    return x + y + z
+
+
+def f5(x: int, y: int, z: int) -> int:
+    return x + y + z
+
+
+def build_list(x: int, y: int, z: int) -> List[int]:
+    return [x, y, z, x, y, z, x, y, z, x, y, z, x, y, z, x, y, z, x, y, z, x, y, z]
+
+
+def build_dict(x: str, y: str, z: str) -> Dict[str, str]:
+    return {"x": x, "y": y, "z": z, "a": x, "b": y, "c": z, "d": x, "e": y, "f": z}
+
+
+def build_tuple(x: int, y: int, z: int) -> Tuple[int, int, int, int, int, int]:
+    return (x, y, z, x, y, z)
+
+
+def deep(x: int, y: int, z: int) -> Optional[int]:
+    if x is None:
+        return None
+    if y is None:
+        return None
+    if z is None:
+        return None
+    if x == 0:
+        return x
+    if y == 0:
+        return y
+    if z == 0:
+        return z
+    return x + y + z
+
+
+def union_of(x: Union[int, str], y: Union[int, str], z: Union[int, str]) -> Union[int, str]:
+    if isinstance(x, int) and isinstance(y, int) and isinstance(z, int):
+        return x + y + z
+    return str(x) + str(y) + str(z)
+
+
+def any_of(x: Any, y: Any, z: Any) -> Any:
+    return x or y or z or x or y or z or x or y or z
+
+
+# Lots of calls, each one touches the same identifiers repeatedly.
+_ = f1(1, 2, 3)
+_ = f2(1, 2, 3)
+_ = f3(1, 2, 3)
+_ = f4(1, 2, 3)
+_ = f5(1, 2, 3)
+_ = f1(1, 2, 3)
+_ = f2(1, 2, 3)
+_ = f3(1, 2, 3)
+_ = f4(1, 2, 3)
+_ = f5(1, 2, 3)
+_ = f1(1, 2, 3)
+_ = f2(1, 2, 3)
+_ = f3(1, 2, 3)
+_ = f4(1, 2, 3)
+_ = f5(1, 2, 3)
+
+_ = build_list(1, 2, 3)
+_ = build_list(1, 2, 3)
+_ = build_list(1, 2, 3)
+_ = build_list(1, 2, 3)
+_ = build_list(1, 2, 3)
+
+_ = build_dict("a", "b", "c")
+_ = build_dict("a", "b", "c")
+_ = build_dict("a", "b", "c")
+_ = build_dict("a", "b", "c")
+_ = build_dict("a", "b", "c")
+
+_ = build_tuple(1, 2, 3)
+_ = build_tuple(1, 2, 3)
+_ = build_tuple(1, 2, 3)
+_ = build_tuple(1, 2, 3)
+_ = build_tuple(1, 2, 3)
+
+_ = deep(1, 2, 3)
+_ = deep(1, 2, 3)
+_ = deep(1, 2, 3)
+_ = deep(1, 2, 3)
+_ = deep(1, 2, 3)
+
+_ = union_of(1, 2, 3)
+_ = union_of(1, 2, 3)
+_ = union_of(1, 2, 3)
+_ = union_of(1, 2, 3)
+_ = union_of(1, 2, 3)
+
+_ = any_of(1, 2, 3)
+_ = any_of(1, 2, 3)
+_ = any_of(1, 2, 3)
+_ = any_of(1, 2, 3)
+_ = any_of(1, 2, 3)
+
+c1 = C1(1, "a", [1, 2, 3])
+c2 = C1(1, "a", [1, 2, 3])
+c3 = C1(1, "a", [1, 2, 3])
+c4 = C1(1, "a", [1, 2, 3])
+c5 = C1(1, "a", [1, 2, 3])
+c6 = C2(1, "a", [1, 2, 3])
+c7 = C2(1, "a", [1, 2, 3])
+c8 = C2(1, "a", [1, 2, 3])
+c9 = C2(1, "a", [1, 2, 3])
+c10 = C2(1, "a", [1, 2, 3])
+
+# Flat attribute-access cascade — each line references self-like receivers
+# multiple times, producing many repeated identifier tokens per line.
+r1 = c1.get_x() + c2.get_x() + c3.get_x() + c4.get_x() + c5.get_x()
+r2 = c1.get_y() + c2.get_y() + c3.get_y() + c4.get_y() + c5.get_y()
+r3 = c1.get_z() + c2.get_z() + c3.get_z() + c4.get_z() + c5.get_z()
+r4 = c6.get_x() + c7.get_x() + c8.get_x() + c9.get_x() + c10.get_x()
+r5 = c6.get_y() + c7.get_y() + c8.get_y() + c9.get_y() + c10.get_y()
+r6 = c6.get_z() + c7.get_z() + c8.get_z() + c9.get_z() + c10.get_z()
+
+# A batch of nearly-identical lines to really hammer the intern cache.
+v1 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v2 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v3 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v4 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v5 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v6 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v7 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v8 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v9 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+v10 = [x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9] if x > 0 and x < 10 and x != 5]
+
+w1 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w2 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w3 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w4 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w5 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w6 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w7 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w8 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w9 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
+w10 = {k: v for k, v in [("a", 1), ("b", 2), ("c", 3)] if v > 0 and k != "x"}
diff --git a/packages/pyright-internal/src/tests/benchmarkData/union_heavy.py b/packages/pyright-internal/src/tests/benchmarkData/union_heavy.py
new file mode 100644
index 000000000000..0f5f0602e54e
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarkData/union_heavy.py
@@ -0,0 +1,525 @@
+# union_heavy.py — complex union/intersection type scenarios
+# Stresses the type evaluator's union handling, narrowing, and type guard paths.
+
+from __future__ import annotations
+
+from typing import (
+    Any,
+    Dict,
+    Generic,
+    List,
+    Literal,
+    Never,
+    Optional,
+    Protocol,
+    Sequence,
+    Tuple,
+    TypeAlias,
+    TypeGuard,
+    TypeVar,
+    Union,
+    overload,
+    runtime_checkable,
+)
+from dataclasses import dataclass
+
+_T = TypeVar("_T")
+
+# --- Large literal unions ---
+
+HttpStatus: TypeAlias = Literal[
+    100, 101, 102, 103,
+    200, 201, 202, 203, 204, 205, 206, 207, 208, 226,
+    300, 301, 302, 303, 304, 305, 307, 308,
+    400, 401, 402, 403, 404, 405, 406, 407, 408, 409,
+    410, 411, 412, 413, 414, 415, 416, 417, 418, 421,
+    422, 423, 424, 425, 426, 428, 429, 431, 451,
+    500, 501, 502, 503, 504, 505, 506, 507, 508, 510, 511,
+]
+
+Color: TypeAlias = Literal[
+    "red", "green", "blue", "yellow", "cyan", "magenta",
+    "white", "black", "gray", "grey", "orange", "purple",
+    "pink", "brown", "gold", "silver", "navy", "teal",
+    "maroon", "olive", "lime", "aqua", "coral", "salmon",
+    "crimson", "indigo", "violet", "turquoise", "khaki",
+    "orchid", "plum", "sienna", "tomato", "wheat",
+]
+
+Country: TypeAlias = Literal[
+    "US", "UK", "CA", "AU", "NZ", "IE", "DE", "FR", "ES", "IT",
+    "PT", "NL", "BE", "CH", "AT", "SE", "NO", "DK", "FI", "PL",
+    "CZ", "SK", "HU", "RO", "BG", "HR", "SI", "EE", "LV", "LT",
+    "JP", "KR", "CN", "TW", "HK", "SG", "MY", "TH", "VN", "PH",
+    "IN", "PK", "BD", "LK", "NP", "ID", "BR", "AR", "CL", "CO",
+    "MX", "PE", "VE", "EC", "UY", "PY", "BO", "ZA", "NG", "KE",
+    "EG", "MA", "TN", "GH", "ET", "TZ", "UG", "RW", "SN", "CI",
+]
+
+# --- Discriminated unions ---
+
+@dataclass
+class Circle:
+    kind: Literal["circle"] = "circle"
+    radius: float = 1.0
+
+
+@dataclass
+class Rectangle:
+    kind: Literal["rectangle"] = "rectangle"
+    width: float = 1.0
+    height: float = 1.0
+
+
+@dataclass
+class Triangle:
+    kind: Literal["triangle"] = "triangle"
+    base: float = 1.0
+    height: float = 1.0
+
+
+@dataclass
+class Polygon:
+    kind: Literal["polygon"] = "polygon"
+    sides: int = 3
+    side_length: float = 1.0
+
+
+@dataclass
+class Ellipse:
+    kind: Literal["ellipse"] = "ellipse"
+    semi_major: float = 2.0
+    semi_minor: float = 1.0
+
+
+Shape = Union[Circle, Rectangle, Triangle, Polygon, Ellipse]
+
+
+def area(shape: Shape) -> float:
+    if shape.kind == "circle":
+        return 3.14159 * shape.radius ** 2
+    elif shape.kind == "rectangle":
+        return shape.width * shape.height
+    elif shape.kind == "triangle":
+        return 0.5 * shape.base * shape.height
+    elif shape.kind == "polygon":
+        import math
+        return (shape.sides * shape.side_length ** 2) / (4 * math.tan(math.pi / shape.sides))
+    elif shape.kind == "ellipse":
+        return 3.14159 * shape.semi_major * shape.semi_minor
+    else:
+        _: Never = shape
+        raise ValueError(f"Unknown shape: {shape}")
+
+
+def perimeter(shape: Shape) -> float:
+    if shape.kind == "circle":
+        return 2 * 3.14159 * shape.radius
+    elif shape.kind == "rectangle":
+        return 2 * (shape.width + shape.height)
+    elif shape.kind == "triangle":
+        return shape.base * 3
+    elif shape.kind == "polygon":
+        return shape.sides * shape.side_length
+    elif shape.kind == "ellipse":
+        import math
+        a = shape.semi_major
+        b = shape.semi_minor
+        return 3.14159 * (3 * (a + b) - math.sqrt((3 * a + b) * (a + 3 * b)))
+    else:
+        _: Never = shape
+        raise ValueError
+
+
+# --- Nested unions ---
+
+JsonPrimitive = Union[str, int, float, bool, None]
+JsonArray = List["JsonValue"]
+JsonObject = Dict[str, "JsonValue"]
+JsonValue = Union[JsonPrimitive, JsonArray, JsonObject]
+
+
+def json_depth(value: JsonValue) -> int:
+    if isinstance(value, dict):
+        if not value:
+            return 1
+        return 1 + max(json_depth(v) for v in value.values())
+    elif isinstance(value, list):
+        if not value:
+            return 1
+        return 1 + max(json_depth(v) for v in value)
+    else:
+        return 0
+
+
+def json_size(value: JsonValue) -> int:
+    if isinstance(value, dict):
+        return sum(json_size(v) for v in value.values()) + len(value)
+    elif isinstance(value, list):
+        return sum(json_size(v) for v in value) + len(value)
+    elif isinstance(value, str):
+        return len(value)
+    elif value is None:
+        return 0
+    else:
+        return 1
+
+
+# --- Union narrowing stress ---
+
+def narrow_union_1(x: Union[int, str, float, bool, bytes, None]) -> str:
+    if isinstance(x, int):
+        return f"int: {x}"
+    elif isinstance(x, str):
+        return f"str: {x}"
+    elif isinstance(x, float):
+        return f"float: {x}"
+    elif isinstance(x, bool):
+        return f"bool: {x}"
+    elif isinstance(x, bytes):
+        return f"bytes: {x!r}"
+    elif x is None:
+        return "none"
+    else:
+        _: Never = x
+        return "unreachable"
+
+
+def narrow_union_2(
+    x: Union[int, str, List[int], Dict[str, int], Tuple[int, ...], set, frozenset],
+) -> int:
+    if isinstance(x, int):
+        return x
+    elif isinstance(x, str):
+        return len(x)
+    elif isinstance(x, list):
+        return sum(x)
+    elif isinstance(x, dict):
+        return sum(x.values())
+    elif isinstance(x, tuple):
+        return sum(x)
+    elif isinstance(x, set):
+        return len(x)
+    elif isinstance(x, frozenset):
+        return len(x)
+    else:
+        _: Never = x
+        raise ValueError
+
+
+def narrow_union_chained(
+    x: Union[int, str, float, bytes, list, dict, tuple, set, frozenset, None],
+) -> str:
+    if x is None:
+        return "None"
+    if isinstance(x, (int, float)):
+        return f"number: {x}"
+    if isinstance(x, (str, bytes)):
+        return f"text: {x!r}"
+    if isinstance(x, (list, tuple)):
+        return f"sequence: len={len(x)}"
+    if isinstance(x, (set, frozenset)):
+        return f"set: len={len(x)}"
+    if isinstance(x, dict):
+        return f"dict: keys={len(x)}"
+    _: Never = x
+    return "unreachable"
+
+
+# --- Type guards ---
+
+def is_string_list(val: List[Any]) -> TypeGuard[List[str]]:
+    return all(isinstance(item, str) for item in val)
+
+
+def is_int_dict(val: Dict[str, Any]) -> TypeGuard[Dict[str, int]]:
+    return all(isinstance(v, int) for v in val.values())
+
+
+def is_non_empty(val: Optional[List[_T]]) -> TypeGuard[List[_T]]:
+    return val is not None and len(val) > 0
+
+
+def is_positive_int(val: Union[int, str, None]) -> TypeGuard[int]:
+    return isinstance(val, int) and val > 0
+
+
+# --- Overloaded functions with union args ---
+
+@overload
+def transform(value: int) -> str: ...
+@overload
+def transform(value: str) -> int: ...
+@overload
+def transform(value: float) -> bool: ...
+@overload
+def transform(value: bool) -> float: ...
+@overload
+def transform(value: bytes) -> List[int]: ...
+@overload
+def transform(value: List[int]) -> bytes: ...
+@overload
+def transform(value: None) -> Literal["none"]: ...
+
+
+def transform(
+    value: Union[int, str, float, bool, bytes, List[int], None],
+) -> Union[str, int, bool, float, List[int], bytes, Literal["none"]]:
+    if isinstance(value, bool):
+        return float(value)
+    elif isinstance(value, int):
+        return str(value)
+    elif isinstance(value, str):
+        return len(value)
+    elif isinstance(value, float):
+        return value > 0
+    elif isinstance(value, bytes):
+        return list(value)
+    elif isinstance(value, list):
+        return bytes(value)
+    elif value is None:
+        return "none"
+    else:
+        raise TypeError
+
+
+# --- Complex generic unions ---
+
+@dataclass
+class Success(Generic[_T]):
+    value: _T
+
+
+@dataclass
+class Failure:
+    error: str
+    code: int = 0
+
+
+Result = Union[Success[_T], Failure]
+
+
+def handle_result(r: Result[int]) -> str:
+    if isinstance(r, Success):
+        return f"OK: {r.value}"
+    else:
+        return f"ERR[{r.code}]: {r.error}"
+
+
+def chain_results(results: List[Result[int]]) -> Result[List[int]]:
+    values: List[int] = []
+    for r in results:
+        if isinstance(r, Failure):
+            return r
+        values.append(r.value)
+    return Success(values)
+
+
+# --- Protocol unions ---
+
+@runtime_checkable
+class Printable(Protocol):
+    def __str__(self) -> str: ...
+
+@runtime_checkable
+class Measurable(Protocol):
+    def __len__(self) -> int: ...
+
+@runtime_checkable
+class Numeric(Protocol):
+    def __add__(self, other: Any) -> Any: ...
+    def __mul__(self, other: Any) -> Any: ...
+
+
+def describe_value(val: Union[Printable, Measurable, Numeric]) -> str:
+    parts: List[str] = []
+    if isinstance(val, Printable):
+        parts.append(f"str={val}")
+    if isinstance(val, Measurable):
+        parts.append(f"len={len(val)}")
+    return ", ".join(parts) if parts else "unknown"
+
+
+# --- TypedDict unions ---
+
+from typing import TypedDict
+
+
+class UserInfo(TypedDict):
+    name: str
+    age: int
+    email: str
+
+
+class CompanyInfo(TypedDict):
+    name: str
+    employees: int
+    industry: str
+
+
+class ProductInfo(TypedDict):
+    name: str
+    price: float
+    category: str
+
+
+Entity = Union[UserInfo, CompanyInfo, ProductInfo]
+
+
+def entity_name(entity: Entity) -> str:
+    return entity["name"]
+
+
+def entity_summary(entity: Entity) -> str:
+    if "age" in entity:
+        e: UserInfo = entity  # type: ignore
+        return f"User: {e['name']}, age {e['age']}"
+    elif "employees" in entity:
+        e2: CompanyInfo = entity  # type: ignore
+        return f"Company: {e2['name']}, {e2['employees']} employees"
+    else:
+        e3: ProductInfo = entity  # type: ignore
+        return f"Product: {e3['name']}, ${e3['price']}"
+
+
+# --- Deep union chains ---
+
+Level0 = Union[int, str]
+Level1 = Union[Level0, float, bool]
+Level2 = Union[Level1, bytes, list]
+Level3 = Union[Level2, dict, tuple]
+Level4 = Union[Level3, set, frozenset]
+Level5 = Union[Level4, complex, memoryview]
+
+DeepUnion = Level5
+
+
+def process_deep(val: DeepUnion) -> str:
+    if isinstance(val, int):
+        return "int"
+    elif isinstance(val, str):
+        return "str"
+    elif isinstance(val, float):
+        return "float"
+    elif isinstance(val, bool):
+        return "bool"
+    elif isinstance(val, bytes):
+        return "bytes"
+    elif isinstance(val, list):
+        return "list"
+    elif isinstance(val, dict):
+        return "dict"
+    elif isinstance(val, tuple):
+        return "tuple"
+    elif isinstance(val, set):
+        return "set"
+    elif isinstance(val, frozenset):
+        return "frozenset"
+    elif isinstance(val, complex):
+        return "complex"
+    elif isinstance(val, memoryview):
+        return "memoryview"
+    else:
+        return "unknown"
+
+
+# --- Union of many dataclasses ---
+
+@dataclass
+class EventA:
+    kind: Literal["a"] = "a"
+    payload: str = ""
+
+@dataclass
+class EventB:
+    kind: Literal["b"] = "b"
+    count: int = 0
+
+@dataclass
+class EventC:
+    kind: Literal["c"] = "c"
+    flag: bool = False
+
+@dataclass
+class EventD:
+    kind: Literal["d"] = "d"
+    value: float = 0.0
+
+@dataclass
+class EventE:
+    kind: Literal["e"] = "e"
+    items: List[str] = None  # type: ignore
+
+@dataclass
+class EventF:
+    kind: Literal["f"] = "f"
+    data: Dict[str, Any] = None  # type: ignore
+
+@dataclass
+class EventG:
+    kind: Literal["g"] = "g"
+    source: str = ""
+
+@dataclass
+class EventH:
+    kind: Literal["h"] = "h"
+    target: str = ""
+
+@dataclass
+class EventI:
+    kind: Literal["i"] = "i"
+    timestamp: float = 0.0
+
+@dataclass
+class EventJ:
+    kind: Literal["j"] = "j"
+    priority: int = 0
+
+Event = Union[EventA, EventB, EventC, EventD, EventE, EventF, EventG, EventH, EventI, EventJ]
+
+
+def dispatch_event(event: Event) -> str:
+    if event.kind == "a":
+        return f"A: {event.payload}"
+    elif event.kind == "b":
+        return f"B: {event.count}"
+    elif event.kind == "c":
+        return f"C: {event.flag}"
+    elif event.kind == "d":
+        return f"D: {event.value}"
+    elif event.kind == "e":
+        return f"E: {event.items}"
+    elif event.kind == "f":
+        return f"F: {event.data}"
+    elif event.kind == "g":
+        return f"G: {event.source}"
+    elif event.kind == "h":
+        return f"H: {event.target}"
+    elif event.kind == "i":
+        return f"I: {event.timestamp}"
+    elif event.kind == "j":
+        return f"J: {event.priority}"
+    else:
+        _: Never = event
+        raise ValueError
+
+
+# --- Conditional types via overload ---
+
+@overload
+def maybe_parse(raw: str, strict: Literal[True]) -> int: ...
+@overload
+def maybe_parse(raw: str, strict: Literal[False]) -> Optional[int]: ...
+@overload
+def maybe_parse(raw: str, strict: bool = ...) -> Optional[int]: ...
+
+def maybe_parse(raw: str, strict: bool = False) -> Optional[int]:
+    try:
+        return int(raw)
+    except ValueError:
+        if strict:
+            raise
+        return None
+
+
+# End of union_heavy.py
diff --git a/packages/pyright-internal/src/tests/benchmarks/parserBenchmark.test.ts b/packages/pyright-internal/src/tests/benchmarks/parserBenchmark.test.ts
new file mode 100644
index 000000000000..2869777706cc
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarks/parserBenchmark.test.ts
@@ -0,0 +1,296 @@
+/*
+ * parserBenchmark.test.ts
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Microbenchmark for the Python parser.
+ * Measures nodes/sec, parse time, AST node count across representative corpora.
+ *
+ * Run with:
+ *   cd packages/pyright/packages/pyright-internal
+ *   node node_modules\jest\bin\jest parserBenchmark.test --runInBand --detectOpenHandles --forceExit --testTimeout=300000
+ *
+ * Results are written as JSON to:
+ *   src/tests/benchmarks/.generated/benchmark-results/parser/
+ */
+
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+
+import { DiagnosticSink } from '../../common/diagnosticSink';
+import { ParseOptions, Parser } from '../../parser/parser';
+
+// --- Configuration ---
+
+const WARMUP_ITERATIONS = 3;
+const BENCHMARK_ITERATIONS = 10;
+
+const BENCHMARK_OUTPUT_DIR = path.join(__dirname, '.generated', 'benchmark-results', 'parser');
+
+// --- Types ---
+
+interface BenchmarkResult {
+    corpus: string;
+    fileSizeBytes: number;
+    iterations: number;
+    timesMs: number[];
+    medianMs: number;
+    p95Ms: number;
+    minMs: number;
+    maxMs: number;
+    avgMs: number;
+    nodeCount: number;
+    nodesPerSec: number;
+    statementCount: number;
+    errorCount: number;
+}
+
+interface BenchmarkReport {
+    timestamp: string;
+    system: {
+        platform: string;
+        arch: string;
+        cpus: string;
+        cpuCount: number;
+        totalMemoryMB: number;
+        nodeVersion: string;
+    };
+    config: {
+        warmupIterations: number;
+        benchmarkIterations: number;
+    };
+    results: BenchmarkResult[];
+}
+
+// --- Helpers ---
+
+function calculateStats(times: ReadonlyArray<number>): {
+    median: number;
+    p95: number;
+    min: number;
+    max: number;
+    avg: number;
+} {
+    const sorted = [...times].sort((a, b) => a - b);
+    const len = sorted.length;
+
+    const median = len % 2 === 0 ? (sorted[len / 2 - 1] + sorted[len / 2]) / 2 : sorted[Math.floor(len / 2)];
+    const p95Index = Math.ceil(len * 0.95) - 1;
+    const p95 = sorted[Math.min(p95Index, len - 1)];
+    const min = sorted[0];
+    const max = sorted[len - 1];
+    const avg = times.reduce((a, b) => a + b, 0) / len;
+
+    return { median, p95, min, max, avg };
+}
+
+function loadCorpus(filename: string): string {
+    const filePath = path.resolve(__dirname, '..', 'benchmarkData', filename);
+    return fs.readFileSync(filePath, 'utf-8');
+}
+
+function getSystemInfo(): BenchmarkReport['system'] {
+    const cpus = os.cpus();
+    return {
+        platform: os.platform(),
+        arch: os.arch(),
+        cpus: cpus[0]?.model ?? 'unknown',
+        cpuCount: cpus.length,
+        totalMemoryMB: Math.round(os.totalmem() / (1024 * 1024)),
+        nodeVersion: process.version,
+    };
+}
+
+function writeReport(report: BenchmarkReport): void {
+    fs.mkdirSync(BENCHMARK_OUTPUT_DIR, { recursive: true });
+    const filename = `parser-benchmark-${new Date().toISOString().replace(/[:.]/g, '-')}.json`;
+    const outputPath = path.join(BENCHMARK_OUTPUT_DIR, filename);
+    fs.writeFileSync(outputPath, JSON.stringify(report, undefined, 2), 'utf-8');
+    console.log(`\nBenchmark results written to: ${outputPath}`);
+}
+
+function printResultTable(results: ReadonlyArray<BenchmarkResult>): void {
+    console.log('\n=== Parser Benchmark Results ===\n');
+    console.log(
+        `${'Corpus'.padEnd(25)} ${'Size'.padStart(8)} ${'Nodes'.padStart(8)} ${'Stmts'.padStart(7)} ${'Errors'.padStart(
+            7
+        )} ${'Median'.padStart(10)} ${'Min'.padStart(10)} ${'Max'.padStart(10)} ${'Avg'.padStart(
+            10
+        )} ${'Nodes/s'.padStart(12)}`
+    );
+    console.log('-'.repeat(117));
+
+    for (const r of results) {
+        const sizeKB = `${(r.fileSizeBytes / 1024).toFixed(1)}KB`;
+        console.log(
+            `${r.corpus.padEnd(25)} ${sizeKB.padStart(8)} ${String(r.nodeCount).padStart(8)} ${String(
+                r.statementCount
+            ).padStart(7)} ${String(r.errorCount).padStart(7)} ${r.medianMs.toFixed(2).padStart(10)} ${r.minMs
+                .toFixed(2)
+                .padStart(10)} ${r.maxMs.toFixed(2).padStart(10)} ${r.avgMs.toFixed(2).padStart(10)} ${Math.round(
+                r.nodesPerSec
+            )
+                .toLocaleString()
+                .padStart(12)}`
+        );
+    }
+    console.log('');
+}
+
+/**
+ * Count all AST nodes by walking the tree recursively.
+ * Pyright parse nodes have: { nodeType, d: { ...children }, ... }
+ */
+function countNodes(node: any): number {
+    if (!node || typeof node !== 'object' || !('nodeType' in node)) {
+        return 0;
+    }
+
+    let count = 1;
+
+    // Walk the .d data bag where child nodes live
+    const data = node.d;
+    if (data && typeof data === 'object') {
+        for (const key of Object.keys(data)) {
+            const val = data[key];
+            if (val && typeof val === 'object') {
+                if ('nodeType' in val) {
+                    count += countNodes(val);
+                } else if (Array.isArray(val)) {
+                    for (const item of val) {
+                        if (item && typeof item === 'object' && 'nodeType' in item) {
+                            count += countNodes(item);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return count;
+}
+
+function benchmarkParse(corpusName: string, code: string): BenchmarkResult {
+    const times: number[] = [];
+    let nodeCount = 0;
+    let statementCount = 0;
+    let errorCount = 0;
+
+    const parseOptions = new ParseOptions();
+
+    // Warmup
+    for (let i = 0; i < WARMUP_ITERATIONS; i++) {
+        const parser = new Parser();
+        const diagSink = new DiagnosticSink();
+        parser.parseSourceFile(code, parseOptions, diagSink);
+    }
+
+    // Benchmark
+    for (let i = 0; i < BENCHMARK_ITERATIONS; i++) {
+        const parser = new Parser();
+        const diagSink = new DiagnosticSink();
+
+        const start = performance.now();
+        const result = parser.parseSourceFile(code, parseOptions, diagSink);
+        const elapsed = performance.now() - start;
+
+        times.push(elapsed);
+        statementCount = result.parserOutput.parseTree.d.statements.length;
+        errorCount = diagSink.getErrors().length;
+
+        // Count nodes on the last iteration only (it's expensive)
+        if (i === BENCHMARK_ITERATIONS - 1) {
+            nodeCount = countNodes(result.parserOutput.parseTree);
+        }
+    }
+
+    const stats = calculateStats(times);
+
+    return {
+        corpus: corpusName,
+        fileSizeBytes: Buffer.byteLength(code, 'utf-8'),
+        iterations: BENCHMARK_ITERATIONS,
+        timesMs: times,
+        medianMs: stats.median,
+        p95Ms: stats.p95,
+        minMs: stats.min,
+        maxMs: stats.max,
+        avgMs: stats.avg,
+        nodeCount,
+        nodesPerSec: nodeCount / (stats.median / 1000),
+        statementCount,
+        errorCount,
+    };
+}
+
+// --- Corpus definitions ---
+
+const corpora: { name: string; file: string }[] = [
+    { name: 'large_stdlib', file: 'large_stdlib.py' },
+    { name: 'fstring_heavy', file: 'fstring_heavy.py' },
+    { name: 'comment_heavy', file: 'comment_heavy.py' },
+    { name: 'large_class', file: 'large_class.py' },
+    { name: 'import_heavy', file: 'import_heavy.py' },
+    { name: 'union_heavy', file: 'union_heavy.py' },
+];
+
+// --- Tests ---
+
+describe('Parser Benchmark', () => {
+    const allResults: BenchmarkResult[] = [];
+
+    for (const { name, file } of corpora) {
+        test(`parse ${name}`, () => {
+            const code = loadCorpus(file);
+            const result = benchmarkParse(name, code);
+            allResults.push(result);
+
+            console.log(
+                `  ${name}: median=${result.medianMs.toFixed(2)}ms, nodes=${result.nodeCount}, stmts=${
+                    result.statementCount
+                }, nodes/sec=${Math.round(result.nodesPerSec).toLocaleString()}`
+            );
+
+            // Sanity: parser should produce statements
+            expect(result.statementCount).toBeGreaterThan(0);
+            // Sanity: should complete in reasonable time (< 10s per file)
+            expect(result.medianMs).toBeLessThan(10000);
+        });
+    }
+
+    test('scaled corpus (10x large_stdlib)', () => {
+        const base = loadCorpus('large_stdlib.py');
+        const scaled = Array(10).fill(base).join('\n');
+
+        const result = benchmarkParse('large_stdlib_10x', scaled);
+        allResults.push(result);
+
+        console.log(
+            `  large_stdlib_10x: median=${result.medianMs.toFixed(2)}ms, nodes=${
+                result.nodeCount
+            }, nodes/sec=${Math.round(result.nodesPerSec).toLocaleString()}`
+        );
+
+        expect(result.statementCount).toBeGreaterThan(0);
+    });
+
+    afterAll(() => {
+        if (allResults.length === 0) {
+            return;
+        }
+
+        printResultTable(allResults);
+
+        const report: BenchmarkReport = {
+            timestamp: new Date().toISOString(),
+            system: getSystemInfo(),
+            config: {
+                warmupIterations: WARMUP_ITERATIONS,
+                benchmarkIterations: BENCHMARK_ITERATIONS,
+            },
+            results: allResults,
+        };
+
+        writeReport(report);
+    });
+});
diff --git a/packages/pyright-internal/src/tests/benchmarks/tokenizerBenchmark.test.ts b/packages/pyright-internal/src/tests/benchmarks/tokenizerBenchmark.test.ts
new file mode 100644
index 000000000000..48c1521badfb
--- /dev/null
+++ b/packages/pyright-internal/src/tests/benchmarks/tokenizerBenchmark.test.ts
@@ -0,0 +1,316 @@
+/*
+ * tokenizerBenchmark.test.ts
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Microbenchmark for the Python tokenizer.
+ * Measures tokens/sec and time-to-tokenize across representative corpora.
+ *
+ * Run with:
+ *   cd packages/pyright-internal
+ *   node node_modules\jest\bin\jest tokenizerBenchmark.test --runInBand --detectOpenHandles --forceExit --testTimeout=300000
+ *
+ * Results are written as JSON to:
+ *   src/tests/benchmarks/.generated/benchmark-results/tokenizer/
+ */
+
+import { execFileSync } from 'child_process';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+
+import { Tokenizer } from '../../parser/tokenizer';
+
+// --- Configuration ---
+
+const WARMUP_ITERATIONS = 3;
+const BENCHMARK_ITERATIONS = 10;
+
+const BENCHMARK_OUTPUT_DIR = path.join(__dirname, '.generated', 'benchmark-results', 'tokenizer');
+const JEST_BIN_PATH = path.resolve(__dirname, '..', '..', '..', 'node_modules', 'jest', 'bin', 'jest.js');
+const CHILD_RESULT_PREFIX = '__TOKENIZER_BENCHMARK_RESULT__';
+const CHILD_MODE_ENV = 'PYRIGHT_TOKENIZER_BENCH_CHILD';
+const RUN_BENCHMARKS_ENV = 'PYRIGHT_RUN_BENCHMARKS';
+
+// --- Types ---
+
+interface BenchmarkResult {
+    corpus: string;
+    fileSizeBytes: number;
+    iterations: number;
+    timesMs: number[];
+    medianMs: number;
+    p95Ms: number;
+    minMs: number;
+    maxMs: number;
+    avgMs: number;
+    tokenCount: number;
+    tokensPerSec: number;
+}
+
+interface BenchmarkReport {
+    timestamp: string;
+    system: {
+        platform: string;
+        arch: string;
+        cpus: string;
+        cpuCount: number;
+        totalMemoryMB: number;
+        nodeVersion: string;
+    };
+    config: {
+        warmupIterations: number;
+        benchmarkIterations: number;
+    };
+    results: BenchmarkResult[];
+}
+
+// --- Helpers ---
+
+function calculateStats(times: ReadonlyArray<number>): {
+    median: number;
+    p95: number;
+    min: number;
+    max: number;
+    avg: number;
+} {
+    const sorted = [...times].sort((a, b) => a - b);
+    const len = sorted.length;
+
+    const median = len % 2 === 0 ? (sorted[len / 2 - 1] + sorted[len / 2]) / 2 : sorted[Math.floor(len / 2)];
+    const p95Index = Math.ceil(len * 0.95) - 1;
+    const p95 = sorted[Math.min(p95Index, len - 1)];
+    const min = sorted[0];
+    const max = sorted[len - 1];
+    const avg = times.reduce((a, b) => a + b, 0) / len;
+
+    return { median, p95, min, max, avg };
+}
+
+function loadCorpus(filename: string): string {
+    const filePath = path.resolve(__dirname, '..', 'benchmarkData', filename);
+    return fs.readFileSync(filePath, 'utf-8');
+}
+
+function getSystemInfo(): BenchmarkReport['system'] {
+    const cpus = os.cpus();
+    return {
+        platform: os.platform(),
+        arch: os.arch(),
+        cpus: cpus[0]?.model ?? 'unknown',
+        cpuCount: cpus.length,
+        totalMemoryMB: Math.round(os.totalmem() / (1024 * 1024)),
+        nodeVersion: process.version,
+    };
+}
+
+function writeReport(report: BenchmarkReport): void {
+    fs.mkdirSync(BENCHMARK_OUTPUT_DIR, { recursive: true });
+    const filename = `tokenizer-benchmark-${new Date().toISOString().replace(/[:.]/g, '-')}.json`;
+    const outputPath = path.join(BENCHMARK_OUTPUT_DIR, filename);
+    fs.writeFileSync(outputPath, JSON.stringify(report, undefined, 2), 'utf-8');
+    console.log(`\nBenchmark results written to: ${outputPath}`);
+}
+
+function printResultTable(results: ReadonlyArray<BenchmarkResult>): void {
+    console.log('\n=== Tokenizer Benchmark Results ===\n');
+    console.log(
+        `${'Corpus'.padEnd(25)} ${'Size'.padStart(8)} ${'Tokens'.padStart(8)} ${'Median'.padStart(10)} ${'Min'.padStart(
+            10
+        )} ${'Max'.padStart(10)} ${'Avg'.padStart(10)} ${'p95'.padStart(10)} ${'Tok/sec'.padStart(12)}`
+    );
+    console.log('-'.repeat(113));
+
+    for (const result of results) {
+        const sizeKB = `${(result.fileSizeBytes / 1024).toFixed(1)}KB`;
+        console.log(
+            `${result.corpus.padEnd(25)} ${sizeKB.padStart(8)} ${String(result.tokenCount).padStart(
+                8
+            )} ${result.medianMs.toFixed(2).padStart(10)} ${result.minMs.toFixed(2).padStart(10)} ${result.maxMs
+                .toFixed(2)
+                .padStart(10)} ${result.avgMs.toFixed(2).padStart(10)} ${result.p95Ms
+                .toFixed(2)
+                .padStart(10)} ${Math.round(result.tokensPerSec).toLocaleString().padStart(12)}`
+        );
+    }
+    console.log('');
+}
+
+function emitChildResult(result: BenchmarkResult): void {
+    process.stdout.write(`${CHILD_RESULT_PREFIX}${JSON.stringify(result)}\n`);
+}
+
+function getChildOutput(error: unknown): string {
+    if (!(error instanceof Error)) {
+        return '';
+    }
+
+    const stdout = 'stdout' in error && typeof error.stdout === 'string' ? error.stdout : '';
+    const stderr = 'stderr' in error && typeof error.stderr === 'string' ? error.stderr : '';
+    return [stdout, stderr].filter((part) => part.length > 0).join('\n');
+}
+
+function escapeRegExp(text: string): string {
+    return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+function runBenchmarkInFreshProcess(testName: string): BenchmarkResult {
+    try {
+        const output = execFileSync(
+            process.execPath,
+            [
+                JEST_BIN_PATH,
+                __filename,
+                '--runInBand',
+                '--forceExit',
+                '--testTimeout=300000',
+                '--testNamePattern',
+                `^Tokenizer Benchmark ${escapeRegExp(testName)}$`,
+            ],
+            {
+                cwd: path.resolve(__dirname, '..', '..', '..'),
+                encoding: 'utf-8',
+                env: {
+                    ...process.env,
+                    [CHILD_MODE_ENV]: '1',
+                },
+            }
+        );
+
+        const resultLine = output.split(/\r?\n/).find((line) => line.startsWith(CHILD_RESULT_PREFIX));
+
+        if (!resultLine) {
+            throw new Error(`Child benchmark for "${testName}" did not emit a result.\n${output}`);
+        }
+
+        return JSON.parse(resultLine.slice(CHILD_RESULT_PREFIX.length)) as BenchmarkResult;
+    } catch (error) {
+        const output = getChildOutput(error);
+        const message = error instanceof Error ? error.message : String(error);
+        throw new Error(`Child benchmark for "${testName}" failed.\n${message}${output ? `\n${output}` : ''}`);
+    }
+}
+
+function benchmarkTokenize(corpusName: string, code: string): BenchmarkResult {
+    const times: number[] = [];
+    let tokenCount = 0;
+
+    for (let i = 0; i < WARMUP_ITERATIONS; i++) {
+        const tokenizer = new Tokenizer();
+        tokenizer.tokenize(code);
+    }
+
+    for (let i = 0; i < BENCHMARK_ITERATIONS; i++) {
+        const tokenizer = new Tokenizer();
+
+        const start = performance.now();
+        const results = tokenizer.tokenize(code);
+        const elapsed = performance.now() - start;
+
+        times.push(elapsed);
+        tokenCount = results.tokens.count;
+    }
+
+    const stats = calculateStats(times);
+
+    return {
+        corpus: corpusName,
+        fileSizeBytes: Buffer.byteLength(code, 'utf-8'),
+        iterations: BENCHMARK_ITERATIONS,
+        timesMs: times,
+        medianMs: stats.median,
+        p95Ms: stats.p95,
+        minMs: stats.min,
+        maxMs: stats.max,
+        avgMs: stats.avg,
+        tokenCount,
+        tokensPerSec: tokenCount / (stats.median / 1000),
+    };
+}
+
+// --- Corpus definitions ---
+
+const corpora: { name: string; file: string }[] = [
+    { name: 'large_stdlib', file: 'large_stdlib.py' },
+    { name: 'fstring_heavy', file: 'fstring_heavy.py' },
+    { name: 'comment_heavy', file: 'comment_heavy.py' },
+    { name: 'large_class', file: 'large_class.py' },
+    { name: 'import_heavy', file: 'import_heavy.py' },
+    { name: 'union_heavy', file: 'union_heavy.py' },
+    { name: 'repetitive_identifiers', file: 'repetitive_identifiers.py' },
+];
+
+// --- Tests ---
+
+const benchmarkSuite = process.env[RUN_BENCHMARKS_ENV] === '1' ? describe : describe.skip;
+
+benchmarkSuite('Tokenizer Benchmark', () => {
+    const allResults: BenchmarkResult[] = [];
+    const isChildProcess = process.env[CHILD_MODE_ENV] === '1';
+
+    for (const { name, file } of corpora) {
+        test(`tokenize ${name}`, () => {
+            const result = isChildProcess
+                ? benchmarkTokenize(name, loadCorpus(file))
+                : runBenchmarkInFreshProcess(`tokenize ${name}`);
+
+            if (!isChildProcess) {
+                allResults.push(result);
+            }
+
+            console.log(
+                `  ${name}: median=${result.medianMs.toFixed(2)}ms, tokens=${result.tokenCount}, tok/sec=${Math.round(
+                    result.tokensPerSec
+                ).toLocaleString()}`
+            );
+
+            if (isChildProcess) {
+                emitChildResult(result);
+            }
+
+            expect(result.tokenCount).toBeGreaterThan(0);
+            expect(result.medianMs).toBeLessThan(5000);
+        });
+    }
+
+    test('scaled corpus (10x large_stdlib)', () => {
+        const result = isChildProcess
+            ? benchmarkTokenize('large_stdlib_10x', Array(10).fill(loadCorpus('large_stdlib.py')).join('\n'))
+            : runBenchmarkInFreshProcess('scaled corpus (10x large_stdlib)');
+
+        if (!isChildProcess) {
+            allResults.push(result);
+        }
+
+        console.log(
+            `  large_stdlib_10x: median=${result.medianMs.toFixed(2)}ms, tokens=${
+                result.tokenCount
+            }, tok/sec=${Math.round(result.tokensPerSec).toLocaleString()}`
+        );
+
+        if (isChildProcess) {
+            emitChildResult(result);
+        }
+
+        expect(result.tokenCount).toBeGreaterThan(0);
+    });
+
+    afterAll(() => {
+        if (isChildProcess || allResults.length === 0) {
+            return;
+        }
+
+        printResultTable(allResults);
+
+        const report: BenchmarkReport = {
+            timestamp: new Date().toISOString(),
+            system: getSystemInfo(),
+            config: {
+                warmupIterations: WARMUP_ITERATIONS,
+                benchmarkIterations: BENCHMARK_ITERATIONS,
+            },
+            results: allResults,
+        };
+
+        writeReport(report);
+    });
+});
diff --git a/packages/pyright-internal/src/tests/tokenizer.test.ts b/packages/pyright-internal/src/tests/tokenizer.test.ts
index 32f92009bbd1..9cec1fcbec68 100644
--- a/packages/pyright-internal/src/tests/tokenizer.test.ts
+++ b/packages/pyright-internal/src/tests/tokenizer.test.ts
@@ -1676,7 +1676,8 @@ test('Lines1', () => {
 
 test('Comments1', () => {
     const t = new Tokenizer();
-    const results = t.tokenize('# hello\n# good bye\n\n\n""" test """ # another\n\n\npass');
+    const text = '# hello\n# good bye\n\n\n""" test """ # another\n\n\npass';
+    const results = t.tokenize(text);
     assert.equal(results.tokens.count, 4 + _implicitTokenCount);
 
     const token0 = results.tokens.getItemAt(0);
@@ -1711,7 +1712,8 @@ test('Comments1', () => {
 
 test('Comments2', () => {
     const t = new Tokenizer();
-    const results = t.tokenize('class A:\n    def func(self):\n        pass\n        # comment\n    ');
+    const text = 'class A:\n    def func(self):\n        pass\n        # comment\n    ';
+    const results = t.tokenize(text);
     assert.equal(results.tokens.count, 16 + _implicitTokenCount);
 
     const token17 = results.tokens.getItemAt(17);
@@ -1815,6 +1817,21 @@ test('TypeIgnoreLine2', () => {
     assert.equal(results.tokens.contains(42), false);
 });
 
+test('TypeIgnoreLineMalformedBracket', () => {
+    const t = new Tokenizer();
+    const results = t.tokenize('a = 3 # type: ignore[broken');
+    assert.equal(results.typeIgnoreLines.size, 0);
+});
+
+// A space-separated unclosed bracket (e.g. `# type: ignore [broken`) is also
+// rejected entirely. The tokenizer does not fall back to treating the
+// directive as "ignore all" when the bracket list is present but malformed.
+test('TypeIgnoreLineMalformedBracketWithSpace', () => {
+    const t = new Tokenizer();
+    const results = t.tokenize('a = 3 # type: ignore [broken');
+    assert.equal(results.typeIgnoreLines.size, 0);
+});
+
 // Regression test for https://github.com/microsoft/pyright/issues/11345.
 // type: ignore comments containing tool-namespaced codes (e.g. "ty:rule-name")
 // must be recognised as type: ignore comments.