microsoft · bschnurr · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 16, 2026
diff --git a/.gitignore b/.gitignore
@@ -132,3 +132,6 @@ serena/
 .beads/
 AGENTS.md
 
+# Generated benchmark output
+packages/pyright-internal/src/tests/benchmarks/.generated/
+
diff --git a/packages/pyright-internal/package.json b/packages/pyright-internal/package.json
@@ -13,9 +13,10 @@
         "clean": "shx rm -rf ./dist ./out",
         "webpack:testserver": "webpack --config ./src/tests/lsp/webpack.testserver.config.js --mode=development",
         "webpack:testserver:watch": "npm run clean && webpack --config ./src/tests/lsp/webpack.testserver.config.js --mode development --watch --progress",
-        "test": "npm run webpack:testserver && node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit",
-        "test:norebuild": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit",
-        "test:coverage": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --reporters=jest-junit --reporters=default --coverage --coverageReporters=cobertura --coverageReporters=html --coverageReporters=json",
+        "test": "npm run webpack:testserver && node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks",
+        "test:norebuild": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks",
+        "test:benchmark": "cross-env PYRIGHT_RUN_BENCHMARKS=1 node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testTimeout=300000 --runInBand --detectOpenHandles src/tests/benchmarks",
+        "test:coverage": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks --reporters=jest-junit --reporters=default --coverage --coverageReporters=cobertura --coverageReporters=html --coverageReporters=json",
         "test:imports": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest importResolver.test --forceExit --runInBand"
     },
     "dependencies": {

diff --git a/packages/pyright-internal/src/analyzer/sourceFile.ts b/packages/pyright-internal/src/analyzer/sourceFile.ts
@@ -774,7 +774,7 @@ export class SourceFile {
                     this._writableData.taskListDiagnostics = [];
                     this._addTaskListDiagnostics(
                         configOptions.taskListTokens,
-                        parseFileResults.tokenizerOutput,
+                        parseFileResults,
                         this._writableData.taskListDiagnostics
                     );
                 });
@@ -1327,13 +1327,16 @@ export class SourceFile {
     // to the specified diagnostic list.
     private _addTaskListDiagnostics(
         taskListTokens: TaskListToken[] | undefined,
-        tokenizerOutput: TokenizerOutput,
+        parseFileResults: ParseFileResults,
         diagList: Diagnostic[]
     ) {
         if (!taskListTokens || taskListTokens.length === 0 || !diagList) {
             return;
         }
 
+        const tokenizerOutput = parseFileResults.tokenizerOutput;
+        const fileContents = parseFileResults.text;
+
         for (let i = 0; i < tokenizerOutput.tokens.count; i++) {
             const token = tokenizerOutput.tokens.getItemAt(i);
 
@@ -1343,36 +1346,65 @@ export class SourceFile {
             }
 
             for (const comment of token.comments) {
-                for (const token of taskListTokens) {
-                    // Check if the comment matches the task list token.
-                    // The comment must start with zero or more whitespace characters,
-                    // followed by the taskListToken (case insensitive),
-                    // followed by (0+ whitespace + EOL) OR (1+ NON-alphanumeric characters)
-                    const regexStr = '^[\\s]*' + token.text + '([\\s]*$|[\\W]+)';
-                    const regex = RegExp(regexStr, 'i'); // case insensitive
-
-                    // If the comment doesn't match, skip it.
-                    if (!regex.test(comment.value)) {
+                for (const taskToken of taskListTokens) {
+                    // Match: optional leading whitespace, then taskToken.text (case-insensitive),
+                    // then either (whitespace to end) or (non-alphanumeric char).
+                    const commentStart = comment.start;
+                    const commentEnd = commentStart + comment.length;
+                    const taskText = taskToken.text;
+                    const taskLen = taskText.length;
+
+                    // Skip leading whitespace within the source text range.
+                    let pos = commentStart;
+                    while (pos < commentEnd) {
+                        const ch = fileContents.charCodeAt(pos);
+                        if (ch === 0x20 || ch === 0x09 || ch === 0x0a || ch === 0x0d || ch === 0x0c || ch === 0x0b) {
+                            pos++;
+                        } else {
+                            break;
+                        }
+                    }
+
+                    // Check if the task token text matches (case-insensitive).
+                    if (pos + taskLen > commentEnd) {
                         continue;
                     }
 
-                    // Calculate the range for the diagnostic. This allows navigation
-                    // to the comment via double clicking the item in the task list pane.
-                    let rangeStart = comment.start;
+                    let matched = true;
+                    for (let k = 0; k < taskLen; k++) {
+                        const a = fileContents.charCodeAt(pos + k);
+                        const b = taskText.charCodeAt(k);
+                        if (a !== b && (a | 0x20) !== (b | 0x20)) {
+                            matched = false;
+                            break;
+                        }
+                    }
+                    if (!matched) {
+                        continue;
+                    }
 
-                    // The comment technically starts right after the comment identifier(#),
-                    // but we want the caret right before the task list token (since there
-                    // might be whitespace before it).
-                    const indexOfToken = comment.value.toLowerCase().indexOf(token.text.toLowerCase());
-                    rangeStart += indexOfToken;
+                    // After the token, require whitespace-to-end or a non-word character.
+                    const afterPos = pos + taskLen;
+                    if (afterPos < commentEnd) {
+                        const ch = fileContents.charCodeAt(afterPos);
+                        // Check if ch is a word character [a-zA-Z0-9_]
+                        const isWord =
+                            (ch >= 0x61 && ch <= 0x7a) ||
+                            (ch >= 0x41 && ch <= 0x5a) ||
+                            (ch >= 0x30 && ch <= 0x39) ||
+                            ch === 0x5f;
+                        if (isWord) {
+                            continue;
+                        }
+                    }
 
+                    // Match succeeded. pos is the offset of the task token in the source text.
                     const rangeEnd = TextRange.getEnd(comment);
-                    const range = convertOffsetsToRange(rangeStart, rangeEnd, tokenizerOutput.lines!);
+                    const range = convertOffsetsToRange(pos, rangeEnd, tokenizerOutput.lines!);
 
-                    // Add the diagnostic to the list and trim whitespace from the comment so
-                    // it's easier to read in the task list.
+                    const commentValue = comment.value;
                     diagList.push(
-                        new Diagnostic(DiagnosticCategory.TaskItem, comment.value.trim(), range, token.priority)
+                        new Diagnostic(DiagnosticCategory.TaskItem, commentValue.trim(), range, taskToken.priority)
                     );
                 }
             }

diff --git a/packages/pyright-internal/src/parser/characterStream.ts b/packages/pyright-internal/src/parser/characterStream.ts
@@ -108,8 +108,30 @@ export class CharacterStream {
     }
 
     skipWhitespace(): void {
-        while (!this.isEndOfStream() && this.isAtWhiteSpace()) {
-            this.moveNext();
+        // Tight loop: advance _position/_currentChar directly while the
+        // current char is a space/tab/form-feed. Avoids the method-call
+        // overhead of moveNext() + isAtWhiteSpace() + isWhiteSpace() per
+        // iteration, which is one of the hottest paths in tokenization.
+        const text = this._text;
+        const len = text.length;
+        let pos = this._position;
+        while (pos < len) {
+            const ch = text.charCodeAt(pos);
+            if (ch === Char.Space || ch === Char.Tab || ch === Char.FormFeed) {
+                pos++;
+            } else {
+                break;
+            }
+        }
+        if (pos !== this._position) {
+            this._position = pos;
+            if (pos >= len) {
+                this._isEndOfStream = true;
+                this._position = len;
+                this._currentChar = 0;
+            } else {
+                this._currentChar = text.charCodeAt(pos);
+            }
         }
     }
 

diff --git a/packages/pyright-internal/src/parser/parser.ts b/packages/pyright-internal/src/parser/parser.ts
@@ -232,6 +232,8 @@ const maxChildNodeDepth = 256;
 export class Parser {
     private _fileContents?: string;
     private _tokenizerOutput?: TokenizerOutput;
+    private _tokens?: TextRangeCollection<Token>;
+    private _tokenCount = 0;
     private _tokenIndex = 0;
     private _areErrorsSuppressed = false;
     private _parseOptions: ParseOptions = new ParseOptions();
@@ -406,6 +408,8 @@ export class Parser {
             initialParenDepth,
             this._parseOptions.useNotebookMode
         );
+        this._tokens = this._tokenizerOutput.tokens;
+        this._tokenCount = this._tokens.count;
         this._tokenIndex = 0;
     }
 
@@ -5259,7 +5263,7 @@ export class Parser {
     }
 
     private _getNextToken(): Token {
-        const token = this._tokenizerOutput!.tokens.getItemAt(this._tokenIndex);
+        const token = this._tokens!.getItemAt(this._tokenIndex);
         if (!this._atEof()) {
             this._tokenIndex++;
         }
@@ -5270,19 +5274,20 @@ export class Parser {
     private _atEof(): boolean {
         // Are we pointing at the last token in the stream (which is
         // assumed to be an end-of-stream token)?
-        return this._tokenIndex >= this._tokenizerOutput!.tokens.count - 1;
+        return this._tokenIndex >= this._tokenCount - 1;
     }
 
     private _peekToken(count = 0): Token {
-        if (this._tokenIndex + count < 0) {
-            return this._tokenizerOutput!.tokens.getItemAt(0);
+        const targetIndex = this._tokenIndex + count;
+        if (targetIndex < 0) {
+            return this._tokens!.getItemAt(0);
         }
 
-        if (this._tokenIndex + count >= this._tokenizerOutput!.tokens.count) {
-            return this._tokenizerOutput!.tokens.getItemAt(this._tokenizerOutput!.tokens.count - 1);
+        if (targetIndex >= this._tokenCount) {
+            return this._tokens!.getItemAt(this._tokenCount - 1);
         }
 
-        return this._tokenizerOutput!.tokens.getItemAt(this._tokenIndex + count);
+        return this._tokens!.getItemAt(targetIndex);
     }
 
     private _peekTokenType(): TokenType {