Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
cbe0937
initial optimization
bschnurr Apr 15, 2026
0b620a1
Summary: The migration to the no-regex tokenizer had dropped support …
bschnurr Apr 15, 2026
1df7200
## Summary
bschnurr Apr 15, 2026
04a5fd4
Fixed the regression in tokenizer.ts so matchIgnoreDirective now reje…
bschnurr Apr 16, 2026
49cc702
Address PR review: extract bracket parser, ReadonlyArray, space-befor…
bschnurr Apr 17, 2026
c55f5ec
tokenizer: cache typeIgnoreAll scan result (drop O(n) findIndex per d…
bschnurr Apr 17, 2026
f9545aa
tokenizer: ASCII fast-path for _tryIdentifier (15-25% faster tokenize)
bschnurr Apr 17, 2026
cabd0e6
characterStream: inline skipWhitespace tight loop (avoid per-char met…
bschnurr Apr 17, 2026
2929e43
tokenizer: direct-mapped identifier intern cache (~14% faster on larg…
bschnurr Apr 17, 2026
17109e5
tokenizer: revert indexOf to bounded hand-rolled scan in matchIgnoreD…
bschnurr Apr 17, 2026
462a76a
benchmark: add repetitive_identifiers corpus to validate intern-cache…
bschnurr Apr 17, 2026
dbd7908
benchmark: add @ts-nocheck to runBenchmarkJest.js (fix TS7016 typeche…
bschnurr Apr 18, 2026
e800928
benchmark: exclude runBenchmarkJest.js from tsc (fix TS7016, replaces…
bschnurr Apr 18, 2026
94a042d
benchmark: replace runBenchmarkJest.js launcher with cross-env in npm…
bschnurr Apr 18, 2026
429eb0b
tokenizer: revert Token.create to single-shape form (avoid V8 IC shap…
bschnurr Apr 18, 2026
a325da3
tokenizer: restore tokenizerTypes.ts to main (revert all two-shape co…
bschnurr Apr 18, 2026
c5d62f0
tokenizer: restore two-shape token creation (omit comments slot when …
bschnurr Apr 18, 2026
3a82f80
tokenizer: add comments explaining two-shape token allocation optimiz…
bschnurr Apr 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,6 @@ serena/
.beads/
AGENTS.md

# Generated benchmark output
packages/pyright-internal/src/tests/benchmarks/.generated/

7 changes: 4 additions & 3 deletions packages/pyright-internal/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
"clean": "shx rm -rf ./dist ./out",
"webpack:testserver": "webpack --config ./src/tests/lsp/webpack.testserver.config.js --mode=development",
"webpack:testserver:watch": "npm run clean && webpack --config ./src/tests/lsp/webpack.testserver.config.js --mode development --watch --progress",
"test": "npm run webpack:testserver && node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit",
"test:norebuild": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit",
"test:coverage": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --reporters=jest-junit --reporters=default --coverage --coverageReporters=cobertura --coverageReporters=html --coverageReporters=json",
"test": "npm run webpack:testserver && node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks",
"test:norebuild": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks",
"test:benchmark": "cross-env PYRIGHT_RUN_BENCHMARKS=1 node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testTimeout=300000 --runInBand --detectOpenHandles src/tests/benchmarks",
"test:coverage": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest --forceExit --testPathIgnorePatterns src/tests/benchmarks --reporters=jest-junit --reporters=default --coverage --coverageReporters=cobertura --coverageReporters=html --coverageReporters=json",
"test:imports": "node --max-old-space-size=8192 --expose-gc ./node_modules/jest/bin/jest importResolver.test --forceExit --runInBand"
},
"dependencies": {
Expand Down
80 changes: 56 additions & 24 deletions packages/pyright-internal/src/analyzer/sourceFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ export class SourceFile {
this._writableData.taskListDiagnostics = [];
this._addTaskListDiagnostics(
configOptions.taskListTokens,
parseFileResults.tokenizerOutput,
parseFileResults,
this._writableData.taskListDiagnostics
);
});
Expand Down Expand Up @@ -1327,13 +1327,16 @@ export class SourceFile {
// to the specified diagnostic list.
private _addTaskListDiagnostics(
taskListTokens: TaskListToken[] | undefined,
tokenizerOutput: TokenizerOutput,
parseFileResults: ParseFileResults,
diagList: Diagnostic[]
) {
if (!taskListTokens || taskListTokens.length === 0 || !diagList) {
return;
}

const tokenizerOutput = parseFileResults.tokenizerOutput;
const fileContents = parseFileResults.text;

for (let i = 0; i < tokenizerOutput.tokens.count; i++) {
const token = tokenizerOutput.tokens.getItemAt(i);

Expand All @@ -1343,36 +1346,65 @@ export class SourceFile {
}

for (const comment of token.comments) {
for (const token of taskListTokens) {
// Check if the comment matches the task list token.
// The comment must start with zero or more whitespace characters,
// followed by the taskListToken (case insensitive),
// followed by (0+ whitespace + EOL) OR (1+ NON-alphanumeric characters)
const regexStr = '^[\\s]*' + token.text + '([\\s]*$|[\\W]+)';
const regex = RegExp(regexStr, 'i'); // case insensitive

// If the comment doesn't match, skip it.
if (!regex.test(comment.value)) {
for (const taskToken of taskListTokens) {
// Match: optional leading whitespace, then taskToken.text (case-insensitive),
// then either (whitespace to end) or (non-alphanumeric char).
const commentStart = comment.start;
const commentEnd = commentStart + comment.length;
const taskText = taskToken.text;
const taskLen = taskText.length;

// Skip leading whitespace within the source text range.
let pos = commentStart;
while (pos < commentEnd) {
const ch = fileContents.charCodeAt(pos);
if (ch === 0x20 || ch === 0x09 || ch === 0x0a || ch === 0x0d || ch === 0x0c || ch === 0x0b) {
pos++;
} else {
break;
}
}

// Check if the task token text matches (case-insensitive).
if (pos + taskLen > commentEnd) {
continue;
}

// Calculate the range for the diagnostic. This allows navigation
// to the comment via double clicking the item in the task list pane.
let rangeStart = comment.start;
let matched = true;
for (let k = 0; k < taskLen; k++) {
const a = fileContents.charCodeAt(pos + k);
const b = taskText.charCodeAt(k);
if (a !== b && (a | 0x20) !== (b | 0x20)) {
matched = false;
break;
}
}
if (!matched) {
continue;
}

// The comment technically starts right after the comment identifier(#),
// but we want the caret right before the task list token (since there
// might be whitespace before it).
const indexOfToken = comment.value.toLowerCase().indexOf(token.text.toLowerCase());
rangeStart += indexOfToken;
// After the token, require whitespace-to-end or a non-word character.
const afterPos = pos + taskLen;
if (afterPos < commentEnd) {
const ch = fileContents.charCodeAt(afterPos);
// Check if ch is a word character [a-zA-Z0-9_]
const isWord =
(ch >= 0x61 && ch <= 0x7a) ||
(ch >= 0x41 && ch <= 0x5a) ||
(ch >= 0x30 && ch <= 0x39) ||
ch === 0x5f;
if (isWord) {
continue;
}
}

// Match succeeded. pos is the offset of the task token in the source text.
const rangeEnd = TextRange.getEnd(comment);
const range = convertOffsetsToRange(rangeStart, rangeEnd, tokenizerOutput.lines!);
const range = convertOffsetsToRange(pos, rangeEnd, tokenizerOutput.lines!);

// Add the diagnostic to the list and trim whitespace from the comment so
// it's easier to read in the task list.
const commentValue = comment.value;
diagList.push(
new Diagnostic(DiagnosticCategory.TaskItem, comment.value.trim(), range, token.priority)
new Diagnostic(DiagnosticCategory.TaskItem, commentValue.trim(), range, taskToken.priority)
);
}
}
Expand Down
26 changes: 24 additions & 2 deletions packages/pyright-internal/src/parser/characterStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,30 @@ export class CharacterStream {
}

skipWhitespace(): void {
while (!this.isEndOfStream() && this.isAtWhiteSpace()) {
this.moveNext();
// Tight loop: advance _position/_currentChar directly while the
// current char is a space/tab/form-feed. Avoids the method-call
// overhead of moveNext() + isAtWhiteSpace() + isWhiteSpace() per
// iteration, which is one of the hottest paths in tokenization.
const text = this._text;
const len = text.length;
let pos = this._position;
while (pos < len) {
const ch = text.charCodeAt(pos);
if (ch === Char.Space || ch === Char.Tab || ch === Char.FormFeed) {
pos++;
} else {
break;
}
}
if (pos !== this._position) {
this._position = pos;
if (pos >= len) {
this._isEndOfStream = true;
this._position = len;
this._currentChar = 0;
} else {
this._currentChar = text.charCodeAt(pos);
}
}
}

Expand Down
19 changes: 12 additions & 7 deletions packages/pyright-internal/src/parser/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ const maxChildNodeDepth = 256;
export class Parser {
private _fileContents?: string;
private _tokenizerOutput?: TokenizerOutput;
private _tokens?: TextRangeCollection<Token>;
private _tokenCount = 0;
private _tokenIndex = 0;
private _areErrorsSuppressed = false;
private _parseOptions: ParseOptions = new ParseOptions();
Expand Down Expand Up @@ -406,6 +408,8 @@ export class Parser {
initialParenDepth,
this._parseOptions.useNotebookMode
);
this._tokens = this._tokenizerOutput.tokens;
this._tokenCount = this._tokens.count;
this._tokenIndex = 0;
}

Expand Down Expand Up @@ -5259,7 +5263,7 @@ export class Parser {
}

private _getNextToken(): Token {
const token = this._tokenizerOutput!.tokens.getItemAt(this._tokenIndex);
const token = this._tokens!.getItemAt(this._tokenIndex);
if (!this._atEof()) {
this._tokenIndex++;
}
Expand All @@ -5270,19 +5274,20 @@ export class Parser {
private _atEof(): boolean {
// Are we pointing at the last token in the stream (which is
// assumed to be an end-of-stream token)?
return this._tokenIndex >= this._tokenizerOutput!.tokens.count - 1;
return this._tokenIndex >= this._tokenCount - 1;
}

private _peekToken(count = 0): Token {
if (this._tokenIndex + count < 0) {
return this._tokenizerOutput!.tokens.getItemAt(0);
const targetIndex = this._tokenIndex + count;
if (targetIndex < 0) {
return this._tokens!.getItemAt(0);
}

if (this._tokenIndex + count >= this._tokenizerOutput!.tokens.count) {
return this._tokenizerOutput!.tokens.getItemAt(this._tokenizerOutput!.tokens.count - 1);
if (targetIndex >= this._tokenCount) {
return this._tokens!.getItemAt(this._tokenCount - 1);
}

return this._tokenizerOutput!.tokens.getItemAt(this._tokenIndex + count);
return this._tokens!.getItemAt(targetIndex);
}

private _peekTokenType(): TokenType {
Expand Down
Loading
Loading