From f5a242c077557ed60bffd93bcf76b76ef5a65624 Mon Sep 17 00:00:00 2001 From: Evgeniy Tikhomirov Date: Fri, 17 Apr 2026 06:43:01 +0300 Subject: [PATCH 1/5] fix: prevent OOM crash with MAX_DEPTH limit and .engramignore support - Add MAX_DEPTH=100 to prevent stack overflow on deep directory trees - Wrap readdirSync in try-catch to skip unreadable directories - Add .engramignore support for custom exclusions - Expand default exclusions (target, .venv, .next, .nuxt, .output, coverage, .turbo, .cache) --- src/miners/ast-miner.ts | 63 +++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/src/miners/ast-miner.ts b/src/miners/ast-miner.ts index 51b7724..8d5fea8 100644 --- a/src/miners/ast-miner.ts +++ b/src/miners/ast-miner.ts @@ -504,6 +504,42 @@ function getPatterns(lang: string): LangPatterns { } } +const MAX_DEPTH = 100; + +const DEFAULT_EXCLUDED_DIRS = new Set([ + "node_modules", + "dist", + "build", + "__pycache__", + "vendor", + ".engram", + "target", + ".venv", + ".next", + ".nuxt", + ".output", + "coverage", + ".turbo", + ".cache", +]); + +function loadEngramIgnore(rootDir: string): Set { + const ignoreFile = join(rootDir, ".engramignore"); + const excluded = new Set(DEFAULT_EXCLUDED_DIRS); + try { + const content = readFileSync(ignoreFile, "utf-8"); + for (const line of content.split("\n")) { + const trimmed = line.trim(); + if (trimmed && !trimmed.startsWith("#")) { + excluded.add(trimmed); + } + } + } catch { + // no .engramignore file + } + return excluded; +} + /** * Scan a directory recursively and extract all supported code files. */ @@ -518,35 +554,38 @@ export function extractDirectory( let totalLines = 0; const visitedDirs = new Set(); + const excludedDirs = loadEngramIgnore(root); + + function walk(dir: string, depth: number): void { + if (depth > MAX_DEPTH) return; - function walk(dir: string): void { - // Symlink loop protection let realDir: string; try { realDir = realpathSync(dir); } catch { - return; // broken symlink + return; } if (visitedDirs.has(realDir)) return; visitedDirs.add(realDir); - const entries = readdirSync(dir, { withFileTypes: true }); + let entries: ReturnType; + try { + entries = readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { const fullPath = join(dir, entry.name); if (entry.isDirectory()) { if ( entry.name.startsWith(".") || - entry.name === "node_modules" || - entry.name === "dist" || - entry.name === "build" || - entry.name === "__pycache__" || - entry.name === "vendor" || - entry.name === ".engram" + excludedDirs.has(entry.name) ) { continue; } - walk(fullPath); + walk(fullPath, depth + 1); continue; } @@ -568,6 +607,6 @@ export function extractDirectory( } } - walk(dirPath); + walk(dirPath, 0); return { nodes: allNodes, edges: allEdges, fileCount, totalLines }; } From ed781a9f8e3487421d6304479056531e5e2b450f Mon Sep 17 00:00:00 2001 From: Evgeniy Tikhomirov Date: Fri, 17 Apr 2026 06:59:51 +0300 Subject: [PATCH 2/5] fix: prevent OOM in mineGitHistory with MAX_FILES_PER_COMMIT limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add MAX_FILES_PER_COMMIT=50 to prevent O(n²) explosion on commits with many files - Skip build/dist directories to reduce noise - Axolotl project has commits with 130 files which caused 8,385+ co-change pairs --- package-lock.json | 4 ++-- src/miners/git-miner.ts | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/package-lock.json b/package-lock.json index 4f26093..e685947 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "engramx", - "version": "0.4.4", + "version": "0.5.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "engramx", - "version": "0.4.4", + "version": "0.5.3", "license": "Apache-2.0", "dependencies": { "chalk": "^5.6.2", diff --git a/src/miners/git-miner.ts b/src/miners/git-miner.ts index e9a26b0..ddd2d93 100644 --- a/src/miners/git-miner.ts +++ b/src/miners/git-miner.ts @@ -73,6 +73,10 @@ export function mineGitHistory( const authorMap = new Map>(); const commitBlocks = log.split("\n\n").filter(Boolean); + // Skip build/dist directories to avoid explosion of co-change pairs + const SKIP_PREFIXES = ["dist/", "build/", "node_modules/", ".venv/", "target/", "coverage/"]; + const MAX_FILES_PER_COMMIT = 50; // Prevent O(n²) explosion + for (const block of commitBlocks) { const lines = block.split("\n").filter(Boolean); if (lines.length === 0) continue; @@ -82,14 +86,20 @@ export function mineGitHistory( if (parts.length < 3) continue; const author = parts[1]; - const files = fileLines.filter( + let files = fileLines.filter( (f) => f.length > 0 && !f.includes("|") && !f.startsWith(" ") && - f.includes(".") + f.includes(".") && + !SKIP_PREFIXES.some((p) => f.startsWith(p)) ); + // Limit files per commit to prevent O(n²) explosion + if (files.length > MAX_FILES_PER_COMMIT) { + files = files.slice(0, MAX_FILES_PER_COMMIT); + } + // Track file change frequency for (const file of files) { fileChangeCount.set(file, (fileChangeCount.get(file) ?? 0) + 1); From 4c5eea4be32762c2a2dbe55a31f98ed883a8f842 Mon Sep 17 00:00:00 2001 From: Evgeniy Tikhomirov Date: Sat, 25 Apr 2026 03:24:01 +0300 Subject: [PATCH 3/5] fix(ast-miner): pin encoding on readdirSync for Node 25 type compatibility Add encoding: 'utf-8' to readdirSync calls to fix TypeScript errors in Node 25: - Type 'Dirent[]' not assignable to type 'Dirent[]' - Property 'startsWith' does not exist on type 'NonSharedBuffer' Same pattern as skills-miner.ts:226-229 --- bench/real-world.ts | 2 +- src/miners/ast-miner.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/real-world.ts b/bench/real-world.ts index 098a5c6..0e57ac4 100644 --- a/bench/real-world.ts +++ b/bench/real-world.ts @@ -90,7 +90,7 @@ function collectSourceFiles(root: string, cap: number): string[] { if (out.length >= cap) return; let entries: ReturnType; try { - entries = readdirSync(dir, { withFileTypes: true }); + entries = readdirSync(dir, { withFileTypes: true, encoding: "utf-8" }); } catch { return; } diff --git a/src/miners/ast-miner.ts b/src/miners/ast-miner.ts index 1857723..30b577b 100644 --- a/src/miners/ast-miner.ts +++ b/src/miners/ast-miner.ts @@ -612,7 +612,7 @@ export function extractDirectory( let entries: Dirent[]; try { - entries = readdirSync(dir, { withFileTypes: true }); + entries = readdirSync(dir, { withFileTypes: true, encoding: "utf-8" }); } catch { return; } From 47c8296e150d925da9d36dcb43e022fb903a19cf Mon Sep 17 00:00:00 2001 From: mechtar-ru Date: Sat, 25 Apr 2026 05:07:06 +0300 Subject: [PATCH 4/5] Refactor readdirSync to use 'dir' variable --- src/miners/skills-miner.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/miners/skills-miner.ts b/src/miners/skills-miner.ts index 86d795a..a34c444 100644 --- a/src/miners/skills-miner.ts +++ b/src/miners/skills-miner.ts @@ -223,10 +223,7 @@ function discoverSkillFiles(skillsDir: string): string[] { // `.localeCompare`, `.startsWith`, and path join calls. let entries; try { - entries = readdirSync(skillsDir, { - withFileTypes: true, - encoding: "utf-8", - }); + const entries = readdirSync(dir, { withFileTypes: true, encoding: "utf-8" }); } catch { return []; } From b9c2480e801baf8a94c193b93de6e2b50bf7018b Mon Sep 17 00:00:00 2001 From: Evgeniy Tikhomirov Date: Sat, 25 Apr 2026 07:13:56 +0300 Subject: [PATCH 5/5] test: add git-miner and ast-miner depth limit tests --- tests/ast-miner.test.ts | 79 ++++++++++++++++++++++++ tests/git-miner.test.ts | 129 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 tests/git-miner.test.ts diff --git a/tests/ast-miner.test.ts b/tests/ast-miner.test.ts index 6bfd2fc..ac34258 100644 --- a/tests/ast-miner.test.ts +++ b/tests/ast-miner.test.ts @@ -1,6 +1,8 @@ import { describe, it, expect } from "vitest"; import { extractFile, extractDirectory, SUPPORTED_EXTENSIONS } from "../src/miners/ast-miner.js"; import { join } from "node:path"; +import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from "node:fs"; +import * as os from "node:os"; const FIXTURES = join(import.meta.dirname, "fixtures"); @@ -138,4 +140,81 @@ describe("AST Miner", () => { expect(edges).toEqual([]); }); }); + +describe("depth limits", () => { + function mkdtemp(prefix: string): string { + return mkdtempSync(join(os.tmpdir(), prefix)); + } + + function cleanup(dir: string): void { + rmSync(dir, { recursive: true, force: true }); + } + + it("does not throw on directory nested 101 levels deep", () => { + // Build a path 101 levels deep within a temp root + const root = mkdtemp("engram-deep-"); + let current = root; + + for (let i = 1; i <= 101; i++) { + current = join(current, `level${i}`); + } + mkdirSync(current, { recursive: true }); + writeFileSync(join(current, "deep.ts"), "export function deep() {}\n"); + + // extractDirectory should not throw — MAX_DEPTH guard prevents stack overflow + expect(() => extractDirectory(root)).not.toThrow(); + + cleanup(root); + }); + + it("extracts files at exactly MAX_DEPTH (100)", () => { + const root = mkdtemp("engram-exact-"); + let current = root; + + for (let i = 1; i <= 100; i++) { + current = join(current, `d${i}`); + } + mkdirSync(current, { recursive: true }); + writeFileSync(join(current, "exact.ts"), "export function exact() {}\n"); + + const result = extractDirectory(root); + + // File at depth 100 IS reachable (depth 0..100 inclusive) + expect(result.fileCount).toBeGreaterThanOrEqual(1); + expect(result.nodes.some((n) => n.label === "exact()")).toBe(true); + + cleanup(root); + }); + + it("stops extraction beyond MAX_DEPTH (100)", () => { + const root = mkdtemp("engram-beyond-"); + let current = root; + + for (let i = 1; i <= 101; i++) { + current = join(current, `d${i}`); + } + mkdirSync(current, { recursive: true }); + writeFileSync(join(current, "beyond.ts"), "export function beyond() {}\n"); + + const result = extractDirectory(root); + + // File at depth 101 should NOT be extracted + expect(result.fileCount).toBe(0); + expect(result.nodes.some((n) => n.label === "beyond()")).toBe(false); + + cleanup(root); + }); + + it("returns mtimes map for incremental indexing", () => { + const root = mkdtemp("engram-mtime-"); + mkdirSync(join(root, "src"), { recursive: true }); + writeFileSync(join(root, "src", "app.ts"), "export function app() {}\n"); + + const result = extractDirectory(root); + expect(result.mtimes.size).toBeGreaterThan(0); + expect(result.skippedCount).toBe(0); + + cleanup(root); + }); + }); }); diff --git a/tests/git-miner.test.ts b/tests/git-miner.test.ts new file mode 100644 index 0000000..a2a4351 --- /dev/null +++ b/tests/git-miner.test.ts @@ -0,0 +1,129 @@ +import { describe, it, expect } from "vitest"; +import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import * as os from "node:os"; +import { execSync } from "node:child_process"; +import { mineGitHistory } from "../src/miners/git-miner.js"; + +describe("git-miner", () => { + it("caps co-change edges at MAX_FILES_PER_COMMIT (50) for large commits", () => { + const tmpDir = mkdtempSync(join(os.tmpdir(), "engram-cap-")); + const gitDir = join(tmpDir, "repo"); + mkdirSync(gitDir, { recursive: true }); + + execSync("git init", { cwd: gitDir }); + execSync("git config user.email test@test.com", { cwd: gitDir }); + execSync("git config user.name Test", { cwd: gitDir }); + + // Create 51 files and commit them all together + for (let i = 1; i <= 51; i++) { + writeFileSync(join(gitDir, `file${i}.ts`), `export const x${i} = ${i};\n`); + } + + execSync("git add .", { cwd: gitDir }); + execSync("git commit -m 'bulk: add 51 files'", { cwd: gitDir }); + + const result = mineGitHistory(gitDir); + + // MAX_FILES_PER_COMMIT = 50 → files beyond limit are ignored + // With only 1 commit, no pair reaches the threshold of 3 co-changes + const coChangeEdges = result.edges.filter( + (e) => e.metadata?.coChangeCount !== undefined + ); + expect(coChangeEdges.length).toBe(0); + + rmSync(tmpDir, { recursive: true, force: true }); + }); + + it("handles commit with exactly MAX_FILES_PER_COMMIT (50) files", () => { + const tmpDir = mkdtempSync(join(os.tmpdir(), "engram-max-")); + const gitDir = join(tmpDir, "repo"); + mkdirSync(gitDir, { recursive: true }); + + execSync("git init", { cwd: gitDir }); + execSync("git config user.email test@test.com", { cwd: gitDir }); + execSync("git config user.name Test", { cwd: gitDir }); + + for (let i = 1; i <= 50; i++) { + writeFileSync(join(gitDir, `f${i}.ts`), `const v${i} = ${i};\n`); + } + + execSync("git add .", { cwd: gitDir }); + execSync("git commit -m 'max files commit'", { cwd: gitDir }); + + const result = mineGitHistory(gitDir); + + // 50 files at count=1 → no edges (threshold is 3) + const coChangeEdges = result.edges.filter( + (e) => e.metadata?.coChangeCount !== undefined + ); + expect(coChangeEdges.length).toBe(0); + + rmSync(tmpDir, { recursive: true, force: true }); + }); + + it("creates edges when files co-change 3+ times", () => { + const tmpDir = mkdtempSync(join(os.tmpdir(), "engram-edge-")); + const gitDir = join(tmpDir, "repo"); + mkdirSync(gitDir, { recursive: true }); + + execSync("git init", { cwd: gitDir }); + execSync("git config user.email test@test.com", { cwd: gitDir }); + execSync("git config user.name Test", { cwd: gitDir }); + + // Create 10 files + for (let i = 1; i <= 10; i++) { + writeFileSync(join(gitDir, `lib${i}.ts`), "export function fn() {}\n"); + } + + // Commit all files together 3 times (modify ALL files each time) + for (let commit = 1; commit <= 3; commit++) { + // Update ALL files to ensure they're included in every commit + for (let i = 1; i <= 10; i++) { + writeFileSync(join(gitDir, `lib${i}.ts`), `// commit ${commit}\nexport function fn${i}() {}\n`); + } + execSync("git add .", { cwd: gitDir }); + execSync(`git commit -m 'chore: update all libs ${commit}'`, { cwd: gitDir }); + } + + const result = mineGitHistory(gitDir); + + // 10 files at count=3 → C(10,2) = 45 edges + const coChangeEdges = result.edges.filter( + (e) => e.metadata?.coChangeCount === 3 + ); + expect(coChangeEdges.length).toBe(45); + expect(coChangeEdges.every((e) => e.confidenceScore > 0.5)).toBe(true); + + rmSync(tmpDir, { recursive: true, force: true }); + }); + + it("skips build/dist/node_modules prefixes", () => { + const tmpDir = mkdtempSync(join(os.tmpdir(), "engram-skip-")); + const gitDir = join(tmpDir, "repo"); + mkdirSync(gitDir, { recursive: true }); + + execSync("git init", { cwd: gitDir }); + execSync("git config user.email test@test.com", { cwd: gitDir }); + execSync("git config user.name Test", { cwd: gitDir }); + + mkdirSync(join(gitDir, "src"), { recursive: true }); + writeFileSync(join(gitDir, "src/a.ts"), "export const a = 1;\n"); + mkdirSync(join(gitDir, "dist"), { recursive: true }); + writeFileSync(join(gitDir, "dist/bundle.js"), "// generated"); + mkdirSync(join(gitDir, "node_modules", "pkg"), { recursive: true }); + writeFileSync(join(gitDir, "node_modules", "pkg", "index.js"), "// dep"); + + execSync("git add .", { cwd: gitDir }); + execSync("git commit -m 'with build artifacts'", { cwd: gitDir }); + + const result = mineGitHistory(gitDir); + + // Only src/a.ts should appear in the graph + const nodeLabels = result.nodes.map((n) => n.label); + expect(nodeLabels.some((l) => l.includes("dist"))).toBe(false); + expect(nodeLabels.some((l) => l.includes("node_modules"))).toBe(false); + + rmSync(tmpDir, { recursive: true, force: true }); + }); +}); \ No newline at end of file