diff --git a/src/commands/extract.ts b/src/commands/extract.ts index 1b5abb7e..9c3c70c7 100644 --- a/src/commands/extract.ts +++ b/src/commands/extract.ts @@ -52,12 +52,20 @@ export function walkMarkdownFiles(dir: string): { path: string; relPath: string const files: { path: string; relPath: string }[] = []; function walk(d: string) { for (const entry of readdirSync(d)) { + // Skip `.`-prefixed entries (hidden / dotfiles) and `_`-prefixed entries + // (user-quarantine convention — e.g. `_pending/` ambient captures) at + // BOTH file and directory level. Previously the file-level check + // skipped `_foo.md` but the directory recursion still walked into + // `_pending/originals/foo.md`, so extract counted quarantined content + // that `sync` correctly excluded — "N pages walked" ballooned from 61 + // to 154 on brains with large `_pending/` trees (issue #202). if (entry.startsWith('.')) continue; + if (entry.startsWith('_')) continue; const full = join(d, entry); try { if (lstatSync(full).isDirectory()) { walk(full); - } else if (entry.endsWith('.md') && !entry.startsWith('_')) { + } else if (entry.endsWith('.md')) { files.push({ path: full, relPath: relative(dir, full) }); } } catch { /* skip unreadable */ } diff --git a/test/extract.test.ts b/test/extract.test.ts index 78720eff..698f49a2 100644 --- a/test/extract.test.ts +++ b/test/extract.test.ts @@ -122,4 +122,30 @@ describe('walkMarkdownFiles', () => { it('is a function', () => { expect(typeof walkMarkdownFiles).toBe('function'); }); + + it('skips _-prefixed directories, matching the _-prefixed file behavior', async () => { + // Quarantine convention (e.g. `_pending/` for signal-detector ambient + // captures): users extend exclusions to skip these paths on sync. The + // walker already skipped `_foo.md` files but still recursed into + // `_pending/originals/`, so extract counted quarantined content that + // sync excluded (issue #202). + const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('fs'); + const { tmpdir } = await import('os'); + const { join } = await import('path'); + + const root = mkdtempSync(join(tmpdir(), 'gbrain-walk-test-')); + try { + mkdirSync(join(root, 'concepts')); + mkdirSync(join(root, '_pending', 'originals'), { recursive: true }); + writeFileSync(join(root, 'concepts', 'alpha.md'), '# alpha'); + writeFileSync(join(root, '_pending', 'ambient.md'), '# ambient'); + writeFileSync(join(root, '_pending', 'originals', 'buried.md'), '# buried'); + writeFileSync(join(root, '_skip-me.md'), '# file-level skip'); + + const files = walkMarkdownFiles(root).map(f => f.relPath).sort(); + expect(files).toEqual(['concepts/alpha.md']); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); });