From 5c1c85a524a0d836d4712766a6e2fc2ef7ed9fb5 Mon Sep 17 00:00:00 2001 From: Basit Mustafa Date: Fri, 17 Apr 2026 08:34:30 -0700 Subject: [PATCH] fix: normalize extract slugs and dir parsing --- src/commands/extract.ts | 33 ++++++++++++++---------- test/extract.test.ts | 57 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/src/commands/extract.ts b/src/commands/extract.ts index 442a0de7..9288c18c 100644 --- a/src/commands/extract.ts +++ b/src/commands/extract.ts @@ -11,6 +11,7 @@ import { readFileSync, readdirSync, lstatSync, existsSync } from 'fs'; import { join, relative, dirname } from 'path'; import type { BrainEngine } from '../core/engine.ts'; import { parseMarkdown } from '../core/markdown.ts'; +import { slugifyPath } from '../core/sync.ts'; // --- Types --- @@ -123,16 +124,17 @@ export function extractLinksFromFile( content: string, relPath: string, allSlugs: Set, ): ExtractedLink[] { const links: ExtractedLink[] = []; - const slug = relPath.replace('.md', ''); + const slug = slugifyPath(relPath); const fileDir = dirname(relPath); const fm = parseFrontmatterFromContent(content, relPath); for (const { name, relTarget } of extractMarkdownLinks(content)) { - const resolved = join(fileDir, relTarget).replace('.md', ''); + const resolved = slugifyPath(join(fileDir, relTarget)); if (allSlugs.has(resolved)) { links.push({ - from_slug: slug, to_slug: resolved, - link_type: inferLinkType(fileDir, dirname(resolved), fm), + from_slug: slug, + to_slug: resolved, + link_type: inferLinkType(dirname(slug), dirname(resolved), fm), context: `markdown link: [${name}]`, }); } @@ -177,7 +179,9 @@ export function extractTimelineFromContent(content: string, slug: string): Extra export async function runExtract(engine: BrainEngine, args: string[]) { const subcommand = args[0]; const dirIdx = args.indexOf('--dir'); - const brainDir = (dirIdx >= 0 && dirIdx + 1 < args.length) ? args[dirIdx + 1] : '.'; + const dirValueIdx = dirIdx >= 0 ? dirIdx + 1 : -1; + const positionalDir = args.find((arg, idx) => idx > 0 && !arg.startsWith('--') && idx !== dirValueIdx); + const brainDir = (dirIdx >= 0 && dirValueIdx < args.length) ? args[dirValueIdx] : (positionalDir || '.'); const dryRun = args.includes('--dry-run'); const jsonMode = args.includes('--json'); @@ -215,7 +219,7 @@ async function extractLinksFromDir( engine: BrainEngine, brainDir: string, dryRun: boolean, jsonMode: boolean, ): Promise<{ created: number; pages: number }> { const files = walkMarkdownFiles(brainDir); - const allSlugs = new Set(files.map(f => f.relPath.replace('.md', ''))); + const allSlugs = new Set(files.map(f => slugifyPath(f.relPath))); // Load existing links for O(1) dedup const existing = new Set(); @@ -280,7 +284,7 @@ async function extractTimelineFromDir( for (let i = 0; i < files.length; i++) { try { const content = readFileSync(files[i].path, 'utf-8'); - const slug = files[i].relPath.replace('.md', ''); + const slug = slugifyPath(files[i].relPath); for (const entry of extractTimelineFromContent(content, slug)) { const key = `${entry.slug}::${entry.date}::${entry.summary}`; if (existing.has(key)) continue; @@ -312,14 +316,15 @@ async function extractTimelineFromDir( export async function extractLinksForSlugs(engine: BrainEngine, repoPath: string, slugs: string[]): Promise { const allFiles = walkMarkdownFiles(repoPath); - const allSlugs = new Set(allFiles.map(f => f.relPath.replace('.md', ''))); + const allSlugs = new Set(allFiles.map(f => slugifyPath(f.relPath))); + const fileBySlug = new Map(allFiles.map(f => [slugifyPath(f.relPath), f.path])); let created = 0; for (const slug of slugs) { - const filePath = join(repoPath, slug + '.md'); - if (!existsSync(filePath)) continue; + const filePath = fileBySlug.get(slug); + if (!filePath) continue; try { const content = readFileSync(filePath, 'utf-8'); - for (const link of extractLinksFromFile(content, slug + '.md', allSlugs)) { + for (const link of extractLinksFromFile(content, relative(repoPath, filePath), allSlugs)) { try { await engine.addLink(link.from_slug, link.to_slug, link.context, link.link_type); created++; } catch { /* skip */ } } } catch { /* skip */ } @@ -328,10 +333,12 @@ export async function extractLinksForSlugs(engine: BrainEngine, repoPath: string } export async function extractTimelineForSlugs(engine: BrainEngine, repoPath: string, slugs: string[]): Promise { + const allFiles = walkMarkdownFiles(repoPath); + const fileBySlug = new Map(allFiles.map(f => [slugifyPath(f.relPath), f.path])); let created = 0; for (const slug of slugs) { - const filePath = join(repoPath, slug + '.md'); - if (!existsSync(filePath)) continue; + const filePath = fileBySlug.get(slug); + if (!filePath) continue; try { const content = readFileSync(filePath, 'utf-8'); for (const entry of extractTimelineFromContent(content, slug)) { diff --git a/test/extract.test.ts b/test/extract.test.ts index 78720eff..3a885da8 100644 --- a/test/extract.test.ts +++ b/test/extract.test.ts @@ -1,8 +1,13 @@ import { describe, it, expect } from 'bun:test'; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; import { extractMarkdownLinks, extractLinksFromFile, + extractLinksForSlugs, extractTimelineFromContent, + runExtract, walkMarkdownFiles, } from '../src/commands/extract.ts'; @@ -79,6 +84,58 @@ describe('extractLinksFromFile', () => { const links = extractLinksFromFile(content, 'deals/seed.md', allSlugs); expect(links[0].link_type).toBe('deal_for'); }); + + it('slugifies from/to paths the same way import does', () => { + const content = 'See [Readme](README.md).'; + const allSlugs = new Set(['folder/index', 'folder/readme']); + const links = extractLinksFromFile(content, 'Folder/INDEX.md', allSlugs); + expect(links).toHaveLength(1); + expect(links[0].from_slug).toBe('folder/index'); + expect(links[0].to_slug).toBe('folder/readme'); + }); +}); + +describe('extractLinksForSlugs', () => { + it('finds files by slugified path during sync extraction', async () => { + const dir = mkdtempSync(join(tmpdir(), 'gbrain-extract-')); + try { + mkdirSync(join(dir, 'Folder'), { recursive: true }); + writeFileSync(join(dir, 'Folder', 'README.md'), '# Readme\n'); + writeFileSync(join(dir, 'Folder', 'INDEX.md'), 'See [Readme](README.md).\n'); + + const links: Array<{ from: string; to: string }> = []; + const engine = { + addLink: async (from: string, to: string) => { links.push({ from, to }); }, + } as any; + + const created = await extractLinksForSlugs(engine, dir, ['folder/index']); + expect(created).toBe(1); + expect(links).toEqual([{ from: 'folder/index', to: 'folder/readme' }]); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('accepts a positional dir argument in runExtract', async () => { + const dir = mkdtempSync(join(tmpdir(), 'gbrain-runextract-')); + try { + mkdirSync(join(dir, 'Folder'), { recursive: true }); + writeFileSync(join(dir, 'Folder', 'README.md'), '# Readme\n'); + writeFileSync(join(dir, 'Folder', 'INDEX.md'), 'See [Readme](README.md).\n'); + + const links: Array<{ from: string; to: string }> = []; + const engine = { + listPages: async () => [], + getLinks: async () => [], + addLink: async (from: string, to: string) => { links.push({ from, to }); }, + } as any; + + await runExtract(engine, ['links', dir]); + expect(links).toEqual([{ from: 'folder/index', to: 'folder/readme' }]); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); }); describe('extractTimelineFromContent', () => {