From 5c1c85a524a0d836d4712766a6e2fc2ef7ed9fb5 Mon Sep 17 00:00:00 2001
From: Basit Mustafa <basit.mustafa@gmail.com>
Date: Fri, 17 Apr 2026 08:34:30 -0700
Subject: [PATCH] fix: normalize extract slugs and dir parsing

---
 src/commands/extract.ts | 33 ++++++++++++++----------
 test/extract.test.ts    | 57 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 13 deletions(-)
diff --git a/src/commands/extract.ts b/src/commands/extract.ts
index 442a0de7..9288c18c 100644
--- a/src/commands/extract.ts
+++ b/src/commands/extract.ts
@@ -11,6 +11,7 @@ import { readFileSync, readdirSync, lstatSync, existsSync } from 'fs';
 import { join, relative, dirname } from 'path';
 import type { BrainEngine } from '../core/engine.ts';
 import { parseMarkdown } from '../core/markdown.ts';
+import { slugifyPath } from '../core/sync.ts';
 
 // --- Types ---
 
@@ -123,16 +124,17 @@ export function extractLinksFromFile(
   content: string, relPath: string, allSlugs: Set<string>,
 ): ExtractedLink[] {
   const links: ExtractedLink[] = [];
-  const slug = relPath.replace('.md', '');
+  const slug = slugifyPath(relPath);
   const fileDir = dirname(relPath);
   const fm = parseFrontmatterFromContent(content, relPath);
 
   for (const { name, relTarget } of extractMarkdownLinks(content)) {
-    const resolved = join(fileDir, relTarget).replace('.md', '');
+    const resolved = slugifyPath(join(fileDir, relTarget));
     if (allSlugs.has(resolved)) {
       links.push({
-        from_slug: slug, to_slug: resolved,
-        link_type: inferLinkType(fileDir, dirname(resolved), fm),
+        from_slug: slug,
+        to_slug: resolved,
+        link_type: inferLinkType(dirname(slug), dirname(resolved), fm),
         context: `markdown link: [${name}]`,
       });
     }
@@ -177,7 +179,9 @@ export function extractTimelineFromContent(content: string, slug: string): Extra
 export async function runExtract(engine: BrainEngine, args: string[]) {
   const subcommand = args[0];
   const dirIdx = args.indexOf('--dir');
-  const brainDir = (dirIdx >= 0 && dirIdx + 1 < args.length) ? args[dirIdx + 1] : '.';
+  const dirValueIdx = dirIdx >= 0 ? dirIdx + 1 : -1;
+  const positionalDir = args.find((arg, idx) => idx > 0 && !arg.startsWith('--') && idx !== dirValueIdx);
+  const brainDir = (dirIdx >= 0 && dirValueIdx < args.length) ? args[dirValueIdx] : (positionalDir || '.');
   const dryRun = args.includes('--dry-run');
   const jsonMode = args.includes('--json');
 
@@ -215,7 +219,7 @@ async function extractLinksFromDir(
   engine: BrainEngine, brainDir: string, dryRun: boolean, jsonMode: boolean,
 ): Promise<{ created: number; pages: number }> {
   const files = walkMarkdownFiles(brainDir);
-  const allSlugs = new Set(files.map(f => f.relPath.replace('.md', '')));
+  const allSlugs = new Set(files.map(f => slugifyPath(f.relPath)));
 
   // Load existing links for O(1) dedup
   const existing = new Set<string>();
@@ -280,7 +284,7 @@ async function extractTimelineFromDir(
   for (let i = 0; i < files.length; i++) {
     try {
       const content = readFileSync(files[i].path, 'utf-8');
-      const slug = files[i].relPath.replace('.md', '');
+      const slug = slugifyPath(files[i].relPath);
       for (const entry of extractTimelineFromContent(content, slug)) {
         const key = `${entry.slug}::${entry.date}::${entry.summary}`;
         if (existing.has(key)) continue;
@@ -312,14 +316,15 @@ async function extractTimelineFromDir(
 
 export async function extractLinksForSlugs(engine: BrainEngine, repoPath: string, slugs: string[]): Promise<number> {
   const allFiles = walkMarkdownFiles(repoPath);
-  const allSlugs = new Set(allFiles.map(f => f.relPath.replace('.md', '')));
+  const allSlugs = new Set(allFiles.map(f => slugifyPath(f.relPath)));
+  const fileBySlug = new Map(allFiles.map(f => [slugifyPath(f.relPath), f.path]));
   let created = 0;
   for (const slug of slugs) {
-    const filePath = join(repoPath, slug + '.md');
-    if (!existsSync(filePath)) continue;
+    const filePath = fileBySlug.get(slug);
+    if (!filePath) continue;
     try {
       const content = readFileSync(filePath, 'utf-8');
-      for (const link of extractLinksFromFile(content, slug + '.md', allSlugs)) {
+      for (const link of extractLinksFromFile(content, relative(repoPath, filePath), allSlugs)) {
         try { await engine.addLink(link.from_slug, link.to_slug, link.context, link.link_type); created++; } catch { /* skip */ }
       }
     } catch { /* skip */ }
@@ -328,10 +333,12 @@ export async function extractLinksForSlugs(engine: BrainEngine, repoPath: string
 }
 
 export async function extractTimelineForSlugs(engine: BrainEngine, repoPath: string, slugs: string[]): Promise<number> {
+  const allFiles = walkMarkdownFiles(repoPath);
+  const fileBySlug = new Map(allFiles.map(f => [slugifyPath(f.relPath), f.path]));
   let created = 0;
   for (const slug of slugs) {
-    const filePath = join(repoPath, slug + '.md');
-    if (!existsSync(filePath)) continue;
+    const filePath = fileBySlug.get(slug);
+    if (!filePath) continue;
     try {
       const content = readFileSync(filePath, 'utf-8');
       for (const entry of extractTimelineFromContent(content, slug)) {
diff --git a/test/extract.test.ts b/test/extract.test.ts
index 78720eff..3a885da8 100644
--- a/test/extract.test.ts
+++ b/test/extract.test.ts
@@ -1,8 +1,13 @@
 import { describe, it, expect } from 'bun:test';
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
 import {
   extractMarkdownLinks,
   extractLinksFromFile,
+  extractLinksForSlugs,
   extractTimelineFromContent,
+  runExtract,
   walkMarkdownFiles,
 } from '../src/commands/extract.ts';
 
@@ -79,6 +84,58 @@ describe('extractLinksFromFile', () => {
     const links = extractLinksFromFile(content, 'deals/seed.md', allSlugs);
     expect(links[0].link_type).toBe('deal_for');
   });
+
+  it('slugifies from/to paths the same way import does', () => {
+    const content = 'See [Readme](README.md).';
+    const allSlugs = new Set(['folder/index', 'folder/readme']);
+    const links = extractLinksFromFile(content, 'Folder/INDEX.md', allSlugs);
+    expect(links).toHaveLength(1);
+    expect(links[0].from_slug).toBe('folder/index');
+    expect(links[0].to_slug).toBe('folder/readme');
+  });
+});
+
+describe('extractLinksForSlugs', () => {
+  it('finds files by slugified path during sync extraction', async () => {
+    const dir = mkdtempSync(join(tmpdir(), 'gbrain-extract-'));
+    try {
+      mkdirSync(join(dir, 'Folder'), { recursive: true });
+      writeFileSync(join(dir, 'Folder', 'README.md'), '# Readme\n');
+      writeFileSync(join(dir, 'Folder', 'INDEX.md'), 'See [Readme](README.md).\n');
+
+      const links: Array<{ from: string; to: string }> = [];
+      const engine = {
+        addLink: async (from: string, to: string) => { links.push({ from, to }); },
+      } as any;
+
+      const created = await extractLinksForSlugs(engine, dir, ['folder/index']);
+      expect(created).toBe(1);
+      expect(links).toEqual([{ from: 'folder/index', to: 'folder/readme' }]);
+    } finally {
+      rmSync(dir, { recursive: true, force: true });
+    }
+  });
+
+  it('accepts a positional dir argument in runExtract', async () => {
+    const dir = mkdtempSync(join(tmpdir(), 'gbrain-runextract-'));
+    try {
+      mkdirSync(join(dir, 'Folder'), { recursive: true });
+      writeFileSync(join(dir, 'Folder', 'README.md'), '# Readme\n');
+      writeFileSync(join(dir, 'Folder', 'INDEX.md'), 'See [Readme](README.md).\n');
+
+      const links: Array<{ from: string; to: string }> = [];
+      const engine = {
+        listPages: async () => [],
+        getLinks: async () => [],
+        addLink: async (from: string, to: string) => { links.push({ from, to }); },
+      } as any;
+
+      await runExtract(engine, ['links', dir]);
+      expect(links).toEqual([{ from: 'folder/index', to: 'folder/readme' }]);
+    } finally {
+      rmSync(dir, { recursive: true, force: true });
+    }
+  });
 });
 
 describe('extractTimelineFromContent', () => {