Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions src/commands/extract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { readFileSync, readdirSync, lstatSync, existsSync } from 'fs';
import { join, relative, dirname } from 'path';
import type { BrainEngine } from '../core/engine.ts';
import { parseMarkdown } from '../core/markdown.ts';
import { slugifyPath } from '../core/sync.ts';

// --- Types ---

Expand Down Expand Up @@ -123,16 +124,17 @@ export function extractLinksFromFile(
content: string, relPath: string, allSlugs: Set<string>,
): ExtractedLink[] {
const links: ExtractedLink[] = [];
const slug = relPath.replace('.md', '');
const slug = slugifyPath(relPath);
const fileDir = dirname(relPath);
const fm = parseFrontmatterFromContent(content, relPath);

for (const { name, relTarget } of extractMarkdownLinks(content)) {
const resolved = join(fileDir, relTarget).replace('.md', '');
const resolved = slugifyPath(join(fileDir, relTarget));
if (allSlugs.has(resolved)) {
links.push({
from_slug: slug, to_slug: resolved,
link_type: inferLinkType(fileDir, dirname(resolved), fm),
from_slug: slug,
to_slug: resolved,
link_type: inferLinkType(dirname(slug), dirname(resolved), fm),
context: `markdown link: [${name}]`,
});
}
Expand Down Expand Up @@ -177,7 +179,9 @@ export function extractTimelineFromContent(content: string, slug: string): Extra
export async function runExtract(engine: BrainEngine, args: string[]) {
const subcommand = args[0];
const dirIdx = args.indexOf('--dir');
const brainDir = (dirIdx >= 0 && dirIdx + 1 < args.length) ? args[dirIdx + 1] : '.';
const dirValueIdx = dirIdx >= 0 ? dirIdx + 1 : -1;
const positionalDir = args.find((arg, idx) => idx > 0 && !arg.startsWith('--') && idx !== dirValueIdx);
const brainDir = (dirIdx >= 0 && dirValueIdx < args.length) ? args[dirValueIdx] : (positionalDir || '.');
const dryRun = args.includes('--dry-run');
const jsonMode = args.includes('--json');

Expand Down Expand Up @@ -215,7 +219,7 @@ async function extractLinksFromDir(
engine: BrainEngine, brainDir: string, dryRun: boolean, jsonMode: boolean,
): Promise<{ created: number; pages: number }> {
const files = walkMarkdownFiles(brainDir);
const allSlugs = new Set(files.map(f => f.relPath.replace('.md', '')));
const allSlugs = new Set(files.map(f => slugifyPath(f.relPath)));

// Load existing links for O(1) dedup
const existing = new Set<string>();
Expand Down Expand Up @@ -280,7 +284,7 @@ async function extractTimelineFromDir(
for (let i = 0; i < files.length; i++) {
try {
const content = readFileSync(files[i].path, 'utf-8');
const slug = files[i].relPath.replace('.md', '');
const slug = slugifyPath(files[i].relPath);
for (const entry of extractTimelineFromContent(content, slug)) {
const key = `${entry.slug}::${entry.date}::${entry.summary}`;
if (existing.has(key)) continue;
Expand Down Expand Up @@ -312,14 +316,15 @@ async function extractTimelineFromDir(

export async function extractLinksForSlugs(engine: BrainEngine, repoPath: string, slugs: string[]): Promise<number> {
const allFiles = walkMarkdownFiles(repoPath);
const allSlugs = new Set(allFiles.map(f => f.relPath.replace('.md', '')));
const allSlugs = new Set(allFiles.map(f => slugifyPath(f.relPath)));
const fileBySlug = new Map(allFiles.map(f => [slugifyPath(f.relPath), f.path]));
let created = 0;
for (const slug of slugs) {
const filePath = join(repoPath, slug + '.md');
if (!existsSync(filePath)) continue;
const filePath = fileBySlug.get(slug);
if (!filePath) continue;
try {
const content = readFileSync(filePath, 'utf-8');
for (const link of extractLinksFromFile(content, slug + '.md', allSlugs)) {
for (const link of extractLinksFromFile(content, relative(repoPath, filePath), allSlugs)) {
try { await engine.addLink(link.from_slug, link.to_slug, link.context, link.link_type); created++; } catch { /* skip */ }
}
} catch { /* skip */ }
Expand All @@ -328,10 +333,12 @@ export async function extractLinksForSlugs(engine: BrainEngine, repoPath: string
}

export async function extractTimelineForSlugs(engine: BrainEngine, repoPath: string, slugs: string[]): Promise<number> {
const allFiles = walkMarkdownFiles(repoPath);
const fileBySlug = new Map(allFiles.map(f => [slugifyPath(f.relPath), f.path]));
let created = 0;
for (const slug of slugs) {
const filePath = join(repoPath, slug + '.md');
if (!existsSync(filePath)) continue;
const filePath = fileBySlug.get(slug);
if (!filePath) continue;
try {
const content = readFileSync(filePath, 'utf-8');
for (const entry of extractTimelineFromContent(content, slug)) {
Expand Down
57 changes: 57 additions & 0 deletions test/extract.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import { describe, it, expect } from 'bun:test';
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
import {
extractMarkdownLinks,
extractLinksFromFile,
extractLinksForSlugs,
extractTimelineFromContent,
runExtract,
walkMarkdownFiles,
} from '../src/commands/extract.ts';

Expand Down Expand Up @@ -79,6 +84,58 @@ describe('extractLinksFromFile', () => {
const links = extractLinksFromFile(content, 'deals/seed.md', allSlugs);
expect(links[0].link_type).toBe('deal_for');
});

it('slugifies from/to paths the same way import does', () => {
const content = 'See [Readme](README.md).';
const allSlugs = new Set(['folder/index', 'folder/readme']);
const links = extractLinksFromFile(content, 'Folder/INDEX.md', allSlugs);
expect(links).toHaveLength(1);
expect(links[0].from_slug).toBe('folder/index');
expect(links[0].to_slug).toBe('folder/readme');
});
});

describe('extractLinksForSlugs', () => {
it('finds files by slugified path during sync extraction', async () => {
const dir = mkdtempSync(join(tmpdir(), 'gbrain-extract-'));
try {
mkdirSync(join(dir, 'Folder'), { recursive: true });
writeFileSync(join(dir, 'Folder', 'README.md'), '# Readme\n');
writeFileSync(join(dir, 'Folder', 'INDEX.md'), 'See [Readme](README.md).\n');

const links: Array<{ from: string; to: string }> = [];
const engine = {
addLink: async (from: string, to: string) => { links.push({ from, to }); },
} as any;

const created = await extractLinksForSlugs(engine, dir, ['folder/index']);
expect(created).toBe(1);
expect(links).toEqual([{ from: 'folder/index', to: 'folder/readme' }]);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});

it('accepts a positional dir argument in runExtract', async () => {
const dir = mkdtempSync(join(tmpdir(), 'gbrain-runextract-'));
try {
mkdirSync(join(dir, 'Folder'), { recursive: true });
writeFileSync(join(dir, 'Folder', 'README.md'), '# Readme\n');
writeFileSync(join(dir, 'Folder', 'INDEX.md'), 'See [Readme](README.md).\n');

const links: Array<{ from: string; to: string }> = [];
const engine = {
listPages: async () => [],
getLinks: async () => [],
addLink: async (from: string, to: string) => { links.push({ from, to }); },
} as any;

await runExtract(engine, ['links', dir]);
expect(links).toEqual([{ from: 'folder/index', to: 'folder/readme' }]);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
});

describe('extractTimelineFromContent', () => {
Expand Down