From 7367405085e83cb2e9d315d57c4d5a442a5609c8 Mon Sep 17 00:00:00 2001 From: Eric Oberhofer <8151512+EagleEyez1@users.noreply.github.com> Date: Tue, 14 Apr 2026 10:14:15 -0400 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20add=20gbrain=20relink=20command=20?= =?UTF-8?q?=E2=80=94=20rebuild=20DB=20link=20graph=20from=20markdown=20lin?= =?UTF-8?q?ks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New gbrain relink [--dir ] CLI command - extractInternalMarkdownLinks / resolveInternalMarkdownLink in core/markdown - reconcilePageLinks in core/import-file — syncs DB links to markdown links on every import, even for unchanged content - Body splitter now falls back to ## Timeline heading when delimiter is absent - Docs: cron-schedule.md and README.md updated to reference relink in weekly + dream-cycle flows - Tests: import-file and markdown test coverage for link extraction and reconciliation Work-in-progress local feature, preserved here before fast-forwarding master to origin/master. Rebase onto master once main checkout is updated. --- docs/guides/cron-schedule.md | 13 +++---- src/cli.ts | 8 ++++- src/commands/relink.ts | 69 +++++++++++++++++++++++++++++++++++ src/core/import-file.ts | 31 +++++++++++++++- src/core/markdown.ts | 62 +++++++++++++++++++++++++++++++- test/import-file.test.ts | 70 ++++++++++++++++++++++++++++++++++++ test/markdown.test.ts | 31 ++++++++++++++-- 7 files changed, 273 insertions(+), 11 deletions(-) create mode 100644 src/commands/relink.ts diff --git a/docs/guides/cron-schedule.md b/docs/guides/cron-schedule.md index a96609f2..48f2a61a 100644 --- a/docs/guides/cron-schedule.md +++ b/docs/guides/cron-schedule.md @@ -23,8 +23,8 @@ fixed. You wake up and the brain is smarter than when you went to sleep. | 3x/day (weekdays) | Meeting sync | Full ingestion + attendee propagation | [meeting-sync](../../recipes/meeting-sync.md) | | Weekly | Calendar sync | Daily files + attendee enrichment | [calendar-to-brain](../../recipes/calendar-to-brain.md) | | Daily AM | Morning briefing | Search calendar attendees, deal status, active threads | [briefing skill](../../skills/briefing/SKILL.md) | -| Weekly | Brain maintenance | `gbrain doctor`, embed stale, orphan detection | [maintain skill](../../skills/maintain/SKILL.md) | -| Nightly | Dream cycle | Entity sweep, enrich thin spots, fix citations | See below | +| Weekly | Brain maintenance | `gbrain doctor`, embed stale, orphan detection, relink DB graph from markdown links | [maintain skill](../../skills/maintain/SKILL.md) | +| Nightly | Dream cycle | Entity sweep, enrich thin spots, fix citations, add/fix cross-links | See below | ## Implementation: Setting Up Cron Jobs @@ -42,7 +42,7 @@ fixed. You wake up and the brain is smarter than when you went to sleep. 0 10 * * 0 cd /path/to/calendar-sync && node calendar-sync.mjs --start $(date -v-7d +%Y-%m-%d) --end $(date +%Y-%m-%d) # Brain health — weekly Mondays at 6 AM -0 6 * * 1 gbrain doctor --json >> /tmp/gbrain-health.log 2>&1 && gbrain embed --stale +0 6 * * 1 gbrain doctor --json >> /tmp/gbrain-health.log 2>&1 && gbrain embed --stale && gbrain relink --dir /path/to/brain # Dream cycle — nightly at 2 AM 0 2 * * * /path/to/dream-cycle.sh @@ -121,6 +121,7 @@ dream_cycle(): // Phase 4: Sync gbrain sync --no-pull --no-embed + gbrain relink --dir /path/to/brain gbrain embed --stale ``` @@ -182,9 +183,9 @@ echo "Dream cycle complete at $(date)" 1. **Quiet hours:** Set quiet hours to current hour. Run a notification cron. Verify output went to `/tmp/cron-held/`, not to messaging. -2. **Dream cycle:** Run the dream cycle manually. Check that thin entity pages - got enriched and broken citations were fixed. -3. **Email collector cron:** Wait 30 minutes. Check `data/digests/` for new digest. +2. Dream cycle: Run the dream cycle manually. Check that thin entity pages + got enriched, broken citations were fixed, and the DB link graph was rebuilt. +3. Email collector cron: Wait 30 minutes. Check `data/digests/` for new digest. 4. **Morning briefing:** Check that held messages appear in the briefing. 5. **Health check:** Run `gbrain doctor --json`. All checks should pass. diff --git a/src/cli.ts b/src/cli.ts index 33f149f4..b2200ebb 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -18,7 +18,7 @@ for (const op of operations) { } // CLI-only commands that bypass the operation layer -const CLI_ONLY = new Set(['init', 'upgrade', 'post-upgrade', 'check-update', 'integrations', 'publish', 'check-backlinks', 'lint', 'report', 'import', 'export', 'files', 'embed', 'serve', 'call', 'config', 'doctor', 'migrate', 'eval', 'sync', 'extract', 'features', 'autopilot']); +const CLI_ONLY = new Set(['init', 'upgrade', 'post-upgrade', 'check-update', 'integrations', 'publish', 'check-backlinks', 'lint', 'report', 'import', 'export', 'files', 'embed', 'relink', 'serve', 'call', 'config', 'doctor', 'migrate', 'eval', 'sync', 'extract', 'features', 'autopilot']); async function main() { const args = process.argv.slice(2); @@ -321,6 +321,11 @@ async function handleCliOnly(command: string, args: string[]) { await runEmbed(engine, args); break; } + case 'relink': { + const { runRelink } = await import('./commands/relink.ts'); + await runRelink(engine, args); + break; + } case 'serve': { const { runServe } = await import('./commands/serve.ts'); await runServe(engine); @@ -468,6 +473,7 @@ TOOLS extract [dir] Extract links/timeline from markdown into DB publish [--password] Shareable HTML (strips private data, optional AES-256) check-backlinks [dir] Find/fix missing back-links across brain + relink [--dir ] Rebuild DB link graph from markdown links lint [--fix] Catch LLM artifacts, placeholder dates, bad frontmatter report --type --content ... Save timestamped report to brain/reports/ diff --git a/src/commands/relink.ts b/src/commands/relink.ts new file mode 100644 index 00000000..7dba6acf --- /dev/null +++ b/src/commands/relink.ts @@ -0,0 +1,69 @@ +import { existsSync, lstatSync, readdirSync } from 'fs'; +import { join, relative } from 'path'; +import type { BrainEngine } from '../core/engine.ts'; +import { importFromFile } from '../core/import-file.ts'; +import { isSyncable } from '../core/sync.ts'; + +function collectSyncableFiles(rootDir: string): { fullPath: string; relPath: string }[] { + const files: { fullPath: string; relPath: string }[] = []; + + function walk(dir: string) { + for (const entry of readdirSync(dir)) { + if (entry.startsWith('.')) continue; + const fullPath = join(dir, entry); + const stat = lstatSync(fullPath); + if (stat.isSymbolicLink()) continue; + if (stat.isDirectory()) { + walk(fullPath); + continue; + } + + const relPath = relative(rootDir, fullPath).replace(/\\/g, '/'); + if (isSyncable(relPath)) { + files.push({ fullPath, relPath }); + } + } + } + + walk(rootDir); + files.sort((a, b) => a.relPath.localeCompare(b.relPath)); + return files; +} + +export async function runRelink(engine: BrainEngine, args: string[]) { + const dirIdx = args.indexOf('--dir'); + const rootDir = dirIdx >= 0 ? args[dirIdx + 1] : '.'; + const dryRun = args.includes('--dry-run'); + + if (!existsSync(rootDir)) { + console.error(`Directory not found: ${rootDir}`); + process.exit(1); + } + + const files = collectSyncableFiles(rootDir); + if (files.length === 0) { + console.log('No syncable markdown files found.'); + return; + } + + if (dryRun) { + console.log(`Would relink ${files.length} syncable markdown file(s) from ${rootDir}.`); + return; + } + + let imported = 0; + let skipped = 0; + let errors = 0; + + for (const file of files) { + const result = await importFromFile(engine, file.fullPath, file.relPath, { noEmbed: true }); + if (result.status === 'imported') imported++; + else if (result.status === 'skipped') skipped++; + else errors++; + } + + console.log(`Relinked ${files.length} file(s) from ${rootDir}.`); + console.log(` imported_or_updated: ${imported}`); + console.log(` unchanged_or_reconciled: ${skipped}`); + console.log(` errors: ${errors}`); +} diff --git a/src/core/import-file.ts b/src/core/import-file.ts index 6f00a121..92305491 100644 --- a/src/core/import-file.ts +++ b/src/core/import-file.ts @@ -1,7 +1,7 @@ import { readFileSync, statSync, lstatSync } from 'fs'; import { createHash } from 'crypto'; import type { BrainEngine } from './engine.ts'; -import { parseMarkdown } from './markdown.ts'; +import { extractInternalMarkdownLinks, parseMarkdown } from './markdown.ts'; import { chunkText } from './chunkers/recursive.ts'; import { embedBatch } from './embedding.ts'; import { slugifyPath } from './sync.ts'; @@ -16,6 +16,29 @@ export interface ImportResult { const MAX_FILE_SIZE = 5_000_000; // 5MB +async function reconcilePageLinks(engine: BrainEngine, slug: string, desiredTargets: string[]): Promise { + const desired = new Set(desiredTargets.filter(target => target && target !== slug)); + const existingLinks = await engine.getLinks(slug); + const existing = new Set(existingLinks.map(link => link.to_slug)); + + for (const target of existing) { + if (!desired.has(target)) { + await engine.removeLink(slug, target); + } + } + + for (const target of desired) { + if (!existing.has(target)) { + try { + await engine.addLink(slug, target); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn(`Skipping unresolved link ${slug} -> ${target}: ${message}`); + } + } + } +} + /** * Import content from a string. Core pipeline: * parse -> hash -> embed (external) -> transaction(version + putPage + tags + chunks) @@ -48,6 +71,7 @@ export async function importFromContent( } const parsed = parseMarkdown(content, slug + '.md'); + const desiredLinkTargets = extractInternalMarkdownLinks(content, slug).map(link => link.target_slug); // Hash includes ALL fields for idempotency (not just compiled_truth + timeline) const hash = createHash('sha256') @@ -63,6 +87,9 @@ export async function importFromContent( const existing = await engine.getPage(slug); if (existing?.content_hash === hash) { + await engine.transaction(async (tx) => { + await reconcilePageLinks(tx, slug, desiredLinkTargets); + }); return { slug, status: 'skipped', chunks: 0 }; } @@ -115,6 +142,8 @@ export async function importFromContent( await tx.addTag(slug, tag); } + await reconcilePageLinks(tx, slug, desiredLinkTargets); + if (chunks.length > 0) { await tx.upsertChunks(slug, chunks); } else { diff --git a/src/core/markdown.ts b/src/core/markdown.ts index 239fe054..5bf783e2 100644 --- a/src/core/markdown.ts +++ b/src/core/markdown.ts @@ -1,3 +1,4 @@ +import { posix as pathPosix } from 'path'; import matter from 'gray-matter'; import type { PageType } from './types.ts'; import { slugifyPath } from './sync.ts'; @@ -12,6 +13,12 @@ export interface ParsedMarkdown { tags: string[]; } +export interface InternalMarkdownLink { + text: string; + href: string; + target_slug: string; +} + /** * Parse a markdown file with YAML frontmatter into its components. * @@ -84,7 +91,7 @@ export function splitBody(body: string): { compiled_truth: string; timeline: str } if (splitIndex === -1) { - return { compiled_truth: body, timeline: '' }; + return splitBodyByHeading(body); } const compiled_truth = lines.slice(0, splitIndex).join('\n'); @@ -92,6 +99,17 @@ export function splitBody(body: string): { compiled_truth: string; timeline: str return { compiled_truth, timeline }; } +function splitBodyByHeading(body: string): { compiled_truth: string; timeline: string } { + const timelineMatch = body.match(/^##\s+Timeline\s*$/m); + if (!timelineMatch || timelineMatch.index === undefined) { + return { compiled_truth: body, timeline: '' }; + } + + const compiled_truth = body.slice(0, timelineMatch.index).trimEnd(); + const timeline = body.slice(timelineMatch.index + timelineMatch[0].length).trimStart(); + return { compiled_truth, timeline }; +} + /** * Serialize a page back to markdown format. * Produces: frontmatter + compiled_truth + --- + timeline @@ -122,6 +140,48 @@ export function serializeMarkdown( return yamlContent + '\n\n' + body + '\n'; } +/** + * Extract internal markdown links and resolve them to page slugs. + * External URLs, anchors, mailto links, and non-markdown assets are ignored. + */ +export function extractInternalMarkdownLinks(content: string, sourceSlug: string): InternalMarkdownLink[] { + const linkPattern = /\[([^\]]+)\]\(([^)]+)\)/g; + const links: InternalMarkdownLink[] = []; + const seen = new Set(); + + let match: RegExpExecArray | null; + while ((match = linkPattern.exec(content)) !== null) { + const text = match[1]?.trim() || ''; + const href = match[2]?.trim() || ''; + const targetSlug = resolveInternalMarkdownLink(sourceSlug, href); + if (!targetSlug) continue; + + const dedupeKey = `${targetSlug}|${href}|${text}`; + if (seen.has(dedupeKey)) continue; + seen.add(dedupeKey); + links.push({ text, href, target_slug: targetSlug }); + } + + return links; +} + +export function resolveInternalMarkdownLink(sourceSlug: string, href: string): string | null { + const trimmed = href.trim(); + if (!trimmed) return null; + if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) return null; + if (trimmed.startsWith('mailto:') || trimmed.startsWith('#')) return null; + + const withoutFragment = trimmed.split('#')[0] || ''; + const withoutQuery = withoutFragment.split('?')[0] || ''; + if (!withoutQuery.toLowerCase().endsWith('.md') && !withoutQuery.toLowerCase().endsWith('.mdx')) return null; + + const sourcePath = `${sourceSlug}.md`; + const resolvedPath = pathPosix.normalize(pathPosix.join(pathPosix.dirname(sourcePath), withoutQuery)); + if (resolvedPath.startsWith('..')) return null; + + return slugifyPath(resolvedPath); +} + function inferType(filePath?: string): PageType { if (!filePath) return 'concept'; diff --git a/test/import-file.test.ts b/test/import-file.test.ts index 60be770a..8a48e628 100644 --- a/test/import-file.test.ts +++ b/test/import-file.test.ts @@ -20,6 +20,7 @@ function mockEngine(overrides: Partial> = {}): BrainEngine { if (prop === '_calls') return calls; if (prop === 'getTags') return overrides.getTags || (() => Promise.resolve([])); if (prop === 'getPage') return overrides.getPage || (() => Promise.resolve(null)); + if (prop === 'getLinks') return overrides.getLinks || (() => Promise.resolve([])); // transaction: just call the fn with the same engine (no real DB transaction in tests) if (prop === 'transaction') return async (fn: (tx: BrainEngine) => Promise) => fn(engine); return track(prop); @@ -243,6 +244,75 @@ Content here. expect(addCalls.length).toBe(2); }); + test('reconciles outgoing links on import', async () => { + const filePath = join(TMP, 'linked.md'); + writeFileSync(filePath, `--- +type: person +title: Linked +--- + +Works at [ExoHelix](../companies/exohelix.md) with [Joanna](joanna-oberhofer.md). +`); + + const engine = mockEngine({ + getLinks: () => Promise.resolve([{ from_slug: 'people/linked', to_slug: 'people/old-link', link_type: '', context: '' }]), + }); + + await importFile(engine, filePath, 'people/linked.md', { noEmbed: true }); + + const calls = (engine as any)._calls; + const addCalls = calls.filter((c: any) => c.method === 'addLink'); + const removeCalls = calls.filter((c: any) => c.method === 'removeLink'); + + expect(removeCalls).toHaveLength(1); + expect(removeCalls[0].args).toEqual(['people/linked', 'people/old-link']); + expect(addCalls.map((c: any) => c.args)).toEqual([ + ['people/linked', 'companies/exohelix'], + ['people/linked', 'people/joanna-oberhofer'], + ]); + }); + + test('reconciles outgoing links even when content hash is unchanged', async () => { + const filePath = join(TMP, 'unchanged-links.md'); + const content = `--- +type: person +title: Unchanged Links +--- + +Works at [ExoHelix](../companies/exohelix.md). +`; + writeFileSync(filePath, content); + + const { createHash } = await import('crypto'); + const { parseMarkdown } = await import('../src/core/markdown.ts'); + const parsed = parseMarkdown(content, 'people/unchanged-links.md'); + const hash = createHash('sha256') + .update(JSON.stringify({ + title: parsed.title, + type: parsed.type, + compiled_truth: parsed.compiled_truth, + timeline: parsed.timeline, + frontmatter: parsed.frontmatter, + tags: parsed.tags.sort(), + })) + .digest('hex'); + + const engine = mockEngine({ + getPage: () => Promise.resolve({ content_hash: hash }), + getLinks: () => Promise.resolve([]), + }); + + const result = await importFile(engine, filePath, 'people/unchanged-links.md', { noEmbed: true }); + expect(result.status).toBe('skipped'); + + const calls = (engine as any)._calls; + expect(calls.find((c: any) => c.method === 'putPage')).toBeUndefined(); + const addCalls = calls.filter((c: any) => c.method === 'addLink'); + expect(addCalls.map((c: any) => c.args)).toEqual([ + ['people/unchanged-links', 'companies/exohelix'], + ]); + }); + test('chunks compiled_truth and timeline separately', async () => { const filePath = join(TMP, 'chunked.md'); writeFileSync(filePath, `--- diff --git a/test/markdown.test.ts b/test/markdown.test.ts index aa214024..59915196 100644 --- a/test/markdown.test.ts +++ b/test/markdown.test.ts @@ -1,5 +1,5 @@ import { describe, test, expect } from 'bun:test'; -import { parseMarkdown, serializeMarkdown, splitBody } from '../src/core/markdown.ts'; +import { extractInternalMarkdownLinks, parseMarkdown, resolveInternalMarkdownLink, serializeMarkdown, splitBody } from '../src/core/markdown.ts'; describe('Markdown Parser', () => { test('parses frontmatter + compiled_truth + timeline', () => { @@ -110,7 +110,15 @@ describe('splitBody', () => { expect(compiled_truth).toContain('Content here'); expect(timeline.trim()).toBe(''); }); -}); + + test('falls back to heading-based timeline sections', () => { + const body = '# Eric Oberhofer\n\n## Compiled Truth\n\nCurrent state.\n\n## Timeline\n\n### 2026-04-13 — Created\nPage created.'; + const { compiled_truth, timeline } = splitBody(body); + expect(compiled_truth).toContain('## Compiled Truth'); + expect(timeline).toContain('2026-04-13'); + expect(timeline).not.toContain('## Timeline'); + }); + }); describe('serializeMarkdown', () => { test('round-trips through parse and serialize', () => { @@ -200,3 +208,22 @@ Some content.`; expect(parseMarkdown('', 'companies/acme.md').type).toBe('company'); }); }); + +describe('internal markdown links', () => { + test('resolves relative links to page slugs', () => { + expect(resolveInternalMarkdownLink('people/eric-oberhofer', '../companies/exohelix.md')).toBe('companies/exohelix'); + expect(resolveInternalMarkdownLink('people/eric-oberhofer', 'joanna-oberhofer.md')).toBe('people/joanna-oberhofer'); + }); + + test('ignores external and non-markdown links', () => { + expect(resolveInternalMarkdownLink('people/eric-oberhofer', 'https://example.com')).toBeNull(); + expect(resolveInternalMarkdownLink('people/eric-oberhofer', '#timeline')).toBeNull(); + expect(resolveInternalMarkdownLink('people/eric-oberhofer', '../files/photo.png')).toBeNull(); + }); + + test('extracts unique internal markdown links from content', () => { + const content = '[ExoHelix](../companies/exohelix.md) and [Joanna](joanna-oberhofer.md) and [ExoHelix](../companies/exohelix.md)'; + const links = extractInternalMarkdownLinks(content, 'people/eric-oberhofer'); + expect(links.map(link => link.target_slug)).toEqual(['companies/exohelix', 'people/joanna-oberhofer']); + }); +}); From 8f74ad210bae2f9f36c67e689df3dc324d4139ec Mon Sep 17 00:00:00 2001 From: Eric Oberhofer <8151512+EagleEyez1@users.noreply.github.com> Date: Tue, 14 Apr 2026 10:14:15 -0400 Subject: [PATCH 2/2] fix(relink): honor --help/-h; print usage instead of executing Previously `gbrain relink --help` would try to relink because argument parsing didn't recognize --help as a help request and fell through to the normal execution path. Add an explicit short-circuit at the top of runRelink and document the command surface. --- src/commands/relink.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/commands/relink.ts b/src/commands/relink.ts index 7dba6acf..7d07ec6b 100644 --- a/src/commands/relink.ts +++ b/src/commands/relink.ts @@ -31,6 +31,23 @@ function collectSyncableFiles(rootDir: string): { fullPath: string; relPath: str } export async function runRelink(engine: BrainEngine, args: string[]) { + if (args.includes('--help') || args.includes('-h')) { + console.log(`gbrain relink — rebuild DB link graph from markdown links + +USAGE + gbrain relink [--dir ] [--dry-run] + +OPTIONS + --dir Brain directory to scan (default: current directory) + --dry-run Report how many files would be relinked without writing + +Relink re-imports every syncable markdown file under with embeddings +skipped, which triggers link reconciliation in import-file. Use after bulk +markdown edits, schema migrations, or when the DB link graph has drifted +from the markdown source of truth.`); + return; + } + const dirIdx = args.indexOf('--dir'); const rootDir = dirIdx >= 0 ? args[dirIdx + 1] : '.'; const dryRun = args.includes('--dry-run');