garrytan · knee5 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/src/cli.ts b/src/cli.ts
@@ -18,7 +18,7 @@ for (const op of operations) {
 }
 
 // CLI-only commands that bypass the operation layer
-const CLI_ONLY = new Set(['init', 'upgrade', 'post-upgrade', 'check-update', 'integrations', 'publish', 'check-backlinks', 'lint', 'report', 'import', 'export', 'files', 'embed', 'serve', 'call', 'config', 'doctor', 'migrate', 'eval', 'sync', 'extract', 'features', 'autopilot', 'graph-query', 'jobs', 'apply-migrations', 'skillpack-check']);
+const CLI_ONLY = new Set(['init', 'upgrade', 'post-upgrade', 'check-update', 'integrations', 'publish', 'check-backlinks', 'lint', 'report', 'import', 'export', 'files', 'embed', 'serve', 'call', 'config', 'doctor', 'migrate', 'eval', 'sync', 'extract', 'features', 'autopilot', 'graph-query', 'jobs', 'apply-migrations', 'skillpack-check', 'orphans']);
 
 async function main() {
   const args = process.argv.slice(2);
@@ -412,6 +412,11 @@ async function handleCliOnly(command: string, args: string[]) {
         await runGraphQuery(engine, args);
         break;
       }
+      case 'orphans': {
+        const { runOrphans } = await import('./commands/orphans.ts');
+        await runOrphans(engine, args);
+        break;
+      }
     }
   } finally {
     if (command !== 'serve') await engine.disconnect();
@@ -520,6 +525,7 @@ TOOLS
   publish <page.md> [--password]     Shareable HTML (strips private data, optional AES-256)
   check-backlinks <check|fix> [dir]  Find/fix missing back-links across brain
   lint <dir|file> [--fix]            Catch LLM artifacts, placeholder dates, bad frontmatter
+  orphans [--json] [--count]         Find pages with no inbound wikilinks
   report --type <name> --content ... Save timestamped report to brain/reports/
 
 JOBS (Minions)

diff --git a/src/commands/extract.ts b/src/commands/extract.ts
@@ -69,33 +69,95 @@ export function walkMarkdownFiles(dir: string): { path: string; relPath: string
 
 // --- Link extraction ---
 
-/** Extract markdown links to .md files (relative paths only) */
+/** Extract markdown links to .md files (relative paths only).
+ *
+ * Handles two syntaxes:
+ *   1. Standard markdown:  [text](relative/path.md)
+ *   2. Wikilinks:          [[relative/path]] or [[relative/path|Display Text]]
+ *
+ * Both are resolved relative to the file that contains them, so the caller
+ * receives a relTarget that can be joined with dirname(relPath) to get the
+ * absolute slug.  External URLs (containing ://) are always skipped.
+ */
 export function extractMarkdownLinks(content: string): { name: string; relTarget: string }[] {
   const results: { name: string; relTarget: string }[] = [];
-  const pattern = /\[([^\]]+)\]\(([^)]+\.md)\)/g;
+
+  // Standard markdown links: [text](relative/path.md)
+  const mdPattern = /\[([^\]]+)\]\(([^)]+\.md)\)/g;
   let match;
-  while ((match = pattern.exec(content)) !== null) {
+  while ((match = mdPattern.exec(content)) !== null) {
     const target = match[2];
     if (target.includes('://')) continue; // skip external URLs
     results.push({ name: match[1], relTarget: target });
   }
+
+  // Wikilinks: [[path/to/page]] or [[path/to/page|Display Text]]
+  // Path may or may not carry a .md suffix; normalise to include it.
+  // Skip external URLs like [[https://example.com|Title]].
+  // Strip section anchors: [[page#section|Title]] → page
+  const wikiPattern = /\[\[([^|\]]+?)(?:\|[^\]]*?)?\]\]/g;
+  while ((match = wikiPattern.exec(content)) !== null) {
+    const rawPath = match[1].trim();
+    if (rawPath.includes('://')) continue; // skip [[https://...]]
+    // Strip section anchors (#heading) — they're intra-page refs, not page slugs
+    const hashIdx = rawPath.indexOf('#');
+    const pagePath = hashIdx >= 0 ? rawPath.slice(0, hashIdx) : rawPath;
+    if (!pagePath) continue; // bare [[#anchor]] — same-page ref, skip
+    const relTarget = pagePath.endsWith('.md') ? pagePath : pagePath + '.md';
+    // Use the display text portion if present, otherwise the raw path
+    const pipeIdx = match[0].indexOf('|');
+    const displayName = pipeIdx >= 0
+      ? match[0].slice(pipeIdx + 1, -2).trim()
+      : rawPath;
+    results.push({ name: displayName, relTarget });
+  }
+
   return results;
 }
 
-/** Infer link type from directory structure */
-function inferLinkType(fromDir: string, toDir: string, frontmatter?: Record<string, unknown>): string {
-  const from = fromDir.split('/')[0];
-  const to = toDir.split('/')[0];
-  if (from === 'people' && to === 'companies') {
-    if (Array.isArray(frontmatter?.founded)) return 'founded';
-    return 'works_at';
+/**
+ * Resolve a wikilink target (relative path from extractMarkdownLinks) to a
+ * canonical slug, given the directory of the containing page and the set of
+ * all known slugs in the brain.
+ *
+ * Wiki KBs often use inconsistent relative depths:
+ *   - Same-directory bare name: [[foo-bar]] from tech/wiki/analysis/ → tech/wiki/analysis/foo-bar  ✓
+ *   - Cross-type shorthand: [[analysis/foo]] from {domain}/wiki/guides/ → {domain}/wiki/analysis/foo
+ *     (author omits the leading ../ because they think in "wiki-root-relative" terms)
+ *   - Cross-domain with one-too-few ../: [[../../finance/wiki/...]] from {domain}/wiki/analysis/
+ *     resolves to {domain}/finance/wiki/... instead of finance/wiki/... because depth-3 dirs
+ *     need 3 × ../ to reach KB root, but authors only write 2 ×
+ *
+ * Resolution order (first match wins):
+ *   1. Standard join(fileDir, relTarget) — exact relative path as written
+ *   2. Progressively strip leading path components from fileDir (ancestor search):
+ *      tries parent dir, grandparent dir, … up to KB root.
+ *      Handles both cross-type and cross-domain under-specified paths.
+ *
+ * Returns null when no matching slug is found (dangling link).
+ */
+export function resolveSlug(fileDir: string, relTarget: string, allSlugs: Set<string>): string | null {
+  const targetNoExt = relTarget.endsWith('.md') ? relTarget.slice(0, -3) : relTarget;
+
+  // Strategy 1: standard relative resolution
+  const s1 = join(fileDir, targetNoExt);
+  if (allSlugs.has(s1)) return s1;
+
+  // Strategy 2: ancestor search — try each parent directory in turn.
+  // This resolves links whose authors omitted one or more leading ../
+  // (common when targeting sibling subdirectories or cross-domain pages).
+  const parts = fileDir.split('/').filter(Boolean);
+  for (let strip = 1; strip <= parts.length; strip++) {
+    const ancestor = parts.slice(0, parts.length - strip).join('/');
+    const candidate = ancestor ? join(ancestor, targetNoExt) : targetNoExt;
+    if (allSlugs.has(candidate)) return candidate;
   }
-  if (from === 'people' && to === 'deals') return 'involved_in';
-  if (from === 'deals' && to === 'companies') return 'deal_for';
-  if (from === 'meetings' && to === 'people') return 'attendee';
-  return 'mention';
+
+  return null;
 }
 
+// inferLinkType is now imported from ../core/link-extraction.ts (v0.12.0 canonical extractor)
+
 /** Extract links from frontmatter fields */
 function extractFrontmatterLinks(slug: string, fm: Record<string, unknown>): ExtractedLink[] {
   const links: ExtractedLink[] = [];
@@ -139,8 +201,8 @@ export function extractLinksFromFile(
   const fm = parseFrontmatterFromContent(content, relPath);
 
   for (const { name, relTarget } of extractMarkdownLinks(content)) {
-    const resolved = join(fileDir, relTarget).replace('.md', '');
-    if (allSlugs.has(resolved)) {
+    const resolved = resolveSlug(fileDir, relTarget, allSlugs);
+    if (resolved !== null) {
       links.push({
         from_slug: slug, to_slug: resolved,
         link_type: inferLinkType(fileDir, dirname(resolved), fm),
@@ -231,7 +293,15 @@ export async function runExtractCore(engine: BrainEngine, opts: ExtractOpts): Pr
 export async function runExtract(engine: BrainEngine, args: string[]) {
   const subcommand = args[0];
   const dirIdx = args.indexOf('--dir');
-  const brainDir = (dirIdx >= 0 && dirIdx + 1 < args.length) ? args[dirIdx + 1] : '.';
+  // Support --dir <path> flag, positional [dir] argument, or default to '.'
+  let brainDir: string;
+  if (dirIdx >= 0 && dirIdx + 1 < args.length) {
+    brainDir = args[dirIdx + 1];
+  } else if (args[1] && !args[1].startsWith('--')) {
+    brainDir = args[1];
+  } else {
+    brainDir = '.';
+  }
   const sourceIdx = args.indexOf('--source');
   const source = (sourceIdx >= 0 && sourceIdx + 1 < args.length) ? args[sourceIdx + 1] : 'fs';
   const typeIdx = args.indexOf('--type');

diff --git a/src/commands/orphans.ts b/src/commands/orphans.ts
@@ -0,0 +1,227 @@
+/**
+ * gbrain orphans — Surface pages with no inbound wikilinks.
+ *
+ * Deterministic: zero LLM calls. Queries the links table for pages with
+ * no entries where to_page_id = pages.id. By default filters out
+ * auto-generated pages and pseudo-pages where no inbound links is expected.
+ *
+ * Usage:
+ *   gbrain orphans                  # list orphans grouped by domain
+ *   gbrain orphans --json           # JSON output for agent consumption
+ *   gbrain orphans --count          # just the number
+ *   gbrain orphans --include-pseudo # include auto-generated/pseudo pages
+ */
+
+import type { BrainEngine } from '../core/engine.ts';
+import * as db from '../core/db.ts';
+
+// --- Types ---
+
+export interface OrphanPage {
+  slug: string;
+  title: string;
+  domain: string;
+}
+
+export interface OrphanResult {
+  orphans: OrphanPage[];
+  total_orphans: number;
+  total_linkable: number;
+  total_pages: number;
+  excluded: number;
+}
+
+// --- Filter constants ---
+
+/** Slug suffixes that are always auto-generated root files */
+const AUTO_SUFFIX_PATTERNS = ['/_index', '/log'];
+
+/** Page slugs that are pseudo-pages by convention */
+const PSEUDO_SLUGS = new Set(['_atlas', '_index', '_stats', '_orphans', '_scratch', 'claude']);
+
+/** Slug segment that marks raw sources */
+const RAW_SEGMENT = '/raw/';
+
+/** Slug prefixes where no inbound links is expected */
+const DENY_PREFIXES = [
+  'output/',
+  'dashboards/',
+  'scripts/',
+  'templates/',
+  'openclaw/config/',
+];
+
+/** First slug segments where no inbound links is expected */
+const FIRST_SEGMENT_EXCLUSIONS = new Set(['scratch', 'thoughts', 'catalog', 'entities']);
+
+// --- Filter logic ---
+
+/**
+ * Returns true if a slug should be excluded from orphan reporting by default.
+ * These are pages where having no inbound links is expected / not a content problem.
+ */
+export function shouldExclude(slug: string): boolean {
+  // Pseudo-pages (exact match)
+  if (PSEUDO_SLUGS.has(slug)) return true;
+
+  // Auto-generated suffix patterns
+  for (const suffix of AUTO_SUFFIX_PATTERNS) {
+    if (slug.endsWith(suffix)) return true;
+  }
+
+  // Raw source slugs
+  if (slug.includes(RAW_SEGMENT)) return true;
+
+  // Deny-prefix slugs
+  for (const prefix of DENY_PREFIXES) {
+    if (slug.startsWith(prefix)) return true;
+  }
+
+  // First-segment exclusions
+  const firstSegment = slug.split('/')[0];
+  if (FIRST_SEGMENT_EXCLUSIONS.has(firstSegment)) return true;
+
+  return false;
+}
+
+/**
+ * Derive domain from frontmatter or first slug segment.
+ */
+export function deriveDomain(frontmatterDomain: string | null | undefined, slug: string): string {
+  if (frontmatterDomain && typeof frontmatterDomain === 'string' && frontmatterDomain.trim()) {
+    return frontmatterDomain.trim();
+  }
+  return slug.split('/')[0] || 'root';
+}
+
+// --- Core query ---
+
+/**
+ * Find pages with no inbound links.
+ * Returns raw rows from the DB (all pages regardless of filter).
+ */
+export async function queryOrphanPages(): Promise<{ slug: string; title: string; domain: string | null }[]> {
+  const sql = db.getConnection();
+  const rows = await sql`
+    SELECT
+      p.slug,
+      COALESCE(p.title, p.slug) AS title,
+      p.frontmatter->>'domain' AS domain
+    FROM pages p
+    WHERE NOT EXISTS (
+      SELECT 1 FROM links l WHERE l.to_page_id = p.id
+    )
+    ORDER BY p.slug
+  `;
+  return rows as { slug: string; title: string; domain: string | null }[];
+}
+
+/**
+ * Find orphan pages, with optional pseudo-page filtering.
+ * Returns structured OrphanResult with totals.
+ */
+export async function findOrphans(includePseudo: boolean = false): Promise<OrphanResult> {
+  const allOrphans = await queryOrphanPages();
+  const totalPages = allOrphans.length; // pages with no inbound links
+
+  // Count total pages in DB for the summary line
+  const sql = db.getConnection();
+  const [{ count: totalPagesCount }] = await sql`SELECT count(*)::int AS count FROM pages`;
+  const total = Number(totalPagesCount);
+
+  const filtered = includePseudo
+    ? allOrphans
+    : allOrphans.filter(row => !shouldExclude(row.slug));
+
+  const orphans: OrphanPage[] = filtered.map(row => ({
+    slug: row.slug,
+    title: row.title,
+    domain: deriveDomain(row.domain, row.slug),
+  }));
+
+  const excluded = allOrphans.length - filtered.length;
+
+  return {
+    orphans,
+    total_orphans: orphans.length,
+    total_linkable: filtered.length + (total - allOrphans.length),
+    total_pages: total,
+    excluded,
+  };
+}
+
+// --- Output formatters ---
+
+export function formatOrphansText(result: OrphanResult): string {
+  const lines: string[] = [];
+
+  const { orphans, total_orphans, total_linkable, total_pages, excluded } = result;
+  lines.push(
+    `${total_orphans} orphans out of ${total_linkable} linkable pages (${total_pages} total; ${excluded} excluded)\n`,
+  );
+
+  if (orphans.length === 0) {
+    lines.push('No orphan pages found.');
+    return lines.join('\n');
+  }
+
+  // Group by domain, sort alphabetically within each group
+  const byDomain = new Map<string, OrphanPage[]>();
+  for (const page of orphans) {
+    const list = byDomain.get(page.domain) || [];
+    list.push(page);
+    byDomain.set(page.domain, list);
+  }
+
+  // Sort domains alphabetically
+  const sortedDomains = [...byDomain.keys()].sort();
+  for (const domain of sortedDomains) {
+    const pages = byDomain.get(domain)!.sort((a, b) => a.slug.localeCompare(b.slug));
+    lines.push(`[${domain}]`);
+    for (const page of pages) {
+      lines.push(`  ${page.slug}  ${page.title}`);
+    }
+    lines.push('');
+  }
+
+  return lines.join('\n').trimEnd();
+}
+
+// --- CLI entry point ---
+
+export async function runOrphans(_engine: BrainEngine, args: string[]) {
+  const json = args.includes('--json');
+  const count = args.includes('--count');
+  const includePseudo = args.includes('--include-pseudo');
+
+  if (args.includes('--help') || args.includes('-h')) {
+    console.log(`Usage: gbrain orphans [options]
+
+Find pages with no inbound wikilinks.
+
+Options:
+  --json            Output as JSON (for agent consumption)
+  --count           Output just the number of orphans
+  --include-pseudo  Include auto-generated and pseudo pages in results
+  --help, -h        Show this help
+
+Output (default): grouped by domain, sorted alphabetically within each group
+Summary line: N orphans out of M linkable pages (K total; K-M excluded)
+`);
+    return;
+  }
+
+  const result = await findOrphans(includePseudo);
+
+  if (count) {
+    console.log(String(result.total_orphans));
+    return;
+  }
+
+  if (json) {
+    console.log(JSON.stringify(result, null, 2));
+    return;
+  }
+
+  console.log(formatOrphansText(result));
+}