From 6abf63c4c117f468ca029b3d1decd157242488b4 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Thu, 2 Apr 2026 17:04:27 -0700 Subject: [PATCH 01/18] feat(core-ingestion): add TOML file support (.toml) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parses TOML files line-by-line, extracting [table] headers and key=value pairs as config_entry entities with CONTAINS relationships. Mirrors the YAML parser pattern — no external dependencies needed. Co-Authored-By: Claude Sonnet 4.6 --- .../src/__tests__/queries.toml.test.ts | 190 ++++++++++++++++++ core-ingestion/src/index.ts | 112 +++++++++++ core-ingestion/src/languages.ts | 2 + core-ingestion/src/queries.ts | 1 + ix-cli/src/cli/commands/ingest.ts | 1 + 5 files changed, 306 insertions(+) create mode 100644 core-ingestion/src/__tests__/queries.toml.test.ts diff --git a/core-ingestion/src/__tests__/queries.toml.test.ts b/core-ingestion/src/__tests__/queries.toml.test.ts new file mode 100644 index 0000000..5206f5a --- /dev/null +++ b/core-ingestion/src/__tests__/queries.toml.test.ts @@ -0,0 +1,190 @@ +import { describe, expect, it } from 'vitest'; + +import { parseFile } from '../index.js'; +import { languageFromPath, SupportedLanguages } from '../languages.js'; + +describe('TOML parsing', () => { + it('recognizes .toml as TOML', () => { + expect(languageFromPath('/repo/Cargo.toml')).toBe(SupportedLanguages.TOML); + expect(languageFromPath('/repo/pyproject.toml')).toBe(SupportedLanguages.TOML); + }); + + it('parses top-level key = value pairs', () => { + const result = parseFile( + '/repo/config.toml', + [ + 'name = "my-app"', + 'version = "1.0.0"', + 'debug = false', + ].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.language).toBe(SupportedLanguages.TOML); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'name', + kind: 'config_entry', + language: SupportedLanguages.TOML, + container: undefined, + })); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'version', + kind: 'config_entry', + container: undefined, + })); + expect(result!.relationships).toContainEqual({ + srcName: 'config.toml', + dstName: 'name', + predicate: 'CONTAINS', + }); + }); + + it('parses [table] headers and keys within them', () => { + const result = parseFile( + '/repo/config.toml', + [ + '[database]', + 'host = "localhost"', + 'port = 5432', + ].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'database', + kind: 'config_entry', + container: undefined, + })); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'host', + kind: 'config_entry', + container: 'database', + })); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'port', + kind: 'config_entry', + container: 'database', + })); + expect(result!.relationships).toContainEqual({ + srcName: 'config.toml', + dstName: 'database', + predicate: 'CONTAINS', + }); + expect(result!.relationships).toContainEqual({ + srcName: 'database', + dstName: 'host', + predicate: 'CONTAINS', + }); + }); + + it('parses nested [section.subsection] tables', () => { + const result = parseFile( + '/repo/config.toml', + [ + '[server.tls]', + 'cert = "/etc/ssl/cert.pem"', + 'key = "/etc/ssl/key.pem"', + ].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'tls', + kind: 'config_entry', + container: 'server', + })); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'cert', + kind: 'config_entry', + container: 'server.tls', + })); + expect(result!.relationships).toContainEqual({ + srcName: 'server', + dstName: 'tls', + predicate: 'CONTAINS', + }); + expect(result!.relationships).toContainEqual({ + srcName: 'server.tls', + dstName: 'cert', + predicate: 'CONTAINS', + }); + }); + + it('parses [[array-of-tables]] headers', () => { + const result = parseFile( + '/repo/config.toml', + [ + '[[servers]]', + 'name = "alpha"', + '', + '[[servers]]', + 'name = "beta"', + ].join('\n'), + ); + + expect(result).not.toBeNull(); + // Both [[servers]] headers produce a 'servers' entity + const serverEntities = result!.entities.filter(e => e.name === 'servers'); + expect(serverEntities.length).toBe(2); + expect(result!.entities.filter(e => e.name === 'name').length).toBe(2); + }); + + it('ignores comment lines and blank lines', () => { + const result = parseFile( + '/repo/config.toml', + [ + '# This is a comment', + '', + 'key = "value"', + '# another comment', + ].join('\n'), + ); + + expect(result).not.toBeNull(); + const nonFileEntities = result!.entities.filter(e => e.kind !== 'file'); + expect(nonFileEntities).toHaveLength(1); + expect(nonFileEntities[0].name).toBe('key'); + }); + + it('produces a file_body chunk for empty or comment-only files', () => { + const result = parseFile('/repo/config.toml', '# just a comment\n'); + + expect(result).not.toBeNull(); + expect(result!.chunks).toHaveLength(1); + expect(result!.chunks[0].chunkKind).toBe('file_body'); + }); + + it('produces config_key chunks with content hashes', () => { + const result = parseFile('/repo/Cargo.toml', 'name = "my-crate"\n'); + + expect(result).not.toBeNull(); + expect(result!.chunks).toContainEqual(expect.objectContaining({ + name: 'name', + chunkKind: 'config_key', + contentHash: expect.stringMatching(/^[0-9a-f]{64}$/), + })); + }); + + it('parses a realistic Cargo.toml', () => { + const result = parseFile( + '/repo/Cargo.toml', + [ + '[package]', + 'name = "my-crate"', + 'version = "0.1.0"', + 'edition = "2021"', + '', + '[dependencies]', + 'serde = { version = "1", features = ["derive"] }', + 'tokio = "1"', + ].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ name: 'package' })); + expect(result!.entities).toContainEqual(expect.objectContaining({ name: 'name', container: 'package' })); + expect(result!.entities).toContainEqual(expect.objectContaining({ name: 'dependencies' })); + expect(result!.entities).toContainEqual(expect.objectContaining({ name: 'serde', container: 'dependencies' })); + expect(result!.entities).toContainEqual(expect.objectContaining({ name: 'tokio', container: 'dependencies' })); + }); +}); diff --git a/core-ingestion/src/index.ts b/core-ingestion/src/index.ts index 51cce19..bc689c8 100644 --- a/core-ingestion/src/index.ts +++ b/core-ingestion/src/index.ts @@ -388,6 +388,117 @@ function parseYamlFile(filePath: string, source: string): FileParseResult { }; } +function parseTomlFile(filePath: string, source: string): FileParseResult { + const language = SupportedLanguages.TOML; + const fileName = nodePath.basename(filePath); + const sourceLineCount = countSourceLines(source); + const fileRole = classifyFileRole(filePath); + const entities: ParsedEntity[] = [ + { name: fileName, kind: 'file', lineStart: 1, lineEnd: sourceLineCount, language }, + ]; + const chunks: ParsedChunk[] = []; + const relationships: ParsedRelationship[] = []; + const lineStarts = computeLineStarts(source); + const lines = source.split(/\r?\n/); + + // [table] or [[array-of-tables]] headers set the current section context. + const tableHeaderPattern = /^\s*\[{1,2}([^\[\]]+)\]{1,2}\s*(?:#.*)?$/; + // key = value lines (bare keys, quoted keys, dotted keys). + const keyPattern = /^\s*([A-Za-z0-9_"'-][A-Za-z0-9_"' .-]*?)\s*=/; + + let currentTable: string | null = null; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const trimmed = line.trimStart(); + if (!trimmed || trimmed.startsWith('#')) continue; + + const tableMatch = tableHeaderPattern.exec(line); + if (tableMatch) { + const tablePath = tableMatch[1].trim(); + currentTable = tablePath; + const parts = tablePath.split('.'); + const key = parts[parts.length - 1]; + const parent = parts.length > 1 ? parts.slice(0, -1).join('.') : null; + const lineNumber = i + 1; + const startByte = lineStarts[i] ?? 0; + + entities.push({ + name: key, + kind: 'config_entry', + lineStart: lineNumber, + lineEnd: lineNumber, + language, + container: parent ?? undefined, + }); + chunks.push({ + name: key, + chunkKind: 'config_key', + lineStart: lineNumber, + lineEnd: lineNumber, + startByte, + endByte: startByte + line.length, + contentHash: crypto.createHash('sha256').update(line).digest('hex'), + language, + container: parent ?? undefined, + }); + relationships.push({ + srcName: parent ?? fileName, + dstName: key, + predicate: 'CONTAINS', + }); + continue; + } + + const keyMatch = keyPattern.exec(line); + if (keyMatch) { + const key = keyMatch[1].trim().replace(/^["']|["']$/g, ''); + const lineNumber = i + 1; + const startByte = lineStarts[i] ?? 0; + + entities.push({ + name: key, + kind: 'config_entry', + lineStart: lineNumber, + lineEnd: lineNumber, + language, + container: currentTable ?? undefined, + }); + chunks.push({ + name: key, + chunkKind: 'config_key', + lineStart: lineNumber, + lineEnd: lineNumber, + startByte, + endByte: startByte + line.length, + contentHash: crypto.createHash('sha256').update(line).digest('hex'), + language, + container: currentTable ?? undefined, + }); + relationships.push({ + srcName: currentTable ?? fileName, + dstName: key, + predicate: 'CONTAINS', + }); + } + } + + if (chunks.length === 0) { + chunks.push({ + name: null, + chunkKind: 'file_body', + lineStart: 1, + lineEnd: Math.max(sourceLineCount, 1), + startByte: 0, + endByte: source.length, + contentHash: crypto.createHash('sha256').update(source).digest('hex'), + language, + }); + } + + return { filePath, language, entities, chunks, relationships, fileRole }; +} + function parseDockerfileFile(filePath: string, source: string): FileParseResult { const language = SupportedLanguages.Dockerfile; const fileName = nodePath.basename(filePath); @@ -841,6 +952,7 @@ export function parseFile(filePath: string, source: string): FileParseResult | n if (language === SupportedLanguages.Dockerfile) return parseDockerfileFile(filePath, source); if (language === SupportedLanguages.SQL) return parseSqlFile(filePath, source); if (language === SupportedLanguages.JSON) return parseJsonFile(filePath, source); + if (language === SupportedLanguages.TOML) return parseTomlFile(filePath, source); // TypeScript TSX uses a separate grammar const isTsx = filePath.endsWith('.tsx'); diff --git a/core-ingestion/src/languages.ts b/core-ingestion/src/languages.ts index 8546401..d41545b 100644 --- a/core-ingestion/src/languages.ts +++ b/core-ingestion/src/languages.ts @@ -17,6 +17,7 @@ export enum SupportedLanguages { Dockerfile = 'dockerfile', SQL = 'sql', JSON = 'json', + TOML = 'toml', } const EXT_MAP: Record = { @@ -49,6 +50,7 @@ const EXT_MAP: Record = { '.dockerfile': SupportedLanguages.Dockerfile, '.sql': SupportedLanguages.SQL, '.json': SupportedLanguages.JSON, + '.toml': SupportedLanguages.TOML, }; export function languageFromPath(filePath: string): SupportedLanguages | null { diff --git a/core-ingestion/src/queries.ts b/core-ingestion/src/queries.ts index c6f7089..d3b2f88 100644 --- a/core-ingestion/src/queries.ts +++ b/core-ingestion/src/queries.ts @@ -1183,5 +1183,6 @@ export const LANGUAGE_QUERIES: Record = { [SupportedLanguages.Dockerfile]: '', [SupportedLanguages.SQL]: '', [SupportedLanguages.JSON]: '', + [SupportedLanguages.TOML]: '', }; diff --git a/ix-cli/src/cli/commands/ingest.ts b/ix-cli/src/cli/commands/ingest.ts index bab3173..b9af785 100644 --- a/ix-cli/src/cli/commands/ingest.ts +++ b/ix-cli/src/cli/commands/ingest.ts @@ -37,6 +37,7 @@ const SUPPORTED_EXTENSIONS = new Set([ '.dockerfile', '.sql', '.json', + '.toml', ]); export function isSupportedSourceFile(filePath: string): boolean { From 672a77de599e8d83b374f8433ad433783d842704 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Thu, 2 Apr 2026 17:25:17 -0700 Subject: [PATCH 02/18] fix: include TOML in isGrammarSupported early-return guard TOML has a custom parser (parseTomlFile) like YAML/JSON/SQL/Dockerfile, but was missing from the isGrammarSupported check. This caused all .toml files to be silently dropped before reaching parseFile. Co-Authored-By: Claude Sonnet 4.6 --- core-ingestion/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core-ingestion/src/index.ts b/core-ingestion/src/index.ts index bc689c8..c71881c 100644 --- a/core-ingestion/src/index.ts +++ b/core-ingestion/src/index.ts @@ -213,7 +213,7 @@ function builtinsForLanguage(lang: SupportedLanguages): Set { export function isGrammarSupported(filePath: string): boolean { const language = languageFromPath(filePath); if (!language) return false; - if (language === SupportedLanguages.YAML || language === SupportedLanguages.Dockerfile || language === SupportedLanguages.SQL || language === SupportedLanguages.JSON) return true; + if (language === SupportedLanguages.YAML || language === SupportedLanguages.Dockerfile || language === SupportedLanguages.SQL || language === SupportedLanguages.JSON || language === SupportedLanguages.TOML) return true; if (filePath.endsWith('.tsx')) return true; // TSX uses TypeScript.tsx, always available return GRAMMAR_MAP[language] !== undefined; } From 9bab0f9645edef2e279e6ff16811d8b3e8be7e78 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Thu, 2 Apr 2026 17:54:56 -0700 Subject: [PATCH 03/18] fix(toml): emit intermediate nodes for dotted table headers [profile.release] now materialises a `profile` node in the graph, not just `release`. A per-file seen-set deduplicates shared prefixes so [profile.release] + [profile.dev] produce exactly one `profile` entity. Co-Authored-By: Claude Sonnet 4.6 --- .../src/__tests__/queries.toml.test.ts | 73 +++++++++++++++++++ core-ingestion/src/index.ts | 30 +++++++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/core-ingestion/src/__tests__/queries.toml.test.ts b/core-ingestion/src/__tests__/queries.toml.test.ts index 5206f5a..c47f5ec 100644 --- a/core-ingestion/src/__tests__/queries.toml.test.ts +++ b/core-ingestion/src/__tests__/queries.toml.test.ts @@ -88,6 +88,12 @@ describe('TOML parsing', () => { ); expect(result).not.toBeNull(); + // Intermediate node + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'server', + kind: 'config_entry', + container: undefined, + })); expect(result!.entities).toContainEqual(expect.objectContaining({ name: 'tls', kind: 'config_entry', @@ -98,6 +104,11 @@ describe('TOML parsing', () => { kind: 'config_entry', container: 'server.tls', })); + expect(result!.relationships).toContainEqual({ + srcName: 'config.toml', + dstName: 'server', + predicate: 'CONTAINS', + }); expect(result!.relationships).toContainEqual({ srcName: 'server', dstName: 'tls', @@ -110,6 +121,68 @@ describe('TOML parsing', () => { }); }); + it('emits intermediate nodes for dotted table paths', () => { + const result = parseFile( + '/repo/Cargo.toml', + [ + '[profile.release]', + 'lto = true', + ].join('\n'), + ); + + expect(result).not.toBeNull(); + // Intermediate node + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'profile', + kind: 'config_entry', + container: undefined, + })); + // Leaf node + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'release', + kind: 'config_entry', + container: 'profile', + })); + // Key within leaf + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'lto', + kind: 'config_entry', + container: 'profile.release', + })); + // Chain of CONTAINS relationships + expect(result!.relationships).toContainEqual({ + srcName: 'Cargo.toml', + dstName: 'profile', + predicate: 'CONTAINS', + }); + expect(result!.relationships).toContainEqual({ + srcName: 'profile', + dstName: 'release', + predicate: 'CONTAINS', + }); + expect(result!.relationships).toContainEqual({ + srcName: 'profile.release', + dstName: 'lto', + predicate: 'CONTAINS', + }); + }); + + it('does not duplicate intermediate nodes across sibling table headers', () => { + const result = parseFile( + '/repo/Cargo.toml', + [ + '[profile.release]', + 'lto = true', + '[profile.dev]', + 'opt-level = 0', + ].join('\n'), + ); + + expect(result).not.toBeNull(); + const profileNodes = result!.entities.filter(e => e.name === 'profile'); + expect(profileNodes).toHaveLength(1); + }); + it('parses [[array-of-tables]] headers', () => { const result = parseFile( '/repo/config.toml', diff --git a/core-ingestion/src/index.ts b/core-ingestion/src/index.ts index c71881c..4192724 100644 --- a/core-ingestion/src/index.ts +++ b/core-ingestion/src/index.ts @@ -407,6 +407,7 @@ function parseTomlFile(filePath: string, source: string): FileParseResult { const keyPattern = /^\s*([A-Za-z0-9_"'-][A-Za-z0-9_"' .-]*?)\s*=/; let currentTable: string | null = null; + const seenTablePaths = new Set(); for (let i = 0; i < lines.length; i++) { const line = lines[i]; @@ -418,11 +419,36 @@ function parseTomlFile(filePath: string, source: string): FileParseResult { const tablePath = tableMatch[1].trim(); currentTable = tablePath; const parts = tablePath.split('.'); - const key = parts[parts.length - 1]; - const parent = parts.length > 1 ? parts.slice(0, -1).join('.') : null; const lineNumber = i + 1; const startByte = lineStarts[i] ?? 0; + // Emit intermediate nodes for each prefix segment (e.g. [a.b.c] → emit a, a.b) + for (let p = 1; p < parts.length; p++) { + const prefixPath = parts.slice(0, p).join('.'); + if (!seenTablePaths.has(prefixPath)) { + seenTablePaths.add(prefixPath); + const prefixKey = parts[p - 1]; + const prefixParent = p > 1 ? parts.slice(0, p - 1).join('.') : null; + entities.push({ + name: prefixKey, + kind: 'config_entry', + lineStart: lineNumber, + lineEnd: lineNumber, + language, + container: prefixParent ?? undefined, + }); + relationships.push({ + srcName: prefixParent ?? fileName, + dstName: prefixKey, + predicate: 'CONTAINS', + }); + } + } + + const key = parts[parts.length - 1]; + const parent = parts.length > 1 ? parts.slice(0, -1).join('.') : null; + + seenTablePaths.add(tablePath); entities.push({ name: key, kind: 'config_entry', From 6d340eb8085afc611f45382dd8b666aeb53305e1 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Thu, 2 Apr 2026 18:36:24 -0700 Subject: [PATCH 04/18] feat(cli): add --path filter to ix contains command Allows disambiguating by file path when multiple entities share the same name across workspaces or files. Co-Authored-By: Claude Sonnet 4.6 --- ix-cli/src/cli/commands/contains.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ix-cli/src/cli/commands/contains.ts b/ix-cli/src/cli/commands/contains.ts index 8b9ae68..fb130f9 100644 --- a/ix-cli/src/cli/commands/contains.ts +++ b/ix-cli/src/cli/commands/contains.ts @@ -9,14 +9,15 @@ export function registerContainsCommand(program: Command): void { .command("contains ") .description("Show members contained by the given entity (class, module, file)") .option("--kind ", "Filter target entity by kind") + .option("--path ", "Filter target entity by file path (substring match)") .option("--pick ", "Pick Nth candidate from ambiguous results (1-based)") .option("--limit ", "Max results to show", "50") .option("--format ", "Output format (text|json)", "text") - .addHelpText("after", "\nExamples:\n ix contains IngestionService\n ix contains auth.py --kind file --format json\n ix contains MyClass --limit 20") - .action(async (symbol: string, opts: { kind?: string; pick?: string; limit: string; format: string }) => { + .addHelpText("after", "\nExamples:\n ix contains IngestionService\n ix contains auth.py --kind file --format json\n ix contains MyClass --limit 20\n ix contains package --path crates/regex/Cargo.toml") + .action(async (symbol: string, opts: { kind?: string; path?: string; pick?: string; limit: string; format: string }) => { const client = new IxClient(getEndpoint()); const limit = parseInt(opts.limit, 10); - const resolveOpts = { kind: opts.kind, pick: opts.pick ? parseInt(opts.pick, 10) : undefined }; + const resolveOpts = { kind: opts.kind, path: opts.path, pick: opts.pick ? parseInt(opts.pick, 10) : undefined }; const target = await resolveFileOrEntity(client, symbol, resolveOpts); if (!target) return; if (opts.format !== "json") printResolved(target); From 01bc0defe876b3bbf85e4b59c7fd7e46f9d3b42c Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Fri, 3 Apr 2026 19:21:32 -0700 Subject: [PATCH 05/18] feat(core-ingestion): add Markdown file support (.md, .markdown) Parses ATX headings as heading entities with hierarchical CONTAINS relationships, YAML frontmatter as a frontmatter entity/chunk, and section chunks spanning each heading's content. Skips headings inside fenced code blocks. Falls back to file_body for files with no headings. Co-Authored-By: Claude Sonnet 4.6 --- .../src/__tests__/queries.markdown.test.ts | 172 ++++++++++++++++++ core-ingestion/src/index.ts | 123 ++++++++++++- core-ingestion/src/languages.ts | 3 + 3 files changed, 297 insertions(+), 1 deletion(-) create mode 100644 core-ingestion/src/__tests__/queries.markdown.test.ts diff --git a/core-ingestion/src/__tests__/queries.markdown.test.ts b/core-ingestion/src/__tests__/queries.markdown.test.ts new file mode 100644 index 0000000..b106b0b --- /dev/null +++ b/core-ingestion/src/__tests__/queries.markdown.test.ts @@ -0,0 +1,172 @@ +import { describe, expect, it } from 'vitest'; + +import { parseFile } from '../index.js'; +import { languageFromPath, SupportedLanguages } from '../languages.js'; + +describe('Markdown parsing', () => { + it('recognizes .md and .markdown extensions', () => { + expect(languageFromPath('/repo/README.md')).toBe(SupportedLanguages.Markdown); + expect(languageFromPath('/repo/guide.markdown')).toBe(SupportedLanguages.Markdown); + }); + + it('parses a single top-level heading', () => { + const result = parseFile('/repo/README.md', '# Getting Started\n\nSome text here.'); + + expect(result).not.toBeNull(); + expect(result!.language).toBe(SupportedLanguages.Markdown); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Getting Started', + kind: 'heading', + language: SupportedLanguages.Markdown, + container: undefined, + })); + expect(result!.relationships).toContainEqual({ + srcName: 'README.md', + dstName: 'Getting Started', + predicate: 'CONTAINS', + }); + }); + + it('nests h2 headings under the nearest h1', () => { + const result = parseFile( + '/repo/README.md', + ['# Title', '## Installation', '## Usage'].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Installation', + kind: 'heading', + container: 'Title', + })); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Usage', + kind: 'heading', + container: 'Title', + })); + expect(result!.relationships).toContainEqual({ + srcName: 'Title', + dstName: 'Installation', + predicate: 'CONTAINS', + }); + }); + + it('nests h3 under h2, not h1', () => { + const result = parseFile( + '/repo/docs.md', + ['# Guide', '## Setup', '### Prerequisites'].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Prerequisites', + kind: 'heading', + container: 'Setup', + })); + expect(result!.relationships).toContainEqual({ + srcName: 'Setup', + dstName: 'Prerequisites', + predicate: 'CONTAINS', + }); + }); + + it('resets heading scope when a higher-level heading appears', () => { + const result = parseFile( + '/repo/docs.md', + ['# Part One', '## Chapter A', '# Part Two', '## Chapter B'].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Chapter A', + container: 'Part One', + })); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Chapter B', + container: 'Part Two', + })); + }); + + it('emits section chunks for each heading', () => { + const result = parseFile( + '/repo/README.md', + ['# Title', 'intro text', '## Install', 'install steps'].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.chunks).toContainEqual(expect.objectContaining({ + name: 'Title', + chunkKind: 'section', + contentHash: expect.stringMatching(/^[0-9a-f]{64}$/), + })); + expect(result!.chunks).toContainEqual(expect.objectContaining({ + name: 'Install', + chunkKind: 'section', + })); + }); + + it('parses YAML frontmatter', () => { + const result = parseFile( + '/repo/post.md', + ['---', 'title: Hello', 'date: 2024-01-01', '---', '# Content'].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'frontmatter', + kind: 'frontmatter', + })); + expect(result!.chunks).toContainEqual(expect.objectContaining({ + name: 'frontmatter', + chunkKind: 'frontmatter', + })); + expect(result!.relationships).toContainEqual({ + srcName: 'post.md', + dstName: 'frontmatter', + predicate: 'CONTAINS', + }); + // Heading after frontmatter still parsed + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Content', + kind: 'heading', + })); + }); + + it('skips headings inside fenced code blocks', () => { + const result = parseFile( + '/repo/README.md', + ['# Real Heading', '```', '# Not A Heading', '```'].join('\n'), + ); + + expect(result).not.toBeNull(); + const headings = result!.entities.filter(e => e.kind === 'heading'); + expect(headings).toHaveLength(1); + expect(headings[0].name).toBe('Real Heading'); + }); + + it('produces file_body chunk for files with no headings', () => { + const result = parseFile('/repo/notes.md', 'Just some plain text.\nNo headings here.'); + + expect(result).not.toBeNull(); + expect(result!.chunks).toHaveLength(1); + expect(result!.chunks[0].chunkKind).toBe('file_body'); + }); + + it('produces file_body chunk for empty file', () => { + const result = parseFile('/repo/empty.md', ''); + + expect(result).not.toBeNull(); + expect(result!.chunks).toHaveLength(1); + expect(result!.chunks[0].chunkKind).toBe('file_body'); + }); + + it('handles ATX headings with closing hashes', () => { + const result = parseFile('/repo/README.md', '## Section ##\nContent.'); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Section', + kind: 'heading', + })); + }); +}); diff --git a/core-ingestion/src/index.ts b/core-ingestion/src/index.ts index 4192724..3fe2beb 100644 --- a/core-ingestion/src/index.ts +++ b/core-ingestion/src/index.ts @@ -213,7 +213,7 @@ function builtinsForLanguage(lang: SupportedLanguages): Set { export function isGrammarSupported(filePath: string): boolean { const language = languageFromPath(filePath); if (!language) return false; - if (language === SupportedLanguages.YAML || language === SupportedLanguages.Dockerfile || language === SupportedLanguages.SQL || language === SupportedLanguages.JSON || language === SupportedLanguages.TOML) return true; + if (language === SupportedLanguages.YAML || language === SupportedLanguages.Dockerfile || language === SupportedLanguages.SQL || language === SupportedLanguages.JSON || language === SupportedLanguages.TOML || language === SupportedLanguages.Markdown) return true; if (filePath.endsWith('.tsx')) return true; // TSX uses TypeScript.tsx, always available return GRAMMAR_MAP[language] !== undefined; } @@ -525,6 +525,126 @@ function parseTomlFile(filePath: string, source: string): FileParseResult { return { filePath, language, entities, chunks, relationships, fileRole }; } +function parseMarkdownFile(filePath: string, source: string): FileParseResult { + const language = SupportedLanguages.Markdown; + const fileName = nodePath.basename(filePath); + const sourceLineCount = countSourceLines(source); + const fileRole = classifyFileRole(filePath); + const entities: ParsedEntity[] = [ + { name: fileName, kind: 'file', lineStart: 1, lineEnd: sourceLineCount, language }, + ]; + const chunks: ParsedChunk[] = []; + const relationships: ParsedRelationship[] = []; + const lineStarts = computeLineStarts(source); + const lines = source.split(/\r?\n/); + + let startLine = 0; + + // Parse YAML frontmatter delimited by --- ... --- + if (lines[0]?.trim() === '---') { + let fmEnd = -1; + for (let j = 1; j < lines.length; j++) { + if (lines[j].trim() === '---' || lines[j].trim() === '...') { + fmEnd = j; + break; + } + } + if (fmEnd > 0) { + startLine = fmEnd + 1; + const endByte = (lineStarts[fmEnd] ?? 0) + lines[fmEnd].length; + const fmContent = lines.slice(0, fmEnd + 1).join('\n'); + entities.push({ name: 'frontmatter', kind: 'frontmatter', lineStart: 1, lineEnd: fmEnd + 1, language }); + chunks.push({ + name: 'frontmatter', + chunkKind: 'frontmatter', + lineStart: 1, + lineEnd: fmEnd + 1, + startByte: 0, + endByte, + contentHash: crypto.createHash('sha256').update(fmContent).digest('hex'), + language, + }); + relationships.push({ srcName: fileName, dstName: 'frontmatter', predicate: 'CONTAINS' }); + } + } + + // headingStack[level] = heading name currently active at that depth (1–6) + const headingStack: (string | null)[] = [null, null, null, null, null, null, null]; + const headingPattern = /^(#{1,6})\s+(.+?)(?:\s+#+)?\s*$/; + const headingLines: { level: number; name: string; lineNum: number; container: string | null }[] = []; + + let inFence = false; + for (let i = startLine; i < lines.length; i++) { + const line = lines[i]; + + // Toggle fenced code block on opening/closing fence (``` or ~~~) + if (/^(`{3,}|~{3,})/.test(line.trimStart())) { + inFence = !inFence; + continue; + } + if (inFence) continue; + + const headingMatch = headingPattern.exec(line); + if (!headingMatch) continue; + + const level = headingMatch[1].length; + const name = headingMatch[2].trim(); + const lineNum = i + 1; + + // Find nearest ancestor at a shallower level + let container: string | null = null; + for (let l = level - 1; l >= 1; l--) { + if (headingStack[l] !== null) { + container = headingStack[l]; + break; + } + } + + // Reset all deeper levels when a heading resets scope + for (let l = level; l <= 6; l++) headingStack[l] = null; + headingStack[level] = name; + + entities.push({ name, kind: 'heading', lineStart: lineNum, lineEnd: lineNum, language, container: container ?? undefined }); + relationships.push({ srcName: container ?? fileName, dstName: name, predicate: 'CONTAINS' }); + headingLines.push({ level, name, lineNum, container }); + } + + // Build one section chunk per heading spanning to the line before the next heading + for (let h = 0; h < headingLines.length; h++) { + const { name, lineNum, container } = headingLines[h]; + const nextLineNum = h + 1 < headingLines.length ? headingLines[h + 1].lineNum - 1 : sourceLineCount; + const startByte = lineStarts[lineNum - 1] ?? 0; + const endByte = nextLineNum < lines.length ? (lineStarts[nextLineNum] ?? source.length) : source.length; + const sectionContent = lines.slice(lineNum - 1, nextLineNum).join('\n'); + chunks.push({ + name, + chunkKind: 'section', + lineStart: lineNum, + lineEnd: nextLineNum, + startByte, + endByte, + contentHash: crypto.createHash('sha256').update(sectionContent).digest('hex'), + language, + container: container ?? undefined, + }); + } + + if (chunks.length === 0) { + chunks.push({ + name: null, + chunkKind: 'file_body', + lineStart: 1, + lineEnd: Math.max(sourceLineCount, 1), + startByte: 0, + endByte: source.length, + contentHash: crypto.createHash('sha256').update(source).digest('hex'), + language, + }); + } + + return { filePath, language, entities, chunks, relationships, fileRole }; +} + function parseDockerfileFile(filePath: string, source: string): FileParseResult { const language = SupportedLanguages.Dockerfile; const fileName = nodePath.basename(filePath); @@ -979,6 +1099,7 @@ export function parseFile(filePath: string, source: string): FileParseResult | n if (language === SupportedLanguages.SQL) return parseSqlFile(filePath, source); if (language === SupportedLanguages.JSON) return parseJsonFile(filePath, source); if (language === SupportedLanguages.TOML) return parseTomlFile(filePath, source); + if (language === SupportedLanguages.Markdown) return parseMarkdownFile(filePath, source); // TypeScript TSX uses a separate grammar const isTsx = filePath.endsWith('.tsx'); diff --git a/core-ingestion/src/languages.ts b/core-ingestion/src/languages.ts index d41545b..321f3ed 100644 --- a/core-ingestion/src/languages.ts +++ b/core-ingestion/src/languages.ts @@ -18,6 +18,7 @@ export enum SupportedLanguages { SQL = 'sql', JSON = 'json', TOML = 'toml', + Markdown = 'markdown', } const EXT_MAP: Record = { @@ -51,6 +52,8 @@ const EXT_MAP: Record = { '.sql': SupportedLanguages.SQL, '.json': SupportedLanguages.JSON, '.toml': SupportedLanguages.TOML, + '.md': SupportedLanguages.Markdown, + '.markdown': SupportedLanguages.Markdown, }; export function languageFromPath(filePath: string): SupportedLanguages | null { From fd37878771d0ac1d4e9a4084499138e876ebdc1b Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Fri, 3 Apr 2026 19:33:52 -0700 Subject: [PATCH 06/18] fix(ingest): add .md and .markdown to SUPPORTED_EXTENSIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit File discovery in ingest.ts maintains a local extension set that mirrors core-ingestion's EXT_MAP but was missing .md and .markdown. This caused markdown files to be silently excluded before reaching the parser, so ix map never produced heading, frontmatter, or file nodes for .md files — even though parseMarkdownFile was fully implemented. Co-Authored-By: Claude Sonnet 4.6 --- ix-cli/src/cli/commands/ingest.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/ix-cli/src/cli/commands/ingest.ts b/ix-cli/src/cli/commands/ingest.ts index b9af785..e4b97a3 100644 --- a/ix-cli/src/cli/commands/ingest.ts +++ b/ix-cli/src/cli/commands/ingest.ts @@ -38,6 +38,7 @@ const SUPPORTED_EXTENSIONS = new Set([ '.sql', '.json', '.toml', + '.md', '.markdown', ]); export function isSupportedSourceFile(filePath: string): boolean { From 23977bdafe77ffad8b981d8183fdf9af4a2e57e9 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Fri, 3 Apr 2026 20:07:09 -0700 Subject: [PATCH 07/18] fix(cli): wire up ix config set workspace so it actually takes effect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs from the markdown-parsing test run: 1. LANGUAGE_QUERIES was missing a Markdown entry, causing a TS2741 compile error that prevented core-ingestion from building. Added [SupportedLanguages.Markdown]: '' (markdown uses its own hand-written parser, not tree-sitter queries). 2. ix config set workspace wrote a top-level workspace: key to config.yaml but resolveWorkspaceRoot never read it — all commands continued routing to the workspace with default: true. Added workspace?: string to IxConfig and a lookup step in resolveWorkspaceRoot that checks cfg.workspace by name before falling back to the default workspace. Co-Authored-By: Claude Sonnet 4.6 --- core-ingestion/src/queries.ts | 1 + ix-cli/src/cli/config.ts | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/core-ingestion/src/queries.ts b/core-ingestion/src/queries.ts index d3b2f88..979651f 100644 --- a/core-ingestion/src/queries.ts +++ b/core-ingestion/src/queries.ts @@ -1184,5 +1184,6 @@ export const LANGUAGE_QUERIES: Record = { [SupportedLanguages.SQL]: '', [SupportedLanguages.JSON]: '', [SupportedLanguages.TOML]: '', + [SupportedLanguages.Markdown]: '', }; diff --git a/ix-cli/src/cli/config.ts b/ix-cli/src/cli/config.ts index ccd4c79..828be2e 100644 --- a/ix-cli/src/cli/config.ts +++ b/ix-cli/src/cli/config.ts @@ -14,6 +14,7 @@ export interface WorkspaceConfig { export interface IxConfig { endpoint: string; format: string; + workspace?: string; workspaces?: WorkspaceConfig[]; } @@ -66,13 +67,19 @@ export function resolveWorkspaceRoot(explicitRoot?: string): string { const cwd = process.cwd(); const nearest = findWorkspaceForCwd(cwd); if (nearest) return nearest.root_path; - // 3. Configured default workspace + // 3. Named workspace from `ix config set workspace ` + const cfg = loadConfig(); + if (cfg.workspace) { + const named = loadWorkspaces().find(w => w.workspace_name === cfg.workspace); + if (named) return named.root_path; + } + // 4. Configured default workspace const defaultWs = getDefaultWorkspace(); if (defaultWs) return defaultWs.root_path; - // 4. Git root + // 5. Git root try { return execSync("git rev-parse --show-toplevel", { encoding: "utf-8" }).trim(); } catch {} - // 5. cwd fallback + // 6. cwd fallback return cwd; } From b20f6d58f8ad77b065adc8403e19225fecc12648 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Fri, 3 Apr 2026 20:36:36 -0700 Subject: [PATCH 08/18] fix(core-ingestion): store section chunks with kind 'section', bump extractor to 1.21 Section chunks were stored as kind 'chunk', making ix search --kind section return no results. Giving them a first-class 'section' kind (consistent with heading/frontmatter) makes them discoverable via --kind filtering. Co-Authored-By: Claude Sonnet 4.6 --- core-ingestion/src/patch-builder.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core-ingestion/src/patch-builder.ts b/core-ingestion/src/patch-builder.ts index 8b2ac80..890e388 100644 --- a/core-ingestion/src/patch-builder.ts +++ b/core-ingestion/src/patch-builder.ts @@ -51,11 +51,11 @@ function sourceType(filePath: string): string { } export function extractorName(): string { - return `tree-sitter/1.20`; + return `tree-sitter/1.21`; } /** Previous extractor versions — their patches are superseded when re-ingesting. */ -export const PREVIOUS_EXTRACTORS = ['tree-sitter/1.19', 'tree-sitter/1.18', 'tree-sitter/1.17', 'tree-sitter/1.16', 'tree-sitter/1.15', 'tree-sitter/1.14', 'tree-sitter/1.13', 'tree-sitter/1.12', 'tree-sitter/1.11', 'tree-sitter/1.10', 'tree-sitter/1.9', 'tree-sitter/1.8', 'tree-sitter/1.7', 'tree-sitter/1.6', 'tree-sitter/1.5', 'tree-sitter/1.4', 'tree-sitter/1.3', 'tree-sitter/1.2', 'tree-sitter/1.1']; +export const PREVIOUS_EXTRACTORS = ['tree-sitter/1.20', 'tree-sitter/1.19', 'tree-sitter/1.18', 'tree-sitter/1.17', 'tree-sitter/1.16', 'tree-sitter/1.15', 'tree-sitter/1.14', 'tree-sitter/1.13', 'tree-sitter/1.12', 'tree-sitter/1.11', 'tree-sitter/1.10', 'tree-sitter/1.9', 'tree-sitter/1.8', 'tree-sitter/1.7', 'tree-sitter/1.6', 'tree-sitter/1.5', 'tree-sitter/1.4', 'tree-sitter/1.3', 'tree-sitter/1.2', 'tree-sitter/1.1']; /** Compute a patchId for a (filePath, sourceHash, extractorVersion) triple. */ function computePatchId(filePath: string, sourceHash: string, extractor: string): string { @@ -146,10 +146,11 @@ export function buildPatch( for (const chunk of chunks) { const cid = chunkId(filePath, chunk.chunkKind, chunk.name, chunk.lineStart); const chunkName = chunk.name ?? `file_body:${chunk.lineStart}`; + const chunkNodeKind = chunk.chunkKind === 'section' ? 'section' : 'chunk'; ops.push({ type: 'UpsertNode', id: cid, - kind: 'chunk', + kind: chunkNodeKind, name: chunkName, attrs: { file_uri: filePath, @@ -340,10 +341,11 @@ export function buildPatchWithResolution( for (const chunk of chunks) { const cid = chunkId(filePath, chunk.chunkKind, chunk.name, chunk.lineStart); const chunkName = chunk.name ?? `file_body:${chunk.lineStart}`; + const chunkNodeKind2 = chunk.chunkKind === 'section' ? 'section' : 'chunk'; ops.push({ type: 'UpsertNode', id: cid, - kind: 'chunk', + kind: chunkNodeKind2, name: chunkName, attrs: { file_uri: filePath, From a7c351de835f8789fabdbda70ecea7f0bec5da8f Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Fri, 3 Apr 2026 21:11:57 -0700 Subject: [PATCH 09/18] Support HTML headings in markdown docs --- .../src/__tests__/queries.markdown.test.ts | 29 +++++++++++++++++++ core-ingestion/src/index.ts | 10 +++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/core-ingestion/src/__tests__/queries.markdown.test.ts b/core-ingestion/src/__tests__/queries.markdown.test.ts index b106b0b..001e419 100644 --- a/core-ingestion/src/__tests__/queries.markdown.test.ts +++ b/core-ingestion/src/__tests__/queries.markdown.test.ts @@ -169,4 +169,33 @@ describe('Markdown parsing', () => { kind: 'heading', })); }); + + it('parses single-line HTML headings commonly used in docs', () => { + const result = parseFile( + '/repo/docs.md', + ['

Fastify

', '', '## Routes'].join('\n'), + ); + + expect(result).not.toBeNull(); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Fastify', + kind: 'heading', + container: undefined, + })); + expect(result!.entities).toContainEqual(expect.objectContaining({ + name: 'Routes', + kind: 'heading', + container: 'Fastify', + })); + expect(result!.relationships).toContainEqual({ + srcName: 'docs.md', + dstName: 'Fastify', + predicate: 'CONTAINS', + }); + expect(result!.relationships).toContainEqual({ + srcName: 'Fastify', + dstName: 'Routes', + predicate: 'CONTAINS', + }); + }); }); diff --git a/core-ingestion/src/index.ts b/core-ingestion/src/index.ts index 3fe2beb..fe3c806 100644 --- a/core-ingestion/src/index.ts +++ b/core-ingestion/src/index.ts @@ -571,6 +571,7 @@ function parseMarkdownFile(filePath: string, source: string): FileParseResult { // headingStack[level] = heading name currently active at that depth (1–6) const headingStack: (string | null)[] = [null, null, null, null, null, null, null]; const headingPattern = /^(#{1,6})\s+(.+?)(?:\s+#+)?\s*$/; + const htmlHeadingPattern = /^]*>(.*?)<\/h\1>\s*$/i; const headingLines: { level: number; name: string; lineNum: number; container: string | null }[] = []; let inFence = false; @@ -585,10 +586,13 @@ function parseMarkdownFile(filePath: string, source: string): FileParseResult { if (inFence) continue; const headingMatch = headingPattern.exec(line); - if (!headingMatch) continue; + const htmlHeadingMatch = headingMatch ? null : htmlHeadingPattern.exec(line.trim()); + if (!headingMatch && !htmlHeadingMatch) continue; - const level = headingMatch[1].length; - const name = headingMatch[2].trim(); + const level = headingMatch ? headingMatch[1].length : Number(htmlHeadingMatch![1]); + const rawName = headingMatch ? headingMatch[2] : htmlHeadingMatch![2]; + const name = rawName.replace(/<[^>]+>/g, '').trim(); + if (!name) continue; const lineNum = i + 1; // Find nearest ancestor at a shallower level From 7c8bf74e951db92b6ebddd8943b6be7c3b73fa10 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Fri, 3 Apr 2026 21:44:43 -0700 Subject: [PATCH 10/18] Scope CLI search to active workspace --- ix-cli/src/cli/commands/explain.ts | 2 ++ ix-cli/src/cli/commands/search.ts | 19 +++++++++++++++---- ix-cli/src/cli/config.ts | 14 ++++++++++++++ ix-cli/src/cli/format.ts | 5 +++++ ix-cli/src/cli/resolve.ts | 21 ++++++++++++--------- 5 files changed, 48 insertions(+), 13 deletions(-) diff --git a/ix-cli/src/cli/commands/explain.ts b/ix-cli/src/cli/commands/explain.ts index c448991..3a79cf8 100644 --- a/ix-cli/src/cli/commands/explain.ts +++ b/ix-cli/src/cli/commands/explain.ts @@ -121,6 +121,7 @@ async function rawExplain( // Extract snippet fields from attrs const signature = node.attrs?.signature || node.attrs?.summary; const docstring = node.attrs?.docstring || node.attrs?.description; + const chunkKind = node.attrs?.chunk_kind || node.attrs?.chunkKind || (node.kind === "section" ? "section" : undefined); // Get actual callee details from outgoing CALLS edges const calleeEdges = callEdges.filter((e: any) => e.src === target.id); @@ -181,6 +182,7 @@ async function rawExplain( name: node.name || node.attrs?.name || target.name, id: target.id, file: relativePath(sourceUri), + chunkKind, container: container ? { kind: container.kind, name: container.name || container.attrs?.name || "(unknown)" } : undefined, introducedRev: node.createdRev ?? node.created_rev, calledBy: callEdges.filter((e: any) => e.dst === target.id).length, diff --git a/ix-cli/src/cli/commands/search.ts b/ix-cli/src/cli/commands/search.ts index 14c9bce..4760d04 100644 --- a/ix-cli/src/cli/commands/search.ts +++ b/ix-cli/src/cli/commands/search.ts @@ -1,7 +1,7 @@ import type { Command } from "commander"; import chalk from "chalk"; import { IxClient } from "../../client/api.js"; -import { getEndpoint } from "../config.js"; +import { getActiveWorkspaceRoot, getEndpoint } from "../config.js"; import { formatNodes, relativePath } from "../format.js"; import { scoreCandidate } from "../resolve.js"; import { applyRoleFilter, roleHint } from "../role-filter.js"; @@ -80,6 +80,10 @@ function searchSort( return aName.localeCompare(bName); } +function normalizePath(value: string | undefined): string { + return (value ?? "").toLowerCase().replace(/\\/g, "/"); +} + export function registerSearchCommand(program: Command): void { program .command("search ") @@ -112,20 +116,27 @@ Examples: }) => { const client = new IxClient(getEndpoint()); const limit = parseInt(opts.limit, 10); + const effectivePathFilter = opts.path ?? getActiveWorkspaceRoot(); // Fetch more results than requested so we can re-rank and trim const fetchLimit = Math.min(limit * 3, 60); - const nodes = await client.search(term, { + const rawNodes = await client.search(term, { limit: fetchLimit, kind: opts.kind, language: opts.language, asOfRev: opts.asOf ? parseInt(opts.asOf, 10) : undefined, }); + const nodes = effectivePathFilter + ? rawNodes.filter((node: any) => { + const sourceUri = normalizePath(node.provenance?.sourceUri ?? node.provenance?.source_uri ?? ""); + return sourceUri.includes(normalizePath(effectivePathFilter)); + }) + : rawNodes; // Re-rank client-side using shared scoring + backend weight const scored = nodes.map(n => ({ node: n, - rank: rankScore(n, term, opts.kind, opts.path), + rank: rankScore(n, term, opts.kind, effectivePathFilter), })); scored.sort(searchSort); @@ -167,7 +178,7 @@ Examples: })), summary: { count: ranked.length, - totalCandidates: nodes.length, + totalCandidates: rawNodes.length, }, diagnostics, }, null, 2)); diff --git a/ix-cli/src/cli/config.ts b/ix-cli/src/cli/config.ts index 828be2e..dc85155 100644 --- a/ix-cli/src/cli/config.ts +++ b/ix-cli/src/cli/config.ts @@ -60,6 +60,20 @@ export function getDefaultWorkspace(): WorkspaceConfig | undefined { return loadWorkspaces().find(w => w.default); } +export function getActiveWorkspaceRoot(): string | undefined { + const cwd = process.cwd(); + const nearest = findWorkspaceForCwd(cwd); + if (nearest) return nearest.root_path; + + const cfg = loadConfig(); + if (cfg.workspace) { + const named = loadWorkspaces().find(w => w.workspace_name === cfg.workspace); + if (named) return named.root_path; + } + + return getDefaultWorkspace()?.root_path; +} + export function resolveWorkspaceRoot(explicitRoot?: string): string { // 1. Explicit --root if (explicitRoot) return explicitRoot; diff --git a/ix-cli/src/cli/format.ts b/ix-cli/src/cli/format.ts index 6dbcab7..e579c35 100644 --- a/ix-cli/src/cli/format.ts +++ b/ix-cli/src/cli/format.ts @@ -520,6 +520,7 @@ export interface ExplainResult { name: string; id: string; file?: string; + chunkKind?: string; container?: { kind: string; name: string }; introducedRev: number; calledBy: number; @@ -541,6 +542,7 @@ export function formatExplain(result: ExplainResult, format: string): void { name: result.name, id: result.id, file: relativePath(result.file), + chunkKind: result.chunkKind ?? null, container: result.container, introducedRev: result.introducedRev, calledBy: result.calledBy, @@ -573,6 +575,9 @@ export function formatExplain(result: ExplainResult, format: string): void { if (result.file) { console.log(` ${chalk.dim("file")} ${result.file}`); } + if (result.chunkKind) { + console.log(` ${chalk.dim("chunk kind")} ${result.chunkKind}`); + } console.log(` ${chalk.dim("introduced rev")} ${result.introducedRev}`); if (result.calledBy > 0) console.log(` ${chalk.dim("called by")} ${result.calledBy} methods`); if (result.callList && result.callList.length > 0) { diff --git a/ix-cli/src/cli/resolve.ts b/ix-cli/src/cli/resolve.ts index 62678d8..8e80507 100644 --- a/ix-cli/src/cli/resolve.ts +++ b/ix-cli/src/cli/resolve.ts @@ -1,6 +1,7 @@ import * as path from "node:path"; import chalk from "chalk"; import type { IxClient } from "../client/api.js"; +import { getActiveWorkspaceRoot } from "./config.js"; import { stderr } from "./stderr.js"; import { applyRoleFilter } from "./role-filter.js"; @@ -147,9 +148,10 @@ export async function resolveEntityFull( preferredKinds: string[], opts?: { kind?: string; path?: string; pick?: number; includeTests?: boolean; testsOnly?: boolean; searchLimit?: number } ): Promise { + const effectivePath = opts?.path ?? getActiveWorkspaceRoot(); const kindFilter = opts?.kind; const nodes = await client.search(symbol, { - limit: opts?.searchLimit ?? (opts?.path ? 200 : looksTypeLikeSymbol(symbol) ? 50 : 20), + limit: opts?.searchLimit ?? (effectivePath ? 200 : looksTypeLikeSymbol(symbol) ? 50 : 20), kind: kindFilter, nameOnly: true, }); @@ -165,15 +167,15 @@ export async function resolveEntityFull( // Hard path filter: when --path is provided, exclude candidates whose sourceUri does not // contain the filter string. If no candidates survive, return "not found" rather than // falling back to cross-repo results. - const filteredNodes = opts?.path + const filteredNodes = effectivePath ? roleFiltered.filter((n: any) => { const uri = normalizeForPathMatch(n.provenance?.sourceUri ?? n.provenance?.source_uri ?? ""); - return uri.includes(normalizeForPathMatch(opts.path)); + return uri.includes(normalizeForPathMatch(effectivePath)); }) : roleFiltered; - if (opts?.path && filteredNodes.length === 0) { - stderr(`No entity named "${symbol}" found in paths matching "${opts.path}".`); + if (effectivePath && filteredNodes.length === 0) { + stderr(`No entity named "${symbol}" found in paths matching "${effectivePath}".`); return { resolved: false, ambiguous: false, hiddenTestCount }; } @@ -195,7 +197,7 @@ export async function resolveEntityFull( // Score exact-name candidates if (exactName.length > 0) { - const winner = pickBest(exactName, symbol, preferredKinds, opts); + const winner = pickBest(exactName, symbol, preferredKinds, { ...opts, path: effectivePath }); if (winner) { const picked = applyPick(winner, opts); if (picked) return { ...picked, hiddenTestCount } as ResolveResult; @@ -204,7 +206,7 @@ export async function resolveEntityFull( } // ── Phase 2: Fall back to all candidates ──────────────────────────── - const winner = pickBest(filteredNodes, symbol, preferredKinds, opts); + const winner = pickBest(filteredNodes, symbol, preferredKinds, { ...opts, path: effectivePath }); if (winner) { const picked = applyPick(winner, opts); if (picked) return { ...picked, hiddenTestCount } as ResolveResult; @@ -577,10 +579,11 @@ async function tryFileGraphMatch( ): Promise { const basename = path.basename(target); const targetHasPath = target.includes("/") || target.includes("\\"); + const effectivePath = opts?.path ?? getActiveWorkspaceRoot(); // Search for file entities matching the basename const nodes = await client.search(basename, { - limit: opts?.path ? 200 : 20, + limit: effectivePath ? 200 : 20, kind: "file", nameOnly: true, }); @@ -589,7 +592,7 @@ async function tryFileGraphMatch( const targetLower = normalizeForPathMatch(target); const basenameLower = basename.toLowerCase(); const basenameNoExt = basename.replace(/\.[^.]+$/, "").toLowerCase(); - const normalizedPathHint = normalizeForPathMatch(opts?.path); + const normalizedPathHint = normalizeForPathMatch(effectivePath); const matches: Array<{ node: any; quality: number }> = []; for (const n of nodes as any[]) { From e912e622b450d665979cf6f135c1138735f3bc26 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Sat, 4 Apr 2026 10:34:08 -0700 Subject: [PATCH 11/18] fix(cli): resolve absolute paths, short IDs, and section kind display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ix contains: absolute paths now match against graph's relative URIs by checking targetLower.endsWith(uri); URI-length tiebreaker picks the most specific match when multiple quality-0 candidates exist - ix contains/explain: 8–31 char hex inputs (short IDs from CLI output) now attempt resolvePrefix before falling back to symbol resolution - ix explain: context section always shows Kind so section (and all other) entity types are visible in the detail view Co-Authored-By: Claude Sonnet 4.6 --- ix-cli/src/cli/explain/render.ts | 2 ++ ix-cli/src/cli/resolve.ts | 36 +++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/ix-cli/src/cli/explain/render.ts b/ix-cli/src/cli/explain/render.ts index 52f6fd0..ff59058 100644 --- a/ix-cli/src/cli/explain/render.ts +++ b/ix-cli/src/cli/explain/render.ts @@ -272,6 +272,8 @@ export function renderExplanation( // ── Context ─────────────────────────────────────────────────────────── const contextLines: string[] = []; + contextLines.push(`Kind: ${facts.kind}`); + if (facts.path) { contextLines.push(`Defined in: ${facts.path}`); } diff --git a/ix-cli/src/cli/resolve.ts b/ix-cli/src/cli/resolve.ts index 8e80507..71615c3 100644 --- a/ix-cli/src/cli/resolve.ts +++ b/ix-cli/src/cli/resolve.ts @@ -508,6 +508,23 @@ export async function resolveFileOrEntity( } } + // 1.5 Short ID prefix (8–31 hex chars, e.g. "aacc3359" from CLI output) + if (/^[0-9a-f]{8,31}$/i.test(target)) { + try { + const fullId = await client.resolvePrefix(target); + const details = await client.entity(fullId); + const n = details.node as any; + return { + id: fullId, + kind: n.kind || "unknown", + name: n.name || target, + resolutionMode: "exact", + }; + } catch { + // Not a valid entity prefix — fall through to normal resolution + } + } + // 2. File-like input → try graph file search if (looksFileLike(target)) { const fileEntity = await tryFileGraphMatch(client, target, opts); @@ -599,8 +616,10 @@ async function tryFileGraphMatch( const name = (n.name || "").toLowerCase(); const uri = normalizeForPathMatch(n.provenance?.sourceUri ?? n.provenance?.source_uri ?? ""); - // Exact path match (best) - if (targetHasPath && (uri.endsWith(targetLower) || uri === targetLower)) { + // Exact path match (best): covers both absolute URIs matching absolute target, + // and relative URIs that are a suffix of an absolute target path. + if (targetHasPath && (uri.endsWith(targetLower) || uri === targetLower + || (uri.includes("/") && targetLower.endsWith(uri)))) { matches.push({ node: n, quality: 0 }); } // Filename match in user-requested path @@ -619,8 +638,13 @@ async function tryFileGraphMatch( if (matches.length === 0) return null; - // Sort by quality then pick best - matches.sort((a, b) => a.quality - b.quality); + // Sort by quality then by URI length descending (longer = more specific path wins) + matches.sort((a, b) => { + if (a.quality !== b.quality) return a.quality - b.quality; + const uriA = normalizeForPathMatch(a.node.provenance?.sourceUri ?? a.node.provenance?.source_uri ?? ""); + const uriB = normalizeForPathMatch(b.node.provenance?.sourceUri ?? b.node.provenance?.source_uri ?? ""); + return uriB.length - uriA.length; + }); // If multiple matches at same quality, prefer path-matching target const best = matches[0]; @@ -628,7 +652,9 @@ async function tryFileGraphMatch( // Disambiguate by path when user provided a path const pathMatch = matches.find(m => { const uri = normalizeForPathMatch(m.node.provenance?.sourceUri ?? m.node.provenance?.source_uri ?? ""); - return uri.endsWith(targetLower) || (!!normalizedPathHint && uri.includes(normalizedPathHint)); + return uri.endsWith(targetLower) || uri === targetLower + || (uri.includes("/") && targetLower.endsWith(uri)) + || (!!normalizedPathHint && uri.includes(normalizedPathHint)); }); if (pathMatch) { return nodeToResolved(pathMatch.node, pathMatch.node.name, "exact"); From 9705f62c64cffe3d9f2b84bfdb8e2669d901e1c4 Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Sat, 4 Apr 2026 11:11:36 -0700 Subject: [PATCH 12/18] fix(md-parse): hierarchical section scoping and root-file disambiguation - Section chunks now span to the next heading at the same or shallower level, so parent sections include their full nested subtree content - Bare filename tie-breaking now prefers shorter URIs (root-level files) over deeply-nested ones, fixing `ix contains README.md` resolving to a fixture file instead of the root README - Add test asserting parent section lineEnd covers nested child sections Co-Authored-By: Claude Sonnet 4.6 --- .../src/__tests__/queries.markdown.test.ts | 24 +++++++++++++++++++ core-ingestion/src/index.ts | 13 +++++++--- ix-cli/src/cli/resolve.ts | 4 ++-- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/core-ingestion/src/__tests__/queries.markdown.test.ts b/core-ingestion/src/__tests__/queries.markdown.test.ts index 001e419..e0b7e0b 100644 --- a/core-ingestion/src/__tests__/queries.markdown.test.ts +++ b/core-ingestion/src/__tests__/queries.markdown.test.ts @@ -105,6 +105,30 @@ describe('Markdown parsing', () => { })); }); + it('parent section spans full subtree including nested headings', () => { + // H1 section should extend to EOF (no sibling H1), spanning all 4 lines. + // H2 section should extend to EOF as well (no next H2 or H1 after it). + const source = ['# Title', 'intro text', '## Install', 'install steps'].join('\n'); + const result = parseFile('/repo/README.md', source); + + expect(result).not.toBeNull(); + const titleSection = result!.chunks.find(c => c.name === 'Title' && c.chunkKind === 'section'); + const installSection = result!.chunks.find(c => c.name === 'Install' && c.chunkKind === 'section'); + + expect(titleSection).toBeDefined(); + expect(installSection).toBeDefined(); + + // Title (H1) section must span to end of file — it has no sibling H1 after it + expect(titleSection!.lineEnd).toBe(4); + // Install (H2) section spans to end of file + expect(installSection!.lineEnd).toBe(4); + + // Title section must start before Install section + expect(titleSection!.lineStart).toBeLessThan(installSection!.lineStart); + // Title section must end no earlier than Install section + expect(titleSection!.lineEnd).toBeGreaterThanOrEqual(installSection!.lineEnd); + }); + it('parses YAML frontmatter', () => { const result = parseFile( '/repo/post.md', diff --git a/core-ingestion/src/index.ts b/core-ingestion/src/index.ts index fe3c806..468f7c3 100644 --- a/core-ingestion/src/index.ts +++ b/core-ingestion/src/index.ts @@ -613,10 +613,17 @@ function parseMarkdownFile(filePath: string, source: string): FileParseResult { headingLines.push({ level, name, lineNum, container }); } - // Build one section chunk per heading spanning to the line before the next heading + // Build one section chunk per heading spanning to the line before the next heading at the same or + // shallower level. This ensures parent sections contain their nested sub-sections' content. for (let h = 0; h < headingLines.length; h++) { - const { name, lineNum, container } = headingLines[h]; - const nextLineNum = h + 1 < headingLines.length ? headingLines[h + 1].lineNum - 1 : sourceLineCount; + const { level: currentLevel, name, lineNum, container } = headingLines[h]; + let nextLineNum = sourceLineCount; + for (let k = h + 1; k < headingLines.length; k++) { + if (headingLines[k].level <= currentLevel) { + nextLineNum = headingLines[k].lineNum - 1; + break; + } + } const startByte = lineStarts[lineNum - 1] ?? 0; const endByte = nextLineNum < lines.length ? (lineStarts[nextLineNum] ?? source.length) : source.length; const sectionContent = lines.slice(lineNum - 1, nextLineNum).join('\n'); diff --git a/ix-cli/src/cli/resolve.ts b/ix-cli/src/cli/resolve.ts index 71615c3..e54fb26 100644 --- a/ix-cli/src/cli/resolve.ts +++ b/ix-cli/src/cli/resolve.ts @@ -638,12 +638,12 @@ async function tryFileGraphMatch( if (matches.length === 0) return null; - // Sort by quality then by URI length descending (longer = more specific path wins) + // Sort by quality then by URI length ascending (shorter = closer to root = more prominent) matches.sort((a, b) => { if (a.quality !== b.quality) return a.quality - b.quality; const uriA = normalizeForPathMatch(a.node.provenance?.sourceUri ?? a.node.provenance?.source_uri ?? ""); const uriB = normalizeForPathMatch(b.node.provenance?.sourceUri ?? b.node.provenance?.source_uri ?? ""); - return uriB.length - uriA.length; + return uriA.length - uriB.length; }); // If multiple matches at same quality, prefer path-matching target From 1f68979d667abae115b88c108ac7f87a65d8e1af Mon Sep 17 00:00:00 2001 From: Joseph Mikhail Date: Sat, 4 Apr 2026 15:51:21 -0700 Subject: [PATCH 13/18] fix(md-parse): clean VitePress heading syntax from entity names Replaces single-line HTML strip with a full pipeline that handles anchor IDs ({#...}), backtick-wrapped component names, backslash-escaped angle brackets, stability markers (\*\*), inline HTML badges, and double-space normalization. Adds 6 regression tests for vuejs/docs cases. Co-Authored-By: Claude Sonnet 4.6 --- .../src/__tests__/queries.markdown.test.ts | 36 +++++++++++++++++++ core-ingestion/src/index.ts | 23 +++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/core-ingestion/src/__tests__/queries.markdown.test.ts b/core-ingestion/src/__tests__/queries.markdown.test.ts index e0b7e0b..d8b7930 100644 --- a/core-ingestion/src/__tests__/queries.markdown.test.ts +++ b/core-ingestion/src/__tests__/queries.markdown.test.ts @@ -194,6 +194,42 @@ describe('Markdown parsing', () => { })); }); + it('strips VitePress anchor ID suffix {#...} from heading names', () => { + const result = parseFile('/repo/docs.md', '## What is Vue? {#what-is-vue}'); + expect(result!.entities).toContainEqual(expect.objectContaining({ name: 'What is Vue?', kind: 'heading' })); + }); + + it('preserves component names inside backticks when heading contains angle brackets', () => { + const result = parseFile('/repo/docs.md', '## `` {#transition}'); + expect(result!.entities).toContainEqual(expect.objectContaining({ name: '', kind: 'heading' })); + }); + + it('handles backslash-escaped angle brackets in heading names', () => { + const result = parseFile('/repo/docs.md', '# \\