From 0485830d35f203261e1eec3984ea468a37bbcc0f Mon Sep 17 00:00:00 2001 From: Philip Nee Date: Sat, 25 Apr 2026 21:26:55 -0700 Subject: [PATCH] [codex] add semantic context tools --- src/cli/start.ts | 7 +- src/server/router.ts | 399 +++++++++++++++++++++++++++++++++++++++++-- tests/router.test.ts | 106 ++++++++++++ tests/server.test.ts | 86 ++++++++++ 4 files changed, 582 insertions(+), 16 deletions(-) diff --git a/src/cli/start.ts b/src/cli/start.ts index 04e5368..cea8f0d 100644 --- a/src/cli/start.ts +++ b/src/cli/start.ts @@ -91,7 +91,12 @@ export async function start(options: StartOptions = {}): Promise { const audit = interactiveMode ? new InteractiveAuditLogger(createAuditLogger()) : createAuditLogger(); - const router = new ToolRouter(loaded.map((entry) => ({ connector: entry.connector, sourceId: entry.sourceId })), audit, plugins); + const router = new ToolRouter( + loaded.map((entry) => ({ connector: entry.connector, sourceId: entry.sourceId })), + audit, + plugins, + { semanticTools: config.semanticTools }, + ); await router.initialize(); // Cleanup tasks run on SIGINT/SIGTERM and on startup failure. diff --git a/src/server/router.ts b/src/server/router.ts index 7fb4e7b..ecfcd3d 100644 --- a/src/server/router.ts +++ b/src/server/router.ts @@ -1,11 +1,26 @@ import { CallToolResult, Connector, ToolDefinition } from '../connectors/types.js'; import { isLikelyWriteTool, isMemPalaceWriteTool } from '../connectors/write-policy.js'; -import { PermissionConfig } from '../config/schema.js'; +import { PermissionConfig, SemanticToolsConfig } from '../config/schema.js'; import { PatternRedactorAuditEvent, ToolResultPlugin } from '../plugins/types.js'; import { AuditLogger, summarizeArgs } from '../utils/audit.js'; import { ClientIdentity } from './client-identity.js'; type PermissionAction = PermissionConfig['actions'][number]; +type ToolKind = 'raw' | 'semantic'; +type SemanticToolName = 'search_personal_context' | 'read_context_item'; + +interface ToolEntry { + connector: Connector; + originalName: string; + sourceId: string; + requiredAction: PermissionAction; + namespacedName: string; + toolKind: ToolKind; +} + +export interface ToolRouterOptions { + semanticTools?: SemanticToolsConfig; +} export interface RouterConnector { connector: Connector; @@ -18,30 +33,31 @@ export interface NamespacedTool { connectorId: string; sourceId: string; requiredAction: PermissionAction; + toolKind: ToolKind; description: string; inputSchema: Record; } export class ToolRouter { - private readonly toolMap = new Map(); + private readonly toolMap = new Map(); + private readonly toolsBySource = new Map(); private readonly allTools: NamespacedTool[] = []; + private readonly semanticToolDefinitions: NamespacedTool[] = []; private readonly connectors: RouterConnector[]; + private readonly semanticTools: SemanticToolsConfig; constructor( connectors: Array, private readonly audit?: AuditLogger, private readonly plugins: ToolResultPlugin[] = [], + options: ToolRouterOptions = {}, ) { this.connectors = connectors.map((entry) => ( 'connector' in entry ? entry : { connector: entry, sourceId: entry.id } )); + this.semanticTools = options.semanticTools ?? {}; } async initialize(): Promise { @@ -57,12 +73,19 @@ export class ToolRouter { throw new Error(`Duplicate tool name after namespacing: ${namespacedName}`); } - this.toolMap.set(namespacedName, { + const entry: ToolEntry = { connector, originalName: tool.name, sourceId, requiredAction, - }); + namespacedName, + toolKind: 'raw', + }; + + this.toolMap.set(namespacedName, entry); + const sourceTools = this.toolsBySource.get(sourceId) ?? []; + sourceTools.push(entry); + this.toolsBySource.set(sourceId, sourceTools); this.allTools.push({ namespacedName, @@ -70,15 +93,23 @@ export class ToolRouter { connectorId: connector.id, sourceId, requiredAction, + toolKind: 'raw', description: `[${connector.displayName}] ${tool.description}`, inputSchema: normalizeInputSchema(tool), }); } } + + this.semanticToolDefinitions.push(...buildSemanticToolDefinitions()); } getAllTools(identity?: ClientIdentity): NamespacedTool[] { - return this.allTools.filter((tool) => isToolAllowed(tool, identity)).map((tool) => ({ ...tool })); + return [ + ...this.allTools.filter((tool) => isToolAllowed(tool, identity)).map((tool) => ({ ...tool })), + ...this.semanticToolDefinitions + .filter((tool) => this.isSemanticToolVisible(tool.namespacedName as SemanticToolName, identity)) + .map((tool) => ({ ...tool })), + ]; } async callTool( @@ -86,6 +117,10 @@ export class ToolRouter { args: Record, identity?: ClientIdentity, ): Promise { + if (isSemanticToolName(namespacedName)) { + return this.callSemanticTool(namespacedName, args, identity); + } + const entry = this.toolMap.get(namespacedName); if (!entry) { throw new Error(`Unknown tool: ${namespacedName}`); @@ -127,6 +162,158 @@ export class ToolRouter { } } + private async callSemanticTool( + name: SemanticToolName, + args: Record, + identity?: ClientIdentity, + ): Promise { + const start = Date.now(); + const redactions: NonNullable = []; + let result: CallToolResult | undefined; + let threw = false; + + try { + result = name === 'search_personal_context' + ? await this.searchPersonalContext(args, identity) + : await this.readContextItem(args, identity); + + for (const plugin of this.plugins) { + const output = await plugin.process({ + connectorId: 'mvmt', + toolName: name, + originalName: name, + args, + result, + }); + result = output.result; + redactions.push(...flattenRedactionEvents(output.auditEvents ?? [])); + } + return result; + } catch (err) { + threw = true; + result = { + content: [ + { + type: 'text', + text: `Error: ${err instanceof Error ? err.message : 'Unknown error'}`, + }, + ], + isError: true, + }; + return result; + } finally { + this.recordAudit( + { connector: { id: 'mvmt', displayName: 'mvmt' } as Connector }, + name, + args, + start, + result, + threw, + redactions, + identity, + result?.isError ? semanticErrorText(result) : undefined, + ); + } + } + + private async searchPersonalContext( + args: Record, + identity?: ClientIdentity, + ): Promise { + const query = requireString(args.query, 'query'); + const requestedSourceIds = optionalStringArray(args.source_ids); + const limit = normalizeLimit(args.limit); + const sourceIds = this.allowedSemanticSources('search_personal_context', identity, requestedSourceIds); + const results: PersonalContextSearchResult[] = []; + const warnings: string[] = []; + + for (const sourceId of sourceIds) { + if (results.length >= limit) break; + const adapter = this.findSemanticAdapter(sourceId, 'search'); + if (!adapter) { + warnings.push(`source ${sourceId} has no supported search adapter`); + continue; + } + + const raw = await adapter.connector.callTool(adapter.originalName, buildSearchArgs(adapter.originalName, query, limit)); + if (raw.isError) { + warnings.push(`source ${sourceId} search failed: ${extractToolText(raw).slice(0, 160)}`); + continue; + } + + for (const item of normalizeSearchResults(sourceId, sourceTypeFor(adapter), query, raw)) { + results.push(item); + if (results.length >= limit) break; + } + } + + return jsonResult({ + query, + ranking: 'per_source_keyword_union', + results, + ...(warnings.length > 0 ? { warnings } : {}), + }); + } + + private async readContextItem(args: Record, identity?: ClientIdentity): Promise { + const sourceId = requireString(args.source_id, 'source_id'); + const itemId = requireString(args.item_id, 'item_id'); + const allowedSources = this.allowedSemanticSources('read_context_item', identity, [sourceId]); + if (!allowedSources.includes(sourceId)) { + return accessDeniedResult(`missing_permission source=${sourceId} action=read`); + } + + const adapter = this.findSemanticAdapter(sourceId, 'read'); + if (!adapter) return accessDeniedResult(`source ${sourceId} has no supported read adapter`); + + const raw = await adapter.connector.callTool(adapter.originalName, buildReadArgs(adapter.originalName, itemId)); + if (raw.isError) return raw; + const parsed = parseToolJson(raw); + const content = typeof parsed?.content === 'string' ? parsed.content : extractToolText(raw); + const title = typeof parsed?.path === 'string' ? titleFromLocator(parsed.path) : titleFromLocator(itemId); + + return jsonResult({ + source_id: sourceId, + item_id: itemId, + mime_type: sourceTypeFor(adapter) === 'obsidian' ? 'text/markdown' : 'text/plain', + title, + content, + metadata: objectMetadata(parsed, ['path', 'tags']), + }); + } + + private isSemanticToolVisible(name: SemanticToolName, identity?: ClientIdentity): boolean { + return this.allowedSemanticSources(name, identity).length > 0; + } + + private allowedSemanticSources( + name: SemanticToolName, + identity?: ClientIdentity, + requestedSourceIds?: string[], + ): string[] { + const config = name === 'search_personal_context' + ? this.semanticTools.searchPersonalContext + : this.semanticTools.readContextItem; + if (!config || config.enabled === false) return []; + const action: PermissionAction = name === 'search_personal_context' ? 'search' : 'read'; + const requested = requestedSourceIds ? new Set(requestedSourceIds) : undefined; + return config.sourceIds.filter((sourceId) => ( + (!requested || requested.has(sourceId)) && + semanticSourceAllowed(sourceId, action, identity) && + Boolean(this.findSemanticAdapter(sourceId, action)) + )); + } + + private findSemanticAdapter(sourceId: string, action: PermissionAction): ToolEntry | undefined { + const tools = this.toolsBySource.get(sourceId) ?? []; + const preferred = action === 'search' + ? ['search_personal_context', 'search_notes', 'search_files', 'mempalace_search', 'mempalace_kg_search'] + : ['read_context_item', 'read_note', 'read_file', 'read_text_file']; + return preferred + .map((name) => tools.find((tool) => tool.originalName === name)) + .find((tool): tool is ToolEntry => Boolean(tool)); + } + private recordAudit( entry: { connector: Connector }, namespacedName: string, @@ -160,15 +347,13 @@ function isToolAllowed(tool: NamespacedTool, identity?: ClientIdentity): boolean } function toolDeniedReason( - tool: { sourceId: string; requiredAction: PermissionAction }, + tool: { sourceId: string; requiredAction: PermissionAction; toolKind?: ToolKind }, identity?: ClientIdentity, ): string | undefined { if (!identity || identity.isLegacyDefault) return undefined; + if (tool.toolKind === 'semantic') return undefined; if (!identity.rawToolsEnabled) return 'raw_tools_disabled'; - const allowedActions = identity.permissions - .filter((permission) => permission.sourceId === tool.sourceId) - .flatMap((permission) => permission.actions); - if (allowedActions.includes(tool.requiredAction)) return undefined; + if (semanticSourceAllowed(tool.sourceId, tool.requiredAction, identity)) return undefined; return `missing_permission source=${tool.sourceId} action=${tool.requiredAction}`; } @@ -180,6 +365,190 @@ function inferRequiredAction(toolName: string): PermissionAction { return 'read'; } +interface PersonalContextSearchResult { + item_id: string; + source_id: string; + source_type: 'filesystem' | 'obsidian' | 'mempalace' | 'generic'; + title: string; + snippet: string; + locator: string; + actions: ['read_context_item']; +} + +function buildSemanticToolDefinitions(): NamespacedTool[] { + return [ + { + namespacedName: 'search_personal_context', + originalName: 'search_personal_context', + connectorId: 'mvmt', + sourceId: 'mvmt', + requiredAction: 'search', + toolKind: 'semantic', + description: 'Search configured personal context sources and return source-attributed keyword results.', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: 'Keyword or phrase to search for' }, + source_ids: { type: 'array', items: { type: 'string' }, description: 'Optional source IDs to search' }, + limit: { type: 'number', description: 'Maximum total results. Default 8, max 20' }, + }, + required: ['query'], + }, + }, + { + namespacedName: 'read_context_item', + originalName: 'read_context_item', + connectorId: 'mvmt', + sourceId: 'mvmt', + requiredAction: 'read', + toolKind: 'semantic', + description: 'Read a specific item returned by search_personal_context.', + inputSchema: { + type: 'object', + properties: { + source_id: { type: 'string', description: 'Source ID returned by search_personal_context' }, + item_id: { type: 'string', description: 'Item ID returned by search_personal_context' }, + }, + required: ['source_id', 'item_id'], + }, + }, + ]; +} + +function isSemanticToolName(value: string): value is SemanticToolName { + return value === 'search_personal_context' || value === 'read_context_item'; +} + +function semanticSourceAllowed(sourceId: string, action: PermissionAction, identity?: ClientIdentity): boolean { + if (!identity || identity.isLegacyDefault) return true; + return identity.permissions.some((permission) => ( + permission.sourceId === sourceId && permission.actions.includes(action) + )); +} + +function requireString(value: unknown, field: string): string { + if (typeof value !== 'string' || value.trim().length === 0) { + throw new Error(`${field} is required`); + } + return value.trim(); +} + +function optionalStringArray(value: unknown): string[] | undefined { + if (!Array.isArray(value)) return undefined; + return value.filter((entry): entry is string => typeof entry === 'string' && entry.trim().length > 0); +} + +function normalizeLimit(value: unknown): number { + if (typeof value !== 'number' || !Number.isFinite(value)) return 8; + return Math.max(1, Math.min(20, Math.floor(value))); +} + +function buildSearchArgs(toolName: string, query: string, limit: number): Record { + if (toolName === 'search_notes') return { query, maxResults: limit }; + if (toolName === 'search_files') return { path: '.', pattern: query }; + return { query, limit }; +} + +function buildReadArgs(toolName: string, itemId: string): Record { + if (toolName === 'read_note') return { notePath: itemId }; + if (toolName === 'read_file' || toolName === 'read_text_file') return { path: itemId }; + return { item_id: itemId }; +} + +function normalizeSearchResults( + sourceId: string, + sourceType: PersonalContextSearchResult['source_type'], + query: string, + raw: CallToolResult, +): PersonalContextSearchResult[] { + const parsed = parseToolJson(raw); + const rawResults = Array.isArray(parsed?.results) ? parsed.results : []; + const results: PersonalContextSearchResult[] = []; + + for (const item of rawResults) { + if (!item || typeof item !== 'object') continue; + const record = item as Record; + const locator = stringValue(record.path) ?? stringValue(record.file) ?? stringValue(record.id) ?? stringValue(record.title); + if (!locator) continue; + const snippet = stringValue(record.snippet) ?? stringValue(record.text) ?? stringValue(record.content) ?? query; + results.push({ + item_id: locator, + source_id: sourceId, + source_type: sourceType, + title: stringValue(record.title) ?? titleFromLocator(locator), + snippet: snippet.slice(0, 500), + locator, + actions: ['read_context_item'], + }); + } + + return results; +} + +function parseToolJson(raw: CallToolResult): Record | undefined { + const text = extractToolText(raw); + try { + const parsed = JSON.parse(text) as unknown; + return parsed && typeof parsed === 'object' && !Array.isArray(parsed) + ? parsed as Record + : undefined; + } catch { + return undefined; + } +} + +function extractToolText(raw: CallToolResult): string { + return raw.content + .filter((item): item is { type: 'text'; text: string } => item.type === 'text') + .map((item) => item.text) + .join('\n'); +} + +function jsonResult(value: unknown): CallToolResult { + return { content: [{ type: 'text', text: JSON.stringify(value, null, 2) }] }; +} + +function accessDeniedResult(reason: string): CallToolResult { + return { + content: [{ type: 'text', text: `Error: access denied (${reason}).` }], + isError: true, + }; +} + +function sourceTypeFor(entry: ToolEntry): PersonalContextSearchResult['source_type'] { + const fingerprint = `${entry.sourceId} ${entry.connector.id} ${entry.connector.displayName} ${entry.originalName}`.toLowerCase(); + if (fingerprint.includes('obsidian')) return 'obsidian'; + if (fingerprint.includes('mempalace')) return 'mempalace'; + if (fingerprint.includes('file')) return 'filesystem'; + return 'generic'; +} + +function stringValue(value: unknown): string | undefined { + return typeof value === 'string' && value.length > 0 ? value : undefined; +} + +function titleFromLocator(locator: string): string { + const normalized = locator.replace(/\\/g, '/'); + const leaf = normalized.split('/').filter(Boolean).at(-1) ?? normalized; + return leaf.replace(/\.md$/i, '') || locator; +} + +function objectMetadata(value: Record | undefined, keys: string[]): Record { + const metadata: Record = {}; + if (!value) return metadata; + for (const key of keys) { + const entry = value[key]; + if (typeof entry === 'string') metadata[key] = entry; + if (Array.isArray(entry) && entry.every((item) => typeof item === 'string')) metadata[key] = entry; + } + return metadata; +} + +function semanticErrorText(result: CallToolResult): string | undefined { + const text = extractToolText(result); + return text.startsWith('Error: ') ? text.slice('Error: '.length, 180) : undefined; +} + function flattenRedactionEvents( events: PatternRedactorAuditEvent[], ): NonNullable { diff --git a/tests/router.test.ts b/tests/router.test.ts index 2868e5b..bd05e6e 100644 --- a/tests/router.test.ts +++ b/tests/router.test.ts @@ -33,6 +33,7 @@ describe('ToolRouter', () => { connectorId: 'proxy_github', sourceId: 'proxy_github', requiredAction: 'write', + toolKind: 'raw', description: '[github] Create an issue', inputSchema: { type: 'object', properties: { title: { type: 'string' } } }, }, @@ -267,6 +268,76 @@ describe('ToolRouter', () => { expect(audit.record).toHaveBeenCalledWith(expect.objectContaining({ clientId: 'codex', isError: false })); }); + + it('lists semantic tools even when raw tools are disabled', async () => { + const connector = obsidianFixtureConnector(); + const router = new ToolRouter([connector], undefined, [], { + semanticTools: { + searchPersonalContext: { enabled: true, sourceIds: ['obsidian'] }, + readContextItem: { enabled: true, sourceIds: ['obsidian'] }, + }, + }); + await router.initialize(); + + expect(router.getAllTools(client('chatgpt', false, [ + { sourceId: 'obsidian', actions: ['search', 'read'] }, + ])).map((tool) => tool.namespacedName)).toEqual([ + 'search_personal_context', + 'read_context_item', + ]); + }); + + it('searches allowed semantic sources and returns source-attributed results', async () => { + const connector = obsidianFixtureConnector(); + const router = new ToolRouter([connector], undefined, [], { + semanticTools: { + searchPersonalContext: { enabled: true, sourceIds: ['obsidian'] }, + }, + }); + await router.initialize(); + + const result = await router.callTool('search_personal_context', { query: 'launch' }, client('chatgpt', false, [ + { sourceId: 'obsidian', actions: ['search'] }, + ])); + const parsed = JSON.parse(result.content[0].type === 'text' ? result.content[0].text : '{}'); + + expect(parsed).toMatchObject({ + query: 'launch', + ranking: 'per_source_keyword_union', + results: [ + { + item_id: 'projects/launch.md', + source_id: 'obsidian', + source_type: 'obsidian', + actions: ['read_context_item'], + }, + ], + }); + }); + + it('reads allowed semantic context items without enabling raw tools', async () => { + const connector = obsidianFixtureConnector(); + const router = new ToolRouter([connector], undefined, [], { + semanticTools: { + readContextItem: { enabled: true, sourceIds: ['obsidian'] }, + }, + }); + await router.initialize(); + + const result = await router.callTool( + 'read_context_item', + { source_id: 'obsidian', item_id: 'projects/launch.md' }, + client('chatgpt', false, [{ sourceId: 'obsidian', actions: ['read'] }]), + ); + const parsed = JSON.parse(result.content[0].type === 'text' ? result.content[0].text : '{}'); + + expect(parsed).toMatchObject({ + source_id: 'obsidian', + item_id: 'projects/launch.md', + mime_type: 'text/markdown', + content: '# Launch\nShip it.', + }); + }); }); function client( @@ -282,3 +353,38 @@ function client( permissions, }; } + +function obsidianFixtureConnector(): Connector { + return { + id: 'obsidian', + displayName: 'obsidian', + initialize: vi.fn(), + shutdown: vi.fn(), + listTools: vi.fn(async () => [ + { name: 'search_notes', description: 'Search notes', inputSchema: { type: 'object', properties: {} } }, + { name: 'read_note', description: 'Read a note', inputSchema: { type: 'object', properties: {} } }, + ]), + callTool: vi.fn(async (name: string) => { + if (name === 'search_notes') { + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify({ + results: [{ path: 'projects/launch.md', snippet: 'launch plan' }], + }), + }, + ], + }; + } + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify({ path: 'projects/launch.md', content: '# Launch\nShip it.' }), + }, + ], + }; + }), + }; +} diff --git a/tests/server.test.ts b/tests/server.test.ts index f5f870f..cada1cc 100644 --- a/tests/server.test.ts +++ b/tests/server.test.ts @@ -1321,6 +1321,53 @@ describe('startHttpServer lifecycle', () => { } }); + it('serves semantic tools over MCP for clients without raw tool access', async () => { + const connector = new SemanticConnector(); + const router = new ToolRouter([{ connector, sourceId: 'obsidian' }], undefined, [], { + semanticTools: { + searchPersonalContext: { enabled: true, sourceIds: ['obsidian'] }, + readContextItem: { enabled: true, sourceIds: ['obsidian'] }, + }, + }); + await router.initialize(); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'mvmt-server-test-')); + const tokenPath = path.join(tmp, '.mvmt', '.session-token'); + const server = await startHttpServer(router, { + port: 0, + tokenPath, + clients: [ + { + id: 'chatgpt', + name: 'ChatGPT', + auth: { type: 'token', tokenHash: sha256Hex('chatgpt-token') }, + rawToolsEnabled: false, + permissions: [{ sourceId: 'obsidian', actions: ['search', 'read'] }], + }, + ], + }); + + try { + const sessionId = await initializeMcpSession(server.port, 'chatgpt-token'); + const listTools = await mcpJsonRequest(server.port, 'chatgpt-token', sessionId, 2, 'tools/list', {}); + expect(listTools.result.tools.map((tool: { name: string }) => tool.name)).toEqual([ + 'search_personal_context', + 'read_context_item', + ]); + + const search = await mcpJsonRequest(server.port, 'chatgpt-token', sessionId, 3, 'tools/call', { + name: 'search_personal_context', + arguments: { query: 'launch' }, + }); + expect(JSON.parse(search.result.content[0].text).results).toEqual([ + expect.objectContaining({ item_id: 'projects/launch.md', source_id: 'obsidian' }), + ]); + expect(connector.calls.map((call) => call.name)).toEqual(['search_notes']); + } finally { + await server.close(); + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + it('revokes outstanding OAuth access tokens the moment the signing key file is rewritten', async () => { const router = new ToolRouter([new EmptyConnector()]); await router.initialize(); @@ -1475,6 +1522,45 @@ class PolicyConnector implements Connector { async shutdown(): Promise {} } +class SemanticConnector implements Connector { + readonly id = 'obsidian'; + readonly displayName = 'obsidian'; + readonly calls: Array<{ name: string; args: Record }> = []; + + async initialize(): Promise {} + + async listTools() { + return [ + { name: 'search_notes', description: 'Search notes', inputSchema: { type: 'object', properties: {} } }, + { name: 'read_note', description: 'Read a note', inputSchema: { type: 'object', properties: {} } }, + ]; + } + + async callTool(name: string, args: Record) { + this.calls.push({ name, args }); + if (name === 'search_notes') { + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify({ results: [{ path: 'projects/launch.md', snippet: 'launch plan' }] }), + }, + ], + }; + } + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify({ path: 'projects/launch.md', content: '# Launch\nShip it.' }), + }, + ], + }; + } + + async shutdown(): Promise {} +} + function canListenOn(port: number): Promise { return new Promise((resolve) => { const server = createServer();