From d619c7183a1ef7ac3f3221dcddf73f874f025154 Mon Sep 17 00:00:00 2001 From: Greg von Nessi Date: Fri, 13 Mar 2026 17:58:38 +0000 Subject: [PATCH 1/2] Extract removeVectorsAndRelated() in VectorStore Consolidate ~80 lines of duplicated cleanup logic from cleanupExpired() and evictOldest() into a shared private method that handles chunk deletion, empty cluster removal, orphaned index entry cleanup, and memory state sync. --- src/storage/vector-store.ts | 132 ++++++++++++------------------------ 1 file changed, 43 insertions(+), 89 deletions(-) diff --git a/src/storage/vector-store.ts b/src/storage/vector-store.ts index 6ced4dc..6323a0b 100644 --- a/src/storage/vector-store.ts +++ b/src/storage/vector-store.ts @@ -575,42 +575,23 @@ export class VectorStore { } /** - * Clean up expired vectors and their corresponding chunks. - * Removes vectors that haven't been accessed within the TTL period. - * Also deletes the corresponding chunks (FK cascades handle cluster assignments and edges). + * Remove vectors and all related data (chunks, clusters, index entries) by ID. + * Handles DB deletions with FK cascades, orphan cleanup, and in-memory removal. * - * @param ttlDays - Number of days after which unaccessed vectors expire - * @returns Number of vectors deleted + * @param ids - Vector/chunk IDs to remove + * @returns Number of vectors deleted from the DB */ - async cleanupExpired(ttlDays: number): Promise { - await this.load(); - + private removeVectorsAndRelated(ids: string[]): number { const db = getDb(); - - // Find expired vectors (any vector not accessed within TTL) - const expiredRows = db - .prepare( - ` - SELECT id FROM ${this.tableName} - WHERE last_accessed < datetime('now', '-' || ? || ' days') - `, - ) - .all(ttlDays) as { id: string }[]; - - if (expiredRows.length === 0) { - return 0; - } - - const expiredIds = expiredRows.map((r) => r.id); - const placeholders = sqlPlaceholders(expiredIds.length); + const placeholders = sqlPlaceholders(ids.length); // Delete chunks first (FK cascades handle chunk_clusters and edges) - db.prepare(`DELETE FROM chunks WHERE id IN (${placeholders})`).run(...expiredIds); + db.prepare(`DELETE FROM chunks WHERE id IN (${placeholders})`).run(...ids); // Delete vectors const result = db .prepare(`DELETE FROM ${this.tableName} WHERE id IN (${placeholders})`) - .run(...expiredIds); + .run(...ids); // Remove empty clusters (no remaining members after chunk deletion) db.prepare( @@ -623,11 +604,10 @@ export class VectorStore { // Clean up index entries that referenced the deleted chunks try { - const placeholdersForCleanup = sqlPlaceholders(expiredIds.length); - // Remove reverse-lookup rows + const placeholdersForCleanup = sqlPlaceholders(ids.length); db.prepare( `DELETE FROM index_entry_chunks WHERE chunk_id IN (${placeholdersForCleanup})`, - ).run(...expiredIds); + ).run(...ids); // Delete orphaned index entries (no remaining chunk references) const orphaned = db @@ -657,7 +637,7 @@ export class VectorStore { } // Remove from memory - for (const id of expiredIds) { + for (const id of ids) { this.vectors.delete(id); this.chunkProjectIndex.delete(id); } @@ -665,6 +645,37 @@ export class VectorStore { return result.changes; } + /** + * Clean up expired vectors and their corresponding chunks. + * Removes vectors that haven't been accessed within the TTL period. + * Also deletes the corresponding chunks (FK cascades handle cluster assignments and edges). + * + * @param ttlDays - Number of days after which unaccessed vectors expire + * @returns Number of vectors deleted + */ + async cleanupExpired(ttlDays: number): Promise { + await this.load(); + + const db = getDb(); + + // Find expired vectors (any vector not accessed within TTL) + const expiredRows = db + .prepare( + ` + SELECT id FROM ${this.tableName} + WHERE last_accessed < datetime('now', '-' || ? || ' days') + `, + ) + .all(ttlDays) as { id: string }[]; + + if (expiredRows.length === 0) { + return 0; + } + + const expiredIds = expiredRows.map((r) => r.id); + return this.removeVectorsAndRelated(expiredIds); + } + /** * Evict the oldest vectors when collection exceeds maxCount. * Deletes vectors (and their chunks via FK cascade) by ascending last_accessed. @@ -697,64 +708,7 @@ export class VectorStore { if (toEvict.length === 0) return 0; const evictIds = toEvict.map((r) => r.id); - const placeholders = sqlPlaceholders(evictIds.length); - - // Delete chunks first (FK cascades handle chunk_clusters and edges) - db.prepare(`DELETE FROM chunks WHERE id IN (${placeholders})`).run(...evictIds); - - // Delete vectors - const result = db - .prepare(`DELETE FROM ${this.tableName} WHERE id IN (${placeholders})`) - .run(...evictIds); - - // Remove empty clusters - db.prepare( - ` - DELETE FROM clusters WHERE id NOT IN ( - SELECT DISTINCT cluster_id FROM chunk_clusters - ) - `, - ).run(); - - // Clean up index entries that referenced the deleted chunks - try { - const placeholdersForCleanup = sqlPlaceholders(evictIds.length); - db.prepare( - `DELETE FROM index_entry_chunks WHERE chunk_id IN (${placeholdersForCleanup})`, - ).run(...evictIds); - - const orphaned = db - .prepare( - `SELECT id FROM index_entries WHERE id NOT IN ( - SELECT DISTINCT index_entry_id FROM index_entry_chunks - )`, - ) - .all() as Array<{ id: string }>; - - if (orphaned.length > 0) { - const orphanIds = orphaned.map((r) => r.id); - const orphanPlaceholders = sqlPlaceholders(orphanIds.length); - db.prepare(`DELETE FROM index_entries WHERE id IN (${orphanPlaceholders})`).run( - ...orphanIds, - ); - - if (this.tableName === 'vectors') { - db.prepare(`DELETE FROM index_vectors WHERE id IN (${orphanPlaceholders})`).run( - ...orphanIds, - ); - } - } - } catch { - // index_entry_chunks table may not exist yet - } - - // Remove from memory - for (const id of evictIds) { - this.vectors.delete(id); - this.chunkProjectIndex.delete(id); - } - - return result.changes; + return this.removeVectorsAndRelated(evictIds); } } From d2e30c8df72deb0e1f96cfa5cd0e0e4d46eafc57 Mon Sep 17 00:00:00 2001 From: Greg von Nessi Date: Fri, 13 Mar 2026 17:58:38 +0000 Subject: [PATCH 2/2] Extract recordRetrievalSafe() and extractRetrievalArgs() in tools.ts - recordRetrievalSafe: replaces 3 identical try/catch blocks around recordRetrieval() in search, recall, and predict handlers - extractRetrievalArgs: replaces repetitive arg extraction (query, project, agent, maxTokens) across the same 3 handlers --- src/mcp/tools.ts | 95 ++++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 016bbe4..ddccc57 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -76,6 +76,44 @@ function formatSearchResponse(response: SearchResponse): string { return header + response.text; } +/** + * Record retrieval feedback, suppressing errors since it's non-critical. + */ +function recordRetrievalSafe( + chunks: Array<{ id: string }>, + query: string, + type: 'search' | 'recall' | 'predict', +): void { + if (chunks.length === 0) return; + try { + recordRetrieval( + chunks.map((c) => c.id), + query, + type, + ); + } catch { + // Non-critical — don't fail the tool response + } +} + +/** + * Extract common retrieval arguments from tool args. + */ +function extractRetrievalArgs(args: Record): { + query: string; + project: string | undefined; + agent: string | undefined; + maxTokens: number; +} { + const config = getConfig(); + return { + query: (args.query ?? args.context) as string, + project: args.project as string | undefined, + agent: args.agent as string | undefined, + maxTokens: (args.max_tokens as number | undefined) ?? config.mcpMaxResponseTokens, + }; +} + /** * Search tool: semantic discovery across memory. */ @@ -107,11 +145,7 @@ export const searchTool: ToolDefinition = { required: ['query'], }, handler: async (args) => { - const query = args.query as string; - const project = args.project as string | undefined; - const agent = args.agent as string | undefined; - const config = getConfig(); - const maxTokens = (args.max_tokens as number | undefined) ?? config.mcpMaxResponseTokens; + const { query, project, agent, maxTokens } = extractRetrievalArgs(args); const response = await searchContext({ query, @@ -121,19 +155,7 @@ export const searchTool: ToolDefinition = { }); const result = formatSearchResponse(response); - - // Fire-and-forget feedback recording - if (response.chunks.length > 0) { - try { - recordRetrieval( - response.chunks.map((c) => c.id), - query, - 'search', - ); - } catch { - // Non-critical — don't fail the response - } - } + recordRetrievalSafe(response.chunks, query, 'search'); return result; }, @@ -170,11 +192,7 @@ export const recallTool: ToolDefinition = { required: ['query'], }, handler: async (args) => { - const query = args.query as string; - const project = args.project as string | undefined; - const agent = args.agent as string | undefined; - const config = getConfig(); - const maxTokens = (args.max_tokens as number | undefined) ?? config.mcpMaxResponseTokens; + const { query, project, agent, maxTokens } = extractRetrievalArgs(args); // Search session summaries for supplementary context let summarySection = ''; @@ -201,18 +219,7 @@ export const recallTool: ToolDefinition = { }); const result = formatResponse(response); - - if (response.chunks.length > 0) { - try { - recordRetrieval( - response.chunks.map((c) => c.id), - query, - 'recall', - ); - } catch { - // Non-critical - } - } + recordRetrievalSafe(response.chunks, query, 'recall'); return summarySection + result; }, @@ -249,11 +256,7 @@ export const predictTool: ToolDefinition = { required: ['context'], }, handler: async (args) => { - const context = args.context as string; - const project = args.project as string | undefined; - const agent = args.agent as string | undefined; - const config = getConfig(); - const maxTokens = (args.max_tokens as number | undefined) ?? config.mcpMaxResponseTokens; + const { query: context, project, agent, maxTokens } = extractRetrievalArgs(args); const response = await predict(context, { maxTokens, @@ -274,17 +277,7 @@ export const predictTool: ToolDefinition = { result += `\n\n[Chain walk: fell back to search — ${d.fallbackReason}. Search found ${d.searchResultCount} chunks, ${d.seedCount} seeds, ${d.chainsAttempted} chain(s) attempted, lengths: ${lengths}]`; } - if (response.chunks.length > 0) { - try { - recordRetrieval( - response.chunks.map((c) => c.id), - context, - 'predict', - ); - } catch { - // Non-critical - } - } + recordRetrievalSafe(response.chunks, context, 'predict'); return result; },