diff --git a/src/memory/graph.ts b/src/memory/graph.ts index 76011de..f927714 100644 --- a/src/memory/graph.ts +++ b/src/memory/graph.ts @@ -195,4 +195,99 @@ export class KnowledgeGraphManager { return { entities: filteredEntities, relations: filteredRelations }; } + + // ─── Graph Search (RRF Fusion) ────────────────────────────────── + + /** + * BFS traversal from seed entity names through graph relations. + * Returns entity names (and hop distance) reachable within maxHops. + * + * Used by orama-store's searchObservations() to discover related + * observations through the knowledge graph, which are then fused + * into BM25+vector results via Reciprocal Rank Fusion (RRF). + * + * @param seedNames - Starting entity names (e.g. from BM25/vector hits) + * @param maxHops - Maximum traversal depth (default: 2) + * @returns Discovered entities with their hop distance from seeds + */ + async graphSearch( + seedNames: string[], + maxHops = 2, + ): Promise<{ entityName: string; hopDistance: number }[]> { + await this.init(); + + // Normalize seeds to lowercase for index lookup + const seedSet = new Set(seedNames.map(n => n.toLowerCase())); + + // BFS state + const visited = new Set(); // lowercase entity names + const discovered: { entityName: string; hopDistance: number }[] = []; + + // Initialize frontier with seeds that actually exist in the graph + let frontier: string[] = []; // lowercase names + for (const name of seedSet) { + if (this.entityIndex.has(name)) { + visited.add(name); + frontier.push(name); + } + } + + // Pre-build adjacency: lowercase name → Set + // Relations are directed but we traverse both directions for discovery + const adjacency = new Map>(); + for (const rel of this.relations) { + const fromLower = rel.from.toLowerCase(); + const toLower = rel.to.toLowerCase(); + + if (!adjacency.has(fromLower)) adjacency.set(fromLower, new Set()); + adjacency.get(fromLower)!.add(toLower); + + if (!adjacency.has(toLower)) adjacency.set(toLower, new Set()); + adjacency.get(toLower)!.add(fromLower); + } + + // BFS: expand frontier hop by hop + for (let hop = 1; hop <= maxHops; hop++) { + const nextFrontier: string[] = []; + + for (const current of frontier) { + const neighbors = adjacency.get(current); + if (!neighbors) continue; + + for (const neighbor of neighbors) { + if (visited.has(neighbor)) continue; + visited.add(neighbor); + + // Only include if the entity actually exists in the graph + const entity = this.entityIndex.get(neighbor); + if (entity) { + discovered.push({ entityName: entity.name, hopDistance: hop }); + nextFrontier.push(neighbor); + } + } + } + + frontier = nextFrontier; + if (frontier.length === 0) break; // no more to explore + } + + return discovered; + } + + /** + * Get all observation IDs associated with given entity names. + * Used to look up graph-discovered observations for RRF fusion. + * + * Note: Entity.observations[] contains free-text strings (not IDs). + * Actual observation lookup by entityName must go through the + * observations module or Orama search. + */ + getEntitiesForNames(names: string[]): Entity[] { + const result: Entity[] = []; + for (const name of names) { + const entity = this.entityIndex.get(name.toLowerCase()); + if (entity) result.push(entity); + } + return result; + } } diff --git a/src/search/rrf.ts b/src/search/rrf.ts new file mode 100644 index 0000000..c88acbf --- /dev/null +++ b/src/search/rrf.ts @@ -0,0 +1,214 @@ +/** + * Reciprocal Rank Fusion (RRF) — Multi-source result merging + * + * Merges ranked result lists from multiple search strategies + * (full-text BM25, vector/semantic, graph traversal) into a single + * re-ranked list using the RRF formula from: + * Cormack, Clarke & Buettcher (SIGIR 2009) + * + * Formula: score(d) = Σ_i weight_i / (k + rank_i(d)) + * + * k=60 is the standard constant from the original paper. + * Documents absent from a list contribute 0 for that list. + * RRF ignores raw scores — only rank positions matter, which makes + * it safe to fuse results from incomparable scoring systems (BM25, + * cosine similarity, graph hop distance, etc.). + */ + +import type { IndexEntry } from '../types.js'; + +// ─── Public Types ──────────────────────────────────────────────────────────── + +/** One ranked result list from a single search strategy */ +export interface RrfSource { + /** Ordered search results, best first */ + results: IndexEntry[]; + /** + * Weight for this source. + * Higher weight → stronger influence on the final ranking. + * Default: 1. Typical values: 1 (equal), 0.5 (half weight), 2 (double weight). + */ + weight?: number; + /** + * Optional label for trace output (e.g. 'bm25', 'vector', 'graph'). + * Not used in scoring — purely for debugging via buildRrfTrace(). + */ + label?: string; +} + +/** Options for the RRF merger */ +export interface RrfOptions { + /** + * Rank smoothing constant from Cormack et al. 2009. + * k=60 is the paper's standard; higher k reduces the rank-position + * advantage of top-ranked documents relative to lower-ranked ones. + * Default: 60. + */ + k?: number; + /** Trim merged list to this many results. Omit or 0 for no limit. */ + limit?: number; +} + +/** Per-document contribution from one source (for trace output) */ +export interface RrfContribution { + /** Source label (from RrfSource.label, or 'source-{index}' if unlabeled) */ + label: string; + /** 1-based rank within this source, or null if the document was absent */ + rank: number | null; + /** Weighted RRF contribution: weight / (k + rank), or 0 if absent */ + contribution: number; +} + +/** Full debug trace for one merged result document */ +export interface RrfTrace { + /** Document identifier: `${projectId}::${observationId}` */ + key: string; + /** Total accumulated RRF score across all sources */ + totalScore: number; + /** Per-source contribution breakdown */ + contributions: RrfContribution[]; +} + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +/** + * Unique document key. + * Mirrors orama-store's makeEntryKey: `${projectId}::${observationId}`. + * projectId prevents cross-project collisions in global searches. + */ +function makeKey(entry: IndexEntry): string { + return `${entry.projectId ?? ''}::${entry.id}`; +} + +// ─── Core RRF ──────────────────────────────────────────────────────────────── + +/** + * Merge multiple ranked result lists into a single re-ranked list + * using Reciprocal Rank Fusion (RRF). + * + * Properties: + * - Deduplicates by observationId + projectId + * - Preserves IndexEntry data from the highest-ranked source on ties + * - Sets `entry.score` to the computed RRF score (overwrites original) + * - Documents appearing in more sources rank higher than single-source documents + * + * @param sources - One or more ranked result lists with optional per-source weights + * @param options - k constant and optional result limit + * @returns Merged, deduplicated, re-ranked IndexEntry[] + */ +export function mergeWithRRF( + sources: RrfSource[], + options: RrfOptions = {}, +): IndexEntry[] { + const k = options.k ?? 60; + + // Accumulated RRF scores per document key + const scores = new Map(); + // Best item: the IndexEntry from the list that ranked this document highest + const bestItem = new Map(); + + for (let listIdx = 0; listIdx < sources.length; listIdx++) { + const source = sources[listIdx]; + const weight = source.weight ?? 1; + + for (let i = 0; i < source.results.length; i++) { + const rank = i + 1; // 1-based + const entry = source.results[i]; + const key = makeKey(entry); + + const contribution = weight / (k + rank); + scores.set(key, (scores.get(key) ?? 0) + contribution); + + // Track item from the list that assigned it the highest rank (lowest number) + const existing = bestItem.get(key); + if (!existing || rank < existing.bestRank) { + bestItem.set(key, { entry, bestRank: rank }); + } + } + } + + // Sort descending by RRF score; tiebreak by bestRank ascending (lower = better) + let sorted = [...scores.entries()].sort((a, b) => { + const scoreDiff = b[1] - a[1]; + if (scoreDiff !== 0) return scoreDiff; + const rankA = bestItem.get(a[0])?.bestRank ?? Infinity; + const rankB = bestItem.get(b[0])?.bestRank ?? Infinity; + return rankA - rankB; + }); + + if (options.limit && options.limit > 0) { + sorted = sorted.slice(0, options.limit); + } + + return sorted + .map(([key, rrfScore]) => { + const item = bestItem.get(key); + if (!item) return null; + // Spread the best-ranked entry and overwrite score with RRF value + return { ...item.entry, score: rrfScore }; + }) + .filter((e): e is IndexEntry => e !== null); +} + +// ─── Debug Trace ───────────────────────────────────────────────────────────── + +/** + * Build a per-document breakdown showing how much each source contributed. + * + * Useful for: + * - Debugging why a document ranked where it did + * - Tuning per-source weights + * - Auditing fusion quality + * + * @param sources - Same sources passed to mergeWithRRF + * @param k - Same k constant used in mergeWithRRF (default 60) + * @returns Array of RrfTrace, sorted by totalScore descending + * + * @example + * const trace = buildRrfTrace([ + * { results: bm25Results, label: 'bm25' }, + * { results: vectorResults, label: 'vector', weight: 0.5 }, + * ]); + * console.table(trace.map(t => ({ key: t.key, score: t.totalScore }))); + */ +export function buildRrfTrace(sources: RrfSource[], k = 60): RrfTrace[] { + // Build rank lookup per source: key → 1-based rank + const rankMaps: Map[] = sources.map((source) => { + const m = new Map(); + for (let i = 0; i < source.results.length; i++) { + m.set(makeKey(source.results[i]), i + 1); + } + return m; + }); + + // Collect all unique document keys across all sources + const allKeys = new Set(); + for (const source of sources) { + for (const entry of source.results) { + allKeys.add(makeKey(entry)); + } + } + + const traces: RrfTrace[] = []; + + for (const key of allKeys) { + let totalScore = 0; + const contributions: RrfContribution[] = []; + + for (let i = 0; i < sources.length; i++) { + const source = sources[i]; + const label = source.label ?? `source-${i}`; + const weight = source.weight ?? 1; + const rank = rankMaps[i].get(key) ?? null; + const contribution = rank !== null ? weight / (k + rank) : 0; + + totalScore += contribution; + contributions.push({ label, rank, contribution }); + } + + traces.push({ key, totalScore, contributions }); + } + + traces.sort((a, b) => b.totalScore - a.totalScore); + return traces; +} diff --git a/src/server.ts b/src/server.ts index 69162fd..317ebb6 100644 --- a/src/server.ts +++ b/src/server.ts @@ -21,7 +21,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; import { KnowledgeGraphManager } from './memory/graph.js'; import { initObservations, storeObservation, reindexObservations, migrateProjectIds, getObservation } from './memory/observations.js'; -import { resetDb } from './store/orama-store.js'; +import { resetDb, setGraphManager } from './store/orama-store.js'; import { createAutoRelations } from './memory/auto-relations.js'; import { extractEntities } from './memory/entity-extractor.js'; import { compactSearch, compactTimeline, compactDetail } from './compact/engine.js'; @@ -255,6 +255,7 @@ export async function createMemorixServer( // Initialize components let graphManager = new KnowledgeGraphManager(projectDir); await graphManager.init(); + setGraphManager(graphManager); await initObservations(projectDir); const lightweightUnresolvedSession = !projectResolved && deferProjectInitUntilBound; @@ -262,6 +263,7 @@ export async function createMemorixServer( const initializeProjectRuntime = async (logPrefix: 'startup' | 'switch'): Promise => { graphManager = new KnowledgeGraphManager(projectDir); await graphManager.init(); + setGraphManager(graphManager); await initObservations(projectDir); const reindexed = await reindexObservations(); diff --git a/src/store/orama-store.ts b/src/store/orama-store.ts index e58f50f..713d260 100644 --- a/src/store/orama-store.ts +++ b/src/store/orama-store.ts @@ -15,6 +15,8 @@ import { getEmbeddingProvider, type EmbeddingProvider } from '../embedding/provi import { calculateProjectAffinity, extractProjectKeywords, type AffinityContext, type MemoryContent } from './project-affinity.js'; import { detectQueryIntent, applyIntentBoost } from '../search/intent-detector.js'; import { maybeExpandSearchQuery } from '../search/query-expansion.js'; +import type { KnowledgeGraphManager } from '../memory/graph.js'; +import { mergeWithRRF, type RrfSource } from '../search/rrf.js'; let db: AnyOrama | null = null; let embeddingEnabled = false; @@ -22,6 +24,28 @@ let embeddingDimensions: number | null = null; const NON_CJK_HYBRID_SIMILARITY = 0.45; const lastSearchModeByProject = new Map(); const SEARCH_MODE_DEFAULT_KEY = '__global__'; + +// ── Graph Search Fusion ──────────────────────────────────────────── +// Module-level graph manager for RRF-based graph search fusion. +// Set via setGraphManager() from server.ts at startup and on project switch. +let graphManagerInstance: KnowledgeGraphManager | null = null; + +/** + * Inject the KnowledgeGraphManager for graph-based search fusion. + * Must be called after KnowledgeGraphManager is initialized. + * Called from server.ts at startup and on project switch. + */ +export function setGraphManager(gm: KnowledgeGraphManager | null): void { + graphManagerInstance = gm; +} + +/** Default RRF source weights for the three retrieval channels. */ +const RRF_WEIGHTS = { + /** BM25 + Orama hybrid (fulltext/vector) */ + orama: 1.0, + /** Graph traversal — lower weight since it's less precise */ + graph: 0.4, +}; export function getLastSearchMode(projectId?: string): string { return lastSearchModeByProject.get(projectId ?? SEARCH_MODE_DEFAULT_KEY) ?? 'fulltext'; } @@ -223,7 +247,6 @@ export async function hydrateIndex(observations: any[]): Promise { let inserted = 0; for (const obs of observations) { if (!obs || !obs.id || !obs.projectId) continue; - if ((obs.status ?? 'active') !== 'active') continue; try { const doc: MemorixDocument = { id: makeOramaObservationId(obs.projectId, obs.id), @@ -403,7 +426,7 @@ export async function searchObservations(options: SearchOptions): Promise>; try { results = await search(database, searchParams); } catch (error) { @@ -483,6 +506,117 @@ export async function searchObservations(options: SearchOptions): Promise 0) { + try { + const oramaEntityNames = new Set( + results.hits + .filter((hit) => { + if (!projectIds) return true; + const doc = hit.document as unknown as MemorixDocument; + return projectIds.includes(doc.projectId); + }) + .filter((hit) => { + if (statusFilter === 'all') return true; + const doc = hit.document as unknown as MemorixDocument; + return (doc.status || 'active') === statusFilter; + }) + .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)) + .map((hit) => (hit.document as unknown as MemorixDocument).entityName) + .filter((name): name is string => !!name) + .slice(0, 2), + ); + + if (oramaEntityNames.size > 0) { + // BFS traversal: discover entities 1-2 hops from result entities + const graphDiscovered = await graphManagerInstance.graphSearch( + [...oramaEntityNames], + 2, // maxHops + ); + mark('graphTraversal'); + + if (graphDiscovered.length > 0) { + // Look up observations for graph-discovered entities via Orama + const discoveredNames = graphDiscovered.map(d => d.entityName); + const graphSearchParams: Record = { + term: '', + limit: Math.min(discoveredNames.length * 5, 50), + where: { + entityName: discoveredNames, + ...(projectIds && projectIds.length === 1 ? { projectId: projectIds[0] } : {}), + ...(statusFilter !== 'all' ? { status: statusFilter } : {}), + }, + }; + let graphResults: Awaited> | null; + try { + graphResults = await search(database, graphSearchParams); + } catch { + graphResults = null; + } + mark('graphOramaLookup'); + + if (graphResults && graphResults.count > 0) { + // Build graph-sourced IndexEntry[] ranked by hop distance + const hopMap = new Map(graphDiscovered.map(d => [d.entityName.toLowerCase(), d.hopDistance])); + const graphEntries: IndexEntry[] = graphResults.hits + .filter(hit => { + if (!projectIds) return true; + const doc = hit.document as unknown as MemorixDocument; + return projectIds.includes(doc.projectId); + }) + .map(hit => { + const doc = hit.document as unknown as MemorixDocument; + const obsType = doc.type as ObservationType; + const hopDist = hopMap.get(doc.entityName.toLowerCase()) ?? 2; + return { + id: doc.observationId, + time: formatTime(doc.createdAt), + rawTime: doc.createdAt, + type: obsType, + icon: OBSERVATION_ICONS[obsType] ?? '❓', + title: doc.title, + tokens: doc.tokens, + // Score inversely proportional to hop distance for graph ranking + score: 1 / hopDist, + projectId: doc.projectId, + source: (doc.source || 'agent') as 'agent' | 'git' | 'manual', + _isCommandLog: isCommandLogEntry(doc.title), + } as any; + }) + // Sort by hop distance ascending (closer = better) + .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)); + + // Merge with RRF: orama results + graph results + const rrfSources: RrfSource[] = [ + { results: intermediate as IndexEntry[], weight: RRF_WEIGHTS.orama, label: 'orama' }, + { results: graphEntries, weight: RRF_WEIGHTS.graph, label: 'graph' }, + ]; + const merged = mergeWithRRF(rrfSources, { limit: requestLimit }); + intermediate = merged.map(entry => ({ + ...entry, + score: entry.score ?? 0, + projectId: entry.projectId ?? options.projectId ?? '', + source: entry.source ?? 'agent', + rawTime: (entry as any).rawTime ?? '', + _isCommandLog: (entry as any)._isCommandLog ?? false, + })); + lastSearchModeByProject.set( + modeKey, + (lastSearchModeByProject.get(modeKey) ?? 'fulltext') + ' + graph-rrf', + ); + mark('rrfMerge'); + } + } + } + } catch (error) { + // Graph search is best-effort — fall back to Orama-only results + console.error('[memorix] Graph search fusion failed, using Orama results only', error); + } + } + // ── Intent-Aware Type Boosting ─────────────────────────────── // Boost scores for observation types that match the query intent if (intentResult && intentResult.confidence > 0.3) { diff --git a/tests/search/graph-search-fusion.test.ts b/tests/search/graph-search-fusion.test.ts new file mode 100644 index 0000000..b55722f --- /dev/null +++ b/tests/search/graph-search-fusion.test.ts @@ -0,0 +1,365 @@ +/** + * Graph Search Fusion Tests + * + * Tests the integration of knowledge graph BFS traversal with + * Orama search results via Reciprocal Rank Fusion (RRF). + * + * Covers: + * - graphSearch() BFS traversal in KnowledgeGraphManager + * - setGraphManager() injection into orama-store + * - RRF merge of Orama + graph results in searchObservations() + * - Fast-tier queries skip graph search entirely + * - Graceful degradation when no graph manager is set + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; + +// Mock embedding provider BEFORE any imports that might use it +vi.mock('../../src/embedding/provider.js', () => ({ + getEmbeddingProvider: async () => null, + isVectorSearchAvailable: async () => false, + isEmbeddingExplicitlyDisabled: () => true, + resetProvider: () => {}, +})); + +import { promises as fs } from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import { KnowledgeGraphManager } from '../../src/memory/graph.js'; +import { initObservations, storeObservation } from '../../src/memory/observations.js'; +import { + resetDb, + setGraphManager, + searchObservations, + getLastSearchMode, +} from '../../src/store/orama-store.js'; +import type { Entity, Relation } from '../../src/types.js'; + +let testDir: string; +let graphManager: KnowledgeGraphManager; +const PROJECT_ID = 'test/graph-search-fusion'; + +beforeEach(async () => { + testDir = await fs.mkdtemp(path.join(os.tmpdir(), 'memorix-graph-fusion-')); + await resetDb(); + graphManager = new KnowledgeGraphManager(testDir); + await graphManager.init(); + setGraphManager(graphManager); + await initObservations(testDir); +}); + +// ─── graphSearch() BFS Traversal ────────────────────────────────────────────── + +describe('KnowledgeGraphManager.graphSearch()', () => { + it('returns empty array when seed names are empty', async () => { + const result = await graphManager.graphSearch([]); + expect(result).toEqual([]); + }); + + it('returns empty array when seeds have no relations', async () => { + await graphManager.createEntities([ + { name: 'isolated-node', entityType: 'component', observations: [] }, + ]); + const result = await graphManager.graphSearch(['isolated-node']); + expect(result).toEqual([]); + }); + + it('discovers 1-hop neighbors through relations', async () => { + await graphManager.createEntities([ + { name: 'auth-module', entityType: 'component', observations: [] }, + { name: 'jwt-library', entityType: 'library', observations: [] }, + { name: 'user-service', entityType: 'service', observations: [] }, + ]); + await graphManager.createRelations([ + { from: 'auth-module', to: 'jwt-library', relationType: 'depends_on' }, + { from: 'auth-module', to: 'user-service', relationType: 'implements' }, + ]); + + const result = await graphManager.graphSearch(['auth-module']); + const names = result.map(r => r.entityName); + + expect(names).toContain('jwt-library'); + expect(names).toContain('user-service'); + expect(names).not.toContain('auth-module'); // seed excluded from discovered + expect(result.every(r => r.hopDistance === 1)).toBe(true); + }); + + it('discovers 2-hop neighbors', async () => { + await graphManager.createEntities([ + { name: 'A', entityType: 'component', observations: [] }, + { name: 'B', entityType: 'component', observations: [] }, + { name: 'C', entityType: 'component', observations: [] }, + ]); + await graphManager.createRelations([ + { from: 'A', to: 'B', relationType: 'depends_on' }, + { from: 'B', to: 'C', relationType: 'depends_on' }, + ]); + + const result = await graphManager.graphSearch(['A'], 2); + + const bResult = result.find(r => r.entityName === 'B'); + const cResult = result.find(r => r.entityName === 'C'); + + expect(bResult).toBeDefined(); + expect(bResult!.hopDistance).toBe(1); + expect(cResult).toBeDefined(); + expect(cResult!.hopDistance).toBe(2); + }); + + it('respects maxHops=1 and does not discover 2-hop neighbors', async () => { + await graphManager.createEntities([ + { name: 'A', entityType: 'component', observations: [] }, + { name: 'B', entityType: 'component', observations: [] }, + { name: 'C', entityType: 'component', observations: [] }, + ]); + await graphManager.createRelations([ + { from: 'A', to: 'B', relationType: 'depends_on' }, + { from: 'B', to: 'C', relationType: 'depends_on' }, + ]); + + const result = await graphManager.graphSearch(['A'], 1); + const names = result.map(r => r.entityName); + + expect(names).toContain('B'); + expect(names).not.toContain('C'); + }); + + it('traverses relations bidirectionally', async () => { + await graphManager.createEntities([ + { name: 'X', entityType: 'component', observations: [] }, + { name: 'Y', entityType: 'component', observations: [] }, + ]); + // Relation goes X → Y, but searching from Y should find X + await graphManager.createRelations([ + { from: 'X', to: 'Y', relationType: 'causes' }, + ]); + + const result = await graphManager.graphSearch(['Y']); + expect(result.map(r => r.entityName)).toContain('X'); + }); + + it('does not revisit already-visited nodes (cycle safety)', async () => { + await graphManager.createEntities([ + { name: 'A', entityType: 'component', observations: [] }, + { name: 'B', entityType: 'component', observations: [] }, + { name: 'C', entityType: 'component', observations: [] }, + ]); + // Create a cycle: A → B → C → A + await graphManager.createRelations([ + { from: 'A', to: 'B', relationType: 'depends_on' }, + { from: 'B', to: 'C', relationType: 'depends_on' }, + { from: 'C', to: 'A', relationType: 'depends_on' }, + ]); + + const result = await graphManager.graphSearch(['A'], 10); + // Should discover B and C but not re-add A + const names = result.map(r => r.entityName); + expect(names).toContain('B'); + expect(names).toContain('C'); + expect(names).not.toContain('A'); + // No duplicates + expect(new Set(names).size).toBe(names.length); + }); + + it('handles case-insensitive seed names', async () => { + await graphManager.createEntities([ + { name: 'AuthModule', entityType: 'component', observations: [] }, + { name: 'JwtLib', entityType: 'library', observations: [] }, + ]); + await graphManager.createRelations([ + { from: 'AuthModule', to: 'JwtLib', relationType: 'depends_on' }, + ]); + + const result = await graphManager.graphSearch(['authmodule']); // lowercase + expect(result.map(r => r.entityName)).toContain('JwtLib'); + }); + + it('handles seeds that do not exist in the graph', async () => { + await graphManager.createEntities([ + { name: 'real-entity', entityType: 'component', observations: [] }, + ]); + + const result = await graphManager.graphSearch(['nonexistent-entity']); + expect(result).toEqual([]); + }); + + it('handles multiple seeds', async () => { + await graphManager.createEntities([ + { name: 'A', entityType: 'component', observations: [] }, + { name: 'B', entityType: 'component', observations: [] }, + { name: 'C', entityType: 'component', observations: [] }, + { name: 'D', entityType: 'component', observations: [] }, + ]); + await graphManager.createRelations([ + { from: 'A', to: 'C', relationType: 'depends_on' }, + { from: 'B', to: 'D', relationType: 'depends_on' }, + ]); + + const result = await graphManager.graphSearch(['A', 'B'], 1); + const names = result.map(r => r.entityName); + expect(names).toContain('C'); + expect(names).toContain('D'); + }); +}); + +// ─── getEntitiesForNames() ──────────────────────────────────────────────────── + +describe('KnowledgeGraphManager.getEntitiesForNames()', () => { + it('returns entities matching given names (case-insensitive)', async () => { + await graphManager.createEntities([ + { name: 'Auth', entityType: 'component', observations: ['obs1'] }, + { name: 'Database', entityType: 'service', observations: [] }, + ]); + + const entities = graphManager.getEntitiesForNames(['auth', 'Database']); + expect(entities.length).toBe(2); + expect(entities.map(e => e.name).sort()).toEqual(['Auth', 'Database']); + }); + + it('skips names not found in graph', async () => { + await graphManager.createEntities([ + { name: 'Existing', entityType: 'component', observations: [] }, + ]); + + const entities = graphManager.getEntitiesForNames(['Existing', 'missing']); + expect(entities.length).toBe(1); + expect(entities[0].name).toBe('Existing'); + }); +}); + +// ─── setGraphManager() injection ────────────────────────────────────────────── + +describe('setGraphManager()', () => { + it('accepts null without errors', () => { + expect(() => setGraphManager(null)).not.toThrow(); + }); + + it('accepts KnowledgeGraphManager instance', () => { + expect(() => setGraphManager(graphManager)).not.toThrow(); + }); +}); + +// ─── Graph Search Fusion in searchObservations() ────────────────────────────── + +describe('searchObservations() with graph fusion', () => { + async function seedGraphAndObservations() { + // Create entities with graph relations: + // auth-module → jwt-library → crypto-utils + await graphManager.createEntities([ + { name: 'auth-module', entityType: 'component', observations: [] }, + { name: 'jwt-library', entityType: 'library', observations: [] }, + { name: 'crypto-utils', entityType: 'utility', observations: [] }, + { name: 'unrelated-module', entityType: 'component', observations: [] }, + ]); + await graphManager.createRelations([ + { from: 'auth-module', to: 'jwt-library', relationType: 'depends_on' }, + { from: 'jwt-library', to: 'crypto-utils', relationType: 'depends_on' }, + ]); + + // Store observations — auth-module directly matches "authentication", + // jwt-library and crypto-utils only reachable via graph + await storeObservation({ + entityName: 'auth-module', + type: 'how-it-works', + title: 'Authentication flow uses OAuth tokens', + narrative: 'The auth-module handles OAuth 2.0 token validation and refresh', + projectId: PROJECT_ID, + }); + + await storeObservation({ + entityName: 'jwt-library', + type: 'decision', + title: 'JWT signing algorithm choice', + narrative: 'Selected RS256 for JWT signing to support public key verification', + projectId: PROJECT_ID, + }); + + await storeObservation({ + entityName: 'crypto-utils', + type: 'gotcha', + title: 'Crypto key rotation schedule', + narrative: 'Keys must be rotated every 90 days for compliance', + projectId: PROJECT_ID, + }); + + await storeObservation({ + entityName: 'unrelated-module', + type: 'discovery', + title: 'Database connection pooling settings', + narrative: 'The database uses a pool of 10 connections by default', + projectId: PROJECT_ID, + }); + } + + it('standard tier query triggers graph fusion and enriches results', async () => { + await seedGraphAndObservations(); + + // Query matches auth-module by text; graph should pull in jwt-library, crypto-utils + const results = await searchObservations({ + query: 'authentication OAuth tokens flow', + projectId: PROJECT_ID, + limit: 20, + }); + + // Should include auth-module hit from Orama directly + expect(results.some(r => r.title.includes('Authentication flow'))).toBe(true); + + // Graph fusion should pull in jwt-library observation (1 hop from auth-module) + const jwtHit = results.find(r => r.title.includes('JWT signing')); + expect(jwtHit).toBeDefined(); + + // Search mode should indicate graph-rrf was used + const mode = getLastSearchMode(PROJECT_ID); + expect(mode).toContain('graph-rrf'); + }); + + it('fast tier query skips graph search', async () => { + await seedGraphAndObservations(); + + // Single-word short query = fast tier → no graph search + const results = await searchObservations({ + query: 'auth', + projectId: PROJECT_ID, + limit: 20, + }); + + const mode = getLastSearchMode(PROJECT_ID); + expect(mode).not.toContain('graph-rrf'); + }); + + it('gracefully handles no graph manager set', async () => { + setGraphManager(null); // Remove graph manager + await seedGraphAndObservations(); + + // Should still work — just no graph fusion + const results = await searchObservations({ + query: 'authentication OAuth tokens flow', + projectId: PROJECT_ID, + limit: 20, + }); + + expect(results.length).toBeGreaterThan(0); + const mode = getLastSearchMode(PROJECT_ID); + expect(mode).not.toContain('graph-rrf'); + }); + + it('unrelated observations are not pulled in by graph traversal', async () => { + await seedGraphAndObservations(); + + const results = await searchObservations({ + query: 'authentication OAuth tokens flow', + projectId: PROJECT_ID, + limit: 20, + }); + + // "Database connection pooling" is unrelated — no graph path from auth-module + // It should only appear if BM25/vector matched it (unlikely for this query) + const dbHit = results.find(r => r.title.includes('Database connection pooling')); + // If it appears at all, it should be ranked lower than auth results + if (dbHit) { + const authIdx = results.findIndex(r => r.title.includes('Authentication flow')); + const dbIdx = results.indexOf(dbHit); + expect(dbIdx).toBeGreaterThan(authIdx); + } + }); +}); diff --git a/tests/search/rrf.test.ts b/tests/search/rrf.test.ts new file mode 100644 index 0000000..f9ed6e9 --- /dev/null +++ b/tests/search/rrf.test.ts @@ -0,0 +1,274 @@ +import { describe, it, expect } from 'vitest'; +import { mergeWithRRF, buildRrfTrace } from '../../src/search/rrf.js'; +import type { IndexEntry } from '../../src/types.js'; + +// ─── Test Fixtures ─────────────────────────────────────────────────────────── + +function makeEntry( + id: number, + title: string, + opts: { projectId?: string; score?: number } = {}, +): IndexEntry { + return { + id, + time: '12:00 PM', + type: 'discovery', + icon: '🟣', + title, + tokens: 10, + projectId: opts.projectId ?? 'proj-a', + score: opts.score ?? 1, + }; +} + +// ─── mergeWithRRF ──────────────────────────────────────────────────────────── + +describe('mergeWithRRF', () => { + it('returns empty array when all sources are empty', () => { + expect(mergeWithRRF([])).toEqual([]); + expect(mergeWithRRF([{ results: [] }, { results: [] }])).toEqual([]); + }); + + it('single source preserves original order', () => { + const results = [ + makeEntry(1, 'first'), + makeEntry(2, 'second'), + makeEntry(3, 'third'), + ]; + const merged = mergeWithRRF([{ results }]); + expect(merged.map((e) => e.id)).toEqual([1, 2, 3]); + }); + + it('documents in multiple sources rank above single-source documents', () => { + // doc-3 appears in both lists → should outscore doc-1 (only in list A) + // and doc-5 (only in list B) which appear only once + const listA = [makeEntry(1, 'alpha'), makeEntry(3, 'shared')]; + const listB = [makeEntry(5, 'beta'), makeEntry(3, 'shared')]; + + const merged = mergeWithRRF([{ results: listA }, { results: listB }]); + const ids = merged.map((e) => e.id); + + // shared document (3) must rank above the unique ones + expect(ids.indexOf(3)).toBeLessThan(ids.indexOf(1)); + expect(ids.indexOf(3)).toBeLessThan(ids.indexOf(5)); + }); + + it('deduplicates: same document appears at most once', () => { + const entry = makeEntry(42, 'dup'); + const merged = mergeWithRRF([ + { results: [entry, makeEntry(1, 'a')] }, + { results: [entry, makeEntry(2, 'b')] }, + ]); + const ids = merged.map((e) => e.id); + expect(ids.filter((id) => id === 42).length).toBe(1); + }); + + it('sets score to RRF value, not original score', () => { + const entry = makeEntry(7, 'test', { score: 999 }); + const [result] = mergeWithRRF([{ results: [entry] }]); + // RRF score for rank-1, weight=1, k=60 → 1/(60+1) ≈ 0.0164 + expect(result.score).toBeCloseTo(1 / 61, 6); + expect(result.score).not.toBe(999); + }); + + it('higher weight sources dominate lower weight sources', () => { + // doc-1 is rank-1 in low-weight list; doc-2 is rank-1 in 10x-weight list + const lowWeight = [makeEntry(1, 'low-list-rank1')]; + const highWeight = [makeEntry(2, 'high-list-rank1')]; + + const merged = mergeWithRRF([ + { results: lowWeight, weight: 1 }, + { results: highWeight, weight: 10 }, + ]); + // doc-2 from the high-weight source should come first + expect(merged[0].id).toBe(2); + expect(merged[1].id).toBe(1); + }); + + it('preserves entry data from the highest-ranked source', () => { + // doc-1 appears in both lists; list A has it at rank 1 (bestRank), list B at rank 2 + const entryFromA = makeEntry(1, 'title-from-A'); + const entryFromB = { ...makeEntry(1, 'title-from-B'), projectId: 'proj-a' }; + + const merged = mergeWithRRF([ + { results: [entryFromA] }, // rank 1 in list A → bestRank + { results: [makeEntry(99, 'x'), entryFromB] }, // rank 2 in list B + ]); + + const doc1 = merged.find((e) => e.id === 1); + expect(doc1?.title).toBe('title-from-A'); // comes from list A (rank 1) + }); + + it('tiebreaks equal scores by bestRank (lower is better)', () => { + // Two unique docs appear in one list each, same relative weight → same score + // doc-1 is at rank 1, doc-2 is at rank 1 in a different list + const merged = mergeWithRRF([ + { results: [makeEntry(1, 'one')] }, + { results: [makeEntry(2, 'two')] }, + ]); + // Both score 1/61; rank-1 in both — tiebreak is deterministic by key order + // (just assert both present and score equal, not strict ordering) + const scores = merged.map((e) => e.score!); + expect(scores[0]).toBeCloseTo(scores[1]!, 6); + }); + + it('limit option trims output', () => { + const results = [ + makeEntry(1, 'a'), + makeEntry(2, 'b'), + makeEntry(3, 'c'), + makeEntry(4, 'd'), + ]; + const merged = mergeWithRRF([{ results }], { limit: 2 }); + expect(merged.length).toBe(2); + }); + + it('limit: 0 applies no limit', () => { + const results = Array.from({ length: 10 }, (_, i) => makeEntry(i + 1, `doc-${i}`)); + const merged = mergeWithRRF([{ results }], { limit: 0 }); + expect(merged.length).toBe(10); + }); + + it('custom k changes scores proportionally', () => { + const entry = makeEntry(1, 'test'); + const [withK60] = mergeWithRRF([{ results: [entry] }], { k: 60 }); + const [withK1] = mergeWithRRF([{ results: [entry] }], { k: 1 }); + + // k=1 → 1/(1+1)=0.5; k=60 → 1/(60+1)≈0.016 + expect(withK1.score!).toBeGreaterThan(withK60.score!); + expect(withK1.score).toBeCloseTo(0.5, 6); + }); + + it('empty source list in the middle is ignored gracefully', () => { + const list = [makeEntry(1, 'a'), makeEntry(2, 'b')]; + const merged = mergeWithRRF([ + { results: list }, + { results: [] }, + { results: [makeEntry(3, 'c')] }, + ]); + expect(merged.map((e) => e.id)).toContain(1); + expect(merged.map((e) => e.id)).toContain(2); + expect(merged.map((e) => e.id)).toContain(3); + }); + + it('cross-project: documents with same id but different projectId are separate', () => { + const entryProjA = makeEntry(1, 'proj-a-doc', { projectId: 'proj-a' }); + const entryProjB = makeEntry(1, 'proj-b-doc', { projectId: 'proj-b' }); + + const merged = mergeWithRRF([ + { results: [entryProjA] }, + { results: [entryProjB] }, + ]); + // Both must appear as distinct entries (different project namespaces) + expect(merged.length).toBe(2); + const titles = merged.map((e) => e.title); + expect(titles).toContain('proj-a-doc'); + expect(titles).toContain('proj-b-doc'); + }); + + it('three-way merge: document in all three lists ranks highest', () => { + const shared = makeEntry(99, 'shared-doc'); + const merged = mergeWithRRF([ + { results: [makeEntry(1, 'only-a'), shared] }, + { results: [makeEntry(2, 'only-b'), shared] }, + { results: [makeEntry(3, 'only-c'), shared] }, + ]); + // shared appears in all 3 lists → must rank first + expect(merged[0].id).toBe(99); + }); + + it('RRF score formula: rank-1 in one list, k=60 → 1/61', () => { + const [result] = mergeWithRRF([{ results: [makeEntry(1, 'x')] }], { k: 60 }); + expect(result.score).toBeCloseTo(1 / 61, 8); + }); + + it('RRF score formula: rank-1 in two equal-weight lists, k=60 → 2/61', () => { + const entry = makeEntry(1, 'x'); + const [result] = mergeWithRRF([ + { results: [entry] }, + { results: [entry] }, + ], { k: 60 }); + expect(result.score).toBeCloseTo(2 / 61, 8); + }); +}); + +// ─── buildRrfTrace ─────────────────────────────────────────────────────────── + +describe('buildRrfTrace', () => { + it('returns empty array for empty sources', () => { + expect(buildRrfTrace([])).toEqual([]); + expect(buildRrfTrace([{ results: [] }])).toEqual([]); + }); + + it('assigns contribution 0 for sources where document is absent', () => { + const trace = buildRrfTrace([ + { results: [makeEntry(1, 'a')], label: 'bm25' }, + { results: [makeEntry(2, 'b')], label: 'vector' }, + ]); + + const doc1 = trace.find((t) => t.key.endsWith('::1'))!; + expect(doc1.contributions[0].contribution).toBeGreaterThan(0); // bm25 has it + expect(doc1.contributions[1].contribution).toBe(0); // vector does not + expect(doc1.contributions[1].rank).toBeNull(); + }); + + it('uses source labels from RrfSource.label', () => { + const trace = buildRrfTrace([ + { results: [makeEntry(1, 'a')], label: 'bm25' }, + { results: [makeEntry(1, 'a')], label: 'vector' }, + ]); + const doc1 = trace[0]; + expect(doc1.contributions.map((c) => c.label)).toEqual(['bm25', 'vector']); + }); + + it('falls back to source-{index} when label is omitted', () => { + const trace = buildRrfTrace([ + { results: [makeEntry(1, 'a')] }, + { results: [makeEntry(1, 'a')] }, + ]); + const doc1 = trace[0]; + expect(doc1.contributions[0].label).toBe('source-0'); + expect(doc1.contributions[1].label).toBe('source-1'); + }); + + it('totalScore matches mergeWithRRF score for the same document', () => { + const entry = makeEntry(5, 'test'); + const sources = [ + { results: [entry], label: 'bm25', weight: 1 }, + { results: [entry], label: 'vector', weight: 2 }, + ]; + const [merged] = mergeWithRRF(sources, { k: 60 }); + const [traceEntry] = buildRrfTrace(sources, 60); + + expect(traceEntry.totalScore).toBeCloseTo(merged.score!, 8); + }); + + it('sorts traces by totalScore descending', () => { + const trace = buildRrfTrace([ + { + results: [ + makeEntry(1, 'rank1'), + makeEntry(2, 'rank2'), + makeEntry(3, 'rank3'), + ], + }, + ]); + const scores = trace.map((t) => t.totalScore); + for (let i = 0; i < scores.length - 1; i++) { + expect(scores[i]).toBeGreaterThanOrEqual(scores[i + 1]!); + } + }); + + it('records 1-based rank in contributions', () => { + const trace = buildRrfTrace([ + { + results: [makeEntry(10, 'first'), makeEntry(20, 'second')], + label: 'src', + }, + ]); + const doc10 = trace.find((t) => t.key.endsWith('::10'))!; + const doc20 = trace.find((t) => t.key.endsWith('::20'))!; + expect(doc10.contributions[0].rank).toBe(1); + expect(doc20.contributions[0].rank).toBe(2); + }); +});