From 4bba4d9c75d7ab01335d956407a696db25c8b73f Mon Sep 17 00:00:00 2001 From: not-reed Date: Wed, 25 Mar 2026 16:47:40 -0700 Subject: [PATCH] Add spreading activation, temporal recall, and retrieval quality improvements Cairn-level improvements benefiting all consumers (Construct, Cortex, Loom, Deck): - Spreading activation graph traversal with edge-weight-aware decay scoring - Temporal filtering (since/before) on recallMemories - FTS5 stop word filtering to reduce common-word noise - Generation counter used in observation budget eviction priority - Skill selection threshold raised from 0.35 to 0.45 App-level fixes: - Construct: graph-expanded memories now carry activation scores, debug logging for injected skill instructions and relevant memories with match type + score - Cortex: signal memories now stored with embeddings + graph extraction, analyzer uses spreadActivation instead of flat traverseGraph - Loom: added graph expansion to recall path (was entirely missing) Cleanup: remove dead syncSkillInstructionsToGraph, add .superset to gitignore Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 1 + .../src/__tests__/spread-activation.test.ts | 347 ++++++++++++++++++ apps/construct/src/agent.ts | 132 +++++-- apps/construct/src/extensions/embeddings.ts | 9 + apps/construct/src/extensions/index.ts | 142 +++---- .../construct/src/tools/core/memory-recall.ts | 60 +-- apps/cortex/src/pipeline/analyzer.ts | 81 ++-- apps/loom/src/agent.ts | 46 ++- packages/cairn/src/context.ts | 5 +- packages/cairn/src/db/queries.ts | 141 ++++++- packages/cairn/src/graph/queries.ts | 186 ++++++++++ packages/cairn/src/index.ts | 3 + packages/cairn/src/similarity.ts | 4 +- 13 files changed, 1008 insertions(+), 149 deletions(-) create mode 100644 apps/construct/src/__tests__/spread-activation.test.ts diff --git a/.gitignore b/.gitignore index 9934626..0c2baf7 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ target/ .cairn/ .direnv/ .plans/ +.superset/ .blog/drafts/ deploy/lima/case.yaml deploy/lima/cairn.yaml diff --git a/apps/construct/src/__tests__/spread-activation.test.ts b/apps/construct/src/__tests__/spread-activation.test.ts new file mode 100644 index 0000000..3a31c81 --- /dev/null +++ b/apps/construct/src/__tests__/spread-activation.test.ts @@ -0,0 +1,347 @@ +/** + * Tests for spreading activation graph traversal and its integration + * with the memory recall pipeline. + * + * Proves four bugs: + * 1. spreadActivation correctly scores nodes with decay and edge weights + * 2. memory_recall graph memories must carry activation scores (was discarded) + * 3. Recall without graph expansion misses connected memories (Loom gap) + * 4. Memories without embeddings are invisible to semantic search (Cortex signal gap) + */ + +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import type { Kysely } from "kysely"; +import type { Database } from "../db/schema.js"; +import { + spreadActivation, + searchNodesWithScores, + getRelatedMemoryIds, + recallMemories, + storeMemory, + upsertEdge, +} from "@repo/cairn"; +import { setupDb, seedMemories, seedGraph, memoryEmbeddings, queryEmbeddings } from "./fixtures.js"; + +let db: Kysely; +let memIds: Record; +let nodeIds: Record; + +beforeEach(async () => { + db = await setupDb(); + const seeded = await seedMemories(db); + memIds = seeded.ids; + const graph = await seedGraph(db, memIds); + nodeIds = graph.nodeIds; +}); + +afterEach(async () => { + await db.destroy(); +}); + +// ── Bug 1: spreadActivation scoring ──────────────────────────────────── + +describe("spreadActivation — scoring correctness", () => { + it("returns scored nodes with depth", async () => { + const seeds = [{ nodeId: nodeIds.portland, score: 1.0 }]; + const results = await spreadActivation(db, seeds, { maxDepth: 2 }); + + expect(results.length).toBeGreaterThan(0); + for (const r of results) { + expect(r.score).toBeGreaterThan(0); + expect(r.depth).toBeGreaterThan(0); + expect(r.node).toBeDefined(); + expect(r.node.id).toBeDefined(); + } + }); + + it("scores decay with depth — depth-1 nodes score higher than depth-2", async () => { + // Portland → Alex (depth 1) → Miso (depth 2) + const seeds = [{ nodeId: nodeIds.portland, score: 1.0 }]; + const results = await spreadActivation(db, seeds, { maxDepth: 2, decay: 0.5 }); + + const alex = results.find((r) => r.node.id === nodeIds.alex); + const miso = results.find((r) => r.node.id === nodeIds.miso); + + expect(alex).toBeDefined(); + expect(miso).toBeDefined(); + expect(alex!.score).toBeGreaterThan(miso!.score); + expect(alex!.depth).toBe(1); + expect(miso!.depth).toBe(2); + }); + + it("higher edge weights produce higher activation scores", async () => { + // Bump the Alex→Miso edge weight to 5 + await upsertEdge(db, { + source_id: nodeIds.alex, + target_id: nodeIds.miso, + relation: "owns", + memory_id: memIds.miso, + }); + await upsertEdge(db, { + source_id: nodeIds.alex, + target_id: nodeIds.miso, + relation: "owns", + memory_id: memIds.miso, + }); + await upsertEdge(db, { + source_id: nodeIds.alex, + target_id: nodeIds.miso, + relation: "owns", + memory_id: memIds.miso, + }); + await upsertEdge(db, { + source_id: nodeIds.alex, + target_id: nodeIds.miso, + relation: "owns", + memory_id: memIds.miso, + }); + // weight is now 5 (1 initial + 4 upserts) + + const seeds = [{ nodeId: nodeIds.alex, score: 1.0 }]; + const results = await spreadActivation(db, seeds, { maxDepth: 1, decay: 0.5 }); + + const miso = results.find((r) => r.node.id === nodeIds.miso); + const rust = results.find((r) => r.node.id === nodeIds.rust); + + expect(miso).toBeDefined(); + expect(rust).toBeDefined(); + // Miso edge weight=5, Rust edge weight=1 + expect(miso!.score).toBeGreaterThan(rust!.score); + }); + + it("scores never exceed seed score — weights reduce decay, not amplify", async () => { + // Even with high edge weights, child scores must stay <= parent + // Bump Alex→Miso to weight 10 + for (let i = 0; i < 9; i++) { + await upsertEdge(db, { + source_id: nodeIds.alex, + target_id: nodeIds.miso, + relation: "owns", + memory_id: memIds.miso, + }); + } + + const seedScore = 0.8; + const seeds = [{ nodeId: nodeIds.alex, score: seedScore }]; + const results = await spreadActivation(db, seeds, { maxDepth: 1 }); + + for (const r of results) { + expect(r.score).toBeLessThanOrEqual(seedScore); + } + }); + + it("does not include seed nodes in results", async () => { + const seeds = [{ nodeId: nodeIds.alex, score: 1.0 }]; + const results = await spreadActivation(db, seeds); + + const seedInResults = results.find((r) => r.node.id === nodeIds.alex); + expect(seedInResults).toBeUndefined(); + }); + + it("results are sorted by score descending", async () => { + const seeds = [{ nodeId: nodeIds.alex, score: 1.0 }]; + const results = await spreadActivation(db, seeds, { maxDepth: 2 }); + + for (let i = 1; i < results.length; i++) { + expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score); + } + }); +}); + +// ── Bug 2: graph memory scores must propagate ────────────────────────── + +describe("graph recall — scores must propagate to memories", () => { + /** + * Replicate memory_recall's graph expansion logic. + * This test proves the bug: graph memories should carry activation scores. + */ + it("graph-expanded memories must have scores > 0", async () => { + const seedNodes = await searchNodesWithScores(db, "portland", 5); + expect(seedNodes.length).toBeGreaterThan(0); + + const seeds = seedNodes.map((s) => ({ nodeId: s.node.id, score: s.score })); + const activated = await spreadActivation(db, seeds, { maxDepth: 2 }); + + // Build node score map (what memory_recall does) + const nodeScoreMap = new Map(); + for (const s of seedNodes) nodeScoreMap.set(s.node.id, s.score); + for (const a of activated) nodeScoreMap.set(a.node.id, a.score); + + const allNodeIds = [...nodeScoreMap.keys()]; + const relatedMemIds = await getRelatedMemoryIds(db, allNodeIds); + expect(relatedMemIds.length).toBeGreaterThan(0); + + // For each related memory, we should be able to derive a score from the nodeScoreMap + // by looking up which scored nodes the memory's edges connect to + for (const memId of relatedMemIds) { + const edges = await db + .selectFrom("graph_edges") + .selectAll() + .where("memory_id", "=", memId) + .execute(); + + const bestScore = Math.max( + 0, + ...edges.flatMap((e) => [ + nodeScoreMap.get(e.source_id) ?? 0, + nodeScoreMap.get(e.target_id) ?? 0, + ]), + ); + + // This MUST be > 0 — if it's 0, the score didn't propagate + expect(bestScore).toBeGreaterThan(0); + } + }); + + it("getRelatedMemoriesWithScores returns scored memories", async () => { + // This function should exist in cairn — it's the fix for the nodeScoreMap bug. + // Import will fail until we add it (RED). + const { getRelatedMemoriesWithScores } = await import("@repo/cairn"); + + const nodeScoreMap = new Map(); + nodeScoreMap.set(nodeIds.alex, 0.8); + nodeScoreMap.set(nodeIds.portland, 0.5); + nodeScoreMap.set(nodeIds.miso, 0.3); + + const scored = await getRelatedMemoriesWithScores(db, nodeScoreMap); + + expect(scored.length).toBeGreaterThan(0); + for (const item of scored) { + expect(item.memoryId).toBeDefined(); + expect(item.score).toBeGreaterThan(0); + } + + // Memories linked to higher-scored nodes should have higher scores + const sorted = scored.toSorted((a, b) => b.score - a.score); + expect(sorted[0].score).toBeGreaterThanOrEqual(sorted[sorted.length - 1].score); + }); +}); + +// ── Bug 3: recall without graph misses connected memories (Loom gap) ── + +describe("recall without graph expansion — misses connected memories", () => { + it("FTS-only recall for 'portland' misses DataPipe memory", async () => { + // Direct recall for "portland" — should find portland memory via FTS + const direct = await recallMemories(db, "portland", { limit: 10 }); + const directIds = new Set(direct.map((m) => m.id)); + + // Portland memory should be in direct results + expect(directIds.has(memIds.portland)).toBe(true); + + // But DataPipe memory should NOT be in direct results + // (no text overlap between "portland" and "DataPipe" content) + expect(directIds.has(memIds.datapipe)).toBe(false); + + // WITH graph expansion, DataPipe IS reachable: + // Portland node → Alex node → DataPipe node → DataPipe memory + const seedNodes = await searchNodesWithScores(db, "portland", 5); + const seeds = seedNodes.map((s) => ({ nodeId: s.node.id, score: s.score })); + const activated = await spreadActivation(db, seeds, { maxDepth: 2 }); + const allNodeIds = [...seedNodes.map((s) => s.node.id), ...activated.map((a) => a.node.id)]; + const graphMemIds = await getRelatedMemoryIds(db, allNodeIds); + + // Graph expansion DOES find DataPipe + expect(graphMemIds).toContain(memIds.datapipe); + }); +}); + +// ── Bug 4: memories without embeddings invisible to semantic search ──── + +describe("memories without embeddings — semantic search gap", () => { + it("memory stored without embedding is not found by semantic search", async () => { + // Replicate Cortex's pattern: storeMemory with embedding: null + const noEmbedding = await storeMemory(db, { + content: "[Signal BTC short] BUY (0.85): Strong bullish momentum", + category: "signal", + source: "analyzer", + tags: '["bitcoin","buy"]', + embedding: null, + }); + + // FTS5 can find it + const ftsResults = await recallMemories(db, "BTC bullish momentum", { limit: 10 }); + const ftsIds = ftsResults.map((m) => m.id); + expect(ftsIds).toContain(noEmbedding.id); + + // But semantic search (embedding-only) CANNOT find it + const semanticResults = await recallMemories(db, "bitcoin price analysis", { + limit: 10, + queryEmbedding: queryEmbeddings.work, // using a work-domain embedding + }); + const embeddingMatches = semanticResults.filter((m) => m.matchType === "embedding"); + const embeddingIds = embeddingMatches.map((m) => m.id); + expect(embeddingIds).not.toContain(noEmbedding.id); + }); + + it("memory WITH embedding IS found by semantic search", async () => { + const withEmbedding = await storeMemory(db, { + content: "[Signal BTC short] BUY (0.85): Strong bullish momentum", + category: "signal", + source: "analyzer", + tags: '["bitcoin","buy"]', + embedding: JSON.stringify(memoryEmbeddings.datapipe), // work-domain embedding + }); + + const results = await recallMemories(db, "work projects", { + limit: 10, + queryEmbedding: queryEmbeddings.work, + }); + const ids = results.map((m) => m.id); + expect(ids).toContain(withEmbedding.id); + }); +}); + +// ── Temporal recall (should pass — already implemented) ──────────────── + +describe("recallMemories — temporal filtering", () => { + it("since filters out older memories", async () => { + // All seed memories were created "now" — store one with a forced old date + const oldMemory = await storeMemory(db, { + content: "Ancient fact from long ago", + category: "general", + source: "user", + }); + // Manually backdate it + await db + .updateTable("memories") + .set({ created_at: "2020-01-01T00:00:00Z" }) + .where("id", "=", oldMemory.id) + .execute(); + + const results = await recallMemories(db, "ancient fact", { + since: "2024-01-01", + limit: 10, + }); + const ids = results.map((m) => m.id); + expect(ids).not.toContain(oldMemory.id); + }); + + it("before filters out newer memories", async () => { + const results = await recallMemories(db, "Alex", { + before: "2020-01-01", + limit: 10, + }); + // All seed memories were created "now" (2026), so none should match + expect(results).toHaveLength(0); + }); + + it("since + before creates a date range", async () => { + // Backdate one memory to 2023 + await db + .updateTable("memories") + .set({ created_at: "2023-06-15T00:00:00Z" }) + .where("id", "=", memIds.miso) + .execute(); + + const results = await recallMemories(db, "Miso cat", { + since: "2023-01-01", + before: "2024-01-01", + limit: 10, + }); + const ids = results.map((m) => m.id); + expect(ids).toContain(memIds.miso); + + // Other memories (created "now") should not be in range + expect(ids).not.toContain(memIds.portland); + }); +}); diff --git a/apps/construct/src/agent.ts b/apps/construct/src/agent.ts index 49f16f1..09982cf 100644 --- a/apps/construct/src/agent.ts +++ b/apps/construct/src/agent.ts @@ -29,6 +29,7 @@ import { selectAndCreateDynamicTools, selectAndRetrieveSkillInstructions, } from "./extensions/index.js"; +import { upsertNode, upsertEdge } from "@repo/cairn"; // Adapt internal tool → pi-agent-core AgentTool function createPiTool(tool: InternalTool): AgentTool { @@ -133,7 +134,12 @@ export async function processMessage( // Try to find semantically relevant memories for this specific message // queryEmbedding is also reused for tool pack selection below let queryEmbedding: number[] | undefined; - let relevantMemories: Array<{ content: string; category: string; score?: number }> = []; + let relevantMemories: Array<{ + content: string; + category: string; + score?: number; + matchType?: string; + }> = []; try { queryEmbedding = await generateEmbedding(env.OPENROUTER_API_KEY, message, env.EMBEDDING_MODEL); const results = await recallMemories(db, message, { @@ -145,7 +151,12 @@ export async function processMessage( const recentIds = new Set(recentMemories.map((m) => m.id)); relevantMemories = results .filter((m) => !recentIds.has(m.id)) - .map((m) => ({ content: m.content, category: m.category, score: m.score })); + .map((m) => ({ + content: m.content, + category: m.category, + score: m.score, + matchType: m.matchType, + })); } catch { // Embedding call failed — no relevant memories, that's fine // queryEmbedding stays undefined → all tool packs will load (graceful fallback) @@ -154,7 +165,8 @@ export async function processMessage( agentLog.debug`Context: ${recentMemories.length} recent memories, ${relevantMemories.length} relevant memories`; // 5. Select relevant skill instructions based on query embedding - const selectedInstructions = await selectAndRetrieveSkillInstructions(queryEmbedding); + const { formatted: selectedInstructions, instructionIds: selectedInstructionIds } = + await selectAndRetrieveSkillInstructions(queryEmbedding); // 5a. Validate instructions for conflicts if (selectedInstructions.length > 1) { @@ -259,6 +271,9 @@ export async function processMessage( const totalUsage = { input: 0, output: 0, cost: 0 }; let hasUsage = false; + const toolErrors: Array<{ toolName: string; result: string }> = []; + let toolSuccesses = 0; + agent.subscribe((event) => { if (event.type === "message_update") { if (event.assistantMessageEvent.type === "text_delta") { @@ -282,23 +297,12 @@ export async function processMessage( result: String(event.result), }); - // TODO: On successful execution, create "applied_in" edges to skill instructions - // that were injected into the context. This records which instructions led to - // successful tool invocation for learning + observability. - // if (event.result?.success) { - // for (const instrId of selectedInstructions) { - // await memoryManager.upsertEdge(instrId, eventId, "applied_in"); - // } - // } - - // TODO: On tool error, create "failed_on" edges to implicated instructions. - // This helps identify which instructions need fixing. - // if (event.result?.error) { - // const implicated = getImplicatedInstructions(event.toolName, selectedInstructions); - // for (const instrId of implicated) { - // await memoryManager.upsertEdge(instrId, eventId, "failed_on"); - // } - // } + // Track tool errors for failed_on edges (processed after agent finishes) + if (event.isError) { + toolErrors.push({ toolName: event.toolName, result: String(event.result) }); + } else { + toolSuccesses++; + } } }); @@ -326,6 +330,23 @@ export async function processMessage( const totalContextTokens = systemTokens + preambleTokens + historyTokens; agentLog.info`Context breakdown: system=${systemTokens} observations=${observationTokens} recentMem=${recentMemTokens}(${recentMemories.length}) relevantMem=${relevantMemTokens}(${relevantMemories.length}) instructions=${instructionTokens}(${selectedInstructions.length}) history=${historyTokens}(${historyMessages.length}msgs) tools=${toolCount} preamble=${preambleTokens} total=${totalContextTokens}`; + // 12a. Log injected skill instructions at debug level for diagnosing bad context + if (selectedInstructions.length > 0) { + agentLog.debug`Injected skill instructions:\n${selectedInstructions.join("\n")}`; + } + + // 12b. Log relevant memories at debug level + if (relevantMemories.length > 0) { + const memSummary = relevantMemories + .map((m) => { + const match = m.matchType ?? "unknown"; + const score = m.score !== undefined ? ` score=${m.score.toFixed(2)}` : ""; + return ` [${match}${score}] (${m.category}) ${m.content.slice(0, 100)}${m.content.length > 100 ? "..." : ""}`; + }) + .join("\n"); + agentLog.debug`Relevant memories:\n${memSummary}`; + } + // 13. Run agent — prepend context preamble to first message // (subsequent steps renumbered: save=14, usage=15, observer=16) agentLog.debug`Prompting agent`; @@ -357,7 +378,76 @@ export async function processMessage( }); } - // 15. Run observer async after response (next turn benefits) + // 15. Create skill instruction graph edges (fire-and-forget) + if (selectedInstructionIds.length > 0 && toolCalls.length > 0) { + (async () => { + try { + // Create a conversation event node + const eventNodeName = `conv:${conversationId}:${assistantMessageId}`; + const eventNode = await upsertNode(db, { name: eventNodeName, type: "conversation_event" }); + + // Resolve instruction DB IDs → graph node IDs + const instrNodeIds = new Map(); + for (const instrId of selectedInstructionIds) { + const node = await upsertNode(db, { name: instrId, type: "skill_instruction" }); + instrNodeIds.set(instrId, node.id); + } + + if (toolErrors.length === 0 && toolSuccesses > 0) { + // All tools succeeded — create applied_in edges from each instruction + for (const graphNodeId of instrNodeIds.values()) { + await upsertEdge(db, { + source_id: graphNodeId, + target_id: eventNode.id, + relation: "applied_in", + }); + } + agentLog.debug`Created applied_in edges: ${instrNodeIds.size} instructions → ${eventNodeName}`; + } + + if (toolErrors.length > 0) { + // Tool errors — check for implicated instructions in skill_executions + const executions = await db + .selectFrom("skill_executions") + .where("conversation_id", "=", conversationId) + .where("implicated_instruction_id", "is not", null) + .where("had_tool_errors", "=", 1) + .select("implicated_instruction_id") + .execute(); + + const implicatedIds = new Set( + executions + .map((e) => e.implicated_instruction_id) + .filter((id): id is string => id != null), + ); + + // Use implicated instructions if available, otherwise all selected + const failedIds = + implicatedIds.size > 0 ? implicatedIds : new Set(selectedInstructionIds); + const errorSummary = toolErrors + .map((e) => `${e.toolName}: ${e.result.slice(0, 100)}`) + .join("; "); + + for (const instrId of failedIds) { + const graphNodeId = instrNodeIds.get(instrId); + if (graphNodeId) { + await upsertEdge(db, { + source_id: graphNodeId, + target_id: eventNode.id, + relation: "failed_on", + properties: { errors: errorSummary }, + }); + } + } + agentLog.debug`Created failed_on edges: ${failedIds.size} instructions → ${eventNodeName}`; + } + } catch (err) { + agentLog.warning`Failed to create skill instruction graph edges: ${err}`; + } + })(); + } + + // 16. Run observer async after response (next turn benefits) // Non-blocking — fires and forgets. Observer only runs if un-observed // messages exceed the token threshold. memoryManager diff --git a/apps/construct/src/extensions/embeddings.ts b/apps/construct/src/extensions/embeddings.ts index 7e04f14..147944d 100644 --- a/apps/construct/src/extensions/embeddings.ts +++ b/apps/construct/src/extensions/embeddings.ts @@ -331,6 +331,15 @@ export async function selectSkillInstructions( .toSorted((a, b) => b.score - a.score) .slice(0, maxInstructions); + if (scored.length > 0) { + const scoreSummary = scored + .map( + (s) => `${s.instr.skillId}:"${s.instr.instruction.slice(0, 50)}" (${s.score.toFixed(2)})`, + ) + .join(", "); + agentLog.debug`Instruction scores above threshold: ${scoreSummary}`; + } + // Collect selected instructions + transitive dependencies const selected = new Set(); const toAdd = new Set(scored.map((s) => s.instr.id)); diff --git a/apps/construct/src/extensions/index.ts b/apps/construct/src/extensions/index.ts index 6e57200..c69f6bc 100644 --- a/apps/construct/src/extensions/index.ts +++ b/apps/construct/src/extensions/index.ts @@ -21,6 +21,7 @@ import { extractInstructions, resolveDependencyIds } from "./instructions.js"; import { agentLog } from "../logger.js"; import { ExtensionError } from "../errors.js"; import { nanoid } from "nanoid"; +import { upsertNode, upsertEdge } from "@repo/cairn"; // Singleton registry let registry: ExtensionRegistry = { @@ -146,26 +147,73 @@ async function doReload(): Promise { * Skills get exported from here later for dynamic edges. */ async function syncAllSkillsToGraph(db: Kysely): Promise { - // Note: This syncs the static structure. Dynamic edges (applied_in, failed_on) - // are added during execution in agent.ts when we have the memory manager. - // For now, this is a placeholder that logs the count. try { - const skillCount = await db + // Load all active skills + const skills = await db .selectFrom("skills") .where("status", "=", "active") - .select(db.fn.count("id").as("count")) - .executeTakeFirstOrThrow(); + .select(["id", "name", "description"]) + .execute(); - const instrCount = await db - .selectFrom("skill_instructions") - .innerJoin("skills", "skills.id", "skill_instructions.skill_id") - .where("skills.status", "=", "active") - .select(db.fn.count("skill_instructions.id").as("count")) - .executeTakeFirstOrThrow(); + if (skills.length === 0) return; + + // Load all instructions for active skills + const instructions = await db + .selectFrom("skill_instructions as si") + .innerJoin("skills as s", "s.id", "si.skill_id") + .where("s.status", "=", "active") + .select(["si.id", "si.skill_id", "si.instruction"]) + .execute(); + + // Load all instruction deps + const deps = await db + .selectFrom("skill_instruction_deps as d") + .innerJoin("skill_instructions as si", "si.id", "d.from_id") + .innerJoin("skills as s", "s.id", "si.skill_id") + .where("s.status", "=", "active") + .select(["d.from_id", "d.to_id", "d.relation"]) + .execute(); + + // Map from DB ID → graph node ID for edge creation + const nodeIdMap = new Map(); + + // Upsert skill nodes + for (const skill of skills) { + const node = await upsertNode(db, { + name: skill.id, + type: "skill", + description: skill.description, + }); + nodeIdMap.set(skill.id, node.id); + } + + // Upsert instruction nodes + contains edges + for (const instr of instructions) { + const node = await upsertNode(db, { + name: instr.id, + type: "skill_instruction", + description: instr.instruction, + }); + nodeIdMap.set(instr.id, node.id); + + const skillNodeId = nodeIdMap.get(instr.skill_id); + if (skillNodeId) { + await upsertEdge(db, { source_id: skillNodeId, target_id: node.id, relation: "contains" }); + } + } + + // Upsert dependency edges + for (const dep of deps) { + const fromNodeId = nodeIdMap.get(dep.from_id); + const toNodeId = nodeIdMap.get(dep.to_id); + if (fromNodeId && toNodeId) { + await upsertEdge(db, { source_id: fromNodeId, target_id: toNodeId, relation: "requires" }); + } + } - agentLog.debug`Ready for graph sync: ${skillCount.count} skills, ${instrCount.count} instructions (dynamic edges added during execution)`; + agentLog.info`Graph sync: ${skills.length} skills, ${instructions.length} instructions, ${deps.length} deps`; } catch (err) { - agentLog.warning`Failed to count skills for graph: ${err}`; + agentLog.warning`Failed to sync skills to graph: ${err}`; } } @@ -346,58 +394,6 @@ async function extractInstructionsInBackground( } } -/** - * Sync skill instructions to Cairn graph. - * Called from agent.ts after skill instructions are retrieved, with memory manager available. - * Creates: instruction nodes + dependency edges. - * Dynamic edges (applied_in, failed_on) are added during execution. - */ -export async function syncSkillInstructionsToGraph( - instructionIds: string[], - deps: Array<{ fromId: string; toId: string; relation: string }>, - memoryManager: any, -): Promise { - try { - const { upsertNode, upsertEdge } = memoryManager; - - // Upsert instruction nodes - for (const instrId of instructionIds) { - const instr = await dbRef - ?.selectFrom("skill_instructions") - .where("id", "=", instrId) - .select(["instruction", "skill_id"]) - .executeTakeFirst(); - - if (instr) { - // Instruction node - await upsertNode(instrId, "skill_instruction", instr.instruction, ""); - - // Skill node (may already exist) - const skill = await dbRef - ?.selectFrom("skills") - .where("id", "=", instr.skill_id) - .select(["name", "description"]) - .executeTakeFirst(); - - if (skill) { - await upsertNode(instr.skill_id, "skill", skill.name, skill.description); - - // Edge: skill contains instruction - await upsertEdge(instr.skill_id, instrId, "contains"); - } - } - } - - // Upsert dependency edges between instructions - for (const dep of deps) { - await upsertEdge(dep.fromId, dep.toId, "requires"); - } - } catch (err) { - agentLog.warning`Failed to sync instructions to graph: ${err}`; - // Non-fatal — instructions are already in DB - } -} - /** Get the current extension registry. */ export function getExtensionRegistry(): ExtensionRegistry { return registry; @@ -432,12 +428,19 @@ export function selectAndCreateDynamicTools( return tools; } +export interface SelectedInstructions { + /** Formatted lines for system prompt injection */ + formatted: string[]; + /** Raw instruction IDs for graph edge creation */ + instructionIds: string[]; +} + /** * Select and retrieve relevant skill instructions for a query, including transitive dependencies. */ export async function selectAndRetrieveSkillInstructions( queryEmbedding: number[] | undefined, -): Promise { +): Promise { const instructions = await selectSkillInstructions(queryEmbedding); if (instructions.length > 0) { @@ -467,5 +470,8 @@ export async function selectAndRetrieveSkillInstructions( } } - return formatted; + return { + formatted, + instructionIds: instructions.map((i) => i.id), + }; } diff --git a/apps/construct/src/tools/core/memory-recall.ts b/apps/construct/src/tools/core/memory-recall.ts index eda4647..fe33a00 100644 --- a/apps/construct/src/tools/core/memory-recall.ts +++ b/apps/construct/src/tools/core/memory-recall.ts @@ -3,9 +3,9 @@ import type { Kysely } from "kysely"; import { recallMemories, generateEmbedding, - searchNodes, - traverseGraph, - getRelatedMemoryIds, + searchNodesWithScores, + spreadActivation, + getRelatedMemoriesWithScores, type Memory, } from "@repo/cairn"; import type { Database } from "../../db/schema.js"; @@ -25,6 +25,16 @@ const MemoryRecallParams = Type.Object({ description: "Max number of results to return (default: 10)", }), ), + since: Type.Optional( + Type.String({ + description: "ISO date (YYYY-MM-DD). Only return memories created on or after this date.", + }), + ), + before: Type.Optional( + Type.String({ + description: "ISO date (YYYY-MM-DD). Only return memories created before this date.", + }), + ), }); type MemoryRecallInput = Static; @@ -54,39 +64,43 @@ export function createMemoryRecallTool( category: args.category, limit: args.limit, queryEmbedding, + since: args.since, + before: args.before, }); - // Expand via graph traversal — find related memories not in direct results - let graphMemories: (Memory & { matchType: string })[] = []; + // Expand via graph spreading activation — find related memories not in direct results + let graphMemories: (Memory & { matchType: string; score?: number })[] = []; try { const seen = new Set(memories.map((m) => m.id)); - const graphNodes = await searchNodes(db, args.query, 5, queryEmbedding); + const seedNodes = await searchNodesWithScores(db, args.query, 5, queryEmbedding); - if (graphNodes.length > 0) { - // Traverse 1-2 hops from matching nodes - const allNodeIds = new Set(); - for (const node of graphNodes) { - allNodeIds.add(node.id); - const traversed = await traverseGraph(db, node.id, 2); - for (const t of traversed) { - allNodeIds.add(t.node.id); - } - } + if (seedNodes.length > 0) { + const seeds = seedNodes.map((s) => ({ nodeId: s.node.id, score: s.score })); + const activated = await spreadActivation(db, seeds, { maxDepth: 2 }); + + // Build node→score map from seeds + activated nodes + const nodeScoreMap = new Map(); + for (const s of seedNodes) nodeScoreMap.set(s.node.id, s.score); + for (const a of activated) nodeScoreMap.set(a.node.id, a.score); + + // Get memories with scores derived from their linked nodes + const scoredMems = await getRelatedMemoriesWithScores(db, nodeScoreMap); + const newScoredMems = scoredMems.filter((s) => !seen.has(s.memoryId)); - // Find memories linked to these nodes - const relatedMemIds = await getRelatedMemoryIds(db, [...allNodeIds]); - const newMemIds = relatedMemIds.filter((id) => !seen.has(id)); + if (newScoredMems.length > 0) { + const memIds = newScoredMems.slice(0, 5).map((s) => s.memoryId); + const scoreMap = new Map(newScoredMems.map((s) => [s.memoryId, s.score])); - if (newMemIds.length > 0) { const relatedMems = await db .selectFrom("memories") .selectAll() - .where("id", "in", newMemIds) + .where("id", "in", memIds) .where("archived_at", "is", null) - .limit(5) .execute(); - graphMemories = relatedMems.map((m) => ({ ...m, matchType: "graph" })); + graphMemories = relatedMems + .map((m) => ({ ...m, matchType: "graph", score: scoreMap.get(m.id) })) + .toSorted((a, b) => (b.score ?? 0) - (a.score ?? 0)); } } } catch (err) { diff --git a/apps/cortex/src/pipeline/analyzer.ts b/apps/cortex/src/pipeline/analyzer.ts index 0d26aea..b0a71b2 100644 --- a/apps/cortex/src/pipeline/analyzer.ts +++ b/apps/cortex/src/pipeline/analyzer.ts @@ -2,9 +2,9 @@ import type { Kysely } from "kysely"; import { recallMemories, generateEmbedding, - searchNodes, - traverseGraph, - getRelatedMemoryIds, + searchNodesWithScores, + spreadActivation, + getRelatedMemoriesWithScores, storeMemory, type MemoryManager, } from "@repo/cairn"; @@ -29,7 +29,7 @@ interface SignalResult { */ export async function analyzeAllTokens( db: Kysely, - _memory: MemoryManager, + memory: MemoryManager, log: (msg: string) => void, ): Promise { const tokens = await getActiveTokens(db); @@ -45,7 +45,7 @@ export async function analyzeAllTokens( for (const timeframe of ["short", "long"] as const) { try { - const result = await analyzeToken(db, token, price, timeframe, log); + const result = await analyzeToken(db, token, price, timeframe, log, memory); if (result) { const label = timeframe === "short" ? "24h" : "4w"; log( @@ -70,6 +70,7 @@ async function analyzeToken( }, timeframe: "short" | "long", log: (msg: string) => void, + memory?: MemoryManager, ): Promise { // 1. Recall relevant memories via hybrid search const queryText = await generateRecallQuery(token, price, timeframe); @@ -89,35 +90,39 @@ async function analyzeToken( queryEmbedding, }); - // 2. Graph context: find nodes related to this token, traverse + // 2. Graph context: find nodes related to this token via spreading activation let graphContext = ""; try { - const nodes = await searchNodes(db, token.name, 5, queryEmbedding); - if (nodes.length > 0) { - const traversals = await Promise.all( - nodes.slice(0, 3).map((n) => traverseGraph(db, n.id, 2)), - ); + const seedNodes = await searchNodesWithScores(db, token.name, 5, queryEmbedding); + if (seedNodes.length > 0) { + const seeds = seedNodes.map((s) => ({ nodeId: s.node.id, score: s.score })); + const activated = await spreadActivation(db, seeds, { maxDepth: 2 }); - const allTraversed = traversals.flat(); - const graphLines = allTraversed.map( + const graphLines = activated.map( (t) => - `${t.node.display_name} (${t.node.node_type}) — via: ${t.via_relation ?? "root"} [depth ${t.depth}]`, + `${t.node.display_name} (${t.node.node_type}) [score ${t.score.toFixed(2)}, depth ${t.depth}]`, ); graphContext = graphLines.join("\n"); - // Get memories linked to graph nodes - const nodeIds = allTraversed.map((t) => t.node.id); - const relatedMemoryIds = await getRelatedMemoryIds(db, nodeIds); - const graphMemories = await Promise.all( - relatedMemoryIds - .slice(0, 5) - .map((id) => + // Get scored memories linked to graph nodes + const nodeScoreMap = new Map(); + for (const s of seedNodes) nodeScoreMap.set(s.node.id, s.score); + for (const a of activated) nodeScoreMap.set(a.node.id, a.score); + + const scoredMems = await getRelatedMemoriesWithScores(db, nodeScoreMap); + const memIds = scoredMems + .filter((s) => !memories.some((m) => m.id === s.memoryId)) + .slice(0, 5) + .map((s) => s.memoryId); + + if (memIds.length > 0) { + const graphMemories = await Promise.all( + memIds.map((id) => db.selectFrom("memories").selectAll().where("id", "=", id).executeTakeFirst(), ), - ); - for (const m of graphMemories) { - if (m && !memories.some((existing) => existing.id === m.id)) { - memories.push(m); + ); + for (const m of graphMemories) { + if (m) memories.push(m); } } } @@ -182,15 +187,33 @@ async function analyzeToken( timeframe, }); - // 8. Store signal reasoning as a cairn memory (feedback loop) - await storeMemory(db, { - content: `[Signal ${token.symbol} ${timeframe}] ${parsed.signal.toUpperCase()} (${parsed.confidence.toFixed(2)}): ${parsed.reasoning}`, + // 8. Store signal reasoning as a cairn memory with embedding + graph extraction + const signalContent = `[Signal ${token.symbol} ${timeframe}] ${parsed.signal.toUpperCase()} (${parsed.confidence.toFixed(2)}): ${parsed.reasoning}`; + let signalEmbedding: string | null = null; + try { + const embedding = await generateEmbedding( + env.OPENROUTER_API_KEY, + signalContent, + env.EMBEDDING_MODEL, + ); + signalEmbedding = JSON.stringify(embedding); + } catch { + // Proceed without embedding + } + + const signalMemory = await storeMemory(db, { + content: signalContent, category: "signal", source: "analyzer", tags: JSON.stringify([token.symbol, parsed.signal]), - embedding: null, + embedding: signalEmbedding, }); + // Fire graph extraction async so signal entities (tokens, events) get indexed + if (memory) { + memory.processStoredMemory(signalMemory.id, signalMemory.content).catch(() => {}); + } + return parsed; } diff --git a/apps/loom/src/agent.ts b/apps/loom/src/agent.ts index 397752f..76431ec 100644 --- a/apps/loom/src/agent.ts +++ b/apps/loom/src/agent.ts @@ -14,7 +14,15 @@ import { getRecentMemories, trackUsage, } from "./db/queries.js"; -import { generateEmbedding, MemoryManager, SIMILARITY, type WorkerModelConfig } from "@repo/cairn"; +import { + generateEmbedding, + MemoryManager, + SIMILARITY, + searchNodesWithScores, + spreadActivation, + getRelatedMemoriesWithScores, + type WorkerModelConfig, +} from "@repo/cairn"; export interface ProcessMessageOpts { onDelta?: (text: string) => void; @@ -81,8 +89,42 @@ export async function processMessage( queryEmbedding, similarityThreshold: SIMILARITY.RECALL_DEFAULT, }); - const recentMems = await getRecentMemories(db, 10); + + // Graph expansion — find memories connected via entity relationships const seen = new Set(campaignResults.map((m) => m.id)); + try { + const seedNodes = await searchNodesWithScores(db, message, 5, queryEmbedding); + if (seedNodes.length > 0) { + const seeds = seedNodes.map((s) => ({ nodeId: s.node.id, score: s.score })); + const activated = await spreadActivation(db, seeds, { maxDepth: 2 }); + const nodeScoreMap = new Map(); + for (const s of seedNodes) nodeScoreMap.set(s.node.id, s.score); + for (const a of activated) nodeScoreMap.set(a.node.id, a.score); + + const scoredMems = await getRelatedMemoriesWithScores(db, nodeScoreMap); + const newMemIds = scoredMems + .filter((s) => !seen.has(s.memoryId)) + .slice(0, 5) + .map((s) => s.memoryId); + + if (newMemIds.length > 0) { + const graphMems = await db + .selectFrom("memories") + .selectAll() + .where("id", "in", newMemIds) + .where("archived_at", "is", null) + .execute(); + for (const m of graphMems) { + campaignResults.push(m); + seen.add(m.id); + } + } + } + } catch { + // Graph expansion is optional — proceed without it + } + + const recentMems = await getRecentMemories(db, 10); const combined = [...campaignResults]; for (const m of recentMems) { if (!seen.has(m.id) && m.category !== "rules") { diff --git a/packages/cairn/src/context.ts b/packages/cairn/src/context.ts index 2eca787..d58dcc2 100644 --- a/packages/cairn/src/context.ts +++ b/packages/cairn/src/context.ts @@ -41,10 +41,13 @@ export function renderObservationsWithBudget( return { text: "", included: 0, evicted: 0, totalTokens: 0 }; } - // Sort by priority desc, then created_at desc (newest first) for greedy packing + // Sort by priority desc, then generation desc (survived more compression = more durable), + // then created_at desc (newest first) for greedy packing const sorted = [...observations].toSorted((a, b) => { const pDiff = PRIORITY_RANK[b.priority] - PRIORITY_RANK[a.priority]; if (pDiff !== 0) return pDiff; + const gDiff = (b.generation ?? 0) - (a.generation ?? 0); + if (gDiff !== 0) return gDiff; return b.created_at.localeCompare(a.created_at); }); diff --git a/packages/cairn/src/db/queries.ts b/packages/cairn/src/db/queries.ts index 78f24c5..3d2a501 100644 --- a/packages/cairn/src/db/queries.ts +++ b/packages/cairn/src/db/queries.ts @@ -5,6 +5,133 @@ import { cosineSimilarity } from "../embeddings.js"; import { SIMILARITY } from "../similarity.js"; import type { CairnDatabase, Memory, NewMemory, NewAiUsage } from "./types.js"; +/** + * Common English stop words filtered from FTS5 queries. + * These produce high-frequency, low-signal matches when OR'd together. + */ +const FTS_STOP_WORDS = new Set([ + "a", + "an", + "the", + "and", + "or", + "but", + "not", + "no", + "nor", + "is", + "am", + "are", + "was", + "were", + "be", + "been", + "being", + "do", + "does", + "did", + "has", + "have", + "had", + "having", + "will", + "would", + "shall", + "should", + "may", + "might", + "can", + "could", + "it", + "its", + "this", + "that", + "these", + "those", + "i", + "me", + "my", + "we", + "us", + "our", + "you", + "your", + "he", + "him", + "his", + "she", + "her", + "they", + "them", + "their", + "in", + "on", + "at", + "to", + "for", + "of", + "with", + "by", + "from", + "as", + "into", + "about", + "between", + "through", + "up", + "out", + "off", + "over", + "under", + "again", + "then", + "once", + "if", + "so", + "than", + "too", + "very", + "just", + "also", + "now", + "what", + "which", + "who", + "whom", + "when", + "where", + "why", + "how", + "all", + "each", + "every", + "both", + "few", + "more", + "most", + "some", + "any", + "other", + "here", + "there", + "own", + "same", + "such", + "dont", + "im", + "ive", + "its", + "thats", + "youre", + "were", + "theyre", + "wont", + "cant", + "didnt", + "doesnt", + "isnt", +]); + // Kysely is invariant in its type parameter: Kysely is not assignable to // Kysely even when A extends B. To allow consumers with their own extended // database interfaces (e.g. `interface Database extends CairnDatabase { ... }`) @@ -72,6 +199,10 @@ export async function recallMemories( limit?: number; queryEmbedding?: number[]; similarityThreshold?: number; + /** ISO date (YYYY-MM-DD). Only return memories created on or after this date. */ + since?: string; + /** ISO date (YYYY-MM-DD). Only return memories created before this date. */ + before?: string; }, ): Promise<(Memory & { score?: number; matchType?: string })[]> { const d = typed(db); @@ -83,9 +214,9 @@ export async function recallMemories( try { const ftsQuery = query .split(/\s+/) - .filter((w) => w.length > 1) - .map((w) => `"${w.replace(/"/g, "")}"`) - .filter((w) => w !== '""') + .map((w) => w.toLowerCase().replace(/[^a-z0-9]/g, "")) + .filter((w) => w.length > 1 && !FTS_STOP_WORDS.has(w)) + .map((w) => `"${w}"`) .join(" OR "); if (ftsQuery) { @@ -96,6 +227,8 @@ export async function recallMemories( WHERE memories_fts MATCH ${ftsQuery} AND m.archived_at IS NULL ${opts?.category ? sql`AND m.category = ${opts.category}` : sql``} + ${opts?.since ? sql`AND m.created_at >= ${opts.since}` : sql``} + ${opts?.before ? sql`AND m.created_at < ${opts.before}` : sql``} ORDER BY fts.rank LIMIT ${limit * 2} `.execute(d); @@ -130,6 +263,8 @@ export async function recallMemories( .where("archived_at", "is", null) .where("embedding", "is not", null) .$if(!!opts.category, (qb) => qb.where("category", "=", opts!.category!)) + .$if(!!opts.since, (qb) => qb.where("created_at", ">=", opts!.since!)) + .$if(!!opts.before, (qb) => qb.where("created_at", "<", opts!.before!)) .execute(); const scored = allWithEmbeddings diff --git a/packages/cairn/src/graph/queries.ts b/packages/cairn/src/graph/queries.ts index df502bc..84a4ecb 100644 --- a/packages/cairn/src/graph/queries.ts +++ b/packages/cairn/src/graph/queries.ts @@ -159,6 +159,64 @@ export async function searchNodes( return merged; } +/** + * Like searchNodes but returns similarity scores for use as spreading activation seeds. + * LIKE-only matches get a default score of 0.5. + */ +export async function searchNodesWithScores( + db: AnyDB, + query: string, + limit = 10, + queryEmbedding?: number[], +): Promise> { + const d = typed(db); + const pattern = `%${query.toLowerCase().trim()}%`; + const likeResults = await d + .selectFrom("graph_nodes") + .selectAll() + .where("name", "like", pattern) + .orderBy("updated_at", "desc") + .limit(limit) + .execute(); + + if (!queryEmbedding) { + return (likeResults as GraphNode[]).map((n) => ({ node: n, score: 0.5 })); + } + + const threshold = SIMILARITY.GRAPH_SEARCH; + const allWithEmbeddings = await d + .selectFrom("graph_nodes") + .selectAll() + .where("embedding", "is not", null) + .execute(); + + const embeddingMatches = allWithEmbeddings + .map((n) => ({ + node: n as GraphNode, + score: cosineSimilarity(queryEmbedding, JSON.parse(n.embedding!)), + })) + .filter((n) => n.score >= threshold) + .toSorted((a, b) => b.score - a.score) + .slice(0, limit); + + const seen = new Set(); + const merged: Array<{ node: GraphNode; score: number }> = []; + for (const item of embeddingMatches) { + if (!seen.has(item.node.id) && merged.length < limit) { + seen.add(item.node.id); + merged.push(item); + } + } + for (const node of likeResults) { + if (!seen.has(node.id) && merged.length < limit) { + seen.add(node.id); + merged.push({ node: node as GraphNode, score: 0.5 }); + } + } + + return merged; +} + // --- Edges --- /** @@ -304,6 +362,95 @@ export async function traverseGraph( })); } +/** + * Spreading activation: BFS from seed nodes with exponential score decay. + * Each hop reduces activation by `decay * edgeWeight`. Nodes reached via + * multiple paths accumulate the max activation (not sum — avoids inflation). + * @param seeds - Starting nodes with initial activation scores. + * @param opts.decay - Score multiplier per hop (default 0.5). + * @param opts.maxDepth - Maximum traversal hops (default 2). + * @param opts.minActivation - Floor below which nodes are pruned (default 0.01). + * @returns Nodes sorted by activation score descending. + */ +export async function spreadActivation( + db: AnyDB, + seeds: Array<{ nodeId: string; score: number }>, + opts?: { decay?: number; maxDepth?: number; minActivation?: number }, +): Promise> { + const decay = opts?.decay ?? 0.5; + const maxDepth = opts?.maxDepth ?? 2; + const minActivation = opts?.minActivation ?? 0.01; + + // Track best activation per node + const activation = new Map(); + + // Initialize seeds + for (const seed of seeds) { + const existing = activation.get(seed.nodeId); + if (!existing || seed.score > existing.score) { + activation.set(seed.nodeId, { score: seed.score, depth: 0 }); + } + } + + // BFS frontier + let frontier = seeds.map((s) => ({ nodeId: s.nodeId, score: s.score, depth: 0 })); + + for (let hop = 0; hop < maxDepth; hop++) { + const nextFrontier: typeof frontier = []; + + for (const current of frontier) { + const edges = await getNodeEdges(db, current.nodeId); + + for (const edge of edges) { + const neighborId = edge.source_id === current.nodeId ? edge.target_id : edge.source_id; + // Weight=1 is baseline. Higher weights decay less (up to 1.0 = no decay). + // Formula: decay is reduced proportionally to weight, capped so score never exceeds parent. + const weightBoost = Math.min(edge.weight, 10) / 10; // 0.1–1.0 + const effectiveDecay = decay + (1 - decay) * weightBoost; // decay..1.0 + const neighborScore = current.score * effectiveDecay; + + if (neighborScore < minActivation) continue; + + const existing = activation.get(neighborId); + if (!existing || neighborScore > existing.score) { + activation.set(neighborId, { score: neighborScore, depth: hop + 1 }); + nextFrontier.push({ nodeId: neighborId, score: neighborScore, depth: hop + 1 }); + } + } + } + + frontier = nextFrontier; + if (frontier.length === 0) break; + } + + // Remove seed nodes from results (callers already have them) + const seedIds = new Set(seeds.map((s) => s.nodeId)); + const resultIds = [...activation.entries()] + .filter(([id]) => !seedIds.has(id)) + .filter(([, a]) => a.score >= minActivation) + .toSorted(([, a], [, b]) => b.score - a.score); + + if (resultIds.length === 0) return []; + + // Batch-fetch all result nodes + const nodeIds = resultIds.map(([id]) => id); + const nodes = await typed(db) + .selectFrom("graph_nodes") + .selectAll() + .where("id", "in", nodeIds) + .execute(); + + const nodeMap = new Map(nodes.map((n) => [n.id, n])); + + return resultIds + .map(([id, a]) => { + const node = nodeMap.get(id); + if (!node) return null; + return { node: node as GraphNode, score: a.score, depth: a.depth }; + }) + .filter((r): r is NonNullable => r !== null); +} + /** * Find memories connected to a set of nodes via edges. * Returns unique memory IDs from all edges connected to the given nodes. @@ -322,6 +469,45 @@ export async function getRelatedMemoryIds(db: AnyDB, nodeIds: string[]): Promise return results.map((r) => r.memory_id!).filter(Boolean); } +/** + * Find memories connected to scored nodes and assign each memory the + * best activation score from its linked nodes. + * Use after spreadActivation to rank graph-expanded memories. + * @param nodeScoreMap - Map of node ID → activation score (from seeds + spread results). + * @returns Scored memory IDs sorted by score descending. + */ +export async function getRelatedMemoriesWithScores( + db: AnyDB, + nodeScoreMap: Map, +): Promise> { + const nodeIds = [...nodeScoreMap.keys()]; + if (nodeIds.length === 0) return []; + + const edges = await typed(db) + .selectFrom("graph_edges") + .select(["source_id", "target_id", "memory_id"]) + .where("memory_id", "is not", null) + .where((eb) => eb.or([eb("source_id", "in", nodeIds), eb("target_id", "in", nodeIds)])) + .execute(); + + // For each memory, take the max score from any connected scored node + const memoryScores = new Map(); + for (const edge of edges) { + const memId = edge.memory_id!; + const sourceScore = nodeScoreMap.get(edge.source_id) ?? 0; + const targetScore = nodeScoreMap.get(edge.target_id) ?? 0; + const best = Math.max(sourceScore, targetScore); + const current = memoryScores.get(memId) ?? 0; + if (best > current) { + memoryScores.set(memId, best); + } + } + + return [...memoryScores.entries()] + .map(([memoryId, score]) => ({ memoryId, score })) + .toSorted((a, b) => b.score - a.score); +} + /** * Get all nodes connected to a specific memory via edges. */ diff --git a/packages/cairn/src/index.ts b/packages/cairn/src/index.ts index ec33143..6fda317 100644 --- a/packages/cairn/src/index.ts +++ b/packages/cairn/src/index.ts @@ -54,9 +54,12 @@ export { processMemoryForGraph } from "./graph/index.js"; export { extractEntities, DEFAULT_ENTITY_TYPES } from "./graph/extract.js"; export { searchNodes, + searchNodesWithScores, traverseGraph, + spreadActivation, getNodeEdges, getRelatedMemoryIds, + getRelatedMemoriesWithScores, getMemoryNodes, findNodeByName, upsertNode, diff --git a/packages/cairn/src/similarity.ts b/packages/cairn/src/similarity.ts index 6581c36..d517505 100644 --- a/packages/cairn/src/similarity.ts +++ b/packages/cairn/src/similarity.ts @@ -8,6 +8,6 @@ export const SIMILARITY = { GRAPH_SEARCH: 0.3 as number, /** Tool pack selection threshold. */ PACK_SELECTION: 0.3 as number, - /** Skill selection threshold. Slightly higher to avoid false matches. */ - SKILL_SELECTION: 0.35 as number, + /** Skill selection threshold. Must be high enough to reject noise from short/generic queries. */ + SKILL_SELECTION: 0.45 as number, };