diff --git a/docs/memory.md b/docs/memory.md index 70e42e8..1d3bdeb 100644 --- a/docs/memory.md +++ b/docs/memory.md @@ -48,12 +48,15 @@ Results are fused using Reciprocal Rank Fusion (RRF). This means searching for " Before each agent invocation, the context builder: 1. Embeds the user's message -2. Searches episodic memory (top 10 episodes) -3. Searches semantic memory (top 20 facts) -4. Searches procedural memory (top 5 procedures) -5. Budgets results to fit within the token limit (default: 50,000 tokens) -6. Filters out stale, low-signal episodic memories before prompt injection -7. Formats results into the memory section of the system prompt +2. On the first turn of a brand-new session, adds a compact durable context section +3. Searches episodic memory (top 10 episodes) +4. Searches semantic memory (top 20 facts) +5. Searches procedural memory (top 5 procedures) +6. Budgets results to fit within the token limit (default: 50,000 tokens) +7. Filters out stale, low-signal episodic memories before prompt injection +8. Formats results into the memory section of the system prompt + +The durable context section is startup-only and intentionally small. It favors high-confidence facts and metadata-ranked memories so a new session begins with a little long-term continuity before normal retrieval takes over. ## Consolidation diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index 4ed3fc8..b63faf8 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -120,7 +120,7 @@ export class AgentRuntime { let memoryContext: string | undefined; if (this.memoryContextBuilder) { try { - memoryContext = (await this.memoryContextBuilder.build(text)) || undefined; + memoryContext = (await this.memoryContextBuilder.build(text, { isNewSession: !isResume })) || undefined; } catch { // Memory unavailable, continue without it } diff --git a/src/memory/__tests__/context-builder.test.ts b/src/memory/__tests__/context-builder.test.ts index 13300c1..a2dcd88 100644 --- a/src/memory/__tests__/context-builder.test.ts +++ b/src/memory/__tests__/context-builder.test.ts @@ -14,21 +14,31 @@ const TEST_CONFIG: MemoryConfig = { function createMockMemorySystem(overrides?: { ready?: boolean; episodes?: ReturnType; + durableEpisodes?: ReturnType; facts?: ReturnType; procedure?: ReturnType; -}): MemorySystem { - const ms = { +}) { + const recallEpisodes = mock((_query: string, options?: { strategy?: string }) => { + if (options?.strategy === "metadata") { + return overrides?.durableEpisodes ?? Promise.resolve([]); + } + + return overrides?.episodes ?? Promise.resolve([]); + }); + const recallFacts = mock(() => overrides?.facts ?? Promise.resolve([])); + const findProcedure = mock(() => overrides?.procedure ?? Promise.resolve(null)); + const memory = { isReady: () => overrides?.ready ?? true, - recallEpisodes: mock(() => overrides?.episodes ?? Promise.resolve([])), - recallFacts: mock(() => overrides?.facts ?? Promise.resolve([])), - findProcedure: mock(() => overrides?.procedure ?? Promise.resolve(null)), + recallEpisodes, + recallFacts, + findProcedure, } as unknown as MemorySystem; - return ms; + return { memory, recallEpisodes, recallFacts, findProcedure }; } describe("MemoryContextBuilder", () => { test("returns empty string when memory system is not ready", async () => { - const memory = createMockMemorySystem({ ready: false }); + const { memory } = createMockMemorySystem({ ready: false }); const builder = new MemoryContextBuilder(memory, TEST_CONFIG); const result = await builder.build("test query"); @@ -36,7 +46,7 @@ describe("MemoryContextBuilder", () => { }); test("returns empty string when no memories found", async () => { - const memory = createMockMemorySystem(); + const { memory } = createMockMemorySystem(); const builder = new MemoryContextBuilder(memory, TEST_CONFIG); const result = await builder.build("test query"); @@ -44,7 +54,7 @@ describe("MemoryContextBuilder", () => { }); test("formats facts section correctly", async () => { - const memory = createMockMemorySystem({ + const { memory } = createMockMemorySystem({ facts: Promise.resolve([ { id: "f1", @@ -89,7 +99,7 @@ describe("MemoryContextBuilder", () => { }); test("formats episodes section correctly", async () => { - const memory = createMockMemorySystem({ + const { memory } = createMockMemorySystem({ episodes: Promise.resolve([ { id: "ep1", @@ -125,7 +135,7 @@ describe("MemoryContextBuilder", () => { }); test("filters stale low-signal episodes from prompt context", async () => { - const memory = createMockMemorySystem({ + const { memory } = createMockMemorySystem({ episodes: Promise.resolve([ { id: "stale-ep", @@ -180,7 +190,7 @@ describe("MemoryContextBuilder", () => { }); test("formats procedure section correctly", async () => { - const memory = createMockMemorySystem({ + const { memory } = createMockMemorySystem({ procedure: Promise.resolve({ id: "proc1", name: "deploy_staging", @@ -225,6 +235,177 @@ describe("MemoryContextBuilder", () => { expect(result).toContain("5 successes"); }); + test("adds durable context on the first turn of a new session", async () => { + const { memory, recallEpisodes } = createMockMemorySystem({ + episodes: Promise.resolve([ + { + id: "ep1", + type: "task" as const, + summary: "Refreshed the deployment runbook", + detail: "Full detail", + parent_id: null, + session_id: "s1", + user_id: "u1", + tools_used: ["Edit"], + files_touched: [], + outcome: "success" as const, + outcome_detail: "", + lessons: [], + started_at: new Date(Date.now() - 3600000).toISOString(), + ended_at: new Date().toISOString(), + duration_seconds: 3600, + importance: 0.9, + access_count: 3, + last_accessed_at: new Date().toISOString(), + decay_rate: 1.0, + }, + { + id: "ep2", + type: "interaction" as const, + summary: "Discussed rollout timing for tomorrow", + detail: "Full detail", + parent_id: null, + session_id: "s2", + user_id: "u1", + tools_used: [], + files_touched: [], + outcome: "partial" as const, + outcome_detail: "", + lessons: [], + started_at: new Date(Date.now() - 7200000).toISOString(), + ended_at: new Date().toISOString(), + duration_seconds: 1800, + importance: 0.7, + access_count: 1, + last_accessed_at: new Date().toISOString(), + decay_rate: 1.0, + }, + ]), + durableEpisodes: Promise.resolve([ + { + id: "ep1", + type: "task" as const, + summary: "Refreshed the deployment runbook", + detail: "Full detail", + parent_id: null, + session_id: "s1", + user_id: "u1", + tools_used: ["Edit"], + files_touched: [], + outcome: "success" as const, + outcome_detail: "", + lessons: [], + started_at: new Date(Date.now() - 3600000).toISOString(), + ended_at: new Date().toISOString(), + duration_seconds: 3600, + importance: 0.9, + access_count: 3, + last_accessed_at: new Date().toISOString(), + decay_rate: 1.0, + }, + ]), + facts: Promise.resolve([ + { + id: "f1", + subject: "user", + predicate: "prefers", + object: "small PRs", + natural_language: "The user prefers small PRs", + source_episode_ids: [], + confidence: 0.9, + valid_from: new Date().toISOString(), + valid_until: null, + version: 1, + previous_version_id: null, + category: "user_preference" as const, + tags: [], + }, + { + id: "f2", + subject: "repo", + predicate: "uses", + object: "Bun", + natural_language: "This repo uses Bun for task execution", + source_episode_ids: [], + confidence: 0.6, + valid_from: new Date().toISOString(), + valid_until: null, + version: 1, + previous_version_id: null, + category: "codebase" as const, + tags: [], + }, + ]), + }); + + const builder = new MemoryContextBuilder(memory, TEST_CONFIG); + const result = await builder.build("help me deploy", { isNewSession: true }); + + expect(recallEpisodes).toHaveBeenCalledTimes(2); + expect(result).toContain("## Durable Context"); + expect(result).toContain("Fact: The user prefers small PRs"); + expect(result).toContain("Memory: [task] Refreshed the deployment runbook"); + expect(result).toContain("## Known Facts"); + expect(result).toContain("This repo uses Bun for task execution"); + expect(result).toContain("## Recent Memories"); + expect(result).toContain("Discussed rollout timing for tomorrow"); + expect(result.split("The user prefers small PRs").length - 1).toBe(1); + expect(result.split("Refreshed the deployment runbook").length - 1).toBe(1); + }); + + test("skips durable startup context on resumed turns", async () => { + const { memory, recallEpisodes } = createMockMemorySystem({ + episodes: Promise.resolve([]), + durableEpisodes: Promise.resolve([ + { + id: "ep1", + type: "task" as const, + summary: "Should not be recalled durably", + detail: "Full detail", + parent_id: null, + session_id: "s1", + user_id: "u1", + tools_used: [], + files_touched: [], + outcome: "success" as const, + outcome_detail: "", + lessons: [], + started_at: new Date().toISOString(), + ended_at: new Date().toISOString(), + duration_seconds: 60, + importance: 0.9, + access_count: 0, + last_accessed_at: "", + decay_rate: 1.0, + }, + ]), + facts: Promise.resolve([ + { + id: "f1", + subject: "user", + predicate: "prefers", + object: "small PRs", + natural_language: "The user prefers small PRs", + source_episode_ids: [], + confidence: 0.9, + valid_from: new Date().toISOString(), + valid_until: null, + version: 1, + previous_version_id: null, + category: "user_preference" as const, + tags: [], + }, + ]), + }); + + const builder = new MemoryContextBuilder(memory, TEST_CONFIG); + const result = await builder.build("help me deploy"); + + expect(recallEpisodes).toHaveBeenCalledTimes(1); + expect(result).not.toContain("## Durable Context"); + expect(result).toContain("## Known Facts"); + }); + test("respects token budget and truncates", async () => { // Create many facts that would exceed a tiny budget const manyFacts = Array.from({ length: 100 }, (_, i) => ({ @@ -243,7 +424,7 @@ describe("MemoryContextBuilder", () => { tags: [], })); - const memory = createMockMemorySystem({ + const { memory } = createMockMemorySystem({ facts: Promise.resolve(manyFacts), }); @@ -259,7 +440,7 @@ describe("MemoryContextBuilder", () => { }); test("handles errors from memory system gracefully", async () => { - const memory = createMockMemorySystem({ + const { memory } = createMockMemorySystem({ episodes: Promise.reject(new Error("Qdrant down")), facts: Promise.reject(new Error("Qdrant down")), procedure: Promise.reject(new Error("Qdrant down")), diff --git a/src/memory/context-builder.ts b/src/memory/context-builder.ts index 00f5381..4b78be4 100644 --- a/src/memory/context-builder.ts +++ b/src/memory/context-builder.ts @@ -5,6 +5,13 @@ import type { Episode, Procedure, SemanticFact } from "./types.ts"; // Rough estimate: 1 token is about 4 characters const CHARS_PER_TOKEN = 4; +const DURABLE_FACT_CONFIDENCE = 0.8; +const MIN_EPISODE_BUDGET = 500; +const MIN_PROCEDURE_BUDGET = 200; + +export type MemoryContextBuildOptions = { + isNewSession?: boolean; +}; export class MemoryContextBuilder { private memory: MemorySystem; @@ -19,23 +26,43 @@ export class MemoryContextBuilder { this.factLimit = config.context.fact_limit; } - async build(query: string): Promise { + async build(query: string, options: MemoryContextBuildOptions = {}): Promise { if (!this.memory.isReady()) { return ""; } - const [episodes, facts, procedure] = await Promise.all([ + const [episodes, facts, procedure, durableEpisodes] = await Promise.all([ this.memory.recallEpisodes(query, { limit: this.episodeLimit }).catch(() => []), this.memory.recallFacts(query, { limit: this.factLimit }).catch(() => []), this.memory.findProcedure(query).catch(() => null), + options.isNewSession + ? this.memory + .recallEpisodes(query, { limit: this.getDurableEpisodeLimit(), strategy: "metadata" }) + .catch(() => []) + : Promise.resolve([]), ]); + const durableFacts = options.isNewSession ? this.selectDurableFacts(facts) : []; + const durableFactIds = new Set(durableFacts.map((fact) => fact.id)); + const durableEpisodeIds = new Set(durableEpisodes.map((episode) => episode.id)); + const remainingFacts = facts.filter((fact) => !durableFactIds.has(fact.id)); + const remainingEpisodes = episodes.filter((episode) => !durableEpisodeIds.has(episode.id)); + const sections: string[] = []; let tokenBudget = this.maxTokens; + if (options.isNewSession) { + const durableSection = this.formatDurableContext(durableFacts, durableEpisodes, tokenBudget); + const durableTokens = this.estimateTokens(durableSection); + if (durableTokens > 0 && durableTokens <= tokenBudget) { + sections.push(durableSection); + tokenBudget -= durableTokens; + } + } + // Known facts get priority - they're the agent's accumulated knowledge - if (facts.length > 0) { - const factSection = this.formatFacts(facts); + if (remainingFacts.length > 0) { + const factSection = this.formatFacts(remainingFacts); const factTokens = this.estimateTokens(factSection); if (factTokens <= tokenBudget) { sections.push(factSection); @@ -44,9 +71,9 @@ export class MemoryContextBuilder { } // Recent memories provide episode context - if (episodes.length > 0 && tokenBudget > 500) { - const durableEpisodes = episodes.filter(shouldIncludeEpisodeInContext); - const episodeSection = this.formatEpisodes(durableEpisodes, tokenBudget); + if (remainingEpisodes.length > 0 && tokenBudget > MIN_EPISODE_BUDGET) { + const filteredEpisodes = remainingEpisodes.filter(shouldIncludeEpisodeInContext); + const episodeSection = this.formatEpisodes(filteredEpisodes, tokenBudget); const episodeTokens = this.estimateTokens(episodeSection); if (episodeSection) { sections.push(episodeSection); @@ -55,7 +82,7 @@ export class MemoryContextBuilder { } // Relevant procedures - if (procedure && tokenBudget > 200) { + if (procedure && tokenBudget > MIN_PROCEDURE_BUDGET) { const procSection = this.formatProcedure(procedure); const procTokens = this.estimateTokens(procSection); if (procTokens <= tokenBudget) { @@ -68,6 +95,26 @@ export class MemoryContextBuilder { return sections.join("\n\n"); } + private formatDurableContext(facts: SemanticFact[], episodes: Episode[], tokenBudget: number): string { + const header = "## Durable Context\n"; + let content = header; + const maxChars = tokenBudget * CHARS_PER_TOKEN; + + for (const fact of facts) { + const entry = `- Fact: ${fact.natural_language} [confidence: ${fact.confidence.toFixed(1)}]\n`; + if (content.length + entry.length > maxChars) break; + content += entry; + } + + for (const episode of episodes) { + const entry = `- Memory: [${episode.type}] ${episode.summary} (${episode.outcome}, ${formatRelativeTime(episode.started_at)})\n`; + if (content.length + entry.length > maxChars) break; + content += entry; + } + + return content.trim() === "## Durable Context" ? "" : content.trim(); + } + private formatFacts(facts: SemanticFact[]): string { const lines = facts.map((f) => `- ${f.natural_language} [confidence: ${f.confidence.toFixed(1)}]`); return `## Known Facts\n${lines.join("\n")}`; @@ -105,6 +152,27 @@ export class MemoryContextBuilder { private estimateTokens(text: string): number { return Math.ceil(text.length / CHARS_PER_TOKEN); } + + private selectDurableFacts(facts: SemanticFact[]): SemanticFact[] { + return facts + .filter((fact) => fact.valid_until == null && fact.confidence >= DURABLE_FACT_CONFIDENCE) + .sort((a, b) => { + if (b.confidence !== a.confidence) { + return b.confidence - a.confidence; + } + + return new Date(b.valid_from).getTime() - new Date(a.valid_from).getTime(); + }) + .slice(0, this.getDurableFactLimit()); + } + + private getDurableFactLimit(): number { + return Math.max(2, Math.min(5, Math.ceil(this.factLimit / 4))); + } + + private getDurableEpisodeLimit(): number { + return Math.max(1, Math.min(3, Math.ceil(this.episodeLimit / 3))); + } } function formatRelativeTime(isoDate: string): string {