Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/memory.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Session transcripts stored as embeddings. Each episode contains:

Search: "What happened last time I worked on the auth service?"

Episode ranking is not raw vector score alone. Retrieval blends semantic match with importance, reinforcement from repeated access, and decay over time so durable memories stay available while stale one-off memories fade.

### Tier 2: Semantic Memory

Accumulated facts with contradiction detection and temporal validity:
Expand Down Expand Up @@ -50,7 +52,8 @@ Before each agent invocation, the context builder:
3. Searches semantic memory (top 20 facts)
4. Searches procedural memory (top 5 procedures)
5. Budgets results to fit within the token limit (default: 50,000 tokens)
6. Formats results into the memory section of the system prompt
6. Filters out stale, low-signal episodic memories before prompt injection
7. Formats results into the memory section of the system prompt

## Consolidation

Expand Down
55 changes: 55 additions & 0 deletions src/memory/__tests__/context-builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,61 @@ describe("MemoryContextBuilder", () => {
expect(result).toContain("success");
});

test("filters stale low-signal episodes from prompt context", async () => {
const memory = createMockMemorySystem({
episodes: Promise.resolve([
{
id: "stale-ep",
type: "task" as const,
summary: "One-off stale note",
detail: "No longer important",
parent_id: null,
session_id: "s1",
user_id: "u1",
tools_used: [],
files_touched: [],
outcome: "success" as const,
outcome_detail: "",
lessons: [],
started_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
ended_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
duration_seconds: 300,
importance: 0.2,
access_count: 0,
last_accessed_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
decay_rate: 1.0,
},
{
id: "durable-ep",
type: "task" as const,
summary: "Repeated deployment pattern",
detail: "Still referenced often",
parent_id: null,
session_id: "s2",
user_id: "u1",
tools_used: ["Bash"],
files_touched: [],
outcome: "success" as const,
outcome_detail: "",
lessons: [],
started_at: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
ended_at: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
duration_seconds: 300,
importance: 0.8,
access_count: 4,
last_accessed_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
decay_rate: 1.0,
},
]),
});

const builder = new MemoryContextBuilder(memory, TEST_CONFIG);
const result = await builder.build("deployment");

expect(result).toContain("Repeated deployment pattern");
expect(result).not.toContain("One-off stale note");
});

test("formats procedure section correctly", async () => {
const memory = createMockMemorySystem({
procedure: Promise.resolve({
Expand Down
62 changes: 62 additions & 0 deletions src/memory/__tests__/episodic.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,66 @@ describe("EpisodicStore", () => {
expect(episodes[0].id).toBe("new-ep");
expect(episodes[1].id).toBe("old-ep");
});

test("recall() metadata strategy favors reinforced memories", async () => {
const vec = make768dVector();
const now = Date.now();

globalThis.fetch = mock((url: string | Request) => {
const urlStr = typeof url === "string" ? url : url.url;

if (urlStr.includes("/api/embed")) {
return Promise.resolve(new Response(JSON.stringify({ embeddings: [vec] }), { status: 200 }));
}

if (urlStr.includes("/points/query")) {
return Promise.resolve(
new Response(
JSON.stringify({
result: {
points: [
{
id: "stale-ep",
score: 0.82,
payload: {
type: "task",
summary: "Stale one-off episode",
importance: 0.3,
access_count: 0,
last_accessed_at: new Date(now - 45 * 24 * 3600 * 1000).toISOString(),
started_at: now - 45 * 24 * 3600 * 1000,
},
},
{
id: "durable-ep",
score: 0.7,
payload: {
type: "task",
summary: "Frequently reused deployment memory",
importance: 0.8,
access_count: 6,
last_accessed_at: new Date(now - 2 * 24 * 3600 * 1000).toISOString(),
started_at: now - 45 * 24 * 3600 * 1000,
},
},
],
},
}),
{ status: 200, headers: { "Content-Type": "application/json" } },
),
);
}

return Promise.resolve(new Response(JSON.stringify({ status: "ok" }), { status: 200 }));
}) as unknown as typeof fetch;

const qdrant = new QdrantClient(TEST_CONFIG);
const embedder = new EmbeddingClient(TEST_CONFIG);
const store = new EpisodicStore(qdrant, embedder, TEST_CONFIG);

const episodes = await store.recall("deployment", { strategy: "metadata" });

expect(episodes[0].id).toBe("durable-ep");
expect(episodes[1].id).toBe("stale-ep");
});
});
78 changes: 78 additions & 0 deletions src/memory/__tests__/ranking.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { describe, expect, test } from "bun:test";
import { calculateEpisodeRecallScore, shouldIncludeEpisodeInContext } from "../ranking.ts";
import type { Episode } from "../types.ts";

function makeEpisode(overrides?: Partial<Episode>): Episode {
return {
id: "ep-1",
type: "task",
summary: "Memory summary",
detail: "Memory detail",
parent_id: null,
session_id: "session-1",
user_id: "user-1",
tools_used: [],
files_touched: [],
outcome: "success",
outcome_detail: "Completed successfully",
lessons: [],
started_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
ended_at: new Date().toISOString(),
duration_seconds: 60,
importance: 0.6,
access_count: 0,
last_accessed_at: new Date().toISOString(),
decay_rate: 1,
...overrides,
};
}

describe("memory ranking", () => {
test("metadata strategy rewards reinforced memories", () => {
const staleWeak = calculateEpisodeRecallScore(
0.82,
{
importance: 0.3,
accessCount: 0,
startedAt: Date.now() - 45 * 24 * 3600 * 1000,
lastAccessedAt: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
decayRate: 1,
},
"metadata",
);

const durableRepeat = calculateEpisodeRecallScore(
0.7,
{
importance: 0.8,
accessCount: 6,
startedAt: Date.now() - 45 * 24 * 3600 * 1000,
lastAccessedAt: new Date(Date.now() - 2 * 24 * 3600 * 1000).toISOString(),
decayRate: 1,
},
"metadata",
);

expect(durableRepeat).toBeGreaterThan(staleWeak);
});

test("context filtering drops stale low-signal memories", () => {
const staleWeak = makeEpisode({
importance: 0.2,
access_count: 0,
started_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
last_accessed_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
});

const durableRepeat = makeEpisode({
id: "ep-2",
importance: 0.85,
access_count: 5,
started_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
last_accessed_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
});

expect(shouldIncludeEpisodeInContext(staleWeak)).toBe(false);
expect(shouldIncludeEpisodeInContext(durableRepeat)).toBe(true);
});
});
12 changes: 9 additions & 3 deletions src/memory/context-builder.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { MemoryConfig } from "../config/types.ts";
import { shouldIncludeEpisodeInContext } from "./ranking.ts";
import type { MemorySystem } from "./system.ts";
import type { Episode, Procedure, SemanticFact } from "./types.ts";

Expand Down Expand Up @@ -44,10 +45,13 @@ export class MemoryContextBuilder {

// Recent memories provide episode context
if (episodes.length > 0 && tokenBudget > 500) {
const episodeSection = this.formatEpisodes(episodes, tokenBudget);
const durableEpisodes = episodes.filter(shouldIncludeEpisodeInContext);
const episodeSection = this.formatEpisodes(durableEpisodes, tokenBudget);
const episodeTokens = this.estimateTokens(episodeSection);
sections.push(episodeSection);
tokenBudget -= episodeTokens;
if (episodeSection) {
sections.push(episodeSection);
tokenBudget -= episodeTokens;
}
}

// Relevant procedures
Expand All @@ -70,6 +74,8 @@ export class MemoryContextBuilder {
}

private formatEpisodes(episodes: Episode[], tokenBudget: number): string {
if (episodes.length === 0) return "";

const header = "## Recent Memories\n";
let content = header;
const maxChars = tokenBudget * CHARS_PER_TOKEN;
Expand Down
43 changes: 17 additions & 26 deletions src/memory/episodic.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type { MemoryConfig } from "../config/types.ts";
import { type EmbeddingClient, textToSparseVector } from "./embeddings.ts";
import type { QdrantClient } from "./qdrant-client.ts";
import { calculateEpisodeRecallScore } from "./ranking.ts";
import type { Episode, QdrantSearchResult, RecallOptions } from "./types.ts";

const COLLECTION_SCHEMA = {
Expand Down Expand Up @@ -128,6 +129,7 @@ export class EpisodicStore {
for (const id of ids) {
try {
await this.qdrant.updatePayload(this.collectionName, id, {
access_count: { $inc: 1 },
last_accessed_at: new Date().toISOString(),
});
} catch {
Expand Down Expand Up @@ -165,34 +167,23 @@ export class EpisodicStore {
return { must };
}

private applyStrategy(results: QdrantSearchResult[], strategy: string): QdrantSearchResult[] {
const now = Date.now();

private applyStrategy(results: QdrantSearchResult[], strategy: RecallOptions["strategy"]): QdrantSearchResult[] {
return results
.map((r) => {
const startedAt = (r.payload.started_at as number) ?? 0;
const importance = (r.payload.importance as number) ?? 0.5;
const hoursSince = (now - startedAt) / (1000 * 60 * 60);
const recencyScore = Math.exp(-0.01 * hoursSince);

let finalScore: number;
switch (strategy) {
case "similarity":
finalScore = r.score * 0.7 + importance * 0.2 + recencyScore * 0.1;
break;
case "temporal":
finalScore = recencyScore * 0.7 + r.score * 0.2 + importance * 0.1;
break;
case "metadata":
finalScore = r.score * 0.5 + recencyScore * 0.3 + importance * 0.2;
break;
default:
// recency-biased (default)
finalScore = r.score * 0.4 + recencyScore * 0.4 + importance * 0.2;
break;
}

return { ...r, score: finalScore };
return {
...r,
score: calculateEpisodeRecallScore(
r.score,
{
importance: (r.payload.importance as number) ?? 0.5,
accessCount: (r.payload.access_count as number) ?? 0,
startedAt: (r.payload.started_at as number) ?? 0,
lastAccessedAt: (r.payload.last_accessed_at as string | undefined) ?? undefined,
decayRate: (r.payload.decay_rate as number) ?? 1,
},
strategy,
),
};
})
.sort((a, b) => b.score - a.score);
}
Expand Down
Loading
Loading