diff --git a/README.md b/README.md index 0bf79ec..33f2390 100644 --- a/README.md +++ b/README.md @@ -265,6 +265,7 @@ The right profile can swing retrieval quality by 40+ points on benchmarks. | Package | Description | |---|---| +| [`@db0-ai/db0`](packages/md) | Markdown-based memory store — CLI + SDK with scoping, superseding, and context packing | | [`@db0-ai/core`](packages/core) | Types, harness, memory/state/log/context, profiles, extraction | | [`@db0-ai/backends-sqlite`](packages/backends/sqlite) | SQLite via sql.js — zero native deps | | [`@db0-ai/backends-postgres`](packages/backends/postgres) | PostgreSQL + pgvector | diff --git a/docs/design/db0-md-design.md b/docs/design/db0-md-design.md new file mode 100644 index 0000000..6e05d41 --- /dev/null +++ b/docs/design/db0-md-design.md @@ -0,0 +1,367 @@ +# Design: db0/md — The Embedded Memory Primitive for AI Agents + +**Status:** Prototype +**Branch:** `feat/db0-md-prototype` +**Date:** 2026-04-03 + +## Problem + +Every AI agent that runs more than once needs memory. Today, three independent +billion-dollar agent systems (Manus, OpenClaw, Claude Code) converged on the same +architecture: markdown files in a directory. This works because files give agents +transparency, git history, portability, and zero infrastructure. + +But files alone have no lifecycle. After weeks of use: + +- Contradictions accumulate silently ("prefers Python" and "prefers Rust" coexist) +- Nobody cleans up — the directory only grows +- Search is grep (no semantic understanding) +- Context assembly is "cat everything and hope it fits" +- The agent doesn't know which facts are stale + +The tools that exist solve the wrong half of the problem. QMD, RAG pipelines, and +vector databases are excellent at **reading** — search and retrieval. Nobody has +built the **write-side primitives**: superseding stale facts, deduplicating on +write, scoping by lifetime, consolidating redundant memories, and assembling +coherent context within a token budget. + +Mem0 and Zep are cloud services. Letta is a framework. LangChain memory modules +are abandoned by frustrated developers. There is no embedded, zero-config, +local-first memory primitive — no "SQLite for agent memory." + +## Product Thesis + +**db0/md is a CLI tool that turns a directory of markdown files into a managed +memory system.** + +Agents keep reading and writing markdown files. db0 adds the lifecycle: +smart write (dedup + supersede), search, context assembly, and garbage collection. + +The entry point is the "wow moment" — catching a contradiction in under 60 seconds: + +```bash +npx db0 remember "User prefers Rust" --dir ./memories +# Created memories/user/language-prefs.md + +npx db0 remember "User prefers Python" --dir ./memories +# Conflicts with memories/user/language-prefs.md ("User prefers Rust") +# Superseded. Updated memories/user/language-prefs.md +``` + +## Design Principles + +1. **Files are the source of truth.** Not a database. Not a cache export. Real + markdown files that you can open in VS Code, browse on GitHub, copy with `cp`. + +2. **Zero config.** Point at a directory, get value. No init, no config file, + no database to provision. + +3. **The interface is filesystem-shaped.** Agents interact through familiar + operations: read files, write files, grep, ls. The CLI extends this with + `remember`, `search`, `pack`, `consolidate`. + +4. **Everything derived is rebuildable.** If `.db0/` is deleted, `db0 index` + rebuilds from the markdown files. The files are always sufficient. + +5. **Lifecycle, not retrieval.** Search is table stakes (included but not the + differentiator). The value is write-side: dedup, supersede, scope, consolidate. + +6. **Works with every agent runtime.** Claude Code, Codex, OpenClaw, Gemini CLI, + custom agents — anything with shell + filesystem access. + +## Architecture + +``` + ┌─────────────────────┐ + │ Agent Runtime │ + │ (Claude Code, Codex, │ + │ OpenClaw, custom) │ + └──────────┬──────────┘ + │ + reads/writes files + calls db0 CLI + │ + ┌──────────────────────┼──────────────────────┐ + │ │ │ + ▼ ▼ ▼ + ┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐ + │ Markdown Files │ │ db0 CLI │ │ MEMORIES.md │ + │ (source of truth)│ │ │ │ (auto-generated │ + │ │ │ remember │ │ index file) │ + │ memories/ │ │ search │ │ │ + │ user/ │ │ pack │ │ │ + │ agent/ │ │ consolidate │ │ │ + │ session/ │ │ index │ │ │ + │ task/ │ │ │ │ │ + └──────────┬────────┘ └────────┬─────────┘ └──────────────────┘ + │ │ + │ ┌────────────────┘ + │ │ + ▼ ▼ + ┌──────────────────────────────────────┐ + │ .db0/ (derived, rebuildable) │ + │ │ + │ index.sqlite — embeddings, FTS, │ + │ metadata cache │ + │ manifest.json — file hashes, │ + │ last-sync state │ + └──────────────────────────────────────┘ +``` + +### Storage Model + +**Markdown files own content.** Each file is one memory: + +```markdown +--- +id: m_a1b2c3 +scope: user +tags: [preference, language] +created: 2026-04-03T10:00:00Z +supersedes: m_x9y8z7 +--- + +User prefers Rust for CLI tools. Switched from Python in March 2026. +``` + +Frontmatter carries metadata. Body is the memory content. An agent can write a +file with just body + `scope:` — db0 fills in id, created, and other fields on +the next `index` or `remember` run. + +**Directory structure is scoping:** + +``` +memories/ + user/ # permanent, cross-session + agent/ # permanent, agent-level knowledge + session/ # current session (auto-expires) + task/ # current task (auto-expires) +``` + +**SQLite owns derived data.** Embeddings, FTS index, access counts, similarity +scores. Stored in `.db0/index.sqlite`. Rebuildable from files via `db0 index`. + +**MEMORIES.md is the auto-generated index.** One file an agent reads to get all +context. Auto-maintained after every write or consolidate. + +### Relationship to @db0-ai/core + +db0/md wraps `@db0-ai/core` internally: + +- `Memory` component provides superseding, scoping, dedup logic +- `hashEmbed` / `defaultEmbeddingFn` provides zero-dep embeddings +- `memoryAge` provides staleness tracking +- Extraction strategies (rules/LLM) provide fact extraction +- Consolidation (reconcile + consolidateFn) provides garbage collection +- SQLite backend provides the index cache + +The user never sees core. They see markdown files + CLI commands. + +### ContentStore Abstraction + +All file operations go through an interface to enable future S3 migration: + +```typescript +interface ContentStore { + read(key: string): Promise; + write(key: string, content: string): Promise; + delete(key: string): Promise; + list(prefix?: string): Promise; + exists(key: string): Promise; +} +``` + +v1: `LocalContentStore` (fs.readFile/writeFile) +Future: `S3ContentStore`, with local `.db0/` SQLite as read cache. + +## CLI Primitives + +### `db0 remember [--scope user|agent|session|task] [--tags a,b] [--dir ./memories]` + +The smart write. The core differentiator. + +1. Compute hash embedding of the input fact +2. Scan existing memories (via index if available, brute-force if not) +3. Threshold-based decision: + - **High similarity (>0.9):** auto-supersede. Update the existing file. Commit + old version to git if available. + - **Medium similarity (0.7-0.9):** create new file with `related-to:` frontmatter + linking to the similar memory. + - **Low similarity (<0.7):** create new file independently. +4. Update `.db0/` index +5. Regenerate MEMORIES.md + +### `db0 search [--limit N] [--scope ...] [--dir ./memories]` + +Semantic + keyword search over the memory directory. + +1. If `.db0/index.sqlite` exists: use cached embeddings + FTS +2. If not: brute-force scan — read all files, compute hash embeddings on the fly, + rank by cosine similarity +3. Return ranked results with file paths, scores, age, staleness caveats + +For <500 files, brute-force with hash embeddings is <100ms. SQLite index is an +optimization, not a requirement. + +### `db0 pack [--query ] [--budget ] [--dir ./memories]` + +Context assembly for LLM prompts. + +1. If query provided: search for relevant memories, rank by score +2. If no query: use all active memories, rank by recency + scope priority +3. Assemble into markdown, respecting token budget: + - Skip superseded/contradicted facts + - Prefer recent over old + - Prefer user/agent scope over session/task + - Include staleness warnings for old memories +4. Output markdown to stdout (pipe-friendly) + +### `db0 consolidate [--dir ./memories]` + +Garbage collection. + +1. Cluster semantically similar files (using core's reconcile logic) +2. Merge redundant files (preserve the most complete version) +3. Move superseded files to `.db0/archive/` +4. Detect contradictions, flag for review +5. Remove expired session/task files +6. Regenerate MEMORIES.md + +### `db0 index [--dir ./memories]` + +Rebuild the search cache from files. + +1. Scan all markdown files in the directory +2. Parse frontmatter + content +3. Compute embeddings (hash by default, pluggable) +4. Build FTS index +5. Write to `.db0/index.sqlite` + +Idempotent, safe to run anytime. Required after manual file edits. + +## MEMORIES.md Format + +Auto-generated after every `remember`, `consolidate`, or `index`: + +```markdown +# Memories + +> 12 active, 3 archived | last updated 2026-04-03T10:00:00Z + +## user (5) +- [language-prefs.md](user/language-prefs.md) — Prefers Rust for CLI tools (today) +- [work-style.md](user/work-style.md) — Short PRs, no squash (3 days ago) +- [github.md](user/github.md) — Uses lightcone0 for db0-ai (today) + +## agent (4) +- [stack.md](agent/stack.md) — TypeScript monorepo, vitest (today) +- [patterns.md](agent/patterns.md) — Workspace packages pattern (2 days ago) + +## session (2) +- [current-task.md](session/current-task.md) — Designing db0/md (today) + +## stale (>7 days) +- [old-deploy.md](agent/old-deploy.md) — References src/old-config.ts (not found) +``` + +One file. Agent reads it, gets full context. Humans read it, see everything at a +glance. + +## v1 Scope: Pure Files Mode + +For the prototype, the simplest possible implementation: + +- **No SQLite dependency.** Brute-force scan with hash embeddings on every + operation. Fast enough for <500 files. +- **No config file.** Convention over configuration (directory structure = scoping). +- **No background process.** Every command is stateless, reads files, does work, + writes files. +- **Single npm package.** `@db0-ai/md` with a `db0` CLI binary. +- **Zero external dependencies** beyond what's already in core. + +### Growth Path + +| | v1: Pure files | v1.5: SQLite accel | v2: Cloud | +|---|---|---|---| +| Storage | local markdown | local markdown | S3 / object store | +| Index | on-the-fly hash | .db0/index.sqlite | local cache + durable store | +| Search | brute-force cosine | BM25 + vector | same | +| Embeddings | hash (built-in) | pluggable (Ollama, OpenAI) | same | +| File limit | ~500 | ~10,000 | unlimited | +| Dependencies | zero | better-sqlite3 | + S3 SDK | + +## Embedding Strategy + +v1 uses `hashEmbed` from `@db0-ai/core` — deterministic, zero-API, instant. +Not semantic, but good enough for near-exact dedup and recall. + +Pluggable upgrade path (v1.5+): + +```typescript +// .db0.yml or programmatic +embedding: + provider: ollama # or openai, gemini + model: nomic-embed-text +``` + +When a real embedding provider is configured, `db0 index` recomputes all +embeddings and stores them in `.db0/index.sqlite`. + +## Agent Runtime Integration + +### Claude Code +```json +// hooks.json — PostQuery hook +{ "command": "db0 consolidate --dir ${CLAUDE_PLUGIN_ROOT}/memories --quiet" } +``` +Agent reads/writes `memories/` directory normally. db0 runs lifecycle in hooks. + +### OpenClaw +```bash +# In the cron heartbeat +db0 consolidate --dir ./state/memories +``` +Agent writes files, db0 manages lifecycle on the cron loop. + +### Codex / Any Sandbox +```bash +# Agent calls db0 like any CLI tool +db0 remember "The API uses REST, not GraphQL" --scope agent +db0 search "API architecture" +db0 pack --budget 4000 | pbcopy # pipe into prompt +``` + +### SDK (for framework developers) +```typescript +import { MemoryStore } from "@db0-ai/md"; + +const store = new MemoryStore({ dir: "./memories" }); +await store.remember("User prefers Rust", { scope: "user" }); +const results = await store.search("language preferences"); +const context = await store.pack({ budget: 4000 }); +``` + +## What This Is NOT + +- **Not a RAG pipeline.** No document chunking, no ingestion of PDFs/HTML. + It manages agent memories, not reference documents. +- **Not a vector database.** The index is a cache, not a product. +- **Not a framework.** No opinions on agent architecture, no lifecycle hooks + to implement, no base classes to extend. +- **Not a cloud service.** Runs locally, your data stays on your machine. + +## Success Criteria + +1. `npx db0 remember` catches a contradiction in under 60 seconds on first use +2. Works with Claude Code, Codex, and OpenClaw without framework-specific code +3. Zero configuration, zero external dependencies +4. A developer can understand the entire system by reading `ls memories/` +5. MEMORIES.md is good enough that agents use it as their primary context source + +## Prototype Plan + +1. `@db0-ai/md` package with `MemoryStore` class wrapping core +2. `db0` CLI binary with five commands: remember, search, pack, consolidate, index +3. ContentStore interface with LocalContentStore implementation +4. MEMORIES.md generation +5. Tests: smart write dedup, superseding, search ranking, pack budget, consolidate merge +6. A demo: run db0 against a directory of memories, show the lifecycle loop diff --git a/docs/superpowers/plans/2026-04-03-db0-md-prototype.md b/docs/superpowers/plans/2026-04-03-db0-md-prototype.md new file mode 100644 index 0000000..91f9bbc --- /dev/null +++ b/docs/superpowers/plans/2026-04-03-db0-md-prototype.md @@ -0,0 +1,1454 @@ +# db0/md Prototype Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build a CLI tool (`db0`) that turns a directory of markdown files into a managed memory system with smart write, search, context assembly, and garbage collection. + +**Architecture:** `@db0-ai/md` package wraps `@db0-ai/core` utilities (hashEmbed, cosineSimilarity, memoryAge, generateId) without using the full harness/backend stack. v1 is pure files — no SQLite, brute-force scan with hash embeddings. A `ContentStore` interface abstracts file I/O for future S3 migration. The CLI binary exposes five commands: `remember`, `search`, `pack`, `consolidate`, `index`. + +**Tech Stack:** TypeScript (ESM, NodeNext), `@db0-ai/core` (hash embeddings, cosine similarity, staleness), Node.js fs/path, vitest for tests. + +--- + +## File Structure + +``` +packages/md/ +├── package.json +├── tsconfig.json +├── README.md +├── src/ +│ ├── index.ts # Public SDK exports +│ ├── cli.ts # CLI entry point (bin) +│ ├── content-store.ts # ContentStore interface + LocalContentStore +│ ├── markdown.ts # Parse/serialize markdown with YAML frontmatter +│ ├── memory-store.ts # MemoryStore class — the core engine +│ ├── memories-index.ts # MEMORIES.md generator +│ └── types.ts # Shared types (MemoryFile, RememberResult, etc.) +├── __tests__/ +│ ├── markdown.test.ts # Frontmatter parse/serialize +│ ├── content-store.test.ts # LocalContentStore file operations +│ ├── remember.test.ts # Smart write: dedup, supersede, create +│ ├── search.test.ts # Brute-force search ranking +│ ├── pack.test.ts # Context assembly with budget +│ ├── consolidate.test.ts # Garbage collection +│ └── memories-index.test.ts # MEMORIES.md generation +``` + +--- + +### Task 1: Package Scaffolding + +**Files:** +- Create: `packages/md/package.json` +- Create: `packages/md/tsconfig.json` +- Create: `packages/md/src/index.ts` +- Create: `packages/md/src/types.ts` +- Modify: `package.json` (root — add workspace) + +- [ ] **Step 1: Create package.json** + +Create `packages/md/package.json`: + +```json +{ + "name": "@db0-ai/md", + "version": "0.3.0", + "description": "The embedded memory primitive for AI agents. Markdown files + smart lifecycle.", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "bin": { + "db0-md": "./dist/cli.js" + }, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "scripts": { + "build": "tsc" + }, + "files": [ + "README.md", + "dist" + ], + "license": "MIT", + "dependencies": { + "@db0-ai/core": "0.3.0" + } +} +``` + +- [ ] **Step 2: Create tsconfig.json** + +Create `packages/md/tsconfig.json`: + +```json +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src"] +} +``` + +- [ ] **Step 3: Create types.ts** + +Create `packages/md/src/types.ts`: + +```typescript +export type MemoryScope = "user" | "agent" | "session" | "task"; + +export interface MemoryFrontmatter { + id: string; + scope: MemoryScope; + tags?: string[]; + created: string; + supersedes?: string; + "related-to"?: string[]; + expires?: string; +} + +export interface MemoryFile { + /** Relative path from memory dir, e.g. "user/language-prefs.md" */ + path: string; + frontmatter: MemoryFrontmatter; + content: string; +} + +export interface RememberResult { + action: "created" | "superseded" | "related"; + file: string; + superseded?: { file: string; content: string }; + relatedTo?: string; +} + +export interface SearchResult { + file: string; + content: string; + scope: MemoryScope; + score: number; + age: string; + stalenessCaveat: string | null; +} + +export interface ConsolidateResult { + merged: number; + archived: number; + expired: number; +} +``` + +- [ ] **Step 4: Create index.ts with placeholder exports** + +Create `packages/md/src/index.ts`: + +```typescript +export type { + MemoryScope, + MemoryFrontmatter, + MemoryFile, + RememberResult, + SearchResult, + ConsolidateResult, +} from "./types.js"; +``` + +- [ ] **Step 5: Add workspace to root package.json** + +In the root `package.json`, add `"packages/md"` to the `workspaces` array. + +- [ ] **Step 6: Install and verify build** + +Run: `npm install && npm run build -w packages/md` +Expected: Clean build with dist/ output, no errors. + +- [ ] **Step 7: Commit** + +```bash +git add packages/md/ package.json +git commit -m "feat(md): scaffold @db0-ai/md package with types" +``` + +--- + +### Task 2: ContentStore Interface + LocalContentStore + +**Files:** +- Create: `packages/md/src/content-store.ts` +- Create: `packages/md/__tests__/content-store.test.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/md/__tests__/content-store.test.ts`: + +```typescript +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { LocalContentStore } from "../src/content-store.js"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("LocalContentStore", () => { + let dir: string; + let store: LocalContentStore; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + store = new LocalContentStore(dir); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("write and read a file", async () => { + await store.write("user/prefs.md", "hello world"); + const content = await store.read("user/prefs.md"); + expect(content).toBe("hello world"); + }); + + it("list files recursively", async () => { + await store.write("user/a.md", "a"); + await store.write("user/b.md", "b"); + await store.write("agent/c.md", "c"); + const all = await store.list(); + expect(all.sort()).toEqual(["agent/c.md", "user/a.md", "user/b.md"]); + }); + + it("list files with prefix", async () => { + await store.write("user/a.md", "a"); + await store.write("agent/b.md", "b"); + const userFiles = await store.list("user"); + expect(userFiles).toEqual(["user/a.md"]); + }); + + it("check existence", async () => { + expect(await store.exists("user/x.md")).toBe(false); + await store.write("user/x.md", "x"); + expect(await store.exists("user/x.md")).toBe(true); + }); + + it("delete a file", async () => { + await store.write("user/x.md", "x"); + await store.delete("user/x.md"); + expect(await store.exists("user/x.md")).toBe(false); + }); + + it("ignores non-md files and dotfiles", async () => { + await store.write("user/a.md", "a"); + await store.write("user/b.txt", "b"); + await store.write(".db0/index.json", "{}"); + const files = await store.list(); + expect(files).toEqual(["user/a.md"]); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `npx vitest run packages/md/__tests__/content-store.test.ts` +Expected: FAIL — module not found. + +- [ ] **Step 3: Implement LocalContentStore** + +Create `packages/md/src/content-store.ts`: + +```typescript +import { readFile, writeFile, rm, readdir, mkdir } from "node:fs/promises"; +import { join, dirname, relative } from "node:path"; +import { existsSync } from "node:fs"; + +export interface ContentStore { + read(key: string): Promise; + write(key: string, content: string): Promise; + delete(key: string): Promise; + list(prefix?: string): Promise; + exists(key: string): Promise; +} + +export class LocalContentStore implements ContentStore { + constructor(private dir: string) {} + + async read(key: string): Promise { + return readFile(join(this.dir, key), "utf8"); + } + + async write(key: string, content: string): Promise { + const fullPath = join(this.dir, key); + const parent = dirname(fullPath); + if (!existsSync(parent)) { + await mkdir(parent, { recursive: true }); + } + await writeFile(fullPath, content, "utf8"); + } + + async delete(key: string): Promise { + const fullPath = join(this.dir, key); + if (existsSync(fullPath)) { + await rm(fullPath); + } + } + + async list(prefix?: string): Promise { + const scanDir = prefix ? join(this.dir, prefix) : this.dir; + if (!existsSync(scanDir)) return []; + return this.scanDir(scanDir); + } + + async exists(key: string): Promise { + return existsSync(join(this.dir, key)); + } + + private async scanDir(dir: string): Promise { + const results: string[] = []; + const entries = await readdir(dir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.name.startsWith(".")) continue; + const fullPath = join(dir, entry.name); + if (entry.isDirectory()) { + results.push(...(await this.scanDir(fullPath))); + } else if (entry.name.endsWith(".md") && entry.name !== "MEMORIES.md") { + results.push(relative(this.dir, fullPath)); + } + } + return results; + } +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `npx vitest run packages/md/__tests__/content-store.test.ts` +Expected: All 6 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/md/src/content-store.ts packages/md/__tests__/content-store.test.ts +git commit -m "feat(md): add ContentStore interface and LocalContentStore" +``` + +--- + +### Task 3: Markdown Parser/Serializer + +**Files:** +- Create: `packages/md/src/markdown.ts` +- Create: `packages/md/__tests__/markdown.test.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/md/__tests__/markdown.test.ts`: + +```typescript +import { describe, it, expect } from "vitest"; +import { parseMarkdown, serializeMarkdown } from "../src/markdown.js"; + +describe("parseMarkdown", () => { + it("parses frontmatter and content", () => { + const input = `--- +id: m_abc123 +scope: user +tags: [preference, language] +created: "2026-04-03T10:00:00Z" +--- + +User prefers Rust.`; + + const result = parseMarkdown(input); + expect(result.frontmatter.id).toBe("m_abc123"); + expect(result.frontmatter.scope).toBe("user"); + expect(result.frontmatter.tags).toEqual(["preference", "language"]); + expect(result.content).toBe("User prefers Rust."); + }); + + it("handles missing frontmatter", () => { + const result = parseMarkdown("Just plain content."); + expect(result.frontmatter).toEqual({}); + expect(result.content).toBe("Just plain content."); + }); + + it("handles partial frontmatter (scope only)", () => { + const input = `--- +scope: agent +--- + +A fact.`; + const result = parseMarkdown(input); + expect(result.frontmatter.scope).toBe("agent"); + expect(result.frontmatter.id).toBeUndefined(); + expect(result.content).toBe("A fact."); + }); +}); + +describe("serializeMarkdown", () => { + it("serializes frontmatter and content", () => { + const output = serializeMarkdown( + { + id: "m_abc123", + scope: "user", + tags: ["preference"], + created: "2026-04-03T10:00:00Z", + }, + "User prefers Rust.", + ); + expect(output).toContain("id: m_abc123"); + expect(output).toContain("scope: user"); + expect(output).toContain("User prefers Rust."); + expect(output.startsWith("---\n")).toBe(true); + }); + + it("roundtrips cleanly", () => { + const fm = { + id: "m_test", + scope: "user" as const, + tags: ["a", "b"], + created: "2026-01-01T00:00:00Z", + }; + const content = "Hello world."; + const serialized = serializeMarkdown(fm, content); + const parsed = parseMarkdown(serialized); + expect(parsed.frontmatter.id).toBe("m_test"); + expect(parsed.frontmatter.scope).toBe("user"); + expect(parsed.frontmatter.tags).toEqual(["a", "b"]); + expect(parsed.content).toBe(content); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `npx vitest run packages/md/__tests__/markdown.test.ts` +Expected: FAIL — module not found. + +- [ ] **Step 3: Implement markdown parser/serializer** + +Create `packages/md/src/markdown.ts`: + +```typescript +import type { MemoryFrontmatter } from "./types.js"; + +/** + * Minimal YAML frontmatter parser. No dependencies. + * Handles the subset we need: scalars, arrays, quoted strings. + */ +export function parseMarkdown(raw: string): { + frontmatter: Partial; + content: string; +} { + const fmMatch = raw.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/); + if (!fmMatch) { + return { frontmatter: {}, content: raw.trim() }; + } + + const fmBlock = fmMatch[1]; + const content = fmMatch[2].trim(); + const frontmatter: Record = {}; + + for (const line of fmBlock.split("\n")) { + const colonIdx = line.indexOf(":"); + if (colonIdx === -1) continue; + const key = line.slice(0, colonIdx).trim(); + let value: unknown = line.slice(colonIdx + 1).trim(); + + // Remove surrounding quotes + if ( + typeof value === "string" && + value.startsWith('"') && + value.endsWith('"') + ) { + value = value.slice(1, -1); + } + + // Parse inline arrays: [a, b, c] + if (typeof value === "string" && value.startsWith("[") && value.endsWith("]")) { + value = value + .slice(1, -1) + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + } + + if (key) frontmatter[key] = value; + } + + return { frontmatter: frontmatter as Partial, content }; +} + +/** + * Serialize frontmatter + content into a markdown string. + */ +export function serializeMarkdown( + frontmatter: Partial, + content: string, +): string { + const lines: string[] = ["---"]; + + for (const [key, value] of Object.entries(frontmatter)) { + if (value === undefined || value === null) continue; + if (Array.isArray(value)) { + lines.push(`${key}: [${value.join(", ")}]`); + } else { + lines.push(`${key}: ${value}`); + } + } + + lines.push("---"); + lines.push(""); + lines.push(content); + + return lines.join("\n") + "\n"; +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `npx vitest run packages/md/__tests__/markdown.test.ts` +Expected: All 5 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/md/src/markdown.ts packages/md/__tests__/markdown.test.ts +git commit -m "feat(md): add markdown frontmatter parser and serializer" +``` + +--- + +### Task 4: MemoryStore — Core Engine + +**Files:** +- Create: `packages/md/src/memory-store.ts` +- Create: `packages/md/__tests__/remember.test.ts` +- Create: `packages/md/__tests__/search.test.ts` + +- [ ] **Step 1: Write remember tests** + +Create `packages/md/__tests__/remember.test.ts`: + +```typescript +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { mkdtempSync, rmSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.remember", () => { + let dir: string; + let store: MemoryStore; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + store = new MemoryStore({ dir }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("creates a new memory file", async () => { + const result = await store.remember("User prefers Rust", { scope: "user" }); + expect(result.action).toBe("created"); + expect(result.file).toMatch(/^user\/.+\.md$/); + + // File should exist with correct content + const raw = readFileSync(join(dir, result.file), "utf8"); + expect(raw).toContain("User prefers Rust"); + expect(raw).toContain("scope: user"); + }); + + it("supersedes a highly similar memory", async () => { + await store.remember("User prefers Python", { scope: "user" }); + const result = await store.remember("User prefers Rust", { scope: "user" }); + expect(result.action).toBe("superseded"); + expect(result.superseded).toBeDefined(); + expect(result.superseded!.content).toContain("Python"); + + // The file should now contain Rust, not Python + const raw = readFileSync(join(dir, result.file), "utf8"); + expect(raw).toContain("Rust"); + expect(raw).not.toContain("Python"); + }); + + it("creates independently for unrelated facts", async () => { + const r1 = await store.remember("User prefers Rust", { scope: "user" }); + const r2 = await store.remember("Deploy target is AWS", { + scope: "agent", + }); + expect(r1.file).not.toBe(r2.file); + expect(r2.action).toBe("created"); + }); + + it("adds tags to frontmatter", async () => { + const result = await store.remember("Use vitest for testing", { + scope: "agent", + tags: ["testing", "tooling"], + }); + const raw = readFileSync(join(dir, result.file), "utf8"); + expect(raw).toContain("testing"); + expect(raw).toContain("tooling"); + }); +}); +``` + +- [ ] **Step 2: Write search tests** + +Create `packages/md/__tests__/search.test.ts`: + +```typescript +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.search", () => { + let dir: string; + let store: MemoryStore; + + beforeEach(async () => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + store = new MemoryStore({ dir }); + await store.remember("User prefers Rust for CLI tools", { scope: "user" }); + await store.remember("Deploy target is AWS us-east-1", { scope: "agent" }); + await store.remember("Use vitest for all tests", { scope: "agent" }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("returns ranked results", async () => { + const results = await store.search("Rust programming language"); + expect(results.length).toBeGreaterThan(0); + expect(results[0].content).toContain("Rust"); + }); + + it("respects limit", async () => { + const results = await store.search("tools", { limit: 1 }); + expect(results.length).toBe(1); + }); + + it("filters by scope", async () => { + const results = await store.search("tools", { scope: ["agent"] }); + for (const r of results) { + expect(r.scope).toBe("agent"); + } + }); + + it("includes age information", async () => { + const results = await store.search("Rust"); + expect(results[0].age).toBe("today"); + }); +}); +``` + +- [ ] **Step 3: Run tests to verify they fail** + +Run: `npx vitest run packages/md/__tests__/remember.test.ts packages/md/__tests__/search.test.ts` +Expected: FAIL — module not found. + +- [ ] **Step 4: Implement MemoryStore** + +Create `packages/md/src/memory-store.ts`: + +```typescript +import { + hashEmbed, + cosineSimilarity, + generateId, + memoryAge, + defaultSummarize, +} from "@db0-ai/core"; +import { LocalContentStore, type ContentStore } from "./content-store.js"; +import { parseMarkdown, serializeMarkdown } from "./markdown.js"; +import type { + MemoryScope, + MemoryFile, + MemoryFrontmatter, + RememberResult, + SearchResult, + ConsolidateResult, +} from "./types.js"; + +const HIGH_THRESHOLD = 0.9; +const MEDIUM_THRESHOLD = 0.7; + +export interface MemoryStoreOpts { + dir: string; + contentStore?: ContentStore; + highThreshold?: number; + mediumThreshold?: number; +} + +export interface RememberOpts { + scope?: MemoryScope; + tags?: string[]; +} + +export interface SearchOpts { + limit?: number; + scope?: MemoryScope[]; +} + +export interface PackOpts { + query?: string; + budget?: number; +} + +export class MemoryStore { + private store: ContentStore; + private highThreshold: number; + private mediumThreshold: number; + private dir: string; + + constructor(opts: MemoryStoreOpts) { + this.dir = opts.dir; + this.store = opts.contentStore ?? new LocalContentStore(opts.dir); + this.highThreshold = opts.highThreshold ?? HIGH_THRESHOLD; + this.mediumThreshold = opts.mediumThreshold ?? MEDIUM_THRESHOLD; + } + + /** Load all memory files from disk. */ + private async loadAll(): Promise { + const keys = await this.store.list(); + const files: MemoryFile[] = []; + for (const key of keys) { + const raw = await this.store.read(key); + const { frontmatter, content } = parseMarkdown(raw); + files.push({ + path: key, + frontmatter: frontmatter as MemoryFrontmatter, + content, + }); + } + return files; + } + + /** Compute embedding for text using core's hash embeddings. */ + private embed(text: string): Float32Array { + return hashEmbed(text); + } + + /** Generate a slug-friendly filename from content. */ + private slugify(content: string): string { + return content + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-|-$/g, "") + .slice(0, 50); + } + + /** + * Smart write — the core differentiator. + * Dedup, supersede, or create with relationship linking. + */ + async remember( + fact: string, + opts: RememberOpts = {}, + ): Promise { + const scope = opts.scope ?? "user"; + const factEmbedding = this.embed(fact); + const existing = await this.loadAll(); + + // Find best match + let bestMatch: { file: MemoryFile; score: number } | null = null; + for (const file of existing) { + const fileEmbedding = this.embed(file.content); + const score = cosineSimilarity(factEmbedding, fileEmbedding); + if (!bestMatch || score > bestMatch.score) { + bestMatch = { file, score }; + } + } + + const id = `m_${generateId().slice(0, 8)}`; + const now = new Date().toISOString(); + const frontmatter: MemoryFrontmatter = { + id, + scope, + created: now, + ...(opts.tags?.length ? { tags: opts.tags } : {}), + }; + + // High similarity → supersede + if (bestMatch && bestMatch.score >= this.highThreshold) { + const oldFile = bestMatch.file; + frontmatter.supersedes = oldFile.frontmatter.id; + const newContent = serializeMarkdown(frontmatter, fact); + await this.store.write(oldFile.path, newContent); + await this.generateIndex(); + return { + action: "superseded", + file: oldFile.path, + superseded: { file: oldFile.path, content: oldFile.content }, + }; + } + + // Medium similarity → create with link + const filePath = `${scope}/${this.slugify(fact)}.md`; + if ( + bestMatch && + bestMatch.score >= this.mediumThreshold + ) { + frontmatter["related-to"] = [bestMatch.file.frontmatter.id]; + const newContent = serializeMarkdown(frontmatter, fact); + await this.store.write(filePath, newContent); + await this.generateIndex(); + return { + action: "related", + file: filePath, + relatedTo: bestMatch.file.path, + }; + } + + // Low similarity → create independently + const newContent = serializeMarkdown(frontmatter, fact); + await this.store.write(filePath, newContent); + await this.generateIndex(); + return { action: "created", file: filePath }; + } + + /** + * Semantic search over all memory files. + * Brute-force with hash embeddings — fast enough for <500 files. + */ + async search(query: string, opts: SearchOpts = {}): Promise { + const limit = opts.limit ?? 10; + const scopeFilter = opts.scope; + const queryEmbedding = this.embed(query); + const files = await this.loadAll(); + + const scored: SearchResult[] = []; + for (const file of files) { + if (scopeFilter && !scopeFilter.includes(file.frontmatter.scope)) { + continue; + } + const fileEmbedding = this.embed(file.content); + const score = cosineSimilarity(queryEmbedding, fileEmbedding); + const age = memoryAge(file.frontmatter.created ?? new Date().toISOString()); + scored.push({ + file: file.path, + content: file.content, + scope: file.frontmatter.scope, + score: Math.round(score * 1000) / 1000, + age: age.label, + stalenessCaveat: age.stalenessCaveat, + }); + } + + scored.sort((a, b) => b.score - a.score); + return scored.slice(0, limit); + } + + /** + * Assemble context within a token budget. + * Rough token estimate: 1 token ~= 4 chars. + */ + async pack(opts: PackOpts = {}): Promise { + const budget = opts.budget ?? 4000; + const charBudget = budget * 4; + + let files: MemoryFile[]; + if (opts.query) { + const results = await this.search(opts.query, { limit: 50 }); + const allFiles = await this.loadAll(); + const fileMap = new Map(allFiles.map((f) => [f.path, f])); + files = results + .map((r) => fileMap.get(r.file)) + .filter((f): f is MemoryFile => f !== undefined); + } else { + files = await this.loadAll(); + // Sort by scope priority (user > agent > session > task), then recency + const scopePriority: Record = { + user: 0, + agent: 1, + session: 2, + task: 3, + }; + files.sort((a, b) => { + const sp = + (scopePriority[a.frontmatter.scope] ?? 9) - + (scopePriority[b.frontmatter.scope] ?? 9); + if (sp !== 0) return sp; + return (b.frontmatter.created ?? "").localeCompare( + a.frontmatter.created ?? "", + ); + }); + } + + const lines: string[] = ["# Agent Memory Context", ""]; + let totalChars = lines.join("\n").length; + + for (const file of files) { + const age = memoryAge( + file.frontmatter.created ?? new Date().toISOString(), + ); + const caveat = age.stalenessCaveat ? ` (${age.stalenessCaveat})` : ""; + const entry = `- **[${file.frontmatter.scope}]** ${file.content}${caveat}`; + + if (totalChars + entry.length + 1 > charBudget) break; + lines.push(entry); + totalChars += entry.length + 1; + } + + return lines.join("\n") + "\n"; + } + + /** + * Garbage collection — merge duplicates, archive superseded, expire old. + */ + async consolidate(): Promise { + const files = await this.loadAll(); + let merged = 0; + let archived = 0; + let expired = 0; + + // 1. Find superseded files (files whose ID appears in another's supersedes) + const activeIds = new Set(files.map((f) => f.frontmatter.id)); + const supersededIds = new Set( + files + .map((f) => f.frontmatter.supersedes) + .filter((s): s is string => !!s), + ); + + // 2. Archive files that have been superseded by other files + for (const file of files) { + if (supersededIds.has(file.frontmatter.id)) { + const archivePath = `.db0/archive/${file.path}`; + const raw = await this.store.read(file.path); + await this.store.write(archivePath, raw); + await this.store.delete(file.path); + archived++; + } + } + + // 3. Expire session/task files older than 24h + const now = new Date(); + for (const file of files) { + if ( + file.frontmatter.scope === "session" || + file.frontmatter.scope === "task" + ) { + const created = new Date(file.frontmatter.created ?? 0); + const ageHours = + (now.getTime() - created.getTime()) / (1000 * 60 * 60); + if (ageHours > 24) { + await this.store.delete(file.path); + expired++; + } + } + } + + // 4. Find near-duplicates and merge + const remaining = await this.loadAll(); + const consumed = new Set(); + for (let i = 0; i < remaining.length; i++) { + if (consumed.has(remaining[i].path)) continue; + const embI = this.embed(remaining[i].content); + for (let j = i + 1; j < remaining.length; j++) { + if (consumed.has(remaining[j].path)) continue; + const embJ = this.embed(remaining[j].content); + const sim = cosineSimilarity(embI, embJ); + if (sim >= this.highThreshold) { + // Keep the newer one, archive the older + const newer = + (remaining[i].frontmatter.created ?? "") >= + (remaining[j].frontmatter.created ?? "") + ? remaining[i] + : remaining[j]; + const older = newer === remaining[i] ? remaining[j] : remaining[i]; + await this.store.delete(older.path); + consumed.add(older.path); + merged++; + } + } + } + + await this.generateIndex(); + return { merged, archived, expired }; + } + + /** Generate MEMORIES.md index file. */ + async generateIndex(): Promise { + const files = await this.loadAll(); + const byScope: Record = {}; + const stale: MemoryFile[] = []; + + for (const file of files) { + const scope = file.frontmatter.scope ?? "unknown"; + if (!byScope[scope]) byScope[scope] = []; + byScope[scope].push(file); + + const age = memoryAge( + file.frontmatter.created ?? new Date().toISOString(), + ); + if (age.days > 7) stale.push(file); + } + + const lines: string[] = [ + "# Memories", + "", + `> ${files.length} active${stale.length > 0 ? `, ${stale.length} stale` : ""} | last updated ${new Date().toISOString()}`, + "", + ]; + + const scopeOrder: MemoryScope[] = ["user", "agent", "session", "task"]; + for (const scope of scopeOrder) { + const group = byScope[scope]; + if (!group || group.length === 0) continue; + lines.push(`## ${scope} (${group.length})`); + for (const file of group) { + const age = memoryAge( + file.frontmatter.created ?? new Date().toISOString(), + ); + const preview = + file.content.length > 80 + ? file.content.slice(0, 77) + "..." + : file.content; + lines.push(`- [${file.path.split("/").pop()}](${file.path}) — ${preview} (${age.label})`); + } + lines.push(""); + } + + if (stale.length > 0) { + lines.push("## stale (>7 days)"); + for (const file of stale) { + const age = memoryAge(file.frontmatter.created ?? new Date().toISOString()); + const preview = + file.content.length > 60 + ? file.content.slice(0, 57) + "..." + : file.content; + lines.push(`- [${file.path.split("/").pop()}](${file.path}) — ${preview} (${age.label})`); + } + lines.push(""); + } + + await this.store.write("MEMORIES.md", lines.join("\n") + "\n"); + } +} +``` + +- [ ] **Step 5: Run all tests** + +Run: `npx vitest run packages/md/__tests__/remember.test.ts packages/md/__tests__/search.test.ts` +Expected: All tests PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/md/src/memory-store.ts packages/md/__tests__/remember.test.ts packages/md/__tests__/search.test.ts +git commit -m "feat(md): implement MemoryStore with remember, search, pack, consolidate" +``` + +--- + +### Task 5: Pack and Consolidate Tests + +**Files:** +- Create: `packages/md/__tests__/pack.test.ts` +- Create: `packages/md/__tests__/consolidate.test.ts` +- Create: `packages/md/__tests__/memories-index.test.ts` + +- [ ] **Step 1: Write pack tests** + +Create `packages/md/__tests__/pack.test.ts`: + +```typescript +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.pack", () => { + let dir: string; + let store: MemoryStore; + + beforeEach(async () => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + store = new MemoryStore({ dir }); + await store.remember("User prefers Rust", { scope: "user" }); + await store.remember("Deploy to AWS us-east-1", { scope: "agent" }); + await store.remember("Current task is fixing auth bug", { + scope: "session", + }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("assembles all memories into context", async () => { + const context = await store.pack(); + expect(context).toContain("Rust"); + expect(context).toContain("AWS"); + expect(context).toContain("auth bug"); + }); + + it("respects token budget", async () => { + // Very small budget — should truncate + const context = await store.pack({ budget: 20 }); + // 20 tokens ~= 80 chars, should fit header + maybe 1 entry + const lines = context.split("\n").filter((l) => l.startsWith("- ")); + expect(lines.length).toBeLessThanOrEqual(2); + }); + + it("query-based pack returns relevant results", async () => { + const context = await store.pack({ query: "programming language" }); + expect(context).toContain("Rust"); + }); +}); +``` + +- [ ] **Step 2: Write consolidate tests** + +Create `packages/md/__tests__/consolidate.test.ts`: + +```typescript +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { LocalContentStore } from "../src/content-store.js"; +import { serializeMarkdown } from "../src/markdown.js"; +import { mkdtempSync, rmSync, existsSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.consolidate", () => { + let dir: string; + let store: MemoryStore; + let contentStore: LocalContentStore; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + contentStore = new LocalContentStore(dir); + store = new MemoryStore({ dir, contentStore }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("expires old session files", async () => { + // Write a session memory with old timestamp + const old = serializeMarkdown( + { + id: "m_old", + scope: "session", + created: "2020-01-01T00:00:00Z", + }, + "Old session fact", + ); + await contentStore.write("session/old-fact.md", old); + + const result = await store.consolidate(); + expect(result.expired).toBe(1); + expect(existsSync(join(dir, "session/old-fact.md"))).toBe(false); + }); + + it("preserves recent session files", async () => { + await store.remember("Current task context", { scope: "session" }); + const result = await store.consolidate(); + expect(result.expired).toBe(0); + }); + + it("regenerates MEMORIES.md", async () => { + await store.remember("A fact", { scope: "user" }); + await store.consolidate(); + expect(existsSync(join(dir, "MEMORIES.md"))).toBe(true); + }); +}); +``` + +- [ ] **Step 3: Write MEMORIES.md generation test** + +Create `packages/md/__tests__/memories-index.test.ts`: + +```typescript +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { mkdtempSync, rmSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MEMORIES.md generation", () => { + let dir: string; + let store: MemoryStore; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + store = new MemoryStore({ dir }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("generates MEMORIES.md after remember", async () => { + await store.remember("User prefers Rust", { scope: "user" }); + const index = readFileSync(join(dir, "MEMORIES.md"), "utf8"); + expect(index).toContain("# Memories"); + expect(index).toContain("## user (1)"); + expect(index).toContain("Rust"); + }); + + it("groups by scope", async () => { + await store.remember("Fact A", { scope: "user" }); + await store.remember("Fact B", { scope: "agent" }); + const index = readFileSync(join(dir, "MEMORIES.md"), "utf8"); + expect(index).toContain("## user (1)"); + expect(index).toContain("## agent (1)"); + }); + + it("shows total count", async () => { + await store.remember("Fact A", { scope: "user" }); + await store.remember("Fact B", { scope: "user" }); + const index = readFileSync(join(dir, "MEMORIES.md"), "utf8"); + expect(index).toContain("2 active"); + }); +}); +``` + +- [ ] **Step 4: Run all tests** + +Run: `npx vitest run packages/md/__tests__/` +Expected: All tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/md/__tests__/pack.test.ts packages/md/__tests__/consolidate.test.ts packages/md/__tests__/memories-index.test.ts +git commit -m "test(md): add pack, consolidate, and MEMORIES.md generation tests" +``` + +--- + +### Task 6: CLI Binary + +**Files:** +- Create: `packages/md/src/cli.ts` +- Modify: `packages/md/src/index.ts` + +- [ ] **Step 1: Implement CLI** + +Create `packages/md/src/cli.ts`: + +```typescript +#!/usr/bin/env node +import { MemoryStore } from "./memory-store.js"; + +const args = process.argv.slice(2); +const command = args[0]; + +function getFlag(name: string): string | undefined { + const idx = args.indexOf(`--${name}`); + if (idx === -1) return undefined; + return args[idx + 1]; +} + +function getFlagList(name: string): string[] | undefined { + const val = getFlag(name); + if (!val) return undefined; + return val.split(",").map((s) => s.trim()); +} + +const dir = getFlag("dir") ?? "./memories"; + +async function main() { + const store = new MemoryStore({ dir }); + + switch (command) { + case "remember": { + const fact = args.slice(1).filter((a) => !a.startsWith("--")).join(" "); + if (!fact) { + console.error("Usage: db0-md remember [--scope user] [--tags a,b] [--dir ./memories]"); + process.exit(1); + } + const scope = (getFlag("scope") ?? "user") as "user" | "agent" | "session" | "task"; + const tags = getFlagList("tags"); + const result = await store.remember(fact, { scope, tags: tags ?? undefined }); + + if (result.action === "superseded") { + console.log(`Superseded: ${result.file}`); + console.log(` was: ${result.superseded!.content}`); + console.log(` now: ${fact}`); + } else if (result.action === "related") { + console.log(`Created: ${result.file} (related to ${result.relatedTo})`); + } else { + console.log(`Created: ${result.file}`); + } + break; + } + + case "search": { + const query = args.slice(1).filter((a) => !a.startsWith("--")).join(" "); + if (!query) { + console.error("Usage: db0-md search [--limit 10] [--scope user,agent] [--dir ./memories]"); + process.exit(1); + } + const limit = parseInt(getFlag("limit") ?? "10", 10); + const scopeList = getFlagList("scope") as ("user" | "agent" | "session" | "task")[] | undefined; + const results = await store.search(query, { limit, scope: scopeList ?? undefined }); + + if (results.length === 0) { + console.log("No results found."); + } else { + for (const r of results) { + const caveat = r.stalenessCaveat ? ` ⚠️` : ""; + const preview = r.content.length > 80 ? r.content.slice(0, 77) + "..." : r.content; + console.log(` ${r.score.toFixed(3)} [${r.scope}] ${preview} (${r.age})${caveat}`); + console.log(` ${r.file}`); + } + } + break; + } + + case "pack": { + const query = args.slice(1).filter((a) => !a.startsWith("--")).join(" ") || undefined; + const budget = parseInt(getFlag("budget") ?? "4000", 10); + const context = await store.pack({ query, budget }); + process.stdout.write(context); + break; + } + + case "consolidate": { + const result = await store.consolidate(); + if (getFlag("quiet") === undefined) { + console.log(`Consolidated: merged=${result.merged}, archived=${result.archived}, expired=${result.expired}`); + } + break; + } + + case "index": { + await store.generateIndex(); + console.log("MEMORIES.md regenerated."); + break; + } + + default: + console.log(`db0-md — the embedded memory primitive for AI agents + +Commands: + remember Smart write with dedup and superseding + search Semantic search over memories + pack [query] Assemble context within a token budget + consolidate Garbage collection and cleanup + index Regenerate MEMORIES.md + +Options: + --dir Memory directory (default: ./memories) + --scope Memory scope: user, agent, session, task + --tags Comma-separated tags + --limit Max results for search + --budget Token budget for pack + --quiet Suppress output`); + break; + } +} + +main().catch((err) => { + console.error(err.message ?? err); + process.exit(1); +}); +``` + +- [ ] **Step 2: Update index.ts with full exports** + +Update `packages/md/src/index.ts`: + +```typescript +export { MemoryStore } from "./memory-store.js"; +export type { MemoryStoreOpts, RememberOpts, SearchOpts, PackOpts } from "./memory-store.js"; + +export { LocalContentStore } from "./content-store.js"; +export type { ContentStore } from "./content-store.js"; + +export { parseMarkdown, serializeMarkdown } from "./markdown.js"; + +export type { + MemoryScope, + MemoryFrontmatter, + MemoryFile, + RememberResult, + SearchResult, + ConsolidateResult, +} from "./types.js"; +``` + +- [ ] **Step 3: Build and test CLI manually** + +Run: +```bash +npm run build -w packages/md +mkdir -p /tmp/test-memories +node packages/md/dist/cli.js remember "User prefers Rust" --dir /tmp/test-memories --scope user +node packages/md/dist/cli.js remember "User prefers Python" --dir /tmp/test-memories --scope user +node packages/md/dist/cli.js search "language" --dir /tmp/test-memories +node packages/md/dist/cli.js pack --dir /tmp/test-memories +cat /tmp/test-memories/MEMORIES.md +``` + +Expected: Second `remember` shows "Superseded" message. Search returns results. Pack outputs context. MEMORIES.md exists with entries. + +- [ ] **Step 4: Commit** + +```bash +git add packages/md/src/cli.ts packages/md/src/index.ts +git commit -m "feat(md): add CLI binary with remember, search, pack, consolidate, index commands" +``` + +--- + +### Task 7: Run Full Test Suite and Final Polish + +**Files:** +- Modify: `packages/md/src/index.ts` (if needed) +- No new files + +- [ ] **Step 1: Run all md tests** + +Run: `npx vitest run packages/md/__tests__/` +Expected: All tests PASS. + +- [ ] **Step 2: Run full monorepo tests** + +Run: `npx vitest run` +Expected: All existing tests still pass + new md tests pass. + +- [ ] **Step 3: Build all packages** + +Run: `npm run build -w packages/core && npm run build -w packages/md` +Expected: Clean build, no errors. + +- [ ] **Step 4: End-to-end CLI smoke test** + +Run: +```bash +rm -rf /tmp/db0-e2e && mkdir /tmp/db0-e2e +node packages/md/dist/cli.js remember "User prefers TypeScript" --dir /tmp/db0-e2e --scope user +node packages/md/dist/cli.js remember "Always use vitest for testing" --dir /tmp/db0-e2e --scope agent +node packages/md/dist/cli.js remember "User prefers Rust" --dir /tmp/db0-e2e --scope user +node packages/md/dist/cli.js search "programming language" --dir /tmp/db0-e2e +node packages/md/dist/cli.js pack --budget 2000 --dir /tmp/db0-e2e +node packages/md/dist/cli.js consolidate --dir /tmp/db0-e2e +cat /tmp/db0-e2e/MEMORIES.md +ls -la /tmp/db0-e2e/user/ /tmp/db0-e2e/agent/ +``` + +Expected: +- Third `remember` supersedes "TypeScript" with "Rust" +- Search returns "Rust" as top result +- Pack outputs context blob +- MEMORIES.md shows 2 active memories +- Directory structure: `user/` and `agent/` dirs with .md files + +- [ ] **Step 5: Commit all remaining changes** + +```bash +git add -A +git commit -m "feat(md): complete db0/md prototype — smart memory lifecycle for AI agents" +``` diff --git a/package-lock.json b/package-lock.json index 0e210b5..cafdbea 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,7 +16,8 @@ "packages/integrations/ai-sdk", "packages/integrations/langchain", "packages/integrations/pi", - "packages/benchmark" + "packages/benchmark", + "packages/md" ], "devDependencies": { "@anthropic-ai/sdk": "^0.80.0", @@ -35,7 +36,6 @@ "version": "3.0.66", "resolved": "https://registry.npmjs.org/@ai-sdk/gateway/-/gateway-3.0.66.tgz", "integrity": "sha512-SIQ0YY0iMuv+07HLsZ+bB990zUJ6S4ujORAh+Jv1V2KGNn73qQKnGO0JBk+w+Res8YqOFSycwDoWcFlQrVxS4A==", - "dev": true, "license": "Apache-2.0", "dependencies": { "@ai-sdk/provider": "3.0.8", @@ -53,7 +53,6 @@ "version": "3.0.8", "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-3.0.8.tgz", "integrity": "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ==", - "dev": true, "license": "Apache-2.0", "dependencies": { "json-schema": "^0.4.0" @@ -66,7 +65,6 @@ "version": "4.0.19", "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-4.0.19.tgz", "integrity": "sha512-3eG55CrSWCu2SXlqq2QCsFjo3+E7+Gmg7i/oRVoSZzIodTuDSfLb3MRje67xE9RFea73Zao7Lm4mADIfUETKGg==", - "dev": true, "license": "Apache-2.0", "dependencies": { "@ai-sdk/provider": "3.0.8", @@ -115,8 +113,8 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/@cfworker/json-schema/-/json-schema-4.1.1.tgz", "integrity": "sha512-gAmrUZSGtKc3AiBL71iNWxDsyUC5uMaKKGdvzYsBoTW/xi42JQHl7eKV2OYzCUqvc+D2RCcf7EXY2iCyFIk6og==", - "dev": true, - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@changesets/apply-release-plan": { "version": "7.1.0", @@ -396,6 +394,10 @@ "resolved": "packages/integrations/langchain", "link": true }, + "node_modules/@db0-ai/mdcli": { + "resolved": "packages/md", + "link": true + }, "node_modules/@db0-ai/openclaw": { "resolved": "packages/apps/openclaw", "link": true @@ -885,8 +887,8 @@ "version": "1.1.34", "resolved": "https://registry.npmjs.org/@langchain/core/-/core-1.1.34.tgz", "integrity": "sha512-IDlZES5Vexo5meLQRCGkAU7NM0tPGPfPP5wcUzBd7Ot+JoFBmSXutC4gGzvZod5AKRVn3I0Qy5k8vkTraY21jA==", - "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@cfworker/json-schema": "^4.0.2", "@standard-schema/spec": "^1.1.0", @@ -1214,8 +1216,8 @@ "version": "1.9.0", "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", - "dev": true, "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } @@ -1570,7 +1572,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz", "integrity": "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==", - "dev": true, "license": "MIT" }, "node_modules/@types/chai": { @@ -1586,6 +1587,7 @@ "version": "4.1.12", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@types/ms": "*" } @@ -1634,14 +1636,12 @@ "version": "10.0.0", "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz", "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==", - "dev": true, "license": "MIT" }, "node_modules/@vercel/oidc": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/@vercel/oidc/-/oidc-3.1.0.tgz", "integrity": "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w==", - "dev": true, "license": "Apache-2.0", "engines": { "node": ">= 20" @@ -1763,8 +1763,8 @@ "version": "6.0.116", "resolved": "https://registry.npmjs.org/ai/-/ai-6.0.116.tgz", "integrity": "sha512-7yM+cTmyRLeNIXwt4Vj+mrrJgVQ9RMIW5WO0ydoLoYkewIvsMcvUmqS4j2RJTUXaF1HphwmSKUMQ/HypNRGOmA==", - "dev": true, "license": "Apache-2.0", + "peer": true, "dependencies": { "@ai-sdk/gateway": "3.0.66", "@ai-sdk/provider": "3.0.8", @@ -1831,7 +1831,6 @@ "version": "5.2.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, "license": "MIT", "engines": { "node": ">=10" @@ -1869,7 +1868,6 @@ "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "dev": true, "funding": [ { "type": "github", @@ -1978,7 +1976,6 @@ "version": "6.3.0", "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", - "dev": true, "license": "MIT", "engines": { "node": ">=10" @@ -2006,7 +2003,6 @@ "version": "5.6.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", - "dev": true, "license": "MIT", "engines": { "node": "^12.17.0 || ^14.13 || >=16.0.0" @@ -2034,7 +2030,6 @@ "version": "2.15.0", "resolved": "https://registry.npmjs.org/console-table-printer/-/console-table-printer-2.15.0.tgz", "integrity": "sha512-SrhBq4hYVjLCkBVOWaTzceJalvn5K1Zq5aQA6wXC/cYjI3frKWNPEMK3sZsJfNNQApvCQmgBcc13ZKmFj8qExw==", - "dev": true, "license": "MIT", "dependencies": { "simple-wcswidth": "^1.1.2" @@ -2118,7 +2113,6 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.10.0" @@ -2305,7 +2299,6 @@ "version": "4.0.7", "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", - "dev": true, "license": "MIT" }, "node_modules/eventsource": { @@ -2336,6 +2329,7 @@ "node_modules/express": { "version": "5.2.1", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -2673,6 +2667,7 @@ "node_modules/hono": { "version": "4.12.7", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -2835,7 +2830,6 @@ "version": "1.0.21", "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz", "integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==", - "dev": true, "license": "MIT", "dependencies": { "base64-js": "^1.5.1" @@ -2863,7 +2857,6 @@ "version": "0.4.0", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz", "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==", - "dev": true, "license": "(AFL-2.1 OR BSD-3-Clause)" }, "node_modules/json-schema-to-ts": { @@ -2902,7 +2895,6 @@ "version": "0.5.11", "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.5.11.tgz", "integrity": "sha512-Yio502Ow2vbVt16P1sybNMNpMsr5BMqoeonoi4flrcDsP55No/aCe2zydtBNOv0+kjKQw4WSKAzTsNwenDeD5w==", - "dev": true, "license": "MIT", "dependencies": { "@types/uuid": "^10.0.0", @@ -2941,7 +2933,6 @@ "version": "10.0.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", - "dev": true, "funding": [ "https://github.com/sponsors/broofa", "https://github.com/sponsors/ctavan" @@ -3084,7 +3075,6 @@ "version": "4.2.0", "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz", "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==", - "dev": true, "license": "MIT", "bin": { "mustache": "bin/mustache" @@ -3172,7 +3162,6 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==", - "dev": true, "license": "MIT", "engines": { "node": ">=4" @@ -3221,7 +3210,6 @@ "version": "6.6.2", "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz", "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==", - "dev": true, "license": "MIT", "dependencies": { "eventemitter3": "^4.0.4", @@ -3254,7 +3242,6 @@ "version": "3.2.0", "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz", "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==", - "dev": true, "license": "MIT", "dependencies": { "p-finally": "^1.0.0" @@ -3341,6 +3328,7 @@ "node_modules/pg": { "version": "8.20.0", "license": "MIT", + "peer": true, "dependencies": { "pg-connection-string": "^2.12.0", "pg-pool": "^3.13.0", @@ -3427,6 +3415,7 @@ "version": "4.0.3", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -3772,7 +3761,6 @@ "version": "7.7.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", - "dev": true, "license": "ISC", "bin": { "semver": "bin/semver.js" @@ -3929,7 +3917,6 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/simple-wcswidth/-/simple-wcswidth-1.1.2.tgz", "integrity": "sha512-j7piyCjAeTDSjzTSQ7DokZtMNwNlEAyxqSZeCS+CXH7fJ4jx3FuJ/mTW3mE+6JLs4VJBbcll0Kjn+KXI5t21Iw==", - "dev": true, "license": "MIT" }, "node_modules/slash": { @@ -4187,7 +4174,6 @@ "version": "11.1.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz", "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==", - "dev": true, "funding": [ "https://github.com/sponsors/broofa", "https://github.com/sponsors/ctavan" @@ -4208,6 +4194,7 @@ "version": "7.3.1", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", @@ -4440,11 +4427,11 @@ }, "packages/apps/claude-code": { "name": "@db0-ai/claude-code", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { - "@db0-ai/backends-sqlite": "0.2.0", - "@db0-ai/core": "0.2.0", + "@db0-ai/backends-sqlite": "0.3.0", + "@db0-ai/core": "0.3.0", "@modelcontextprotocol/sdk": "^1.0.0" }, "bin": { @@ -4453,11 +4440,11 @@ }, "packages/apps/openclaw": { "name": "@db0-ai/openclaw", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { - "@db0-ai/backends-sqlite": "0.2.0", - "@db0-ai/core": "0.2.0", + "@db0-ai/backends-sqlite": "0.3.0", + "@db0-ai/core": "0.3.0", "debug": "^4.4.3" }, "bin": { @@ -4466,8 +4453,9 @@ }, "packages/backends/postgres": { "name": "@db0-ai/backends-postgres", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", + "peer": true, "dependencies": { "pg": "^8.13.0", "pgvector": "^0.2.0" @@ -4476,34 +4464,34 @@ "@types/pg": "^8.11.0" }, "peerDependencies": { - "@db0-ai/core": "0.2.0" + "@db0-ai/core": "0.3.0" } }, "packages/backends/sqlite": { "name": "@db0-ai/backends-sqlite", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { "debug": "^4.4.3", "sql.js": "^1.11.0" }, "peerDependencies": { - "@db0-ai/core": "0.2.0" + "@db0-ai/core": "0.3.0" } }, "packages/benchmark": { "name": "@db0-ai/benchmark", - "version": "0.2.0", + "version": "0.3.0", "dependencies": { - "@db0-ai/backends-sqlite": "0.2.0", - "@db0-ai/core": "0.2.0" + "@db0-ai/backends-sqlite": "0.3.0", + "@db0-ai/core": "0.3.0" }, "devDependencies": { "tsx": "^4.0.0", "typescript": "^5.7.0" }, "peerDependencies": { - "@db0-ai/backends-postgres": "0.2.0" + "@db0-ai/backends-postgres": "0.3.0" }, "peerDependenciesMeta": { "@db0-ai/backends-postgres": { @@ -4513,45 +4501,34 @@ }, "packages/cli": { "name": "@db0-ai/cli", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { - "@db0-ai/backends-sqlite": "0.2.0", - "@db0-ai/core": "0.2.0" + "@db0-ai/backends-sqlite": "0.3.0", + "@db0-ai/core": "0.3.0" }, "bin": { "db0": "dist/index.js" }, "optionalDependencies": { - "@db0-ai/backends-postgres": "^0.1.0" + "@db0-ai/backends-postgres": "^1.0.0" } }, "packages/cli/node_modules/@db0-ai/backends-postgres": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/@db0-ai/backends-postgres/-/backends-postgres-0.1.0.tgz", - "integrity": "sha512-6KtmczhONEfWN+Mng6qmBY71REutCW47s0XBSqEJdQkWbZ17WGkDainZM5TAtgVKZnGsEPEwZdRgFQ2U4RmY+g==", - "license": "MIT", - "optional": true, - "dependencies": { - "pg": "^8.13.0", - "pgvector": "^0.2.0" - }, - "peerDependencies": { - "@db0-ai/core": "^0.1.0" - } + "optional": true }, "packages/core": { "name": "@db0-ai/core", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT" }, "packages/inspector": { "name": "@db0-ai/inspector", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { - "@db0-ai/backends-sqlite": "0.2.0", - "@db0-ai/core": "0.2.0" + "@db0-ai/backends-sqlite": "0.3.0", + "@db0-ai/core": "0.3.0" }, "bin": { "db0-inspect": "dist/cli.js" @@ -4559,11 +4536,11 @@ }, "packages/integrations/ai-sdk": { "name": "@db0-ai/ai-sdk", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { - "@db0-ai/backends-sqlite": "0.2.0", - "@db0-ai/core": "0.2.0" + "@db0-ai/backends-sqlite": "0.3.0", + "@db0-ai/core": "0.3.0" }, "peerDependencies": { "ai": ">=4.0.0", @@ -4572,11 +4549,11 @@ }, "packages/integrations/langchain": { "name": "@db0-ai/langchain", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { - "@db0-ai/backends-sqlite": "0.2.0", - "@db0-ai/core": "0.2.0" + "@db0-ai/backends-sqlite": "0.3.0", + "@db0-ai/core": "0.3.0" }, "peerDependencies": { "@langchain/core": ">=0.3.0", @@ -4585,11 +4562,25 @@ }, "packages/integrations/pi": { "name": "@db0-ai/pi", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { - "@db0-ai/backends-sqlite": "0.2.0", - "@db0-ai/core": "0.2.0" + "@db0-ai/backends-sqlite": "0.3.0", + "@db0-ai/core": "0.3.0" + }, + "bin": { + "db0-pi": "dist/cli.js" + } + }, + "packages/md": { + "name": "@db0-ai/mdcli", + "version": "0.3.0", + "license": "MIT", + "dependencies": { + "@db0-ai/core": "0.3.0" + }, + "bin": { + "mdcli": "dist/cli.js" } } } diff --git a/package.json b/package.json index f001e59..72171d0 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,8 @@ "packages/integrations/ai-sdk", "packages/integrations/langchain", "packages/integrations/pi", - "packages/benchmark" + "packages/benchmark", + "packages/md" ], "scripts": { "build": "npm run build --workspaces", diff --git a/packages/md/README.md b/packages/md/README.md new file mode 100644 index 0000000..42ff4bc --- /dev/null +++ b/packages/md/README.md @@ -0,0 +1,146 @@ +# @db0-ai/db0 + +The `db0` CLI — a markdown-based memory store for AI agents. + +Turn a directory of markdown files into managed agent memory with scoping, deduplication, superseding, and context packing. Zero config, zero external dependencies, files you can read with `ls`. + +## Install + +```bash +npm install -g @db0-ai/db0 +``` + +Or use directly: + +```bash +npx @db0-ai/db0 remember "User prefers dark mode" +``` + +## Commands + +```bash +db0 remember "User prefers TypeScript" --scope user +db0 search "language preferences" --limit 5 +db0 pack "current task context" --budget 2000 +db0 consolidate +db0 index +``` + +### `remember ` + +Store a new memory. Automatically detects related or contradictory facts: + +- **New fact** — creates a new markdown file with YAML frontmatter +- **Contradicts existing** — supersedes the old file (preserved for audit) +- **Related to existing** — links via `related-to` frontmatter field + +```bash +db0 remember "User prefers dark mode" --scope user --tags ui,preferences +db0 remember "User prefers light mode" # supersedes the previous fact +``` + +Options: `--scope` (user/agent/session/task), `--tags` (comma-separated), `--dir` + +### `search ` + +Search memories by semantic similarity. Results include score, scope, content preview, age, and staleness warnings. + +```bash +db0 search "UI preferences" --limit 5 --scope user +``` + +Options: `--limit`, `--scope`, `--dir` + +### `pack [query]` + +Assemble memories into a context block for LLM consumption. Pipe-friendly — outputs to stdout. + +```bash +db0 pack "current project" --budget 2000 | pbcopy +db0 pack --budget 4000 > context.md +``` + +Without a query, packs all memories ordered by scope priority then recency. With a query, orders by relevance. + +Options: `--budget` (token budget, default 4000), `--scope`, `--dir` + +### `consolidate` + +Clean up superseded, expired, and duplicate memories: + +- Archives superseded files to `.db0/archive/` +- Expires old session/task memories (>24h) +- Merges near-duplicates within the same scope + +```bash +db0 consolidate --dir ./memories +``` + +Options: `--quiet`, `--dir` + +### `index` + +Regenerate the `MEMORIES.md` index file — a human-readable table of contents grouped by scope. + +```bash +db0 index --dir ./memories +``` + +## Storage Model + +``` +memories/ + user/ + prefer-dark-mode.md + prefer-typescript.md + session/ + current-task-notes.md + MEMORIES.md ← auto-generated index + .db0/ ← derived data (archive, cache) +``` + +Each memory is a markdown file with YAML frontmatter: + +```markdown +--- +id: m_abc123 +scope: user +tags: [ui, preferences] +created: 2025-01-15T10:30:00Z +--- + +User prefers dark mode for all IDE and terminal interfaces. +``` + +Frontmatter fields: `id`, `scope`, `tags`, `created`, `supersedes`, `related-to`, `expires` + +## Programmatic API + +```typescript +import { MemoryStore } from "@db0-ai/db0"; + +const store = new MemoryStore({ dir: "./memories" }); + +await store.remember("User prefers TypeScript", { scope: "user" }); + +const results = await store.search("language preferences", { limit: 5 }); + +const context = await store.pack({ query: "current task", tokenBudget: 2000 }); + +await store.consolidate(); +``` + +## Design Principles + +- **Files are the source of truth** — not a database, not a cache. `ls memories/` tells you everything. +- **Zero config** — works out of the box with sensible defaults. +- **Lifecycle over retrieval** — deduplication, superseding, expiration, and consolidation are first-class. +- **Everything rebuildable** — `.db0/` and `MEMORIES.md` are derived from the markdown files. + +## Documentation + +See the [main db0 README](https://github.com/db0-ai/db0) for full documentation. + +## License + +MIT diff --git a/packages/md/__tests__/consolidate.test.ts b/packages/md/__tests__/consolidate.test.ts new file mode 100644 index 0000000..a0c0b61 --- /dev/null +++ b/packages/md/__tests__/consolidate.test.ts @@ -0,0 +1,84 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { LocalContentStore } from "../src/content-store.js"; +import { serializeMarkdown } from "../src/markdown.js"; +import { mkdtempSync, rmSync, existsSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.consolidate", () => { + let dir: string; + let store: MemoryStore; + let contentStore: LocalContentStore; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "db0-md-consolidate-test-")); + contentStore = new LocalContentStore(dir); + store = new MemoryStore({ dir, contentStore }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("expires old session files (>24h)", async () => { + // Write a session file with a very old created timestamp directly + const old = serializeMarkdown( + { id: "m_old", scope: "session", created: "2020-01-01T00:00:00Z" }, + "Old session fact from 2020", + ); + await contentStore.write("session/old-fact.md", old); + + const result = await store.consolidate(); + + expect(result.expired).toBe(1); + // The file should be gone + expect(existsSync(join(dir, "session/old-fact.md"))).toBe(false); + }); + + it("preserves recent session files", async () => { + // Use store.remember() which stamps the current time + const res = await store.remember("Current session fact", { + scope: "session", + }); + + const result = await store.consolidate(); + + expect(result.expired).toBe(0); + // The file should still exist + expect(existsSync(join(dir, res.file))).toBe(true); + }); + + it("regenerates MEMORIES.md after consolidate", async () => { + await store.remember("Agent always uses ESM modules", { scope: "agent" }); + + await store.consolidate(); + + // generateIndex is not called by consolidate itself, but we can verify + // that consolidate doesn't break anything and then generate the index + const index = await store.generateIndex(); + expect(existsSync(join(dir, "MEMORIES.md"))).toBe(true); + expect(index).toContain("# MEMORIES.md"); + expect(index).toContain("ESM modules"); + }); + + it("returns zero counts when nothing to expire, merge, or archive", async () => { + const result = await store.consolidate(); + expect(result.expired).toBe(0); + expect(result.merged).toBe(0); + expect(result.archived).toBe(0); + }); + + it("expires old task files (>24h)", async () => { + const old = serializeMarkdown( + { id: "m_task_old", scope: "task", created: "2019-06-15T12:00:00Z" }, + "Old task fact", + ); + await contentStore.write("task/old-task.md", old); + + const result = await store.consolidate(); + + expect(result.expired).toBeGreaterThanOrEqual(1); + expect(existsSync(join(dir, "task/old-task.md"))).toBe(false); + }); +}); diff --git a/packages/md/__tests__/content-store.test.ts b/packages/md/__tests__/content-store.test.ts new file mode 100644 index 0000000..e130a51 --- /dev/null +++ b/packages/md/__tests__/content-store.test.ts @@ -0,0 +1,60 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { LocalContentStore } from "../src/content-store.js"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("LocalContentStore", () => { + let dir: string; + let store: LocalContentStore; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + store = new LocalContentStore(dir); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("write and read a file", async () => { + await store.write("user/prefs.md", "hello world"); + const content = await store.read("user/prefs.md"); + expect(content).toBe("hello world"); + }); + + it("list files recursively", async () => { + await store.write("user/a.md", "a"); + await store.write("user/b.md", "b"); + await store.write("agent/c.md", "c"); + const all = await store.list(); + expect(all.sort()).toEqual(["agent/c.md", "user/a.md", "user/b.md"]); + }); + + it("list files with prefix", async () => { + await store.write("user/a.md", "a"); + await store.write("agent/b.md", "b"); + const userFiles = await store.list("user"); + expect(userFiles).toEqual(["user/a.md"]); + }); + + it("check existence", async () => { + expect(await store.exists("user/x.md")).toBe(false); + await store.write("user/x.md", "x"); + expect(await store.exists("user/x.md")).toBe(true); + }); + + it("delete a file", async () => { + await store.write("user/x.md", "x"); + await store.delete("user/x.md"); + expect(await store.exists("user/x.md")).toBe(false); + }); + + it("ignores non-md files and dotfiles", async () => { + await store.write("user/a.md", "a"); + await store.write("user/b.txt", "b"); + await store.write(".db0/index.json", "{}"); + const files = await store.list(); + expect(files).toEqual(["user/a.md"]); + }); +}); diff --git a/packages/md/__tests__/markdown.test.ts b/packages/md/__tests__/markdown.test.ts new file mode 100644 index 0000000..9d77b15 --- /dev/null +++ b/packages/md/__tests__/markdown.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect } from "vitest"; +import { parseMarkdown, serializeMarkdown } from "../src/markdown.js"; + +describe("parseMarkdown", () => { + it("parses frontmatter and content", () => { + const input = `--- +id: m_abc123 +scope: user +tags: [preference, language] +created: "2026-04-03T10:00:00Z" +--- + +User prefers Rust.`; + + const result = parseMarkdown(input); + expect(result.frontmatter.id).toBe("m_abc123"); + expect(result.frontmatter.scope).toBe("user"); + expect(result.frontmatter.tags).toEqual(["preference", "language"]); + expect(result.content).toBe("User prefers Rust."); + }); + + it("handles missing frontmatter", () => { + const result = parseMarkdown("Just plain content."); + expect(result.frontmatter).toEqual({}); + expect(result.content).toBe("Just plain content."); + }); + + it("handles partial frontmatter (scope only)", () => { + const input = `--- +scope: agent +--- + +A fact.`; + const result = parseMarkdown(input); + expect(result.frontmatter.scope).toBe("agent"); + expect(result.frontmatter.id).toBeUndefined(); + expect(result.content).toBe("A fact."); + }); +}); + +describe("serializeMarkdown", () => { + it("serializes frontmatter and content", () => { + const output = serializeMarkdown( + { + id: "m_abc123", + scope: "user", + tags: ["preference"], + created: "2026-04-03T10:00:00Z", + }, + "User prefers Rust.", + ); + expect(output).toContain("id: m_abc123"); + expect(output).toContain("scope: user"); + expect(output).toContain("User prefers Rust."); + expect(output.startsWith("---\n")).toBe(true); + }); + + it("roundtrips cleanly", () => { + const fm = { + id: "m_test", + scope: "user" as const, + tags: ["a", "b"], + created: "2026-01-01T00:00:00Z", + }; + const content = "Hello world."; + const serialized = serializeMarkdown(fm, content); + const parsed = parseMarkdown(serialized); + expect(parsed.frontmatter.id).toBe("m_test"); + expect(parsed.frontmatter.scope).toBe("user"); + expect(parsed.frontmatter.tags).toEqual(["a", "b"]); + expect(parsed.content).toBe(content); + }); +}); diff --git a/packages/md/__tests__/memories-index.test.ts b/packages/md/__tests__/memories-index.test.ts new file mode 100644 index 0000000..3d0bd97 --- /dev/null +++ b/packages/md/__tests__/memories-index.test.ts @@ -0,0 +1,92 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { mkdtempSync, rmSync, existsSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.generateIndex (MEMORIES.md)", () => { + let dir: string; + let store: MemoryStore; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "db0-md-index-test-")); + store = new MemoryStore({ dir }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("generates MEMORIES.md with the correct header", async () => { + await store.remember("User prefers dark mode", { scope: "user" }); + + const index = await store.generateIndex(); + + expect(index).toContain("# MEMORIES.md"); + expect(existsSync(join(dir, "MEMORIES.md"))).toBe(true); + }); + + it("contains scope heading and memory content", async () => { + await store.remember("User prefers dark mode", { scope: "user" }); + + const index = await store.generateIndex(); + + expect(index).toContain("## user"); + expect(index).toContain("dark mode"); + }); + + it("groups memories by scope", async () => { + await store.remember("User prefers Rust", { scope: "user" }); + await store.remember("Deploy to AWS us-east-1", { scope: "agent" }); + + const index = await store.generateIndex(); + + expect(index).toContain("## user"); + expect(index).toContain("## agent"); + expect(index).toContain("Rust"); + expect(index).toContain("AWS"); + + // user section should come before agent section (scope order: user, agent, session, task) + const userPos = index.indexOf("## user"); + const agentPos = index.indexOf("## agent"); + expect(userPos).toBeLessThan(agentPos); + }); + + it("writes MEMORIES.md to disk with correct content", async () => { + await store.remember("User prefers TypeScript", { scope: "user" }); + + await store.generateIndex(); + + const onDisk = readFileSync(join(dir, "MEMORIES.md"), "utf8"); + expect(onDisk).toContain("# MEMORIES.md"); + expect(onDisk).toContain("TypeScript"); + }); + + it("shows total count in header region (multiple scopes)", async () => { + await store.remember("User prefers Rust", { scope: "user" }); + await store.remember("Agent uses ESM", { scope: "agent" }); + await store.remember("Agent deploys to AWS", { scope: "agent" }); + + const index = await store.generateIndex(); + + // Both user and agent sections should be present + expect(index).toContain("## user"); + expect(index).toContain("## agent"); + // All three facts should appear + expect(index).toContain("Rust"); + expect(index).toContain("ESM"); + expect(index).toContain("AWS"); + }); + + it("omits scope sections that have no memories", async () => { + // Only add a user-scope memory + await store.remember("User prefers Vim", { scope: "user" }); + + const index = await store.generateIndex(); + + expect(index).toContain("## user"); + expect(index).not.toContain("## agent"); + expect(index).not.toContain("## session"); + expect(index).not.toContain("## task"); + }); +}); diff --git a/packages/md/__tests__/pack.test.ts b/packages/md/__tests__/pack.test.ts new file mode 100644 index 0000000..1b8a164 --- /dev/null +++ b/packages/md/__tests__/pack.test.ts @@ -0,0 +1,68 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.pack", () => { + let dir: string; + let store: MemoryStore; + + beforeEach(async () => { + dir = mkdtempSync(join(tmpdir(), "db0-md-pack-test-")); + store = new MemoryStore({ dir }); + await store.remember("User prefers TypeScript for all projects", { + scope: "user", + }); + await store.remember("Deploy target is AWS us-east-1", { scope: "agent" }); + await store.remember("Use vitest for all unit tests", { scope: "agent" }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("assembles all memories when no query is given", async () => { + const output = await store.pack(); + expect(output).toContain("TypeScript"); + expect(output).toContain("AWS"); + expect(output).toContain("vitest"); + }); + + it("truncates output when tokenBudget is very small", async () => { + // A budget of 20 tokens = 80 chars, which can hold at most one short block + const output = await store.pack({ tokenBudget: 20 }); + // At least one fact should be there since budget cuts after first exceed + // but definitely not all three + const factCount = [ + output.includes("TypeScript"), + output.includes("AWS"), + output.includes("vitest"), + ].filter(Boolean).length; + expect(factCount).toBeLessThan(3); + }); + + it("returns relevant results first when a query is given", async () => { + const output = await store.pack({ query: "programming language TypeScript" }); + // The TypeScript fact should appear in the output + expect(output).toContain("TypeScript"); + // The output should have the TypeScript fact before others (it appears at top) + const tsIndex = output.indexOf("TypeScript"); + const awsIndex = output.indexOf("AWS"); + // TypeScript should come before AWS since it is more relevant to the query + if (awsIndex !== -1) { + expect(tsIndex).toBeLessThan(awsIndex); + } + }); + + it("returns empty string when no memories exist", async () => { + const emptyDir = mkdtempSync(join(tmpdir(), "db0-md-empty-test-")); + try { + const emptyStore = new MemoryStore({ dir: emptyDir }); + const output = await emptyStore.pack(); + expect(output).toBe(""); + } finally { + rmSync(emptyDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/md/__tests__/remember.test.ts b/packages/md/__tests__/remember.test.ts new file mode 100644 index 0000000..050caf2 --- /dev/null +++ b/packages/md/__tests__/remember.test.ts @@ -0,0 +1,56 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { mkdtempSync, rmSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.remember", () => { + let dir: string; + let store: MemoryStore; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + store = new MemoryStore({ dir }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("creates a new memory file", async () => { + const result = await store.remember("User prefers Rust", { scope: "user" }); + expect(result.action).toBe("created"); + expect(result.file).toMatch(/^user\/.+\.md$/); + const raw = readFileSync(join(dir, result.file), "utf8"); + expect(raw).toContain("User prefers Rust"); + expect(raw).toContain("scope: user"); + }); + + it("supersedes a highly similar memory", async () => { + await store.remember("User prefers Python", { scope: "user" }); + const result = await store.remember("User prefers Rust", { scope: "user" }); + expect(result.action).toBe("superseded"); + expect(result.superseded).toBeDefined(); + expect(result.superseded!.content).toContain("Python"); + const raw = readFileSync(join(dir, result.file), "utf8"); + expect(raw).toContain("Rust"); + expect(raw).not.toContain("Python"); + }); + + it("creates independently for unrelated facts", async () => { + const r1 = await store.remember("User prefers Rust", { scope: "user" }); + const r2 = await store.remember("Deploy target is AWS", { scope: "agent" }); + expect(r1.file).not.toBe(r2.file); + expect(r2.action).toBe("created"); + }); + + it("adds tags to frontmatter", async () => { + const result = await store.remember("Use vitest for testing", { + scope: "agent", + tags: ["testing", "tooling"], + }); + const raw = readFileSync(join(dir, result.file), "utf8"); + expect(raw).toContain("testing"); + expect(raw).toContain("tooling"); + }); +}); diff --git a/packages/md/__tests__/search.test.ts b/packages/md/__tests__/search.test.ts new file mode 100644 index 0000000..a24aae0 --- /dev/null +++ b/packages/md/__tests__/search.test.ts @@ -0,0 +1,45 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { MemoryStore } from "../src/memory-store.js"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +describe("MemoryStore.search", () => { + let dir: string; + let store: MemoryStore; + + beforeEach(async () => { + dir = mkdtempSync(join(tmpdir(), "db0-md-test-")); + store = new MemoryStore({ dir }); + await store.remember("User prefers Rust for CLI tools", { scope: "user" }); + await store.remember("Deploy target is AWS us-east-1", { scope: "agent" }); + await store.remember("Use vitest for all tests", { scope: "agent" }); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("returns ranked results", async () => { + const results = await store.search("Rust programming language"); + expect(results.length).toBeGreaterThan(0); + expect(results[0].content).toContain("Rust"); + }); + + it("respects limit", async () => { + const results = await store.search("tools", { limit: 1 }); + expect(results.length).toBe(1); + }); + + it("filters by scope", async () => { + const results = await store.search("tools", { scope: ["agent"] }); + for (const r of results) { + expect(r.scope).toBe("agent"); + } + }); + + it("includes age information", async () => { + const results = await store.search("Rust"); + expect(results[0].age).toBe("today"); + }); +}); diff --git a/packages/md/package.json b/packages/md/package.json new file mode 100644 index 0000000..530526e --- /dev/null +++ b/packages/md/package.json @@ -0,0 +1,28 @@ +{ + "name": "@db0-ai/db0", + "version": "0.3.0", + "description": "The embedded memory primitive for AI agents. Markdown files + smart lifecycle.", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "bin": { + "db0": "./dist/cli.js" + }, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "scripts": { + "build": "tsc" + }, + "files": [ + "README.md", + "dist" + ], + "license": "MIT", + "dependencies": { + "@db0-ai/core": "0.3.0" + } +} diff --git a/packages/md/src/cli.ts b/packages/md/src/cli.ts new file mode 100644 index 0000000..f26bbb5 --- /dev/null +++ b/packages/md/src/cli.ts @@ -0,0 +1,269 @@ +#!/usr/bin/env node +import { resolve } from "node:path"; +import { MemoryStore } from "./memory-store.js"; +import type { MemoryScope } from "./types.js"; + +// ── Arg parsing ─────────────────────────────────────────────────── + +interface ParsedArgs { + command: string | null; + positionals: string[]; + flags: Record; +} + +function parseArgs(argv: string[]): ParsedArgs { + const args = argv.slice(2); // strip node + script + const command = args[0] && !args[0].startsWith("--") ? args[0] : null; + const rest = command ? args.slice(1) : args; + + const positionals: string[] = []; + const flags: Record = {}; + + for (let i = 0; i < rest.length; i++) { + const arg = rest[i]; + if (arg.startsWith("--")) { + const key = arg.slice(2); + const next = rest[i + 1]; + if (next && !next.startsWith("--")) { + flags[key] = next; + i++; + } else { + flags[key] = true; + } + } else { + positionals.push(arg); + } + } + + return { command, positionals, flags }; +} + +function str(v: string | boolean | undefined): string | undefined { + return typeof v === "string" ? v : undefined; +} + +function num(v: string | boolean | undefined, fallback: number): number { + const s = str(v); + if (!s) return fallback; + const n = Number(s); + return isNaN(n) ? fallback : n; +} + +// ── Help ────────────────────────────────────────────────────────── + +function printHelp(): void { + console.log(`db0 — markdown-based memory store for AI agents + +Commands: + remember [options] Store a new memory + search [options] Search memories by semantic similarity + pack [query] [options] Output memories as context block (pipe-friendly) + consolidate [options] Clean up superseded, expired, and duplicate memories + index [options] Regenerate MEMORIES.md index file + +Options: + --dir Memory directory (default: ./memories) + --scope Scope: user | agent | session | task + --tags Comma-separated tags (remember only) + --limit Max results (search, default: 10) + --budget Token budget (pack, default: 4000) + --quiet Suppress output (consolidate only) + +Examples: + db0 remember "User prefers dark mode" --scope user --dir ./memories + db0 search "UI preferences" --limit 5 --dir ./memories + db0 pack "language preferences" --budget 2000 --dir ./memories + db0 consolidate --dir ./memories + db0 index --dir ./memories +`); +} + +// ── Output helpers ──────────────────────────────────────────────── + +function truncate(s: string, len: number): string { + if (s.length <= len) return s; + return s.slice(0, len - 3) + "..."; +} + +function padEnd(s: string, len: number): string { + return s.length >= len ? s : s + " ".repeat(len - s.length); +} + +// ── Commands ────────────────────────────────────────────────────── + +async function cmdRemember( + store: MemoryStore, + positionals: string[], + flags: Record, +): Promise { + const fact = positionals.join(" ").trim(); + if (!fact) { + console.error("Error: is required for 'remember'"); + process.exit(1); + } + + const scopeStr = str(flags["scope"]) ?? "user"; + const validScopes: MemoryScope[] = ["user", "agent", "session", "task"]; + if (!validScopes.includes(scopeStr as MemoryScope)) { + console.error(`Error: --scope must be one of: ${validScopes.join(", ")}`); + process.exit(1); + } + const scope = scopeStr as MemoryScope; + + const tagsStr = str(flags["tags"]); + const tags = tagsStr ? tagsStr.split(",").map((t) => t.trim()) : undefined; + + const result = await store.remember(fact, { scope, tags }); + + if (result.action === "created") { + console.log(`Created: ${result.file}`); + console.log(`Content: ${fact}`); + } else if (result.action === "superseded") { + console.log(`Superseded: ${result.file}`); + console.log(` Old (${result.superseded!.file}): ${result.superseded!.content}`); + console.log(` New: ${fact}`); + } else if (result.action === "related") { + console.log(`Related: ${result.file}`); + console.log(` Related to: ${result.relatedTo}`); + console.log(` Content: ${fact}`); + } +} + +async function cmdSearch( + store: MemoryStore, + positionals: string[], + flags: Record, +): Promise { + const query = positionals.join(" ").trim(); + if (!query) { + console.error("Error: is required for 'search'"); + process.exit(1); + } + + const limit = num(flags["limit"], 10); + const scopeStr = str(flags["scope"]); + const scope = scopeStr + ? (scopeStr.split(",").map((s) => s.trim()) as MemoryScope[]) + : undefined; + + const results = await store.search(query, { limit, scope }); + + if (results.length === 0) { + console.log("No results found."); + return; + } + + // Table header + const scoreW = 6; + const scopeW = 8; + const previewW = 80; + const ageW = 12; + + const header = + padEnd("Score", scoreW) + + " " + + padEnd("Scope", scopeW) + + " " + + padEnd("Content", previewW) + + " " + + padEnd("Age", ageW) + + " File"; + const divider = "-".repeat(header.length); + + console.log(header); + console.log(divider); + + for (const r of results) { + const score = r.score.toFixed(3); + const preview = truncate(r.content.replace(/\n/g, " "), previewW); + const age = r.stalenessCaveat ? `${r.age} ⚠` : r.age; + console.log( + padEnd(score, scoreW) + + " " + + padEnd(r.scope, scopeW) + + " " + + padEnd(preview, previewW) + + " " + + padEnd(age, ageW) + + " " + + r.file, + ); + } +} + +async function cmdPack( + store: MemoryStore, + positionals: string[], + flags: Record, +): Promise { + const query = positionals.join(" ").trim() || undefined; + const tokenBudget = num(flags["budget"], 4000); + const scopeStr = str(flags["scope"]); + const scope = scopeStr + ? (scopeStr.split(",").map((s) => s.trim()) as MemoryScope[]) + : undefined; + + const output = await store.pack({ query, tokenBudget, scope }); + process.stdout.write(output); +} + +async function cmdConsolidate( + store: MemoryStore, + flags: Record, +): Promise { + const quiet = flags["quiet"] === true; + const result = await store.consolidate(); + if (!quiet) { + console.log( + `Consolidation complete: merged=${result.merged}, archived=${result.archived}, expired=${result.expired}`, + ); + } +} + +async function cmdIndex(store: MemoryStore): Promise { + await store.generateIndex(); + console.log("MEMORIES.md regenerated."); +} + +// ── Main ────────────────────────────────────────────────────────── + +async function main(): Promise { + const { command, positionals, flags } = parseArgs(process.argv); + + if (!command || command === "help" || flags["help"]) { + printHelp(); + return; + } + + const dirFlag = str(flags["dir"]) ?? "./memories"; + const dir = resolve(process.cwd(), dirFlag); + + const store = new MemoryStore({ dir }); + + switch (command) { + case "remember": + await cmdRemember(store, positionals, flags); + break; + case "search": + await cmdSearch(store, positionals, flags); + break; + case "pack": + await cmdPack(store, positionals, flags); + break; + case "consolidate": + await cmdConsolidate(store, flags); + break; + case "index": + await cmdIndex(store); + break; + default: + console.error(`Unknown command: ${command}`); + console.error("Run 'db0' with no arguments for help."); + process.exit(1); + } +} + +main().catch((err) => { + console.error("Error:", err instanceof Error ? err.message : String(err)); + process.exit(1); +}); diff --git a/packages/md/src/content-store.ts b/packages/md/src/content-store.ts new file mode 100644 index 0000000..f0f16f3 --- /dev/null +++ b/packages/md/src/content-store.ts @@ -0,0 +1,60 @@ +import { readFile, writeFile, rm, readdir, mkdir } from "node:fs/promises"; +import { join, dirname, relative } from "node:path"; +import { existsSync } from "node:fs"; + +export interface ContentStore { + read(key: string): Promise; + write(key: string, content: string): Promise; + delete(key: string): Promise; + list(prefix?: string): Promise; + exists(key: string): Promise; +} + +export class LocalContentStore implements ContentStore { + constructor(private dir: string) {} + + async read(key: string): Promise { + return readFile(join(this.dir, key), "utf8"); + } + + async write(key: string, content: string): Promise { + const fullPath = join(this.dir, key); + const parent = dirname(fullPath); + if (!existsSync(parent)) { + await mkdir(parent, { recursive: true }); + } + await writeFile(fullPath, content, "utf8"); + } + + async delete(key: string): Promise { + const fullPath = join(this.dir, key); + if (existsSync(fullPath)) { + await rm(fullPath); + } + } + + async list(prefix?: string): Promise { + const scanDir = prefix ? join(this.dir, prefix) : this.dir; + if (!existsSync(scanDir)) return []; + return this.scanDir(scanDir); + } + + async exists(key: string): Promise { + return existsSync(join(this.dir, key)); + } + + private async scanDir(dir: string): Promise { + const results: string[] = []; + const entries = await readdir(dir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.name.startsWith(".")) continue; + const fullPath = join(dir, entry.name); + if (entry.isDirectory()) { + results.push(...(await this.scanDir(fullPath))); + } else if (entry.name.endsWith(".md") && entry.name !== "MEMORIES.md") { + results.push(relative(this.dir, fullPath)); + } + } + return results; + } +} diff --git a/packages/md/src/index.ts b/packages/md/src/index.ts new file mode 100644 index 0000000..ef56e4f --- /dev/null +++ b/packages/md/src/index.ts @@ -0,0 +1,16 @@ +export { MemoryStore } from "./memory-store.js"; +export type { MemoryStoreOpts, RememberOpts, SearchOpts, PackOpts } from "./memory-store.js"; + +export { LocalContentStore } from "./content-store.js"; +export type { ContentStore } from "./content-store.js"; + +export { parseMarkdown, serializeMarkdown } from "./markdown.js"; + +export type { + MemoryScope, + MemoryFrontmatter, + MemoryFile, + RememberResult, + SearchResult, + ConsolidateResult, +} from "./types.js"; diff --git a/packages/md/src/markdown.ts b/packages/md/src/markdown.ts new file mode 100644 index 0000000..9808100 --- /dev/null +++ b/packages/md/src/markdown.ts @@ -0,0 +1,73 @@ +import type { MemoryFrontmatter } from "./types.js"; + +/** + * Minimal YAML frontmatter parser. No dependencies. + * Handles the subset we need: scalars, arrays, quoted strings. + */ +export function parseMarkdown(raw: string): { + frontmatter: Partial; + content: string; +} { + const fmMatch = raw.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/); + if (!fmMatch) { + return { frontmatter: {}, content: raw.trim() }; + } + + const fmBlock = fmMatch[1]; + const content = fmMatch[2].trim(); + const frontmatter: Record = {}; + + for (const line of fmBlock.split("\n")) { + const colonIdx = line.indexOf(":"); + if (colonIdx === -1) continue; + const key = line.slice(0, colonIdx).trim(); + let value: unknown = line.slice(colonIdx + 1).trim(); + + // Remove surrounding quotes + if ( + typeof value === "string" && + value.startsWith('"') && + value.endsWith('"') + ) { + value = value.slice(1, -1); + } + + // Parse inline arrays: [a, b, c] + if (typeof value === "string" && value.startsWith("[") && value.endsWith("]")) { + value = value + .slice(1, -1) + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + } + + if (key) frontmatter[key] = value; + } + + return { frontmatter: frontmatter as Partial, content }; +} + +/** + * Serialize frontmatter + content into a markdown string. + */ +export function serializeMarkdown( + frontmatter: Partial, + content: string, +): string { + const lines: string[] = ["---"]; + + for (const [key, value] of Object.entries(frontmatter)) { + if (value === undefined || value === null) continue; + if (Array.isArray(value)) { + lines.push(`${key}: [${value.join(", ")}]`); + } else { + lines.push(`${key}: ${value}`); + } + } + + lines.push("---"); + lines.push(""); + lines.push(content); + + return lines.join("\n") + "\n"; +} diff --git a/packages/md/src/memory-store.ts b/packages/md/src/memory-store.ts new file mode 100644 index 0000000..441876c --- /dev/null +++ b/packages/md/src/memory-store.ts @@ -0,0 +1,396 @@ +import { + hashEmbed, + cosineSimilarity, + generateId, + memoryAge, +} from "@db0-ai/core"; +import { LocalContentStore } from "./content-store.js"; +import type { ContentStore } from "./content-store.js"; +import { parseMarkdown, serializeMarkdown } from "./markdown.js"; +import type { + MemoryScope, + MemoryFrontmatter, + MemoryFile, + RememberResult, + SearchResult, + ConsolidateResult, +} from "./types.js"; + +// ── Option types ────────────────────────────────────────────────── + +export interface MemoryStoreOpts { + dir: string; + contentStore?: ContentStore; + highThreshold?: number; + mediumThreshold?: number; +} + +export interface RememberOpts { + scope: MemoryScope; + tags?: string[]; + expires?: string; +} + +export interface SearchOpts { + limit?: number; + scope?: MemoryScope[]; +} + +export interface PackOpts { + query?: string; + tokenBudget?: number; + scope?: MemoryScope[]; +} + +// ── Constants ───────────────────────────────────────────────────── + +const DEFAULT_HIGH_THRESHOLD = 0.65; +const DEFAULT_MEDIUM_THRESHOLD = 0.4; +const DEFAULT_TOKEN_BUDGET = 4000; +const CHARS_PER_TOKEN = 4; +const SCOPE_PRIORITY: Record = { + task: 0, + session: 1, + agent: 2, + user: 3, +}; +const SESSION_TASK_EXPIRE_HOURS = 24; + +// ── MemoryStore ─────────────────────────────────────────────────── + +export class MemoryStore { + private store: ContentStore; + private dir: string; + private highThreshold: number; + private mediumThreshold: number; + + constructor(opts: MemoryStoreOpts) { + this.dir = opts.dir; + this.store = opts.contentStore ?? new LocalContentStore(opts.dir); + this.highThreshold = opts.highThreshold ?? DEFAULT_HIGH_THRESHOLD; + this.mediumThreshold = opts.mediumThreshold ?? DEFAULT_MEDIUM_THRESHOLD; + } + + // ── Public API ──────────────────────────────────────────────── + + async remember(fact: string, opts: RememberOpts): Promise { + const existing = await this.loadAll(); + const newEmbed = this.embed(fact); + + // Find the best match among existing memories in the same scope + let bestMatch: { file: MemoryFile; score: number } | null = null; + for (const mem of existing) { + if (mem.frontmatter.scope !== opts.scope) continue; + const memEmbed = this.embed(mem.content); + const score = cosineSimilarity(newEmbed, memEmbed); + if (!bestMatch || score > bestMatch.score) { + bestMatch = { file: mem, score }; + } + } + + const id = generateId(); + const now = new Date().toISOString(); + const slug = this.slugify(fact); + const filePath = `${opts.scope}/${slug}.md`; + + const frontmatter: Partial = { + id, + scope: opts.scope, + created: now, + }; + if (opts.tags && opts.tags.length > 0) { + frontmatter.tags = opts.tags; + } + if (opts.expires) { + frontmatter.expires = opts.expires; + } + + // Decide action based on similarity + if (bestMatch && bestMatch.score >= this.highThreshold) { + // Supersede: replace the old memory + frontmatter.supersedes = bestMatch.file.frontmatter.id; + const content = serializeMarkdown(frontmatter, fact); + // Write new file + await this.store.write(filePath, content); + // Delete old file + await this.store.delete(bestMatch.file.path); + await this.generateIndex(); + return { + action: "superseded", + file: filePath, + superseded: { + file: bestMatch.file.path, + content: bestMatch.file.content, + }, + }; + } else if (bestMatch && bestMatch.score >= this.mediumThreshold) { + // Related: create new but link to the related memory + frontmatter["related-to"] = [bestMatch.file.frontmatter.id]; + const content = serializeMarkdown(frontmatter, fact); + await this.store.write(filePath, content); + await this.generateIndex(); + return { + action: "related", + file: filePath, + relatedTo: bestMatch.file.frontmatter.id, + }; + } else { + // New: create independently + const content = serializeMarkdown(frontmatter, fact); + await this.store.write(filePath, content); + await this.generateIndex(); + return { + action: "created", + file: filePath, + }; + } + } + + async search(query: string, opts?: SearchOpts): Promise { + const limit = opts?.limit ?? 10; + const scopeFilter = opts?.scope; + + const all = await this.loadAll(); + const queryEmbed = this.embed(query); + + const scored: SearchResult[] = []; + for (const mem of all) { + if (scopeFilter && !scopeFilter.includes(mem.frontmatter.scope)) { + continue; + } + const memEmbed = this.embed(mem.content); + const score = cosineSimilarity(queryEmbed, memEmbed); + const age = memoryAge(mem.frontmatter.created); + scored.push({ + file: mem.path, + content: mem.content, + scope: mem.frontmatter.scope, + score, + age: age.label, + stalenessCaveat: age.stalenessCaveat, + }); + } + + scored.sort((a, b) => b.score - a.score); + return scored.slice(0, limit); + } + + async pack(opts?: PackOpts): Promise { + const budget = opts?.tokenBudget ?? DEFAULT_TOKEN_BUDGET; + const charBudget = budget * CHARS_PER_TOKEN; + + let memories: MemoryFile[]; + if (opts?.query) { + const results = await this.search(opts.query, { + limit: 50, + scope: opts.scope, + }); + // Convert search results back to ordered content + const parts: string[] = []; + let totalChars = 0; + for (const r of results) { + const block = `## ${r.file}\n${r.content}\n`; + if (totalChars + block.length > charBudget) break; + parts.push(block); + totalChars += block.length; + } + return parts.join("\n"); + } + + // No query: load all, sort by scope priority then recency + memories = await this.loadAll(); + if (opts?.scope) { + memories = memories.filter((m) => + opts.scope!.includes(m.frontmatter.scope), + ); + } + memories.sort((a, b) => { + const sPri = + SCOPE_PRIORITY[a.frontmatter.scope] - + SCOPE_PRIORITY[b.frontmatter.scope]; + if (sPri !== 0) return sPri; + // More recent first + return ( + new Date(b.frontmatter.created).getTime() - + new Date(a.frontmatter.created).getTime() + ); + }); + + const parts: string[] = []; + let totalChars = 0; + for (const mem of memories) { + const block = `## ${mem.path}\n${mem.content}\n`; + if (totalChars + block.length > charBudget) break; + parts.push(block); + totalChars += block.length; + } + return parts.join("\n"); + } + + async consolidate(): Promise { + let merged = 0; + let archived = 0; + let expired = 0; + + const all = await this.loadAll(); + const now = new Date(); + + // 1. Archive superseded files + for (const mem of all) { + if (mem.frontmatter.supersedes) { + // This memory supersedes another — check if the old one still exists + // The old one should already be deleted in remember(), but in case of + // manual edits, archive any memory that has been superseded by another. + } + } + + // Find memories that have been superseded (their id appears in another's supersedes field) + const supersededIds = new Set( + all + .map((m) => m.frontmatter.supersedes) + .filter((s): s is string => !!s), + ); + for (const mem of all) { + if (supersededIds.has(mem.frontmatter.id)) { + const archivePath = `.db0/archive/${mem.path}`; + const raw = await this.store.read(mem.path); + await this.store.write(archivePath, raw); + await this.store.delete(mem.path); + archived++; + } + } + + // 2. Expire old session/task memories (>24h) + const remaining = await this.loadAll(); + for (const mem of remaining) { + if ( + mem.frontmatter.scope === "session" || + mem.frontmatter.scope === "task" + ) { + const created = new Date(mem.frontmatter.created); + const hoursOld = + (now.getTime() - created.getTime()) / (1000 * 60 * 60); + if (hoursOld > SESSION_TASK_EXPIRE_HOURS) { + await this.store.delete(mem.path); + expired++; + } + } + // Also check explicit expires field + if (mem.frontmatter.expires) { + const expiresDate = new Date(mem.frontmatter.expires); + if (now > expiresDate) { + await this.store.delete(mem.path); + expired++; + } + } + } + + // 3. Merge near-duplicates (score >= highThreshold, same scope) + const afterExpire = await this.loadAll(); + const toDelete = new Set(); + for (let i = 0; i < afterExpire.length; i++) { + if (toDelete.has(afterExpire[i].path)) continue; + for (let j = i + 1; j < afterExpire.length; j++) { + if (toDelete.has(afterExpire[j].path)) continue; + if ( + afterExpire[i].frontmatter.scope !== + afterExpire[j].frontmatter.scope + ) + continue; + const embA = this.embed(afterExpire[i].content); + const embB = this.embed(afterExpire[j].content); + const score = cosineSimilarity(embA, embB); + if (score >= this.highThreshold) { + // Keep the newer one, delete the older one + const aDate = new Date(afterExpire[i].frontmatter.created); + const bDate = new Date(afterExpire[j].frontmatter.created); + if (aDate >= bDate) { + toDelete.add(afterExpire[j].path); + } else { + toDelete.add(afterExpire[i].path); + } + merged++; + } + } + } + for (const path of toDelete) { + await this.store.delete(path); + } + + await this.generateIndex(); + return { merged, archived, expired }; + } + + async generateIndex(): Promise { + const all = await this.loadAll(); + + // Group by scope + const groups: Record = {}; + for (const mem of all) { + const scope = mem.frontmatter.scope; + if (!groups[scope]) groups[scope] = []; + groups[scope].push(mem); + } + + const lines: string[] = ["# MEMORIES.md", ""]; + const scopeOrder: MemoryScope[] = ["user", "agent", "session", "task"]; + + for (const scope of scopeOrder) { + const mems = groups[scope]; + if (!mems || mems.length === 0) continue; + + lines.push(`## ${scope}`); + lines.push(""); + for (const mem of mems) { + const age = memoryAge(mem.frontmatter.created); + let line = `- [${mem.path}](${mem.path}) — ${mem.content.split("\n")[0]}`; + if (age.stalenessCaveat) { + line += ` ⚠️ ${age.label}`; + } + lines.push(line); + } + lines.push(""); + } + + const content = lines.join("\n"); + await this.store.write("MEMORIES.md", content); + return content; + } + + // ── Private helpers ─────────────────────────────────────────── + + private async loadAll(): Promise { + const keys = await this.store.list(); + const files: MemoryFile[] = []; + + for (const key of keys) { + const raw = await this.store.read(key); + const { frontmatter, content } = parseMarkdown(raw); + + // Skip files without required frontmatter + if (!frontmatter.id || !frontmatter.scope || !frontmatter.created) { + continue; + } + + files.push({ + path: key, + frontmatter: frontmatter as MemoryFrontmatter, + content, + }); + } + + return files; + } + + private embed(text: string): Float32Array { + return hashEmbed(text); + } + + private slugify(content: string): string { + return content + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-|-$/g, "") + .slice(0, 50); + } +} diff --git a/packages/md/src/types.ts b/packages/md/src/types.ts new file mode 100644 index 0000000..67bff83 --- /dev/null +++ b/packages/md/src/types.ts @@ -0,0 +1,40 @@ +export type MemoryScope = "user" | "agent" | "session" | "task"; + +export interface MemoryFrontmatter { + id: string; + scope: MemoryScope; + tags?: string[]; + created: string; + supersedes?: string; + "related-to"?: string[]; + expires?: string; +} + +export interface MemoryFile { + /** Relative path from memory dir, e.g. "user/language-prefs.md" */ + path: string; + frontmatter: MemoryFrontmatter; + content: string; +} + +export interface RememberResult { + action: "created" | "superseded" | "related"; + file: string; + superseded?: { file: string; content: string }; + relatedTo?: string; +} + +export interface SearchResult { + file: string; + content: string; + scope: MemoryScope; + score: number; + age: string; + stalenessCaveat: string | null; +} + +export interface ConsolidateResult { + merged: number; + archived: number; + expired: number; +} diff --git a/packages/md/tsconfig.json b/packages/md/tsconfig.json new file mode 100644 index 0000000..792172f --- /dev/null +++ b/packages/md/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src"] +}