diff --git a/.changeset/recall-session-mcp-tool.md b/.changeset/recall-session-mcp-tool.md new file mode 100644 index 0000000..b653780 --- /dev/null +++ b/.changeset/recall-session-mcp-tool.md @@ -0,0 +1,18 @@ +--- +'@colony/mcp-server': minor +'@imdeadpool/colony': minor +--- + +Add `recall_session` MCP tool. An agent passes a `target_session_id` plus its own `current_session_id`, and the tool returns a compact timeline of the target (IDs + kind + ts only — bodies still come from `get_observations(ids[])`) while writing a `kind: 'recall'` observation into the *caller's* session as the audit trail. + +The recall observation introduces a new wire contract that other code may filter on: + +- `kind === 'recall'` +- `metadata.recalled_session_id` — the consulted session +- `metadata.owner_ide` — `inferIdeFromSessionId` fallback when the target's `ide` column is `unknown`, so foreign-session recalls stay traceable without re-inferring at read time +- `metadata.observation_ids` — the timeline slice that was returned +- `metadata.around_id` and `metadata.limit` — the request parameters that produced the slice + +Both session ids are validated via `Storage.getSession()` before any write. `MemoryStore.addObservation` routes through `ensureSession` (memory-store.ts:96), which silently materialises a missing sessions row — without these checks a typo'd `current_session_id` would create a phantom session and write a recall observation into it. Errors come back as `{ code: 'SESSION_NOT_FOUND', error }`. + +Also extends `GET /api/sessions/:id/observations` on the worker viewer with an `?around=&limit=` query so the same paged timeline is reachable from the HTTP surface (the route already proxied to `Storage.timeline`, which has supported `aroundId` for a while). Cross-session `?around` ids cleanly return `[]` rather than spilling into the target window, matching the SQL filter on `session_id`. diff --git a/apps/mcp-server/src/server.ts b/apps/mcp-server/src/server.ts index 9f0b75b..4619151 100644 --- a/apps/mcp-server/src/server.ts +++ b/apps/mcp-server/src/server.ts @@ -14,6 +14,7 @@ import * as hivemind from './tools/hivemind.js'; import * as message from './tools/message.js'; import * as profile from './tools/profile.js'; import * as proposal from './tools/proposal.js'; +import * as recall from './tools/recall.js'; import * as search from './tools/search.js'; import * as spec from './tools/spec.js'; import * as task from './tools/task.js'; @@ -73,6 +74,7 @@ export function buildServer(store: MemoryStore, settings: Settings): McpServer { profile.register(server, ctx); wake.register(server, ctx); message.register(server, ctx); + recall.register(server, ctx); // Spec-driven dev lane (@colony/spec). Adds spec_read, spec_change_open, // spec_change_add_delta, spec_build_context, spec_build_record_failure, diff --git a/apps/mcp-server/src/tools/recall.ts b/apps/mcp-server/src/tools/recall.ts new file mode 100644 index 0000000..c25feae --- /dev/null +++ b/apps/mcp-server/src/tools/recall.ts @@ -0,0 +1,112 @@ +import { inferIdeFromSessionId } from '@colony/core'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { ToolContext } from './context.js'; + +/** + * `recall_session` lets an agent pull a compact timeline of a *different* + * session (its own past or another agent's) and records the act of recalling + * as a `kind:'recall'` observation in the calling session. The recall row is + * what makes cross-session memory traceable: a future search across the + * caller's session can find "I previously consulted session X" without ever + * pasting X's bodies into the caller. + * + * Progressive disclosure: the return payload carries IDs + metadata only. + * Callers fetch bodies via `get_observations(ids[])` exactly like `timeline`. + * + * Safety: both `target_session_id` and `current_session_id` are validated + * against `storage.getSession()` before any write. `MemoryStore.addObservation` + * routes through `ensureSession`, which silently materialises a missing + * sessions row — that would let a typo create a phantom session and write a + * recall observation into it. The early checks make the failure loud. + */ +export function register(server: McpServer, ctx: ToolContext): void { + const { store } = ctx; + + server.tool( + 'recall_session', + "Pull a compact timeline of a past session and audit the recall in the current session. Returns observation IDs only — fetch bodies via get_observations. The recall is itself stored as a kind:'recall' observation with metadata.recalled_session_id, metadata.owner_ide, and metadata.observation_ids so future searches surface that this session consulted the target. Use around_id to centre the window on a specific observation; limit caps how many IDs come back.", + { + target_session_id: z.string().min(1).describe('the session whose timeline you want to read'), + current_session_id: z + .string() + .min(1) + .describe('your session_id (where the recall observation gets written)'), + around_id: z.number().int().positive().optional(), + limit: z.number().int().positive().max(100).optional(), + }, + async ({ target_session_id, current_session_id, around_id, limit }) => { + const target = store.storage.getSession(target_session_id); + if (!target) return sessionNotFound('target', target_session_id); + const current = store.storage.getSession(current_session_id); + if (!current) return sessionNotFound('current', current_session_id); + + const cap = limit ?? 20; + // Storage.timeline filters by session_id but uses aroundId purely as a + // numeric anchor — a foreign-session anchor won't bleed rows from the + // wrong session, but it WILL silently slice the target's history at a + // position the caller did not mean. Detect that case and return an + // empty timeline instead of a misaligned slice. The recall observation + // still gets written so the recall attempt remains auditable. + let rows: ReturnType; + if (around_id !== undefined) { + const anchorRow = store.storage.getObservation(around_id); + rows = + !anchorRow || anchorRow.session_id !== target_session_id + ? [] + : store.timeline(target_session_id, around_id, cap); + } else { + rows = store.timeline(target_session_id, undefined, cap); + } + const ownerIde = + target.ide && target.ide !== 'unknown' + ? target.ide + : (inferIdeFromSessionId(target_session_id) ?? 'unknown'); + const observation_ids = rows.map((r) => r.id); + + const recall_observation_id = store.addObservation({ + session_id: current_session_id, + kind: 'recall', + content: `Recalled session ${target_session_id} (owner_ide=${ownerIde}, observations=${observation_ids.length}).`, + metadata: { + recalled_session_id: target_session_id, + owner_ide: ownerIde, + observation_ids, + ...(around_id !== undefined ? { around_id } : {}), + limit: cap, + }, + }); + + const payload = { + recall_observation_id, + session: { + id: target.id, + ide: ownerIde, + cwd: target.cwd, + started_at: target.started_at, + ended_at: target.ended_at, + }, + observations: rows.map((r) => ({ id: r.id, kind: r.kind, ts: r.ts })), + }; + return { content: [{ type: 'text', text: JSON.stringify(payload) }] }; + }, + ); +} + +function sessionNotFound( + label: 'target' | 'current', + id: string, +): { content: Array<{ type: 'text'; text: string }>; isError: true } { + return { + content: [ + { + type: 'text', + text: JSON.stringify({ + code: 'SESSION_NOT_FOUND', + error: `${label} session ${id} does not exist`, + }), + }, + ], + isError: true, + }; +} diff --git a/apps/mcp-server/test/recall.test.ts b/apps/mcp-server/test/recall.test.ts new file mode 100644 index 0000000..5a72b9d --- /dev/null +++ b/apps/mcp-server/test/recall.test.ts @@ -0,0 +1,161 @@ +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { defaultSettings } from '@colony/config'; +import { MemoryStore } from '@colony/core'; +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { buildServer } from '../src/server.js'; + +let dir: string; +let store: MemoryStore; +let client: Client; + +async function call(name: string, args: Record): Promise { + const res = await client.callTool({ name, arguments: args }); + const text = (res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}'; + return JSON.parse(text) as T; +} + +async function callError( + name: string, + args: Record, +): Promise<{ code: string; error: string }> { + const res = await client.callTool({ name, arguments: args }); + expect(res.isError).toBe(true); + const text = (res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}'; + return JSON.parse(text) as { code: string; error: string }; +} + +beforeEach(async () => { + dir = mkdtempSync(join(tmpdir(), 'colony-recall-')); + store = new MemoryStore({ dbPath: join(dir, 'data.db'), settings: defaultSettings }); + const server = buildServer(store, defaultSettings); + const [clientT, serverT] = InMemoryTransport.createLinkedPair(); + client = new Client({ name: 'test', version: '0.0.0' }); + await Promise.all([server.connect(serverT), client.connect(clientT)]); +}); + +afterEach(async () => { + await client.close(); + store.close(); + rmSync(dir, { recursive: true, force: true }); +}); + +describe('recall_session', () => { + it('returns the target timeline (compact) and writes a kind:recall observation in the caller', async () => { + store.startSession({ id: 'past', ide: 'codex', cwd: '/repo' }); + store.startSession({ id: 'now', ide: 'claude-code', cwd: '/repo' }); + const a = store.addObservation({ session_id: 'past', kind: 'note', content: 'hit one' }); + const b = store.addObservation({ session_id: 'past', kind: 'note', content: 'hit two' }); + + const resp = await call<{ + recall_observation_id: number; + session: { id: string; ide: string }; + observations: Array<{ id: number; kind: string; ts: number }>; + }>('recall_session', { + target_session_id: 'past', + current_session_id: 'now', + limit: 10, + }); + + expect(resp.session.ide).toBe('codex'); + const ids = resp.observations.map((o) => o.id).sort((x, y) => x - y); + expect(ids).toEqual([a, b].sort((x, y) => x - y)); + // Compact shape — never carry bodies on the recall response itself. + for (const obs of resp.observations) { + expect(obs).not.toHaveProperty('content'); + } + + // The recall row lives in the *caller's* session, not the target's. + const recallRow = store.storage.getObservation(resp.recall_observation_id); + expect(recallRow?.session_id).toBe('now'); + expect(recallRow?.kind).toBe('recall'); + const meta = JSON.parse(recallRow?.metadata ?? '{}'); + expect(meta.recalled_session_id).toBe('past'); + expect(meta.owner_ide).toBe('codex'); + expect((meta.observation_ids as number[]).sort((x, y) => x - y)).toEqual( + [a, b].sort((x, y) => x - y), + ); + expect(meta.limit).toBe(10); + }); + + it('rejects an unknown target_session_id with SESSION_NOT_FOUND and writes nothing', async () => { + store.startSession({ id: 'now', ide: 'claude-code', cwd: '/repo' }); + const before = store.storage.listSessions(50).length; + + const err = await callError('recall_session', { + target_session_id: 'never-existed', + current_session_id: 'now', + }); + expect(err.code).toBe('SESSION_NOT_FOUND'); + + // Phantom-row guard: no new sessions row should have appeared. ensureSession + // would silently create one if we let the call reach addObservation. + expect(store.storage.listSessions(50).length).toBe(before); + expect(store.storage.getSession('never-existed')).toBeUndefined(); + }); + + it('rejects an unknown current_session_id with SESSION_NOT_FOUND and does not phantom-create it', async () => { + store.startSession({ id: 'past', ide: 'codex', cwd: '/repo' }); + + const err = await callError('recall_session', { + target_session_id: 'past', + current_session_id: 'typo-session', + }); + expect(err.code).toBe('SESSION_NOT_FOUND'); + expect(err.error).toContain('current'); + expect(store.storage.getSession('typo-session')).toBeUndefined(); + }); + + it('falls back to inferIdeFromSessionId when the target session row lists ide as unknown', async () => { + // Backfill scenario: a row exists but the ide column was never classified. + store.startSession({ id: 'codex-abc-123', ide: 'unknown', cwd: '/repo' }); + store.startSession({ id: 'now', ide: 'claude-code', cwd: '/repo' }); + + const resp = await call<{ session: { ide: string }; recall_observation_id: number }>( + 'recall_session', + { + target_session_id: 'codex-abc-123', + current_session_id: 'now', + }, + ); + expect(resp.session.ide).toBe('codex'); + + const meta = JSON.parse( + store.storage.getObservation(resp.recall_observation_id)?.metadata ?? '{}', + ); + expect(meta.owner_ide).toBe('codex'); + }); + + it('around_id pointing at an id from a different session yields an empty timeline (no silent fall-through)', async () => { + store.startSession({ id: 'past', ide: 'codex', cwd: '/repo' }); + store.startSession({ id: 'other', ide: 'claude-code', cwd: '/repo' }); + store.startSession({ id: 'now', ide: 'claude-code', cwd: '/repo' }); + store.addObservation({ session_id: 'past', kind: 'note', content: 'past one' }); + const otherId = store.addObservation({ + session_id: 'other', + kind: 'note', + content: 'other one', + }); + + const resp = await call<{ + observations: Array<{ id: number }>; + recall_observation_id: number; + }>('recall_session', { + target_session_id: 'past', + current_session_id: 'now', + around_id: otherId, + }); + + // Storage.timeline filters by session_id, so a foreign around_id returns + // [] rather than spilling over into the target's window. + expect(resp.observations).toEqual([]); + const meta = JSON.parse( + store.storage.getObservation(resp.recall_observation_id)?.metadata ?? '{}', + ); + expect(meta.observation_ids).toEqual([]); + expect(meta.around_id).toBe(otherId); + }); +}); diff --git a/apps/mcp-server/test/server.test.ts b/apps/mcp-server/test/server.test.ts index 09634c0..2d93494 100644 --- a/apps/mcp-server/test/server.test.ts +++ b/apps/mcp-server/test/server.test.ts @@ -56,6 +56,7 @@ describe('MCP server', () => { 'hivemind', 'hivemind_context', 'list_sessions', + 'recall_session', 'search', 'spec_archive', 'spec_build_context', diff --git a/apps/worker/src/server.ts b/apps/worker/src/server.ts index 4f2596d..ac34087 100644 --- a/apps/worker/src/server.ts +++ b/apps/worker/src/server.ts @@ -118,7 +118,20 @@ export function buildApp( app.get('/api/sessions/:id/observations', (c) => { const id = c.req.param('id'); const limit = Number(c.req.query('limit') ?? 200); - const rows = store.timeline(id, undefined, limit); + const aroundRaw = c.req.query('around'); + const around = aroundRaw !== undefined ? Number(aroundRaw) : undefined; + const aroundId = + around !== undefined && Number.isFinite(around) && around > 0 ? around : undefined; + // Storage.timeline filters by session_id but uses aroundId as a raw numeric + // anchor — passing an id from a different session won't bleed foreign rows + // in, but it WILL silently slice this session's history at the wrong + // position. Reject the foreign anchor up front instead so the response + // either centres on a real anchor or returns []. + if (aroundId !== undefined) { + const anchor = store.storage.getObservation(aroundId); + if (!anchor || anchor.session_id !== id) return c.json([]); + } + const rows = store.timeline(id, aroundId, limit); return c.json(rows.map((r) => ({ ...r, content: expand(r.content) }))); }); diff --git a/apps/worker/test/server.test.ts b/apps/worker/test/server.test.ts index f895483..4dd0cbe 100644 --- a/apps/worker/test/server.test.ts +++ b/apps/worker/test/server.test.ts @@ -176,6 +176,40 @@ describe('worker HTTP', () => { expect(rows.some((r) => r.content.includes('/etc/caveman.conf'))).toBe(true); }); + it('GET /api/sessions/:id/observations honours ?around for paging within a session', async () => { + const { sessionId, a } = seed(); + const res = await app.request(`/api/sessions/${sessionId}/observations?around=${a}&limit=2`); + expect(res.status).toBe(200); + const rows = (await res.json()) as Array<{ id: number }>; + expect(rows.map((r) => r.id)).toContain(a); + }); + + it('GET /api/sessions/:id/observations with ?around pointing at a foreign-session id returns [] (no silent cross-session bleed)', async () => { + const { a } = seed(); + store.startSession({ id: 's2', ide: 'codex', cwd: '/tmp' }); + const foreign = store.addObservation({ + session_id: 's2', + kind: 'note', + content: 'lives in s2', + }); + expect(foreign).not.toBe(a); + + // around=foreign-id while session=s1 must NOT spill s2's row into s1's + // window — Storage.timeline filters by session_id, so the result is []. + const res = await app.request(`/api/sessions/s1/observations?around=${foreign}&limit=10`); + expect(res.status).toBe(200); + const rows = (await res.json()) as Array<{ id: number; session_id: string }>; + expect(rows).toEqual([]); + }); + + it('GET /api/sessions/:id/observations ignores a non-numeric ?around value', async () => { + seed(); + const res = await app.request('/api/sessions/s1/observations?around=garbage'); + expect(res.status).toBe(200); + const rows = (await res.json()) as Array; + expect(rows.length).toBeGreaterThan(0); + }); + it('GET /api/search returns matching observations', async () => { seed(); const res = await app.request('/api/search?q=config'); diff --git a/docs/mcp.md b/docs/mcp.md index af18854..1d2e481 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -451,6 +451,50 @@ Mark a message as read. Idempotent — re-marking a read or replied message is a Errors include `{ code, error }` with stable codes like `NOT_MESSAGE`, `TASK_MISMATCH`, or `OBSERVATION_NOT_ON_TASK`. +## `recall_session` + +Pull a compact timeline of a past session (your own or another agent's) and audit the recall as a `kind: 'recall'` observation in the calling session. Use this when you need to stand on context from an earlier lane without pasting bodies into your current session. + +```json +{ + "name": "recall_session", + "input": { + "target_session_id": "codex-abc-123", + "current_session_id": "claude-now-789", + "around_id": 4421, + "limit": 25 + } +} +``` + +`target_session_id` is the session whose memory you want to read. `current_session_id` is **your** session — it must already exist (the tool will not auto-create it). `around_id` centres the window on a specific observation id; `limit` caps the count (default 20, max 100). + +Returns: + +```json +{ + "recall_observation_id": 9012, + "session": { "id": "codex-abc-123", "ide": "codex", "cwd": "/repo", "started_at": 1714000000000, "ended_at": null }, + "observations": [{ "id": 4418, "kind": "edit", "ts": 1714000000123 }, ...] +} +``` + +Progressive disclosure: `observations` carries IDs only — fetch full bodies via `get_observations(ids[])`. The `recall_observation_id` is the audit row written into your current session, with metadata: + +```json +{ + "recalled_session_id": "codex-abc-123", + "owner_ide": "codex", + "observation_ids": [4418, 4419, 4420, 4421, 4422], + "around_id": 4421, + "limit": 25 +} +``` + +Wire contract: `kind: 'recall'`, plus `metadata.recalled_session_id`, `metadata.owner_ide`, and `metadata.observation_ids`. UI surfaces and search filters can key off these fields. `owner_ide` falls back to `inferIdeFromSessionId(target_session_id)` when the sessions row lists `ide` as `unknown`. + +Errors include `{ "code": "SESSION_NOT_FOUND", "error": "..." }` when either `target_session_id` or `current_session_id` is missing — the tool refuses to silently materialise a phantom session via `MemoryStore.ensureSession`. + ## `attention_inbox` Compact list of what needs the caller's attention: pending handoffs, pending wakes, stalled lanes, and recent other-session file claims. Fetch full bodies via `get_observations`.