From 626bae6800d501ba7e4798e2667c9f2ea13dfcf4 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Sat, 25 Apr 2026 13:58:06 +0200 Subject: [PATCH] feat(mcp): auto-archive plan when last sub-task completes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds opt-in `auto_archive` flag to `task_plan_publish`. When set, the parent spec change three-way-merges and archives automatically after the last sub-task completes via `task_plan_complete_subtask`. Default off because silent state change after the final completion is risky if the merged spec has not been verified — opt in per plan once the lane lands cleanly. Conflicts on the three-way merge are non-fatal: the completion still returns `status: 'completed'`, the archive is skipped, and a `plan-archive-blocked` observation is recorded on the parent spec task. Other auto-archive failures (missing CHANGE.md, write errors) are recorded as `plan-archive-error` observations. The completion response now carries an `auto_archive: { status, reason?, archived_path?, merged_root_hash?, applied?, conflicts? }` field on every call. New observation kinds: `plan-config`, `plan-archived`, `plan-archive-blocked`, `plan-archive-error`. Also fixes a latent race in `@colony/core` `readSubtask`: when claim and complete observations share the same millisecond timestamp, SQLite's `ORDER BY ts DESC` had undefined tie-breaker behavior. Status is now resolved with terminal-state-wins precedence so a completion is authoritative once it exists. Co-Authored-By: Claude Opus 4.7 (1M context) --- .changeset/plan-auto-archive.md | 6 + apps/mcp-server/src/tools/plan.ts | 182 +++++++++++++++++++++++++++++- apps/mcp-server/test/plan.test.ts | 76 ++++++++++++- docs/mcp.md | 22 +++- packages/core/src/plan.ts | 30 +++-- 5 files changed, 302 insertions(+), 14 deletions(-) create mode 100644 .changeset/plan-auto-archive.md diff --git a/.changeset/plan-auto-archive.md b/.changeset/plan-auto-archive.md new file mode 100644 index 0000000..2251a81 --- /dev/null +++ b/.changeset/plan-auto-archive.md @@ -0,0 +1,6 @@ +--- +"@colony/core": patch +"@colony/mcp-server": minor +--- + +Add opt-in `auto_archive` flag to `task_plan_publish`. When set, the parent spec change three-way-merges and archives automatically after the last sub-task completes via `task_plan_complete_subtask`. Default is `false` because silent state change after the final completion is risky if the merged spec has not been verified — opt in per plan once the lane lands cleanly. Conflicts on the three-way merge are non-fatal: the completion still returns `status: 'completed'`, the archive is skipped, and a `plan-archive-blocked` observation is recorded on the parent spec task so resolution stays explicit. Other auto-archive failures (missing `CHANGE.md`, write errors) are likewise recorded as `plan-archive-error` observations and never propagate as tool errors. The completion response now carries an `auto_archive: { status, reason?, archived_path?, merged_root_hash?, applied?, conflicts? }` field that reports the outcome on every call. New observation kinds: `plan-config` (publish-time policy on the parent spec task), `plan-archived`, `plan-archive-blocked`, `plan-archive-error`. Also fixes a latent lifecycle race in `@colony/core` `readSubtask`: when a `claimed` and `completed` `plan-subtask-claim` observation share the same millisecond timestamp (back-to-back claim then complete in tests or fast-running flows), SQLite's `ORDER BY ts DESC` had undefined tie-breaker behavior and could surface the sub-task as `claimed`. Status is now resolved with terminal-state-wins precedence (`completed > blocked > claimed`) so a completion is authoritative once it exists. diff --git a/apps/mcp-server/src/tools/plan.ts b/apps/mcp-server/src/tools/plan.ts index b43daa1..5880ce7 100644 --- a/apps/mcp-server/src/tools/plan.ts +++ b/apps/mcp-server/src/tools/plan.ts @@ -1,11 +1,12 @@ import { + type MemoryStore, type SubtaskInfo, TaskThread, areDepsMet, listPlans, readSubtaskByBranch, } from '@colony/core'; -import { SpecRepository } from '@colony/spec'; +import { SpecRepository, SyncEngine, parseSpec, serializeSpec } from '@colony/spec'; import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; import type { ToolContext } from './context.js'; @@ -59,6 +60,12 @@ export function register(server: McpServer, ctx: ToolContext): void { .min(2) .max(20) .describe('At least 2 sub-tasks — if it is one task, use task_thread directly.'), + auto_archive: z + .boolean() + .optional() + .describe( + 'When true, the parent spec change auto-archives via three-way merge after the last sub-task completes. Defaults to false because silent state change after the final completion is risky if the merged spec has not been verified — opt in per plan once you trust the lane. Conflicts block auto-archive (surface as a plan-archive-blocked observation) instead of forcing.', + ), }, async (args) => { for (let i = 0; i < args.subtasks.length; i++) { @@ -91,6 +98,22 @@ export function register(server: McpServer, ctx: ToolContext): void { proposal, }); + // Stamp plan-level config on the parent spec task. Read back at + // completion time to decide whether to auto-archive. A separate + // observation (rather than encoding the flag on every sub-task) + // means lifecycle policy lives in one place and can grow more + // fields later without touching sub-task metadata. + store.addObservation({ + session_id: args.session_id, + task_id: opened.task_id, + kind: 'plan-config', + content: `plan ${args.slug} config: auto_archive=${args.auto_archive ?? false}`, + metadata: { + plan_slug: args.slug, + auto_archive: args.auto_archive ?? false, + }, + }); + const subtaskThreads = args.subtasks.map((subtask, index) => { const branch = `spec/${args.slug}/sub-${index}`; const thread = TaskThread.open(store, { @@ -317,13 +340,168 @@ export function register(server: McpServer, ctx: ToolContext): void { }); }); + // Auto-archive: when this completion was the last outstanding sub-task + // and the plan opted in at publish time, three-way-merge the change + // and archive it. Failures are non-fatal and surface as observations + // on the parent spec task rather than tearing down the completion. + const autoArchive = runAutoArchiveIfReady(store, { + plan_slug: args.plan_slug, + parent_spec_task_id: located.info.parent_spec_task_id, + session_id: args.session_id, + }); + return { - content: [{ type: 'text', text: JSON.stringify({ status: 'completed' }) }], + content: [ + { + type: 'text', + text: JSON.stringify({ + status: 'completed', + auto_archive: autoArchive, + }), + }, + ], }; }, ); } +interface AutoArchiveOutcome { + status: 'archived' | 'blocked' | 'error' | 'skipped'; + reason?: string; + archived_path?: string; + merged_root_hash?: string; + applied?: number; + conflicts?: number; +} + +function runAutoArchiveIfReady( + store: MemoryStore, + args: { + plan_slug: string; + parent_spec_task_id: number | null; + session_id: string; + }, +): AutoArchiveOutcome { + if (args.parent_spec_task_id == null) { + return { status: 'skipped', reason: 'no parent spec task linkage on sub-task' }; + } + + const config = readPlanConfig(store, args.parent_spec_task_id); + if (!config?.auto_archive) { + return { status: 'skipped', reason: 'auto_archive disabled' }; + } + + // Aggregate sibling sub-task statuses. The core `readSubtask` helper + // already resolves the claim/complete race with a terminal-state-wins + // rule, so a freshly-completed sub-task surfaces as `completed` here + // even when the prior `claimed` observation shares its millisecond + // timestamp. + const allTasks = store.storage.listTasks(2000); + const siblingBranchPrefix = `spec/${args.plan_slug}/sub-`; + const siblingTasks = allTasks.filter((t) => t.branch.startsWith(siblingBranchPrefix)); + const siblingInfos = siblingTasks + .map((t) => readSubtaskByBranch(store, t.branch)) + .filter((s): s is { task_id: number; info: SubtaskInfo } => s !== null) + .map((s) => s.info); + if (siblingInfos.length === 0) { + return { status: 'skipped', reason: 'no sub-tasks found' }; + } + const allDone = siblingInfos.every((s) => s.status === 'completed'); + if (!allDone) { + return { status: 'skipped', reason: 'sub-tasks still outstanding' }; + } + + const parentTask = allTasks.find((t) => t.id === args.parent_spec_task_id); + if (!parentTask) { + return { status: 'skipped', reason: 'parent spec task not found' }; + } + + try { + const repo = new SpecRepository({ repoRoot: parentTask.repo_root, store }); + const currentRoot = repo.readRoot(); + const change = repo.readChange(args.plan_slug); + const baseRoot = + currentRoot.rootHash === change.baseRootHash + ? currentRoot + : parseSpec(serializeSpec(currentRoot)); + const engine = new SyncEngine('three_way'); + const merge = engine.merge(currentRoot, baseRoot, change); + + if (!merge.clean) { + store.addObservation({ + session_id: args.session_id, + task_id: args.parent_spec_task_id, + kind: 'plan-archive-blocked', + content: `plan ${args.plan_slug} ready to archive but ${merge.conflicts.length} conflict(s) block the merge`, + metadata: { + plan_slug: args.plan_slug, + conflicts: merge.conflicts, + applied: merge.applied, + }, + }); + return { + status: 'blocked', + reason: 'three-way merge conflicts', + conflicts: merge.conflicts.length, + applied: merge.applied, + }; + } + + repo.writeRoot(merge.spec, { + session_id: args.session_id, + agent: 'plan-system', + reason: `Auto-archive ${args.plan_slug}: all ${siblingInfos.length} sub-tasks completed`, + }); + const archivedPath = repo.archiveChange(args.plan_slug); + + store.addObservation({ + session_id: args.session_id, + task_id: args.parent_spec_task_id, + kind: 'plan-archived', + content: `plan ${args.plan_slug} auto-archived after all sub-tasks completed`, + metadata: { + plan_slug: args.plan_slug, + archived_path: archivedPath, + merged_root_hash: merge.spec.rootHash, + applied: merge.applied, + }, + }); + return { + status: 'archived', + archived_path: archivedPath, + merged_root_hash: merge.spec.rootHash, + applied: merge.applied, + conflicts: 0, + }; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + store.addObservation({ + session_id: args.session_id, + task_id: args.parent_spec_task_id, + kind: 'plan-archive-error', + content: `plan ${args.plan_slug} auto-archive failed: ${message}`, + metadata: { plan_slug: args.plan_slug, error: message }, + }); + return { status: 'error', reason: message }; + } +} + +function readPlanConfig( + store: MemoryStore, + parent_task_id: number, +): { auto_archive: boolean } | null { + const rows = store.storage.taskObservationsByKind(parent_task_id, 'plan-config', 100); + // taskObservationsByKind returns DESC by ts; latest config wins. + const latest = rows[0]; + if (!latest?.metadata) return null; + try { + const parsed = JSON.parse(latest.metadata) as { auto_archive?: unknown }; + return { auto_archive: Boolean(parsed.auto_archive) }; + } catch { + return null; + } +} + function detectScopeOverlap( subtasks: SubtaskInput[], ): { a: number; b: number; shared: string[] } | null { diff --git a/apps/mcp-server/test/plan.test.ts b/apps/mcp-server/test/plan.test.ts index 5a08be0..af36704 100644 --- a/apps/mcp-server/test/plan.test.ts +++ b/apps/mcp-server/test/plan.test.ts @@ -1,4 +1,4 @@ -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { existsSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { defaultSettings } from '@colony/config'; @@ -329,3 +329,77 @@ describe('task_plan_complete_subtask', () => { expect(err.code).toBe('PLAN_SUBTASK_NOT_CLAIMED'); }); }); + +describe('task_plan auto-archive', () => { + async function claimAndComplete(slug: string, index: number, sessionId: string, agent: string) { + await call('task_plan_claim_subtask', { + plan_slug: slug, + subtask_index: index, + session_id: sessionId, + agent, + }); + return call<{ status: string; auto_archive: { status: string; reason?: string } }>( + 'task_plan_complete_subtask', + { + plan_slug: slug, + subtask_index: index, + session_id: sessionId, + summary: `sub-${index} done`, + }, + ); + } + + it('archives the change when the last sub-task completes and auto_archive is true', async () => { + await call( + 'task_plan_publish', + basicPublishArgs({ slug: 'auto-archive-on', auto_archive: true }), + ); + + // First completion → still has outstanding work, no archive yet. + const first = await claimAndComplete('auto-archive-on', 0, 'B', 'codex'); + expect(first.status).toBe('completed'); + expect(first.auto_archive.status).toBe('skipped'); + expect(first.auto_archive.reason).toMatch(/outstanding/); + expect(existsSync(join(repoRoot, 'openspec/changes/auto-archive-on/CHANGE.md'))).toBe(true); + + // Last completion → archive runs. + const second = await claimAndComplete('auto-archive-on', 1, 'C', 'claude'); + expect(second.status).toBe('completed'); + expect(second.auto_archive.status).toBe('archived'); + // Change moved to openspec/archive/-/CHANGE.md + expect(existsSync(join(repoRoot, 'openspec/changes/auto-archive-on/CHANGE.md'))).toBe(false); + }); + + it('does not archive when auto_archive is omitted (default off)', async () => { + await call( + 'task_plan_publish', + basicPublishArgs({ slug: 'auto-archive-default-off' }), + ); + await claimAndComplete('auto-archive-default-off', 0, 'B', 'codex'); + const last = await claimAndComplete('auto-archive-default-off', 1, 'C', 'claude'); + expect(last.auto_archive.status).toBe('skipped'); + expect(last.auto_archive.reason).toMatch(/disabled/); + // CHANGE.md stays in openspec/changes/. + expect(existsSync(join(repoRoot, 'openspec/changes/auto-archive-default-off/CHANGE.md'))).toBe( + true, + ); + }); + + it('records a plan-archive-error observation when archive throws', async () => { + // Force a failure path: publish, complete sub-0, then delete the + // CHANGE.md file before the last completion. readChange will throw. + await call( + 'task_plan_publish', + basicPublishArgs({ slug: 'auto-archive-broken', auto_archive: true }), + ); + await claimAndComplete('auto-archive-broken', 0, 'B', 'codex'); + rmSync(join(repoRoot, 'openspec/changes/auto-archive-broken/CHANGE.md'), { + force: true, + }); + const last = await claimAndComplete('auto-archive-broken', 1, 'C', 'claude'); + // Completion still succeeds — the failure is recorded as an observation, + // not propagated as a tool error. + expect(last.status).toBe('completed'); + expect(last.auto_archive.status).toBe('error'); + }); +}); diff --git a/docs/mcp.md b/docs/mcp.md index c2ece35..5e4be94 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -751,7 +751,8 @@ Publish a multi-task plan as a spec change with one task thread per sub-task. Su "depends_on": [0], "capability_hint": "ui_work" } - ] + ], + "auto_archive": false } } ``` @@ -762,6 +763,10 @@ Validation: - `depends_on` indices are zero-based and must point to **earlier** indices (cycle prevention). - Independent sub-tasks (no `depends_on` chain between them) cannot share `file_scope` entries. To overlap files, sequence the work via `depends_on`. +Optional inputs: + +- `auto_archive` (default `false`): when `true`, the parent spec change three-way-merges and archives automatically after the last sub-task completes. Conflicts block the auto-archive (recorded as a `plan-archive-blocked` observation on the parent spec task) instead of forcing — the change stays open so the merge can be resolved by hand. Leave `auto_archive` off until you trust the lane to land cleanly; opt in per plan. + Returns `{ plan_slug, spec_task_id, spec_change_path, subtasks: [{ subtask_index, branch, task_id, title }] }`. Errors: `PLAN_INVALID_DEPENDENCY`, `PLAN_SCOPE_OVERLAP`. ## `task_plan_list` @@ -816,14 +821,25 @@ Mark your claimed sub-task complete. Releases the sub-task file claims and stamp } ``` -Returns `{ status: 'completed' }`. Errors: `PLAN_SUBTASK_NOT_FOUND`, `PLAN_SUBTASK_NOT_CLAIMED`, `PLAN_SUBTASK_NOT_YOURS`. +Returns `{ status: 'completed', auto_archive: { status, reason?, archived_path?, merged_root_hash?, applied?, conflicts? } }`. The `auto_archive` field is always present and reports what happened on this completion: + +- `status: 'skipped'` — auto-archive disabled, sub-tasks still outstanding, or no parent linkage. `reason` carries the specific cause. +- `status: 'archived'` — the merge was clean and the change is now under `openspec/archive/-/`. `archived_path` and `merged_root_hash` describe the result. +- `status: 'blocked'` — auto-archive opted in and the plan is fully completed, but the three-way merge has conflicts. The change stays open; resolve by hand and call `spec_archive` directly. +- `status: 'error'` — auto-archive opted in but the archive flow threw (for example a missing `CHANGE.md`). The completion still succeeded; the failure is recorded as a `plan-archive-error` observation on the parent spec task. + +Errors on `task_plan_complete_subtask` itself: `PLAN_SUBTASK_NOT_FOUND`, `PLAN_SUBTASK_NOT_CLAIMED`, `PLAN_SUBTASK_NOT_YOURS`. ## Plan observation kinds -The lane introduces two observation kinds on the sub-task threads. They are written through `MemoryStore.addObservation`, so content is compressed and `metadata` carries the structured payload. +The lane introduces several observation kinds on the parent spec task and on the sub-task threads. They are written through `MemoryStore.addObservation`, so content is compressed and `metadata` carries the structured payload. - `plan-subtask` — the initial advertisement, one per sub-task at publish time. `metadata` carries `parent_plan_slug`, `parent_plan_title`, `parent_spec_task_id`, `subtask_index`, `file_scope`, `depends_on`, `capability_hint`, and an initial `status: 'available'`. - `plan-subtask-claim` — every lifecycle transition (claim, complete). `metadata.status` is the new state; `metadata.session_id` and `metadata.agent` identify the actor. The latest `plan-subtask-claim` observation by timestamp is authoritative. +- `plan-config` — written on the parent spec task at publish time. Carries plan-level lifecycle policy. Today: `metadata.auto_archive`. +- `plan-archived` — written on the parent spec task when auto-archive succeeds. Carries `archived_path`, `merged_root_hash`, `applied`. +- `plan-archive-blocked` — written when auto-archive is ready but the three-way merge has conflicts. Carries `conflicts` (the conflict set) and `applied` (the deltas that did merge cleanly). +- `plan-archive-error` — written when auto-archive throws. Carries the error message in `metadata.error`. The sub-task completion still succeeded; auto-archive errors never tear down completion. ## Contract stability diff --git a/packages/core/src/plan.ts b/packages/core/src/plan.ts index 4f62ebe..9377ad6 100644 --- a/packages/core/src/plan.ts +++ b/packages/core/src/plan.ts @@ -54,14 +54,28 @@ function readSubtask(store: MemoryStore, task_id: number, plan_slug: string): Su const initial = rows.find((r) => r.kind === 'plan-subtask'); if (!initial) return null; const meta = parseMeta(initial.metadata); - // taskTimeline returns DESC by ts, so the first claim row is the latest. - const latestClaim = rows.find((r) => r.kind === 'plan-subtask-claim'); - const claimMeta = latestClaim ? parseMeta(latestClaim.metadata) : {}; - - const status = - (claimMeta.status as SubtaskStatus | undefined) ?? - (meta.status as SubtaskStatus | undefined) ?? - 'available'; + + // Lifecycle resolution. taskTimeline orders by `ts DESC`, but two + // observations stamped within the same millisecond have undefined + // tie-breaker order in SQLite, so the latest-by-ts row can flicker + // between `claimed` and `completed` for a sub-task that was just + // finished. Resolve with terminal-state-wins precedence so a + // `completed` row is authoritative once it exists; the attribution + // metadata (session_id, agent) is read from the same row that + // decided the status. + const claimRows = rows.filter((r) => r.kind === 'plan-subtask-claim'); + let claimMeta: Record = {}; + let resolvedStatus: SubtaskStatus | undefined; + for (const precedence of ['completed', 'blocked', 'claimed'] as const) { + const match = claimRows.find((r) => parseMeta(r.metadata).status === precedence); + if (match) { + claimMeta = parseMeta(match.metadata); + resolvedStatus = precedence; + break; + } + } + + const status = resolvedStatus ?? (meta.status as SubtaskStatus | undefined) ?? 'available'; const [titleLine, ...rest] = initial.content.split('\n\n'); return {