From 626bae6800d501ba7e4798e2667c9f2ea13dfcf4 Mon Sep 17 00:00:00 2001
From: NagyVikt <nagy.viktordp@gmail.com>
Date: Sat, 25 Apr 2026 13:58:06 +0200
Subject: [PATCH] feat(mcp): auto-archive plan when last sub-task completes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds opt-in `auto_archive` flag to `task_plan_publish`. When set, the
parent spec change three-way-merges and archives automatically after the
last sub-task completes via `task_plan_complete_subtask`. Default off
because silent state change after the final completion is risky if the
merged spec has not been verified — opt in per plan once the lane lands
cleanly. Conflicts on the three-way merge are non-fatal: the completion
still returns `status: 'completed'`, the archive is skipped, and a
`plan-archive-blocked` observation is recorded on the parent spec task.
Other auto-archive failures (missing CHANGE.md, write errors) are
recorded as `plan-archive-error` observations. The completion response
now carries an `auto_archive: { status, reason?, archived_path?,
merged_root_hash?, applied?, conflicts? }` field on every call. New
observation kinds: `plan-config`, `plan-archived`, `plan-archive-blocked`,
`plan-archive-error`.

Also fixes a latent race in `@colony/core` `readSubtask`: when claim and
complete observations share the same millisecond timestamp, SQLite's
`ORDER BY ts DESC` had undefined tie-breaker behavior. Status is now
resolved with terminal-state-wins precedence so a completion is
authoritative once it exists.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .changeset/plan-auto-archive.md   |   6 +
 apps/mcp-server/src/tools/plan.ts | 182 +++++++++++++++++++++++++++++-
 apps/mcp-server/test/plan.test.ts |  76 ++++++++++++-
 docs/mcp.md                       |  22 +++-
 packages/core/src/plan.ts         |  30 +++--
 5 files changed, 302 insertions(+), 14 deletions(-)
 create mode 100644 .changeset/plan-auto-archive.md

diff --git a/.changeset/plan-auto-archive.md b/.changeset/plan-auto-archive.md
new file mode 100644
index 0000000..2251a81
--- /dev/null
+++ b/.changeset/plan-auto-archive.md
@@ -0,0 +1,6 @@
+---
+"@colony/core": patch
+"@colony/mcp-server": minor
+---
+
+Add opt-in `auto_archive` flag to `task_plan_publish`. When set, the parent spec change three-way-merges and archives automatically after the last sub-task completes via `task_plan_complete_subtask`. Default is `false` because silent state change after the final completion is risky if the merged spec has not been verified — opt in per plan once the lane lands cleanly. Conflicts on the three-way merge are non-fatal: the completion still returns `status: 'completed'`, the archive is skipped, and a `plan-archive-blocked` observation is recorded on the parent spec task so resolution stays explicit. Other auto-archive failures (missing `CHANGE.md`, write errors) are likewise recorded as `plan-archive-error` observations and never propagate as tool errors. The completion response now carries an `auto_archive: { status, reason?, archived_path?, merged_root_hash?, applied?, conflicts? }` field that reports the outcome on every call. New observation kinds: `plan-config` (publish-time policy on the parent spec task), `plan-archived`, `plan-archive-blocked`, `plan-archive-error`. Also fixes a latent lifecycle race in `@colony/core` `readSubtask`: when a `claimed` and `completed` `plan-subtask-claim` observation share the same millisecond timestamp (back-to-back claim then complete in tests or fast-running flows), SQLite's `ORDER BY ts DESC` had undefined tie-breaker behavior and could surface the sub-task as `claimed`. Status is now resolved with terminal-state-wins precedence (`completed > blocked > claimed`) so a completion is authoritative once it exists.
diff --git a/apps/mcp-server/src/tools/plan.ts b/apps/mcp-server/src/tools/plan.ts
index b43daa1..5880ce7 100644
--- a/apps/mcp-server/src/tools/plan.ts
+++ b/apps/mcp-server/src/tools/plan.ts
@@ -1,11 +1,12 @@
 import {
+  type MemoryStore,
   type SubtaskInfo,
   TaskThread,
   areDepsMet,
   listPlans,
   readSubtaskByBranch,
 } from '@colony/core';
-import { SpecRepository } from '@colony/spec';
+import { SpecRepository, SyncEngine, parseSpec, serializeSpec } from '@colony/spec';
 import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { z } from 'zod';
 import type { ToolContext } from './context.js';
@@ -59,6 +60,12 @@ export function register(server: McpServer, ctx: ToolContext): void {
         .min(2)
         .max(20)
         .describe('At least 2 sub-tasks — if it is one task, use task_thread directly.'),
+      auto_archive: z
+        .boolean()
+        .optional()
+        .describe(
+          'When true, the parent spec change auto-archives via three-way merge after the last sub-task completes. Defaults to false because silent state change after the final completion is risky if the merged spec has not been verified — opt in per plan once you trust the lane. Conflicts block auto-archive (surface as a plan-archive-blocked observation) instead of forcing.',
+        ),
     },
     async (args) => {
       for (let i = 0; i < args.subtasks.length; i++) {
@@ -91,6 +98,22 @@ export function register(server: McpServer, ctx: ToolContext): void {
         proposal,
       });
 
+      // Stamp plan-level config on the parent spec task. Read back at
+      // completion time to decide whether to auto-archive. A separate
+      // observation (rather than encoding the flag on every sub-task)
+      // means lifecycle policy lives in one place and can grow more
+      // fields later without touching sub-task metadata.
+      store.addObservation({
+        session_id: args.session_id,
+        task_id: opened.task_id,
+        kind: 'plan-config',
+        content: `plan ${args.slug} config: auto_archive=${args.auto_archive ?? false}`,
+        metadata: {
+          plan_slug: args.slug,
+          auto_archive: args.auto_archive ?? false,
+        },
+      });
+
       const subtaskThreads = args.subtasks.map((subtask, index) => {
         const branch = `spec/${args.slug}/sub-${index}`;
         const thread = TaskThread.open(store, {
@@ -317,13 +340,168 @@ export function register(server: McpServer, ctx: ToolContext): void {
         });
       });
 
+      // Auto-archive: when this completion was the last outstanding sub-task
+      // and the plan opted in at publish time, three-way-merge the change
+      // and archive it. Failures are non-fatal and surface as observations
+      // on the parent spec task rather than tearing down the completion.
+      const autoArchive = runAutoArchiveIfReady(store, {
+        plan_slug: args.plan_slug,
+        parent_spec_task_id: located.info.parent_spec_task_id,
+        session_id: args.session_id,
+      });
+
       return {
-        content: [{ type: 'text', text: JSON.stringify({ status: 'completed' }) }],
+        content: [
+          {
+            type: 'text',
+            text: JSON.stringify({
+              status: 'completed',
+              auto_archive: autoArchive,
+            }),
+          },
+        ],
       };
     },
   );
 }
 
+interface AutoArchiveOutcome {
+  status: 'archived' | 'blocked' | 'error' | 'skipped';
+  reason?: string;
+  archived_path?: string;
+  merged_root_hash?: string;
+  applied?: number;
+  conflicts?: number;
+}
+
+function runAutoArchiveIfReady(
+  store: MemoryStore,
+  args: {
+    plan_slug: string;
+    parent_spec_task_id: number | null;
+    session_id: string;
+  },
+): AutoArchiveOutcome {
+  if (args.parent_spec_task_id == null) {
+    return { status: 'skipped', reason: 'no parent spec task linkage on sub-task' };
+  }
+
+  const config = readPlanConfig(store, args.parent_spec_task_id);
+  if (!config?.auto_archive) {
+    return { status: 'skipped', reason: 'auto_archive disabled' };
+  }
+
+  // Aggregate sibling sub-task statuses. The core `readSubtask` helper
+  // already resolves the claim/complete race with a terminal-state-wins
+  // rule, so a freshly-completed sub-task surfaces as `completed` here
+  // even when the prior `claimed` observation shares its millisecond
+  // timestamp.
+  const allTasks = store.storage.listTasks(2000);
+  const siblingBranchPrefix = `spec/${args.plan_slug}/sub-`;
+  const siblingTasks = allTasks.filter((t) => t.branch.startsWith(siblingBranchPrefix));
+  const siblingInfos = siblingTasks
+    .map((t) => readSubtaskByBranch(store, t.branch))
+    .filter((s): s is { task_id: number; info: SubtaskInfo } => s !== null)
+    .map((s) => s.info);
+  if (siblingInfos.length === 0) {
+    return { status: 'skipped', reason: 'no sub-tasks found' };
+  }
+  const allDone = siblingInfos.every((s) => s.status === 'completed');
+  if (!allDone) {
+    return { status: 'skipped', reason: 'sub-tasks still outstanding' };
+  }
+
+  const parentTask = allTasks.find((t) => t.id === args.parent_spec_task_id);
+  if (!parentTask) {
+    return { status: 'skipped', reason: 'parent spec task not found' };
+  }
+
+  try {
+    const repo = new SpecRepository({ repoRoot: parentTask.repo_root, store });
+    const currentRoot = repo.readRoot();
+    const change = repo.readChange(args.plan_slug);
+    const baseRoot =
+      currentRoot.rootHash === change.baseRootHash
+        ? currentRoot
+        : parseSpec(serializeSpec(currentRoot));
+    const engine = new SyncEngine('three_way');
+    const merge = engine.merge(currentRoot, baseRoot, change);
+
+    if (!merge.clean) {
+      store.addObservation({
+        session_id: args.session_id,
+        task_id: args.parent_spec_task_id,
+        kind: 'plan-archive-blocked',
+        content: `plan ${args.plan_slug} ready to archive but ${merge.conflicts.length} conflict(s) block the merge`,
+        metadata: {
+          plan_slug: args.plan_slug,
+          conflicts: merge.conflicts,
+          applied: merge.applied,
+        },
+      });
+      return {
+        status: 'blocked',
+        reason: 'three-way merge conflicts',
+        conflicts: merge.conflicts.length,
+        applied: merge.applied,
+      };
+    }
+
+    repo.writeRoot(merge.spec, {
+      session_id: args.session_id,
+      agent: 'plan-system',
+      reason: `Auto-archive ${args.plan_slug}: all ${siblingInfos.length} sub-tasks completed`,
+    });
+    const archivedPath = repo.archiveChange(args.plan_slug);
+
+    store.addObservation({
+      session_id: args.session_id,
+      task_id: args.parent_spec_task_id,
+      kind: 'plan-archived',
+      content: `plan ${args.plan_slug} auto-archived after all sub-tasks completed`,
+      metadata: {
+        plan_slug: args.plan_slug,
+        archived_path: archivedPath,
+        merged_root_hash: merge.spec.rootHash,
+        applied: merge.applied,
+      },
+    });
+    return {
+      status: 'archived',
+      archived_path: archivedPath,
+      merged_root_hash: merge.spec.rootHash,
+      applied: merge.applied,
+      conflicts: 0,
+    };
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    store.addObservation({
+      session_id: args.session_id,
+      task_id: args.parent_spec_task_id,
+      kind: 'plan-archive-error',
+      content: `plan ${args.plan_slug} auto-archive failed: ${message}`,
+      metadata: { plan_slug: args.plan_slug, error: message },
+    });
+    return { status: 'error', reason: message };
+  }
+}
+
+function readPlanConfig(
+  store: MemoryStore,
+  parent_task_id: number,
+): { auto_archive: boolean } | null {
+  const rows = store.storage.taskObservationsByKind(parent_task_id, 'plan-config', 100);
+  // taskObservationsByKind returns DESC by ts; latest config wins.
+  const latest = rows[0];
+  if (!latest?.metadata) return null;
+  try {
+    const parsed = JSON.parse(latest.metadata) as { auto_archive?: unknown };
+    return { auto_archive: Boolean(parsed.auto_archive) };
+  } catch {
+    return null;
+  }
+}
+
 function detectScopeOverlap(
   subtasks: SubtaskInput[],
 ): { a: number; b: number; shared: string[] } | null {
diff --git a/apps/mcp-server/test/plan.test.ts b/apps/mcp-server/test/plan.test.ts
index 5a08be0..af36704 100644
--- a/apps/mcp-server/test/plan.test.ts
+++ b/apps/mcp-server/test/plan.test.ts
@@ -1,4 +1,4 @@
-import { mkdtempSync, rmSync, writeFileSync } from 'node:fs';
+import { existsSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
 import { defaultSettings } from '@colony/config';
@@ -329,3 +329,77 @@ describe('task_plan_complete_subtask', () => {
     expect(err.code).toBe('PLAN_SUBTASK_NOT_CLAIMED');
   });
 });
+
+describe('task_plan auto-archive', () => {
+  async function claimAndComplete(slug: string, index: number, sessionId: string, agent: string) {
+    await call<ClaimResult>('task_plan_claim_subtask', {
+      plan_slug: slug,
+      subtask_index: index,
+      session_id: sessionId,
+      agent,
+    });
+    return call<{ status: string; auto_archive: { status: string; reason?: string } }>(
+      'task_plan_complete_subtask',
+      {
+        plan_slug: slug,
+        subtask_index: index,
+        session_id: sessionId,
+        summary: `sub-${index} done`,
+      },
+    );
+  }
+
+  it('archives the change when the last sub-task completes and auto_archive is true', async () => {
+    await call<PublishResult>(
+      'task_plan_publish',
+      basicPublishArgs({ slug: 'auto-archive-on', auto_archive: true }),
+    );
+
+    // First completion → still has outstanding work, no archive yet.
+    const first = await claimAndComplete('auto-archive-on', 0, 'B', 'codex');
+    expect(first.status).toBe('completed');
+    expect(first.auto_archive.status).toBe('skipped');
+    expect(first.auto_archive.reason).toMatch(/outstanding/);
+    expect(existsSync(join(repoRoot, 'openspec/changes/auto-archive-on/CHANGE.md'))).toBe(true);
+
+    // Last completion → archive runs.
+    const second = await claimAndComplete('auto-archive-on', 1, 'C', 'claude');
+    expect(second.status).toBe('completed');
+    expect(second.auto_archive.status).toBe('archived');
+    // Change moved to openspec/archive/<date>-<slug>/CHANGE.md
+    expect(existsSync(join(repoRoot, 'openspec/changes/auto-archive-on/CHANGE.md'))).toBe(false);
+  });
+
+  it('does not archive when auto_archive is omitted (default off)', async () => {
+    await call<PublishResult>(
+      'task_plan_publish',
+      basicPublishArgs({ slug: 'auto-archive-default-off' }),
+    );
+    await claimAndComplete('auto-archive-default-off', 0, 'B', 'codex');
+    const last = await claimAndComplete('auto-archive-default-off', 1, 'C', 'claude');
+    expect(last.auto_archive.status).toBe('skipped');
+    expect(last.auto_archive.reason).toMatch(/disabled/);
+    // CHANGE.md stays in openspec/changes/.
+    expect(existsSync(join(repoRoot, 'openspec/changes/auto-archive-default-off/CHANGE.md'))).toBe(
+      true,
+    );
+  });
+
+  it('records a plan-archive-error observation when archive throws', async () => {
+    // Force a failure path: publish, complete sub-0, then delete the
+    // CHANGE.md file before the last completion. readChange will throw.
+    await call<PublishResult>(
+      'task_plan_publish',
+      basicPublishArgs({ slug: 'auto-archive-broken', auto_archive: true }),
+    );
+    await claimAndComplete('auto-archive-broken', 0, 'B', 'codex');
+    rmSync(join(repoRoot, 'openspec/changes/auto-archive-broken/CHANGE.md'), {
+      force: true,
+    });
+    const last = await claimAndComplete('auto-archive-broken', 1, 'C', 'claude');
+    // Completion still succeeds — the failure is recorded as an observation,
+    // not propagated as a tool error.
+    expect(last.status).toBe('completed');
+    expect(last.auto_archive.status).toBe('error');
+  });
+});
diff --git a/docs/mcp.md b/docs/mcp.md
index c2ece35..5e4be94 100644
--- a/docs/mcp.md
+++ b/docs/mcp.md
@@ -751,7 +751,8 @@ Publish a multi-task plan as a spec change with one task thread per sub-task. Su
         "depends_on": [0],
         "capability_hint": "ui_work"
       }
-    ]
+    ],
+    "auto_archive": false
   }
 }
 ```
@@ -762,6 +763,10 @@ Validation:
 - `depends_on` indices are zero-based and must point to **earlier** indices (cycle prevention).
 - Independent sub-tasks (no `depends_on` chain between them) cannot share `file_scope` entries. To overlap files, sequence the work via `depends_on`.
 
+Optional inputs:
+
+- `auto_archive` (default `false`): when `true`, the parent spec change three-way-merges and archives automatically after the last sub-task completes. Conflicts block the auto-archive (recorded as a `plan-archive-blocked` observation on the parent spec task) instead of forcing — the change stays open so the merge can be resolved by hand. Leave `auto_archive` off until you trust the lane to land cleanly; opt in per plan.
+
 Returns `{ plan_slug, spec_task_id, spec_change_path, subtasks: [{ subtask_index, branch, task_id, title }] }`. Errors: `PLAN_INVALID_DEPENDENCY`, `PLAN_SCOPE_OVERLAP`.
 
 ## `task_plan_list`
@@ -816,14 +821,25 @@ Mark your claimed sub-task complete. Releases the sub-task file claims and stamp
 }
 ```
 
-Returns `{ status: 'completed' }`. Errors: `PLAN_SUBTASK_NOT_FOUND`, `PLAN_SUBTASK_NOT_CLAIMED`, `PLAN_SUBTASK_NOT_YOURS`.
+Returns `{ status: 'completed', auto_archive: { status, reason?, archived_path?, merged_root_hash?, applied?, conflicts? } }`. The `auto_archive` field is always present and reports what happened on this completion:
+
+- `status: 'skipped'` — auto-archive disabled, sub-tasks still outstanding, or no parent linkage. `reason` carries the specific cause.
+- `status: 'archived'` — the merge was clean and the change is now under `openspec/archive/<date>-<slug>/`. `archived_path` and `merged_root_hash` describe the result.
+- `status: 'blocked'` — auto-archive opted in and the plan is fully completed, but the three-way merge has conflicts. The change stays open; resolve by hand and call `spec_archive` directly.
+- `status: 'error'` — auto-archive opted in but the archive flow threw (for example a missing `CHANGE.md`). The completion still succeeded; the failure is recorded as a `plan-archive-error` observation on the parent spec task.
+
+Errors on `task_plan_complete_subtask` itself: `PLAN_SUBTASK_NOT_FOUND`, `PLAN_SUBTASK_NOT_CLAIMED`, `PLAN_SUBTASK_NOT_YOURS`.
 
 ## Plan observation kinds
 
-The lane introduces two observation kinds on the sub-task threads. They are written through `MemoryStore.addObservation`, so content is compressed and `metadata` carries the structured payload.
+The lane introduces several observation kinds on the parent spec task and on the sub-task threads. They are written through `MemoryStore.addObservation`, so content is compressed and `metadata` carries the structured payload.
 
 - `plan-subtask` — the initial advertisement, one per sub-task at publish time. `metadata` carries `parent_plan_slug`, `parent_plan_title`, `parent_spec_task_id`, `subtask_index`, `file_scope`, `depends_on`, `capability_hint`, and an initial `status: 'available'`.
 - `plan-subtask-claim` — every lifecycle transition (claim, complete). `metadata.status` is the new state; `metadata.session_id` and `metadata.agent` identify the actor. The latest `plan-subtask-claim` observation by timestamp is authoritative.
+- `plan-config` — written on the parent spec task at publish time. Carries plan-level lifecycle policy. Today: `metadata.auto_archive`.
+- `plan-archived` — written on the parent spec task when auto-archive succeeds. Carries `archived_path`, `merged_root_hash`, `applied`.
+- `plan-archive-blocked` — written when auto-archive is ready but the three-way merge has conflicts. Carries `conflicts` (the conflict set) and `applied` (the deltas that did merge cleanly).
+- `plan-archive-error` — written when auto-archive throws. Carries the error message in `metadata.error`. The sub-task completion still succeeded; auto-archive errors never tear down completion.
 
 ## Contract stability
 
diff --git a/packages/core/src/plan.ts b/packages/core/src/plan.ts
index 4f62ebe..9377ad6 100644
--- a/packages/core/src/plan.ts
+++ b/packages/core/src/plan.ts
@@ -54,14 +54,28 @@ function readSubtask(store: MemoryStore, task_id: number, plan_slug: string): Su
   const initial = rows.find((r) => r.kind === 'plan-subtask');
   if (!initial) return null;
   const meta = parseMeta(initial.metadata);
-  // taskTimeline returns DESC by ts, so the first claim row is the latest.
-  const latestClaim = rows.find((r) => r.kind === 'plan-subtask-claim');
-  const claimMeta = latestClaim ? parseMeta(latestClaim.metadata) : {};
-
-  const status =
-    (claimMeta.status as SubtaskStatus | undefined) ??
-    (meta.status as SubtaskStatus | undefined) ??
-    'available';
+
+  // Lifecycle resolution. taskTimeline orders by `ts DESC`, but two
+  // observations stamped within the same millisecond have undefined
+  // tie-breaker order in SQLite, so the latest-by-ts row can flicker
+  // between `claimed` and `completed` for a sub-task that was just
+  // finished. Resolve with terminal-state-wins precedence so a
+  // `completed` row is authoritative once it exists; the attribution
+  // metadata (session_id, agent) is read from the same row that
+  // decided the status.
+  const claimRows = rows.filter((r) => r.kind === 'plan-subtask-claim');
+  let claimMeta: Record<string, unknown> = {};
+  let resolvedStatus: SubtaskStatus | undefined;
+  for (const precedence of ['completed', 'blocked', 'claimed'] as const) {
+    const match = claimRows.find((r) => parseMeta(r.metadata).status === precedence);
+    if (match) {
+      claimMeta = parseMeta(match.metadata);
+      resolvedStatus = precedence;
+      break;
+    }
+  }
+
+  const status = resolvedStatus ?? (meta.status as SubtaskStatus | undefined) ?? 'available';
 
   const [titleLine, ...rest] = initial.content.split('\n\n');
   return {