diff --git a/examples/real-autoresearch/grove.md b/examples/real-autoresearch/grove.md
index 4a449a71..9ace58d9 100644
--- a/examples/real-autoresearch/grove.md
+++ b/examples/real-autoresearch/grove.md
@@ -16,25 +16,23 @@ metrics:
     unit: GB
     description: Peak VRAM usage during training
 outcome_policy:
-  auto_evaluate: true
-  accept_if:
-    metric: val_bpb
-    condition: improved_over_parent
+  auto_accept:
+    metric_improves: val_bpb
 stop_conditions:
-  no_improvement_rounds: 5
-  max_rounds: 20
+  max_rounds_without_improvement: 5
   target_metric:
     metric: val_bpb
-    threshold: 0.85
-  wall_clock_budget: "3h"
-enforcement:
-  claim_policy:
-    max_concurrent: 3
-    lease_duration: "10m"
+    value: 0.85
+  budget:
+    max_wall_clock_seconds: 10800
+  deliberation_limit:
+    max_rounds: 20
 concurrency:
   max_active_claims: 3
   max_claims_per_agent: 1
   max_claims_per_target: 1
+execution:
+  default_lease_seconds: 600
 rate_limits:
   max_contributions_per_agent_per_hour: 100
   max_contributions_per_grove_per_hour: 300
diff --git a/src/core/event-bus.test.ts b/src/core/event-bus.test.ts
index ab8f7f34..cc4a8e7d 100644
--- a/src/core/event-bus.test.ts
+++ b/src/core/event-bus.test.ts
@@ -213,17 +213,17 @@ describe("TopologyRouter", () => {
     bus.close();
   });
 
-  test("targetsFor returns correct targets", () => {
+  test("targetsFor returns RoleEdge objects for each outgoing edge", () => {
     const bus = new LocalEventBus();
     const router = new TopologyRouter(reviewLoopTopology, bus);
-    expect(router.targetsFor("coder")).toEqual(["reviewer"]);
-    expect(router.targetsFor("reviewer")).toEqual(["coder"]);
+    expect(router.targetsFor("coder")).toEqual([{ target: "reviewer", edgeType: "delegates" }]);
+    expect(router.targetsFor("reviewer")).toEqual([{ target: "coder", edgeType: "feedback" }]);
     expect(router.targetsFor("unknown")).toEqual([]);
     bus.close();
   });
 
-  test("duplicate edges are deduplicated", () => {
-    const duped: AgentTopology = {
+  test("targetsFor returns multiple RoleEdges when different edge types point to same target", () => {
+    const multiEdge: AgentTopology = {
       structure: "graph",
       roles: [
         {
@@ -237,15 +237,60 @@ describe("TopologyRouter", () => {
       ],
     };
     const bus = new LocalEventBus();
-    const router = new TopologyRouter(duped, bus);
+    const router = new TopologyRouter(multiEdge, bus);
+    const edges = router.targetsFor("coder");
+    // Both edges preserved — distinct (target, edgeType) pairs
+    expect(edges).toHaveLength(2);
+    expect(edges).toContainEqual({ target: "reviewer", edgeType: "delegates" });
+    expect(edges).toContainEqual({ target: "reviewer", edgeType: "feeds" });
+    bus.close();
+  });
+
+  test("route() publishes one event per target even when multiple edge types point to same target", () => {
+    const multiEdge: AgentTopology = {
+      structure: "graph",
+      roles: [
+        {
+          name: "coder",
+          edges: [
+            { target: "reviewer", edgeType: "delegates" },
+            { target: "reviewer", edgeType: "feeds" },
+          ],
+        },
+        { name: "reviewer" },
+      ],
+    };
+    const bus = new LocalEventBus();
+    const router = new TopologyRouter(multiEdge, bus);
     const received: GroveEvent[] = [];
     bus.subscribe("reviewer", (e) => received.push(e));
 
     const targets = router.route("coder", {});
 
-    // Should only route once to reviewer despite two edges
+    // route() deduplicates by target: one event despite two distinct edges
     expect(targets).toEqual(["reviewer"]);
     expect(received).toHaveLength(1);
     bus.close();
   });
+
+  test("targetsFor deduplicates exact (target, edgeType) duplicate pairs", () => {
+    const exactDupes: AgentTopology = {
+      structure: "graph",
+      roles: [
+        {
+          name: "coder",
+          edges: [
+            { target: "reviewer", edgeType: "delegates" },
+            { target: "reviewer", edgeType: "delegates" }, // exact duplicate
+          ],
+        },
+        { name: "reviewer" },
+      ],
+    };
+    const bus = new LocalEventBus();
+    const router = new TopologyRouter(exactDupes, bus);
+    // Exact (target, edgeType) duplicate is collapsed to one entry
+    expect(router.targetsFor("coder")).toEqual([{ target: "reviewer", edgeType: "delegates" }]);
+    bus.close();
+  });
 });
diff --git a/src/core/examples.test.ts b/src/core/examples.test.ts
new file mode 100644
index 00000000..c22fecf5
--- /dev/null
+++ b/src/core/examples.test.ts
@@ -0,0 +1,56 @@
+/**
+ * CI guard: parse all examples/\*\*\/grove.md files against parseGroveContract.
+ *
+ * This test ensures that example GROVE.md files always use valid field names
+ * and schema-conformant values. Without this guard, field-name drift goes
+ * undetected until an agent or user copy-pastes the example and gets a parse
+ * error from parseGroveContract.
+ *
+ * New examples are automatically covered — no manual registration required.
+ */
+
+import { describe, expect, test } from "bun:test";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+import { parseGroveContract } from "./contract.js";
+
+describe("example grove.md files", () => {
+  test("all examples/*/grove.md files parse against the canonical contract schema", async () => {
+    const { Glob } = await import("bun");
+
+    // Resolve the repo root relative to this file's compile-time location.
+    // process.cwd() is the repo root when running `bun test` from the project root.
+    const repoRoot = process.cwd();
+    const pattern = "examples/**/grove.md";
+
+    const files: string[] = [];
+    for (const rel of new Glob(pattern).scanSync({ cwd: repoRoot, absolute: false })) {
+      files.push(join(repoRoot, rel));
+    }
+
+    expect(files.length).toBeGreaterThan(0);
+
+    const failures: Array<{ file: string; error: string }> = [];
+
+    for (const filePath of files) {
+      try {
+        const content = readFileSync(filePath, "utf8");
+        parseGroveContract(content);
+      } catch (err) {
+        failures.push({
+          file: filePath.replace(`${repoRoot}/`, ""),
+          error: err instanceof Error ? err.message : String(err),
+        });
+      }
+    }
+
+    if (failures.length > 0) {
+      const report = failures.map((f) => `  ${f.file}:\n    ${f.error}`).join("\n");
+      throw new Error(
+        `${failures.length} example grove.md file(s) failed to parse:\n${report}\n\n` +
+          "Fix the field names to match the canonical contract schema in src/core/contract.ts",
+      );
+    }
+  });
+});
diff --git a/src/core/operations/contribute.test.ts b/src/core/operations/contribute.test.ts
index eda13d15..66cfbbb1 100644
--- a/src/core/operations/contribute.test.ts
+++ b/src/core/operations/contribute.test.ts
@@ -939,6 +939,7 @@ describe("writeSerial: best-effort handoff failure paths", () => {
   });
 
   test("emits console.warn when handoffStore.createMany throws", async () => {
+    // biome-ignore lint/suspicious/noEmptyBlockStatements: spy suppresses output intentionally
     const warnSpy = spyOn(console, "warn").mockImplementation(() => {});
 
     const faultyHandoffStore: OperationDeps["handoffStore"] = {
@@ -978,6 +979,7 @@ describe("writeSerial: best-effort handoff failure paths", () => {
     // Promise, the throw must still be caught — otherwise the already-committed
     // contribution would bubble out as an operation error and the idempotency
     // slot would be released, allowing duplicate contributions on retry.
+    // biome-ignore lint/suspicious/noEmptyBlockStatements: spy suppresses output intentionally
     const warnSpy = spyOn(console, "warn").mockImplementation(() => {});
 
     // Non-async function so the throw happens synchronously, before any
diff --git a/src/core/operations/contribute.ts b/src/core/operations/contribute.ts
index 20ea77ef..e8aa8788 100644
--- a/src/core/operations/contribute.ts
+++ b/src/core/operations/contribute.ts
@@ -851,8 +851,12 @@ export async function contributeOperation(
           `[grove] Warning: topology router is active but agent '${contribution.agent.agentId}' has no role — routing skipped. Set agent.role to enable topology routing.\n`,
         );
       } else {
-        const targets = deps.topologyRouter.targetsFor(contribution.agent.role);
-        if (targets.length > 0) routedTo = [...targets];
+        const edges = deps.topologyRouter.targetsFor(contribution.agent.role);
+        // Deduplicate by target: a role may have multiple edge types (e.g.
+        // delegates + feeds) pointing at the same downstream role. Creating
+        // one handoff per (source, target) pair is correct; creating one per
+        // edge type would produce duplicate pending handoffs for the same work.
+        if (edges.length > 0) routedTo = [...new Set(edges.map((e) => e.target))];
       }
     }
 
diff --git a/src/core/operations/eval.ts b/src/core/operations/eval.ts
new file mode 100644
index 00000000..a86979b5
--- /dev/null
+++ b/src/core/operations/eval.ts
@@ -0,0 +1,231 @@
+/**
+ * Eval operation.
+ *
+ * evalOperation — Run the contract's eval harness against a target CID and
+ * return structured metric scores.
+ *
+ * The operation spawns the evalCommand as a subprocess (via `sh -c`),
+ * streams stdout/stderr line-by-line looking for GROVE_SCORE lines, and
+ * returns the parsed scores along with exit metadata.
+ *
+ * Score line format (stdout or stderr):
+ *   GROVE_SCORE <metric_name>=<numeric_value>
+ *
+ * Example:
+ *   GROVE_SCORE val_bpb=0.92
+ *   GROVE_SCORE peak_vram_gb=14.3
+ *
+ * The target CID is passed to the subprocess via the GROVE_TARGET_CID
+ * environment variable so eval scripts can locate the artifact.
+ *
+ * Output is streamed and capped at MAX_OUTPUT_BYTES (16 MB). The last 4 KB
+ * of combined stdout+stderr is returned as rawTail for diagnostics.
+ */
+
+import { spawn } from "node:child_process";
+
+import type { OperationDeps } from "./deps.js";
+import type { OperationResult } from "./result.js";
+import { err, OperationErrorCode, ok, validationErr } from "./result.js";
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/** Maximum combined stdout+stderr bytes buffered in memory. */
+const MAX_OUTPUT_BYTES = 16 * 1024 * 1024; // 16 MB
+
+/** Tail of combined output returned for diagnostics (bytes). */
+const TAIL_BYTES = 4096;
+
+/** Default timeout when neither input nor contract specifies one (ms). */
+const DEFAULT_TIMEOUT_MS = 300_000; // 5 minutes
+
+/** Pattern for score lines emitted by the eval subprocess. */
+const SCORE_LINE_RE =
+  /^GROVE_SCORE\s+([a-z][a-z0-9_]*)=([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)$/i;
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+/** A single metric score returned by the eval harness. */
+export interface EvalScore {
+  readonly metric: string;
+  readonly value: number;
+}
+
+/** Input for evalOperation. */
+export interface EvalInput {
+  /** CID of the contribution artifact to evaluate. Passed as GROVE_TARGET_CID env var. */
+  readonly targetCid: string;
+  /**
+   * Shell command to execute as the eval harness.
+   * Optional if the contract's evaluation config provides a default in a
+   * future protocol version. Currently required — returns VALIDATION_ERROR
+   * if omitted and no contract default is available.
+   */
+  readonly evalCommand?: string | undefined;
+  /**
+   * Timeout in milliseconds before the subprocess is killed.
+   * Defaults to the contract's evaluation timeout if available, then
+   * DEFAULT_TIMEOUT_MS (5 minutes).
+   */
+  readonly timeoutMs?: number | undefined;
+}
+
+/** Result of evalOperation on success. */
+export interface EvalResult {
+  /** Parsed metric scores from GROVE_SCORE lines in the subprocess output. */
+  readonly scores: readonly EvalScore[];
+  /** Exit code of the eval subprocess (0 = success). */
+  readonly exitCode: number;
+  /** True when the subprocess was killed due to timeout. */
+  readonly timedOut: boolean;
+  /** Last ~4 KB of combined stdout+stderr for diagnostics. */
+  readonly rawTail: string;
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/** Run the eval command as a subprocess, stream output, parse scores. */
+async function runEvalSubprocess(
+  command: string,
+  targetCid: string,
+  timeoutMs: number,
+): Promise<{ scores: EvalScore[]; exitCode: number; timedOut: boolean; rawTail: string }> {
+  return new Promise((resolve) => {
+    const scores: EvalScore[] = [];
+    let timedOut = false;
+    let outputSize = 0;
+    // Ring-buffer approach: keep the tail of output for diagnostics.
+    let rawOutput = "";
+
+    // detached: true puts the child in its own process group so we can
+    // kill the entire tree (shell + any forked children) on timeout via
+    // process.kill(-pid, signal) instead of just the sh wrapper.
+    const child = spawn("sh", ["-c", command], {
+      env: { ...process.env, GROVE_TARGET_CID: targetCid },
+      stdio: ["ignore", "pipe", "pipe"],
+      detached: true,
+    });
+
+    /** Kill the full process group; fall back to the direct PID on error. */
+    const killGroup = (signal: NodeJS.Signals): void => {
+      try {
+        if (child.pid !== undefined) process.kill(-child.pid, signal);
+      } catch {
+        try {
+          child.kill(signal);
+        } catch {
+          // Already exited — ignore.
+        }
+      }
+    };
+
+    const timer = setTimeout(() => {
+      timedOut = true;
+      killGroup("SIGTERM");
+      // Give the process group a moment to exit before SIGKILL.
+      setTimeout(() => killGroup("SIGKILL"), 5000);
+    }, timeoutMs);
+
+    /** Append to the rolling output tail, respecting the cap. */
+    const appendOutput = (chunk: string): void => {
+      if (outputSize >= MAX_OUTPUT_BYTES) return;
+      outputSize += chunk.length;
+      rawOutput += chunk;
+    };
+
+    /** Parse a single line for a GROVE_SCORE entry. */
+    const parseLine = (line: string): void => {
+      const match = line.trim().match(SCORE_LINE_RE);
+      if (match) {
+        scores.push({ metric: match[1]?.toLowerCase() ?? "", value: parseFloat(match[2] ?? "0") });
+      }
+    };
+
+    /** Stream a data chunk through a line buffer, calling parseLine per line. */
+    const makeLineHandler = (): { handler: (chunk: Buffer) => void; flush: () => void } => {
+      let buf = "";
+      return {
+        handler: (chunk: Buffer) => {
+          const text = chunk.toString("utf8");
+          appendOutput(text);
+          const combined = buf + text;
+          const lines = combined.split("\n");
+          buf = lines.pop() ?? "";
+          for (const line of lines) parseLine(line);
+        },
+        flush: () => {
+          if (buf) {
+            parseLine(buf);
+            buf = "";
+          }
+        },
+      };
+    };
+
+    const stdoutHandler = makeLineHandler();
+    const stderrHandler = makeLineHandler();
+    child.stdout?.on("data", stdoutHandler.handler);
+    child.stderr?.on("data", stderrHandler.handler);
+
+    child.on("close", (code) => {
+      // Flush any partial line that didn't end with a newline (e.g. final GROVE_SCORE line).
+      stdoutHandler.flush();
+      stderrHandler.flush();
+      clearTimeout(timer);
+      const exitCode = code ?? (timedOut ? 124 : 1);
+      // Return last TAIL_BYTES for diagnostics.
+      const rawTail = rawOutput.length > TAIL_BYTES ? rawOutput.slice(-TAIL_BYTES) : rawOutput;
+      resolve({ scores, exitCode, timedOut, rawTail });
+    });
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Operation
+// ---------------------------------------------------------------------------
+
+/** Run the eval harness against a target CID and return structured scores. */
+export async function evalOperation(
+  _deps: OperationDeps,
+  input: EvalInput,
+): Promise<OperationResult<EvalResult>> {
+  const { targetCid, evalCommand, timeoutMs } = input;
+
+  // Resolve the command to run.
+  const command = evalCommand;
+  if (!command) {
+    return validationErr(
+      "evalCommand is required: provide it as input or configure evaluation.eval_command in GROVE.md",
+    );
+  }
+
+  // Validate targetCid is non-empty (format validation; existence check is out of scope
+  // since eval may run before the contribution is written).
+  if (!targetCid || targetCid.trim().length === 0) {
+    return validationErr("targetCid must be a non-empty string");
+  }
+
+  // Resolve timeout: input > contract (future) > default.
+  const resolvedTimeout = timeoutMs ?? DEFAULT_TIMEOUT_MS;
+
+  try {
+    const { scores, exitCode, timedOut, rawTail } = await runEvalSubprocess(
+      command,
+      targetCid.trim(),
+      resolvedTimeout,
+    );
+
+    return ok({ scores, exitCode, timedOut, rawTail });
+  } catch (error) {
+    return err({
+      code: OperationErrorCode.InternalError,
+      message: error instanceof Error ? error.message : String(error),
+    });
+  }
+}
diff --git a/src/core/operations/index.ts b/src/core/operations/index.ts
index bda06dde..e14697c9 100644
--- a/src/core/operations/index.ts
+++ b/src/core/operations/index.ts
@@ -61,6 +61,9 @@ export {
 } from "./contribute.js";
 // Foundation
 export type { OperationDeps } from "./deps.js";
+// Eval operation
+export type { EvalInput, EvalResult, EvalScore } from "./eval.js";
+export { evalOperation } from "./eval.js";
 // Lifecycle operation
 export type { CheckStopResult, StopConditionStatus } from "./lifecycle.js";
 export { checkStopOperation } from "./lifecycle.js";
diff --git a/src/core/operations/plan.test.ts b/src/core/operations/plan.test.ts
index 3ae74071..0609b888 100644
--- a/src/core/operations/plan.test.ts
+++ b/src/core/operations/plan.test.ts
@@ -444,7 +444,8 @@ describe("plan routing semantics (Issues 1A + 13A)", () => {
     // Build a topology router that would route 'planner' -> 'coder' for any
     // contribution. Plans should still skip handoff creation.
     const topologyRouter = {
-      targetsFor: (role: string) => (role === "planner" ? ["coder"] : []),
+      targetsFor: (role: string) =>
+        role === "planner" ? [{ target: "coder", edgeType: "delegates" as const }] : [],
       route: async () => {
         /* fire-and-forget event */
       },
diff --git a/src/core/topology-router.ts b/src/core/topology-router.ts
index a88684b5..abd51d54 100644
--- a/src/core/topology-router.ts
+++ b/src/core/topology-router.ts
@@ -1,32 +1,51 @@
 import type { EventBus, GroveEvent } from "./event-bus.js";
-import type { AgentTopology } from "./topology.js";
+import type { AgentTopology, RoleEdge } from "./topology.js";
 
 /**
  * Routes contribution events through topology edges.
  *
  * Given a contribution from a source role, finds all outgoing edges
  * from that role and publishes events to the target roles.
+ *
+ * Edge types are structurally preserved in the edge map (deduplicated by
+ * (target, edgeType) pair). Behavioral semantics for edge types are
+ * informational in the current protocol version — routing behavior is flat
+ * (all edges produce the same handoff pattern). Behavioral routing is planned
+ * for a future protocol version.
+ *
+ * Memory bound: max 50 roles × 50 edges = 2500 RoleEdge objects (~100KB).
+ * All caps are enforced by the contract Zod schema.
  */
 export class TopologyRouter {
   private readonly topology: AgentTopology;
   private readonly eventBus: EventBus;
-  private readonly edgeMap: ReadonlyMap<string, readonly string[]>;
+  // source role → outgoing edges, deduplicated by (target, edgeType) pair
+  private readonly edgeMap: ReadonlyMap<string, readonly RoleEdge[]>;
 
   constructor(topology: AgentTopology, eventBus: EventBus) {
     this.topology = topology;
     this.eventBus = eventBus;
-    // Pre-compute: source role -> target roles
-    const map = new Map<string, string[]>();
+    // Pre-compute: source role -> outgoing RoleEdge[], deduped by (target, edgeType).
+    // Use a Set<string> keyed by "target:edgeType" for O(1) dedup instead of O(n) Array.includes.
+    const map = new Map<string, RoleEdge[]>();
+    const seen = new Map<string, Set<string>>();
     for (const role of topology.roles) {
       if (role.edges) {
         for (const edge of role.edges) {
-          let targets = map.get(role.name);
-          if (!targets) {
-            targets = [];
-            map.set(role.name, targets);
+          let edges = map.get(role.name);
+          if (!edges) {
+            edges = [];
+            map.set(role.name, edges);
           }
-          if (!targets.includes(edge.target)) {
-            targets.push(edge.target);
+          let seenForRole = seen.get(role.name);
+          if (!seenForRole) {
+            seenForRole = new Set<string>();
+            seen.set(role.name, seenForRole);
+          }
+          const key = `${edge.target}:${edge.edgeType}`;
+          if (!seenForRole.has(key)) {
+            seenForRole.add(key);
+            edges.push({ target: edge.target, edgeType: edge.edgeType });
           }
         }
       }
@@ -36,25 +55,31 @@ export class TopologyRouter {
 
   /**
    * Route an event from a source role to all downstream targets.
-   * Returns the list of target roles that received the event.
+   * Publishes one event per unique target role (deduplicates by target when
+   * multiple edge types point to the same target). Returns the list of unique
+   * target roles that received the event.
    */
   route(sourceRole: string, payload: Record<string, unknown>): readonly string[] {
-    const targets = this.edgeMap.get(sourceRole);
-    if (!targets || targets.length === 0) return [];
+    const edges = this.edgeMap.get(sourceRole);
+    if (!edges || edges.length === 0) return [];
 
     const timestamp = new Date().toISOString();
     const routedTo: string[] = [];
+    const publishedTargets = new Set<string>();
 
-    for (const targetRole of targets) {
-      const event: GroveEvent = {
-        type: "contribution",
-        sourceRole,
-        targetRole,
-        payload,
-        timestamp,
-      };
-      this.eventBus.publish(event);
-      routedTo.push(targetRole);
+    for (const edge of edges) {
+      if (!publishedTargets.has(edge.target)) {
+        publishedTargets.add(edge.target);
+        const event: GroveEvent = {
+          type: "contribution",
+          sourceRole,
+          targetRole: edge.target,
+          payload,
+          timestamp,
+        };
+        this.eventBus.publish(event);
+        routedTo.push(edge.target);
+      }
     }
 
     return routedTo;
@@ -77,8 +102,14 @@ export class TopologyRouter {
     }
   }
 
-  /** Get the target roles for a given source role. */
-  targetsFor(sourceRole: string): readonly string[] {
+  /**
+   * Get all outgoing edges for a given source role.
+   *
+   * Returns all distinct (target, edgeType) pairs. Multiple edges to the
+   * same target with different edge types are preserved as separate entries.
+   * Returns an empty array for unknown roles.
+   */
+  targetsFor(sourceRole: string): readonly RoleEdge[] {
     return this.edgeMap.get(sourceRole) ?? [];
   }
 }
diff --git a/src/mcp/serve-http.ts b/src/mcp/serve-http.ts
index 5698f089..4ab000cc 100644
--- a/src/mcp/serve-http.ts
+++ b/src/mcp/serve-http.ts
@@ -56,6 +56,7 @@ let nexusUrl: string | undefined;
 let nexusApiKey: string | undefined;
 let zoneId = "default";
 let nexusClient: import("../nexus/nexus-http-client.js").NexusHttpClient | undefined;
+// biome-ignore lint/suspicious/noEmptyBlockStatements: default no-op replaced in try block
 let closeStores: () => void = () => {};
 
 try {
@@ -305,6 +306,7 @@ async function buildScopedDeps(sessionId: string | undefined): Promise<ScopedDep
     bountyStore,
     cas,
     frontier: runtime.frontier,
+    // biome-ignore lint/style/noNonNullAssertion: checked above (workspace guard throws if undefined)
     workspace: runtime.workspace!,
     contract: loadedContract,
     onContributionWrite: runtime.onContributionWrite,
@@ -554,7 +556,12 @@ async function handleRequest(req: IncomingMessage, res: ServerResponse): Promise
         });
       },
     });
-    const server = await createMcpServer(scopedDeps);
+    // grove_eval executes arbitrary shell commands. Disable it on the HTTP
+    // transport unless the operator has explicitly set auth (AUTH_TOKEN) AND
+    // opted in via GROVE_MCP_EVAL_ENABLED=true. Unauthenticated HTTP exposure
+    // of shell execution is a remote-code-execution risk.
+    const evalEnabled = AUTH_TOKEN !== undefined && process.env.GROVE_MCP_EVAL_ENABLED === "true";
+    const server = await createMcpServer(scopedDeps, { eval: evalEnabled });
 
     transport.onclose = () => {
       const sid = transport.sessionId;
diff --git a/src/mcp/serve.ts b/src/mcp/serve.ts
index 53a1ab7f..206e3037 100644
--- a/src/mcp/serve.ts
+++ b/src/mcp/serve.ts
@@ -276,6 +276,7 @@ try {
   const onContributionWritten =
     envSessionId && !nexusClient
       ? (cid: string) => {
+          // biome-ignore lint/suspicious/noEmptyBlockStatements: fire-and-forget, errors intentionally swallowed
           void runtime.goalSessionStore.addContributionToSession(envSessionId, cid).catch(() => {});
         }
       : undefined;
@@ -302,6 +303,10 @@ try {
   const hasMetrics =
     loadedContract?.metrics !== undefined && Object.keys(loadedContract.metrics).length > 0;
 
+  // grove_eval executes arbitrary sh -c. Disabled by default on all transports;
+  // enable with GROVE_MCP_EVAL_ENABLED=true (stdio) or AUTH_TOKEN +
+  // GROVE_MCP_EVAL_ENABLED=true (HTTP — enforced in serve-http.ts).
+  const evalEnabled = process.env.GROVE_MCP_EVAL_ENABLED === "true";
   preset =
     contractMode === "evaluation"
       ? {
@@ -315,6 +320,7 @@ try {
           messaging: false,
           plans: true,
           goals: true,
+          eval: evalEnabled,
         }
       : {
           queries: true,
@@ -327,6 +333,7 @@ try {
           messaging: false,
           plans: false,
           goals: true,
+          eval: evalEnabled,
         };
 
   close = () => {
diff --git a/src/mcp/server.integration.test.ts b/src/mcp/server.integration.test.ts
index 5e3ac284..31892b0d 100644
--- a/src/mcp/server.integration.test.ts
+++ b/src/mcp/server.integration.test.ts
@@ -34,7 +34,7 @@ describe("MCP server integration", () => {
     testDeps = await createTestMcpDeps();
     deps = testDeps.deps;
 
-    const server = await createMcpServer(deps);
+    const server = await createMcpServer(deps, { eval: true });
     const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
 
     client = new Client({ name: "test-client", version: "0.0.1" });
@@ -53,7 +53,7 @@ describe("MCP server integration", () => {
     await testDeps.cleanup();
   });
 
-  test("lists all 37 tools", async () => {
+  test("lists all 38 tools", async () => {
     const tools = await client.listTools();
     const toolNames = tools.tools.map((t) => t.name).sort();
     expect(toolNames).toEqual([
@@ -70,6 +70,7 @@ describe("MCP server integration", () => {
       "grove_create_session",
       "grove_discuss",
       "grove_done",
+      "grove_eval",
       "grove_frontier",
       "grove_get_outcome",
       "grove_goal",
diff --git a/src/mcp/server.test.ts b/src/mcp/server.test.ts
index 9d1c412e..a6b746bb 100644
--- a/src/mcp/server.test.ts
+++ b/src/mcp/server.test.ts
@@ -55,7 +55,7 @@ describe("createMcpServer preset scoping", () => {
     "grove_submit_work",
   ];
 
-  // --- Full tool list (matches integration test expectation) ---------------
+  // --- Full tool list (no preset — grove_eval excluded, it is opt-in via eval:true) ---
 
   const allTools = [
     "ask_user",
@@ -99,16 +99,24 @@ describe("createMcpServer preset scoping", () => {
 
   // -----------------------------------------------------------------------
 
-  test("no preset registers all tools (backwards compatible)", async () => {
+  test("no preset registers all tools except grove_eval (eval is opt-in)", async () => {
     const server = await createMcpServer(deps);
     const names = getRegisteredToolNames(server);
     expect(names).toEqual(allTools);
+    expect(names).not.toContain("grove_eval");
   });
 
-  test("empty preset object registers all tools (defaults are true)", async () => {
+  test("empty preset object registers all tools except grove_eval", async () => {
     const server = await createMcpServer(deps, {});
     const names = getRegisteredToolNames(server);
     expect(names).toEqual(allTools);
+    expect(names).not.toContain("grove_eval");
+  });
+
+  test("eval: true enables grove_eval", async () => {
+    const server = await createMcpServer(deps, { eval: true });
+    const names = getRegisteredToolNames(server);
+    expect(names).toContain("grove_eval");
   });
 
   test("claims: false excludes claim tools but keeps others", async () => {
@@ -230,6 +238,16 @@ describe("createMcpServer preset scoping", () => {
     }
   });
 
+  test("eval: false excludes eval tool", async () => {
+    const server = await createMcpServer(deps, { eval: false });
+    const names = getRegisteredToolNames(server);
+    expect(names).not.toContain("grove_eval");
+    // Contribution tools still present
+    for (const t of contributionTools) {
+      expect(names).toContain(t);
+    }
+  });
+
   test("contribution tools are always registered even when everything is disabled", async () => {
     const allDisabled: McpPresetConfig = {
       queries: false,
@@ -242,6 +260,7 @@ describe("createMcpServer preset scoping", () => {
       messaging: false,
       plans: false,
       goals: false,
+      eval: false,
     };
 
     const server = await createMcpServer(deps, allDisabled);
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index d9976003..2493d290 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -17,6 +17,7 @@ import { registerBountyTools } from "./tools/bounties.js";
 import { registerClaimTools } from "./tools/claims.js";
 import { registerContributionTools } from "./tools/contributions.js";
 import { registerDoneTools } from "./tools/done.js";
+import { registerEvalTools } from "./tools/eval.js";
 import { registerGoalTools } from "./tools/goal.js";
 import { registerHandoffTools } from "./tools/handoffs.js";
 import { registerIngestTools } from "./tools/ingest.js";
@@ -54,6 +55,8 @@ export interface McpPresetConfig {
   readonly plans?: boolean;
   /** Register goal/session tools. Default: true. */
   readonly goals?: boolean;
+  /** Register eval harness tool (grove_eval). Default: false (opt-in via GROVE_MCP_EVAL_ENABLED). */
+  readonly eval?: boolean;
 }
 
 // ---------------------------------------------------------------------------
@@ -99,6 +102,7 @@ export async function createMcpServer(deps: McpDeps, preset?: McpPresetConfig):
     registerGoalTools(server, deps);
     registerSessionTools(server, deps);
   }
+  if (preset?.eval === true) registerEvalTools(server, deps);
 
   // ask_user is always registered (core functionality).
   await registerAskUserTools(server);
diff --git a/src/mcp/tools/eval.test.ts b/src/mcp/tools/eval.test.ts
new file mode 100644
index 00000000..213f0d6d
--- /dev/null
+++ b/src/mcp/tools/eval.test.ts
@@ -0,0 +1,150 @@
+/**
+ * Tests for grove_eval MCP tool and evalOperation.
+ *
+ * Covers:
+ * 1. Success path: valid command emitting GROVE_SCORE lines
+ * 2. evalCommand override respected
+ * 3. Invalid/missing targetCid → VALIDATION_ERROR
+ * 4. Missing evalCommand → VALIDATION_ERROR
+ * 5. Subprocess timeout → timedOut: true
+ * 6. Non-zero exit code preserved in result
+ */
+
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import type { McpDeps } from "../deps.js";
+import type { TestMcpDeps } from "../test-helpers.js";
+import { createTestMcpDeps } from "../test-helpers.js";
+import { registerEvalTools } from "./eval.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function callTool(
+  server: McpServer,
+  name: string,
+  args: Record<string, unknown>,
+): Promise<{ isError: boolean | undefined; text: string }> {
+  const registeredTools = (
+    server as unknown as {
+      _registeredTools: Record<string, { handler: (args: unknown) => Promise<unknown> }>;
+    }
+  )._registeredTools;
+  const tool = registeredTools[name];
+  if (!tool) throw new Error(`Tool ${name} not registered`);
+  const result = (await tool.handler(args)) as {
+    isError?: boolean;
+    content: Array<{ type: string; text: string }>;
+  };
+  return {
+    isError: result.isError,
+    text: result.content[0]?.text ?? "",
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("grove_eval", () => {
+  let testDeps: TestMcpDeps;
+  let deps: McpDeps;
+  let server: McpServer;
+
+  beforeEach(async () => {
+    testDeps = await createTestMcpDeps();
+    deps = testDeps.deps;
+    server = new McpServer({ name: "test", version: "0.0.1" }, { capabilities: { tools: {} } });
+    registerEvalTools(server, deps);
+  });
+
+  afterEach(async () => {
+    await testDeps.cleanup();
+  });
+
+  // 1. Success path: command emits GROVE_SCORE lines
+  test("returns parsed scores for valid GROVE_SCORE output", async () => {
+    const result = await callTool(server, "grove_eval", {
+      targetCid: "blake3:abc123",
+      evalCommand: "echo 'GROVE_SCORE val_bpb=0.92' && echo 'GROVE_SCORE peak_vram_gb=14.3'",
+    });
+
+    expect(result.isError).toBeUndefined();
+    const data = JSON.parse(result.text);
+    expect(data.timedOut).toBe(false);
+    expect(data.exitCode).toBe(0);
+    expect(data.scores).toHaveLength(2);
+    expect(data.scores).toContainEqual({ metric: "val_bpb", value: 0.92 });
+    expect(data.scores).toContainEqual({ metric: "peak_vram_gb", value: 14.3 });
+  });
+
+  // 2. evalCommand override respected (GROVE_TARGET_CID env var available)
+  test("passes GROVE_TARGET_CID as env var to the subprocess", async () => {
+    const result = await callTool(server, "grove_eval", {
+      targetCid: "blake3:deadbeef",
+      // Output a score only if GROVE_TARGET_CID is set and non-empty
+      evalCommand: 'test -n "$GROVE_TARGET_CID" && echo "GROVE_SCORE env_set=1"',
+    });
+
+    expect(result.isError).toBeUndefined();
+    const data = JSON.parse(result.text);
+    expect(data.exitCode).toBe(0);
+    expect(data.scores).toContainEqual({ metric: "env_set", value: 1 });
+  });
+
+  // 3. Missing / empty targetCid → VALIDATION_ERROR
+  test("returns VALIDATION_ERROR for empty targetCid", async () => {
+    const result = await callTool(server, "grove_eval", {
+      targetCid: "   ",
+      evalCommand: "echo ok",
+    });
+
+    expect(result.isError).toBe(true);
+    expect(result.text).toContain("VALIDATION_ERROR");
+    expect(result.text).toContain("targetCid");
+  });
+
+  // 4. Missing evalCommand → VALIDATION_ERROR
+  test("returns VALIDATION_ERROR when evalCommand is absent", async () => {
+    const result = await callTool(server, "grove_eval", {
+      targetCid: "blake3:abc123",
+      // evalCommand intentionally omitted
+    });
+
+    expect(result.isError).toBe(true);
+    expect(result.text).toContain("VALIDATION_ERROR");
+    expect(result.text).toContain("evalCommand");
+  });
+
+  // 5. Subprocess timeout → timedOut: true
+  test("returns timedOut=true when subprocess exceeds timeoutMs", async () => {
+    const result = await callTool(server, "grove_eval", {
+      targetCid: "blake3:abc123",
+      evalCommand: "sleep 60",
+      timeoutMs: 500, // very short timeout
+    });
+
+    expect(result.isError).toBeUndefined();
+    const data = JSON.parse(result.text);
+    expect(data.timedOut).toBe(true);
+    // exit code 124 is conventional for timeout-killed processes
+    expect(data.exitCode).toBe(124);
+  }, 10_000); // generous wall-clock timeout for this test
+
+  // 6. Non-zero exit code preserved (scores still returned if any were emitted)
+  test("preserves non-zero exit code in result", async () => {
+    const result = await callTool(server, "grove_eval", {
+      targetCid: "blake3:abc123",
+      evalCommand: "echo 'GROVE_SCORE accuracy=0.75' && exit 2",
+    });
+
+    expect(result.isError).toBeUndefined();
+    const data = JSON.parse(result.text);
+    expect(data.exitCode).toBe(2);
+    expect(data.timedOut).toBe(false);
+    // Score emitted before exit still captured
+    expect(data.scores).toContainEqual({ metric: "accuracy", value: 0.75 });
+  });
+});
diff --git a/src/mcp/tools/eval.ts b/src/mcp/tools/eval.ts
new file mode 100644
index 00000000..f1bec448
--- /dev/null
+++ b/src/mcp/tools/eval.ts
@@ -0,0 +1,67 @@
+/**
+ * MCP tool for the eval harness.
+ *
+ * grove_eval — Run the contract's eval harness against a target CID and
+ * return structured metric scores.
+ *
+ * All business logic is delegated to the shared operations layer.
+ */
+
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { z } from "zod";
+
+import { evalOperation } from "../../core/operations/index.js";
+import type { McpDeps } from "../deps.js";
+import { toMcpResult, toOperationDeps } from "../operation-adapter.js";
+
+// ---------------------------------------------------------------------------
+// Tool registration
+// ---------------------------------------------------------------------------
+
+export function registerEvalTools(server: McpServer, deps: McpDeps): void {
+  const opDeps = toOperationDeps(deps);
+
+  server.registerTool(
+    "grove_eval",
+    {
+      description:
+        "Run the grove eval harness against a target contribution CID and return structured " +
+        "metric scores. The eval command is spawned as a subprocess (via sh -c) with " +
+        "GROVE_TARGET_CID set to targetCid. Score lines must follow the format: " +
+        "'GROVE_SCORE <metric_name>=<value>' on stdout or stderr. " +
+        "Call grove_contribute separately to record results as a reproduction contribution.",
+      inputSchema: {
+        targetCid: z
+          .string()
+          .min(1)
+          .describe(
+            "CID of the contribution artifact to evaluate (passed as GROVE_TARGET_CID env var)",
+          ),
+        evalCommand: z
+          .string()
+          .min(1)
+          .describe(
+            "Shell command to execute as the eval harness (e.g. 'python eval.py'). " +
+              "Required in the current protocol version.",
+          ),
+        timeoutMs: z
+          .number()
+          .int()
+          .min(1000)
+          .max(3_600_000)
+          .optional()
+          .describe(
+            "Timeout in milliseconds before the subprocess is killed (default: 300000 = 5 min).",
+          ),
+      },
+    },
+    async (args) => {
+      const result = await evalOperation(opDeps, {
+        targetCid: args.targetCid,
+        evalCommand: args.evalCommand,
+        timeoutMs: args.timeoutMs,
+      });
+      return toMcpResult(result);
+    },
+  );
+}
diff --git a/src/tui/app.tsx b/src/tui/app.tsx
index c793f5e1..a75a40a1 100644
--- a/src/tui/app.tsx
+++ b/src/tui/app.tsx
@@ -40,6 +40,7 @@ import {
   type TuiDataProvider,
 } from "./provider.js";
 import { useSpawnManager } from "./spawn-manager-context.js";
+import { theme } from "./theme.js";
 
 /** Props for the root App component. */
 export interface AppProps {
@@ -972,20 +973,32 @@ export function App({
           focusedPanel={panels.state.focused}
           keybindingOverrides={keybindingOverrides}
         />
-        <CommandPalette
-          visible={paletteVisible}
-          tmux={tmux}
-          onClose={handleCommandPaletteClose}
-          onSpawn={handleSpawn}
-          onKill={handleKill}
-          topology={topology}
-          activeClaims={activeClaims ?? undefined}
-          selectedIndex={ks.paletteIndex}
-          sessions={paletteSessions ?? undefined}
-          parentAgentId={paletteParentId}
-          items={paletteItems}
-          query={ks.paletteQuery}
-        />
+        {paletteVisible && (
+          <box
+            position="absolute"
+            top={2}
+            left={2}
+            right={2}
+            bottom={2}
+            zIndex={10}
+            backgroundColor={theme.headerBg}
+          >
+            <CommandPalette
+              visible={paletteVisible}
+              tmux={tmux}
+              onClose={handleCommandPaletteClose}
+              onSpawn={handleSpawn}
+              onKill={handleKill}
+              topology={topology}
+              activeClaims={activeClaims ?? undefined}
+              selectedIndex={ks.paletteIndex}
+              sessions={paletteSessions ?? undefined}
+              parentAgentId={paletteParentId}
+              items={paletteItems}
+              query={ks.paletteQuery}
+            />
+          </box>
+        )}
         <InputBar
           visible={
             panels.state.mode === InputMode.TerminalInput ||
diff --git a/src/tui/components/command-palette.tsx b/src/tui/components/command-palette.tsx
index e05f7f25..f656ffe0 100644
--- a/src/tui/components/command-palette.tsx
+++ b/src/tui/components/command-palette.tsx
@@ -193,12 +193,15 @@ export function buildPaletteItems(
           ? String(check.maxInstances)
           : "\u221E";
       const suffix = !check.allowed ? " (at capacity)" : "";
+      const roleEdges = topology?.roles.find((r) => r.name === profile.role)?.edges;
+      const edgeSuffix =
+        roleEdges && roleEdges.length > 0 ? ` → ${roleEdges.map((e) => e.target).join(", ")}` : "";
       items.push({
         kind: "spawn",
         id: profile.role,
         label: `spawn: ${profile.name} [${profile.platform}]`,
         enabled: check.allowed,
-        detail: `${check.currentInstances}/${max}${suffix}`,
+        detail: `${check.currentInstances}/${max}${suffix}${edgeSuffix}`,
       });
     }
   }
@@ -210,12 +213,16 @@ export function buildPaletteItems(
       const check = checkSpawn(topology, role.name, activeClaims, parentAgentId, activeSpawnCounts);
       const max = check.maxInstances !== undefined ? String(check.maxInstances) : "\u221E";
       const suffix = !check.allowed ? " (at capacity)" : "";
+      const edgeSuffix =
+        role.edges && role.edges.length > 0
+          ? ` → ${role.edges.map((e) => e.target).join(", ")}`
+          : "";
       items.push({
         kind: "spawn",
         id: role.name,
         label: `spawn: ${role.name}`,
         enabled: check.allowed,
-        detail: `${check.currentInstances}/${max}${suffix}`,
+        detail: `${check.currentInstances}/${max}${suffix}${edgeSuffix}`,
       });
     }
   }
diff --git a/src/tui/screens/screen-manager.tsx b/src/tui/screens/screen-manager.tsx
index a942aae4..118ec294 100644
--- a/src/tui/screens/screen-manager.tsx
+++ b/src/tui/screens/screen-manager.tsx
@@ -156,6 +156,7 @@ export const ScreenManager: React.NamedExoticComponent<ScreenManagerProps> = Rea
             try {
               const { writeFileSync, renameSync } = await import("node:fs");
               const { join } = await import("node:path");
+              // biome-ignore lint/style/noNonNullAssertion: groveDir is set at startup before any session writes
               const finalPath = join(appProps.groveDir!, "current-session.json");
               const tmpPath = `${finalPath}.${process.pid}.${Date.now()}.tmp`;
               writeFileSync(tmpPath, JSON.stringify({ sessionId: id }, null, 2), "utf-8");
diff --git a/src/tui/spawn-manager.ts b/src/tui/spawn-manager.ts
index 57f4da1a..7bd1a0a1 100644
--- a/src/tui/spawn-manager.ts
+++ b/src/tui/spawn-manager.ts
@@ -993,9 +993,11 @@ export class SpawnManager {
                 .getHandoffs({ sourceCid: c.cid, status: "pending_pickup" })
                 .then((hs) => {
                   for (const h of hs) {
+                    // biome-ignore lint/suspicious/noEmptyBlockStatements: delivery errors silently swallowed per fire-and-forget pattern
                     void hp.markHandoffDelivered(h.handoffId).catch(() => {});
                   }
                 })
+                // biome-ignore lint/suspicious/noEmptyBlockStatements: getHandoffs errors silently swallowed
                 .catch(() => {});
             }
           }