diff --git a/README.md b/README.md
index ce60a9b..993c0a2 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,26 @@ bun run codex-harness/index.ts "Build a personal task manager with a REST API, i
 
 Both harnesses write their output to `workspace/claude/` and `workspace/codex/` respectively. The built application lives in `workspace/{sdk}/app/`.
 
+### Resume an Existing Harness Run
+
+You can resume from an existing `workspace/{sdk}/progress.json` state:
+
+```bash
+# strict resume (default when no value is provided)
+bun run claude-harness/index.ts --resume
+
+# resume current sprint with retry counter reset
+bun run claude-harness/index.ts --resume=reset-retries
+
+# resume current sprint with a newly negotiated contract
+bun run claude-harness/index.ts --resume=reset-contract
+
+# opt into strict retry behavior (re-evaluate every regression immediately)
+bun run claude-harness/index.ts --resume --retry-strategy=strict
+```
+
+Same flags are supported for `codex-harness/index.ts`.
+
 ## Configuration
 
 Defaults are in `shared/config.ts`:
@@ -63,7 +83,10 @@ Defaults are in `shared/config.ts`:
 | `maxSprints` | 10 | Maximum number of sprints |
 | `maxRetriesPerSprint` | 3 | Max evaluation retries before failing a sprint |
 | `passThreshold` | 7 | Minimum score (out of 10) for each criterion |
+| `retryStrategy` | `stabilized` | Retry behavior: `stabilized` keeps previously verified criteria locked unless regressions persist |
+| `hardFailUnlockStreak` | 2 | Number of consecutive hard fails required to unlock a previously passed criterion |
 | `CLAUDE_MODEL` | `claude-sonnet-4-6` | Model for Claude harness |
+| `CLAUDE_MAX_TURNS` | 80 | Max Claude turns per agent run (higher improves long evaluation completion reliability) |
 | `CODEX_MODEL` | `gpt-5.4` | Model for Codex harness |
 
 ## How It Works
@@ -82,8 +105,10 @@ The generator reads the spec and contract, then implements features one at a tim
 ### 4. Evaluation Phase (per sprint)
 The evaluator reads the contract criteria, examines the code, **runs the application**, and tries to break it. It scores each criterion on a 1-10 scale. If all criteria pass (score >= 7/10), the sprint survives. If any fail, detailed feedback goes back to the generator -- with file paths, line numbers, and exact failure descriptions.
 
+When `stabilized` retry mode is enabled, evaluator parsing is hardened: if the first evaluator response is not valid JSON, the harness automatically retries the evaluator once with a strict JSON-only instruction before failing the sprint.
+
 ### 5. Retry Loop
-The generator reads the adversarial feedback, decides whether to refine or pivot, and rebuilds. This cycles up to 3 times per sprint. If a sprint can't survive the evaluator after all retries, the harness stops.
+The generator reads the adversarial feedback, decides whether to refine or pivot, and rebuilds. This cycles up to 3 times per sprint. In `stabilized` retry mode, criteria that have already passed are "locked" and only unlocked after repeated hard regressions, which reduces flakey fail/pass oscillations in long sprints.
 
 ### 6. Completion
 Once all sprints pass, you have a working application built incrementally with quality gates at every step -- every feature tested by an agent whose job was to break it.
@@ -126,6 +151,7 @@ Agents communicate through files, not shared conversation history. This keeps ea
 - `spec.md` -- Product specification from the planner
 - `contracts/sprint-{n}.json` -- Sprint contracts
 - `feedback/sprint-{n}-round-{m}.json` -- Evaluator feedback per attempt
+- `feedback/sprint-{n}-stability.json` -- Locked-pass stability state for retry stabilization
 - `progress.json` -- Harness state tracking
 
 ## The GAN Connection
diff --git a/claude-harness/evaluator.ts b/claude-harness/evaluator.ts
index 77730b2..492d84f 100644
--- a/claude-harness/evaluator.ts
+++ b/claude-harness/evaluator.ts
@@ -2,6 +2,7 @@ import { query, type Options } from "@anthropic-ai/claude-agent-sdk";
 import { EVALUATOR_SYSTEM_PROMPT } from "../shared/prompts.ts";
 import { CLAUDE_MODEL, CLAUDE_MAX_TURNS } from "../shared/config.ts";
 import { log, logError } from "../shared/logger.ts";
+import { getCriterionThreshold } from "../shared/evaluation.ts";
 import type { SprintContract, EvalResult } from "../shared/types.ts";
 
 export async function runEvaluator(
@@ -20,7 +21,8 @@ ${JSON.stringify(contract, null, 2)}
 
 ## Pass Threshold
 
-Each criterion must score at least ${passThreshold}/10 to pass.
+Each criterion must satisfy its own \
+\`threshold\` from the sprint contract. If a criterion has no threshold, use ${passThreshold}/10.
 
 ## Instructions
 
@@ -37,43 +39,50 @@ Examine the application in the \`app/\` directory. Read the code, run it if poss
     persistSession: false,
   };
 
-  let fullResponse = "";
+  const fullResponse = await runEvaluationTurn(prompt, options, sprint);
 
-  for await (const msg of query({ prompt, options })) {
-    if (msg.type === "assistant") {
-      const message = msg as { message: { content: Array<{ type: string; text?: string; name?: string }> } };
-      for (const block of message.message.content) {
-        if (block.type === "text" && block.text) {
-          fullResponse += block.text;
-        } else if (block.type === "tool_use" && block.name) {
-          log("EVALUATOR", `  Tool: ${block.name}`);
-        }
-      }
-    } else if (msg.type === "result") {
-      log("EVALUATOR", `Evaluation complete for sprint ${sprint}`);
-    }
+  const invalidThresholds = contract.criteria
+    .filter((criterion) => !Number.isInteger(criterion.threshold) || criterion.threshold < 1 || criterion.threshold > 10)
+    .map((criterion) => `${criterion.name}=${criterion.threshold}`);
+
+  if (invalidThresholds.length > 0) {
+    log(
+      "EVALUATOR",
+      `Ignoring ${invalidThresholds.length} invalid contract thresholds (expected integer 1-10): ${invalidThresholds.join(", ")}`,
+    );
+  }
+
+  let evalResult = tryParseEvalResult(fullResponse, contract, passThreshold);
+  if (!evalResult) {
+    logError("EVALUATOR", "Failed to parse evaluation JSON from first attempt; retrying evaluator once...");
+    const recoveryPrompt = `${prompt}\n\nCRITICAL RETRY INSTRUCTION: Your previous response was not valid JSON. Re-run any checks you need, then output ONLY a valid JSON object matching the required schema.`;
+    const recoveryResponse = await runEvaluationTurn(recoveryPrompt, { ...options, maxTurns: Math.max(CLAUDE_MAX_TURNS, 80) }, sprint);
+    evalResult = tryParseEvalResult(recoveryResponse, contract, passThreshold);
   }
 
-  const evalResult = parseEvalResult(fullResponse, contract, passThreshold);
+  if (!evalResult) {
+    evalResult = buildParseFailureEvalResult(contract, fullResponse);
+  }
 
-  const passedCount = evalResult.feedback.filter((f) => f.score >= passThreshold).length;
+  const passedCount = evalResult.feedback.filter((f) => f.score >= getCriterionThreshold(contract, f.criterion, passThreshold)).length;
   const totalCount = evalResult.feedback.length;
   const verdict = evalResult.passed ? "PASSED" : "FAILED";
   log("EVALUATOR", `Sprint ${sprint}: ${verdict} (${passedCount}/${totalCount} criteria passed)`);
 
   for (const item of evalResult.feedback) {
-    const status = item.score >= passThreshold ? "\x1b[32mPASS\x1b[0m" : "\x1b[31mFAIL\x1b[0m";
-    log("EVALUATOR", `  [${status}] ${item.criterion}: ${item.score}/10 - ${item.details.slice(0, 100)}`);
+    const threshold = getCriterionThreshold(contract, item.criterion, passThreshold);
+    const status = item.score >= threshold ? "\x1b[32mPASS\x1b[0m" : "\x1b[31mFAIL\x1b[0m";
+    log("EVALUATOR", `  [${status}] ${item.criterion}: ${item.score}/10 (threshold ${threshold}) - ${item.details.slice(0, 100)}`);
   }
 
   return evalResult;
 }
 
-function parseEvalResult(
+function tryParseEvalResult(
   response: string,
   contract: SprintContract,
   passThreshold: number,
-): EvalResult {
+): EvalResult | null {
   // Try multiple strategies to extract JSON from the response
   const candidates: string[] = [];
 
@@ -94,8 +103,7 @@ function parseEvalResult(
     try {
       const parsed = JSON.parse(candidate) as EvalResult;
       if (parsed.feedback && Array.isArray(parsed.feedback)) {
-        // Recalculate passed based on threshold
-        parsed.passed = parsed.feedback.every((f) => f.score >= passThreshold);
+        parsed.passed = parsed.feedback.every((f) => f.score >= getCriterionThreshold(contract, f.criterion, passThreshold));
         return parsed;
       }
     } catch {
@@ -103,6 +111,10 @@ function parseEvalResult(
     }
   }
 
+  return null;
+}
+
+function buildParseFailureEvalResult(contract: SprintContract, response: string): EvalResult {
   logError("EVALUATOR", "Failed to parse evaluation JSON from any extraction strategy");
   return {
     passed: false,
@@ -115,3 +127,59 @@ function parseEvalResult(
     overallSummary: "Evaluation parsing failed. Raw response: " + response.slice(0, 500),
   };
 }
+
+async function runEvaluationTurn(prompt: string, options: Options, sprint: number): Promise<string> {
+  let fullResponse = "";
+
+  for await (const msg of query({ prompt, options })) {
+    if (msg.type === "assistant") {
+      const message = msg as { message: { content: Array<{ type: string; text?: string; name?: string }> } };
+      for (const block of message.message.content) {
+        if (block.type === "text" && block.text) {
+          fullResponse += block.text + "\n";
+        } else if (block.type === "tool_use" && block.name) {
+          log("EVALUATOR", `  Tool: ${block.name}`);
+        }
+      }
+    } else if (msg.type === "result") {
+      const resultText = extractResultText(msg);
+      if (resultText) {
+        fullResponse += resultText + "\n";
+      }
+      log("EVALUATOR", `Evaluation complete for sprint ${sprint}`);
+    }
+  }
+
+  return fullResponse.trim();
+}
+
+function extractResultText(resultMsg: unknown): string {
+  const chunks: string[] = [];
+
+  const visit = (value: unknown, depth: number): void => {
+    if (depth > 3 || value === null || value === undefined) return;
+
+    if (typeof value === "string") {
+      const trimmed = value.trim();
+      if (trimmed.startsWith("{") || trimmed.startsWith("```")) {
+        chunks.push(trimmed);
+      }
+      return;
+    }
+
+    if (Array.isArray(value)) {
+      for (const item of value) visit(item, depth + 1);
+      return;
+    }
+
+    if (typeof value === "object") {
+      for (const [key, child] of Object.entries(value as Record<string, unknown>)) {
+        if (key === "type") continue;
+        visit(child, depth + 1);
+      }
+    }
+  };
+
+  visit(resultMsg, 0);
+  return chunks.join("\n");
+}
diff --git a/claude-harness/generator.ts b/claude-harness/generator.ts
index 38ea8e4..9d45ffa 100644
--- a/claude-harness/generator.ts
+++ b/claude-harness/generator.ts
@@ -9,6 +9,7 @@ export async function runGenerator(
   spec: string,
   contract: SprintContract,
   previousFeedback?: EvalResult,
+  retryFocusCriteria: string[] = [],
 ): Promise<{ response: string; sessionId?: string }> {
   const sprint = contract.sprintNumber;
   const attempt = previousFeedback ? "retry" : "initial";
@@ -18,6 +19,10 @@ export async function runGenerator(
 
   if (previousFeedback) {
     prompt += `\n\n## Evaluation Feedback (MUST ADDRESS)\n\n${JSON.stringify(previousFeedback, null, 2)}`;
+    if (retryFocusCriteria.length > 0) {
+      prompt += `\n\n## Retry Focus (Scope Control)\n\nOnly these criteria are still failing and must be fixed now:\n${retryFocusCriteria.map((name) => `- ${name}`).join("\n")}`;
+      prompt += "\n\nMinimize changes outside the failing criteria. Preserve behavior for criteria that already pass unless a dependency forces a shared fix.";
+    }
     prompt += `\n\nThe previous attempt failed evaluation. Address every issue in the feedback above.`;
   } else {
     prompt += `\n\nImplement the features listed in this sprint contract. Work in the \`app/\` directory.`;
diff --git a/claude-harness/harness.ts b/claude-harness/harness.ts
index d947913..b6cd060 100644
--- a/claude-harness/harness.ts
+++ b/claude-harness/harness.ts
@@ -11,16 +11,24 @@ import {
   readSpec,
   writeContract,
   readContract,
+  readFeedback,
   writeFeedback,
+  readProgress,
   writeProgress,
+  findLatestFeedbackRound,
+  readSprintStabilityState,
+  writeSprintStabilityState,
 } from "../shared/files.ts";
+import { stabilizeEvaluation, buildStabilityStateFromEval, getFailedCriteria } from "../shared/evaluation.ts";
 import type {
   HarnessConfig,
+  ResumeMode,
   SprintContract,
   EvalResult,
   HarnessProgress,
   HarnessResult,
   SprintResult,
+  SprintStabilityState,
 } from "../shared/types.ts";
 
 import { runPlanner } from "./planner.ts";
@@ -30,52 +38,108 @@ import { runEvaluator } from "./evaluator.ts";
 export async function runHarness(config: HarnessConfig): Promise<HarnessResult> {
   const startTime = Date.now();
   const results: SprintResult[] = [];
+  const isResume = config.resumeMode !== undefined;
+  const resumeMode: ResumeMode = config.resumeMode ?? "strict";
 
   log("HARNESS", "Initializing Claude Agent SDK harness");
   log("HARNESS", `Work directory: ${config.workDir}`);
   log("HARNESS", `Max sprints: ${config.maxSprints} | Max retries: ${config.maxRetriesPerSprint} | Threshold: ${config.passThreshold}/10`);
+  log("HARNESS", `Retry strategy: ${config.retryStrategy} (unlock streak: ${config.hardFailUnlockStreak})`);
+  if (isResume) {
+    log("HARNESS", `Resume mode: ${resumeMode}`);
+  }
 
-  await initWorkspace(config.workDir);
+  await initWorkspace(config.workDir, { clean: !isResume });
 
-  // Phase 1: Planning
-  logDivider();
-  log("HARNESS", "PHASE 1: PLANNING");
-  logDivider();
+  let spec: string;
+  let totalSprints = 0;
+  let startSprint = 1;
+  let initialRetryForSprint = 0;
+  let reuseExistingContractOnStartSprint = false;
+  let lastEvalForStartSprint: EvalResult | undefined;
+  let stabilityStateForStartSprint: SprintStabilityState | undefined;
+
+  const progress: HarnessProgress = isResume
+    ? await readProgress(config.workDir)
+    : {
+      status: "planning",
+      currentSprint: 0,
+      totalSprints: 0,
+      completedSprints: 0,
+      retryCount: 0,
+    };
 
-  const progress: HarnessProgress = {
-    status: "planning",
-    currentSprint: 0,
-    totalSprints: 0,
-    completedSprints: 0,
-    retryCount: 0,
-  };
-  await writeProgress(config.workDir, progress);
+  if (!isResume) {
+    // Phase 1: Planning
+    logDivider();
+    log("HARNESS", "PHASE 1: PLANNING");
+    logDivider();
 
-  const plannerResponse = await runPlanner(config.userPrompt, config.workDir);
+    await writeProgress(config.workDir, progress);
 
-  // Planner may have written spec.md via Write tool, or returned it as text
-  let spec: string;
-  try {
+    const plannerResponse = await runPlanner(config.userPrompt, config.workDir);
+
+    // Planner may have written spec.md via Write tool, or returned it as text
+    try {
+      spec = await readSpec(config.workDir);
+    } catch {
+      log("HARNESS", "Planner returned spec as text, writing to spec.md");
+      await writeSpec(config.workDir, plannerResponse);
+      spec = plannerResponse;
+    }
+
+    // Parse sprint count from spec - look for "Sprint N" patterns
+    totalSprints = deriveTotalSprints(spec, config.maxSprints);
+    progress.totalSprints = totalSprints;
+    log("HARNESS", `Planner produced ${totalSprints} sprints`);
+  } else {
     spec = await readSpec(config.workDir);
-  } catch {
-    log("HARNESS", "Planner returned spec as text, writing to spec.md");
-    await writeSpec(config.workDir, plannerResponse);
-    spec = plannerResponse;
-  }
+    totalSprints = progress.totalSprints > 0 ? progress.totalSprints : deriveTotalSprints(spec, config.maxSprints);
+    progress.totalSprints = totalSprints;
 
-  // Parse sprint count from spec - look for "Sprint N" patterns
-  const sprintNumbers = Array.from(spec.matchAll(/sprint\s+(\d+)/gi))
-    .map((m) => parseInt(m[1]!, 10))
-    .filter((n) => n > 0 && n <= config.maxSprints);
-  const totalSprints = sprintNumbers.length > 0
-    ? Math.min(Math.max(...sprintNumbers), config.maxSprints)
-    : 3; // Default to 3 if no sprint numbers found
+    if (progress.status === "complete") {
+      log("HARNESS", "Resume requested but harness is already complete.");
+      return { success: true, sprints: [], totalDurationMs: Date.now() - startTime };
+    }
+
+    if (progress.currentSprint <= 0) {
+      throw new Error("Cannot resume: progress.json does not contain a valid currentSprint");
+    }
 
-  progress.totalSprints = totalSprints;
-  log("HARNESS", `Planner produced ${totalSprints} sprints`);
+    startSprint = progress.currentSprint;
+    const latestRound = await findLatestFeedbackRound(config.workDir, startSprint);
+    if (latestRound !== null) {
+      lastEvalForStartSprint = await readFeedback(config.workDir, startSprint, latestRound);
+      try {
+        stabilityStateForStartSprint = await readSprintStabilityState(config.workDir, startSprint);
+      } catch {
+        // Backward compatibility: older runs do not have stability snapshots
+      }
+    }
+
+    if (resumeMode === "strict") {
+      if (progress.status === "failed" && latestRound !== null && latestRound >= config.maxRetriesPerSprint) {
+        throw new Error(
+          `Cannot strictly resume sprint ${startSprint}: retry budget exhausted (last round ${latestRound})`,
+        );
+      }
+      initialRetryForSprint = latestRound === null ? 0 : latestRound + 1;
+      reuseExistingContractOnStartSprint = true;
+    } else if (resumeMode === "reset-retries") {
+      initialRetryForSprint = 0;
+      reuseExistingContractOnStartSprint = true;
+    } else {
+      initialRetryForSprint = 0;
+      reuseExistingContractOnStartSprint = false;
+      lastEvalForStartSprint = undefined;
+      stabilityStateForStartSprint = undefined;
+    }
+
+    log("HARNESS", `Resuming at sprint ${startSprint}/${totalSprints} from retry ${initialRetryForSprint}`);
+  }
 
   // Phase 2-4: Sprint Loop
-  for (let sprint = 1; sprint <= totalSprints; sprint++) {
+  for (let sprint = startSprint; sprint <= totalSprints; sprint++) {
     logDivider();
     log("HARNESS", `SPRINT ${sprint}/${totalSprints}`);
     logDivider();
@@ -83,20 +147,30 @@ export async function runHarness(config: HarnessConfig): Promise<HarnessResult>
     // Phase 2: Contract Negotiation
     progress.status = "negotiating";
     progress.currentSprint = sprint;
-    progress.retryCount = 0;
+    progress.retryCount = sprint === startSprint ? initialRetryForSprint : 0;
     await writeProgress(config.workDir, progress);
 
-    log("HARNESS", "Negotiating sprint contract...");
-    const contract = await negotiateContract(config.workDir, spec, sprint);
-    await writeContract(config.workDir, contract);
+    let contract: SprintContract;
+    const shouldReuseContract = sprint === startSprint && reuseExistingContractOnStartSprint;
+    if (shouldReuseContract) {
+      log("HARNESS", "Reusing existing sprint contract...");
+      contract = await readContract(config.workDir, sprint);
+    } else {
+      log("HARNESS", "Negotiating sprint contract...");
+      contract = await negotiateContract(config.workDir, spec, sprint);
+      await writeContract(config.workDir, contract);
+    }
     log("HARNESS", `Contract agreed: ${contract.criteria.length} criteria for ${contract.features.length} features`);
 
     // Phase 3-4: Build-Evaluate Loop
     let passed = false;
-    let lastEval: EvalResult | undefined;
+    let lastEval: EvalResult | undefined = sprint === startSprint ? lastEvalForStartSprint : undefined;
+    let sprintStabilityState: SprintStabilityState | undefined = sprint === startSprint ? stabilityStateForStartSprint : undefined;
     let attempts = 0;
 
-    for (let retry = 0; retry <= config.maxRetriesPerSprint; retry++) {
+    const retryStart = sprint === startSprint ? initialRetryForSprint : 0;
+
+    for (let retry = retryStart; retry <= config.maxRetriesPerSprint; retry++) {
       attempts = retry + 1;
 
       // Build
@@ -104,13 +178,36 @@ export async function runHarness(config: HarnessConfig): Promise<HarnessResult>
       progress.retryCount = retry;
       await writeProgress(config.workDir, progress);
 
-      await runGenerator(config.workDir, spec, contract, lastEval);
+      if (!sprintStabilityState && lastEval) {
+        sprintStabilityState = buildStabilityStateFromEval(contract, lastEval, config.passThreshold);
+      }
+
+      const retryFocusCriteria = lastEval
+        ? getFailedCriteria(contract, lastEval, config.passThreshold)
+        : [];
+
+      await runGenerator(config.workDir, spec, contract, lastEval, retryFocusCriteria);
 
       // Evaluate
       progress.status = "evaluating";
       await writeProgress(config.workDir, progress);
 
-      lastEval = await runEvaluator(config.workDir, contract, config.passThreshold);
+      const rawEval = await runEvaluator(config.workDir, contract, config.passThreshold);
+      const stabilized = stabilizeEvaluation(contract, rawEval, config, sprintStabilityState);
+      lastEval = stabilized.result;
+      sprintStabilityState = stabilized.state;
+
+      if (config.retryStrategy === "stabilized") {
+        await writeSprintStabilityState(config.workDir, sprint, sprintStabilityState);
+        const { lockedPassRetained, unlockedRegressions, inconclusiveRetained } = stabilized.summary;
+        if (lockedPassRetained > 0 || unlockedRegressions > 0) {
+          log(
+            "HARNESS",
+            `Stabilized retry: retained ${lockedPassRetained} locked pass(es) (${inconclusiveRetained} inconclusive), unlocked ${unlockedRegressions} regression(s)`,
+          );
+        }
+      }
+
       await writeFeedback(config.workDir, sprint, retry, lastEval);
 
       if (lastEval.passed) {
@@ -156,6 +253,16 @@ export async function runHarness(config: HarnessConfig): Promise<HarnessResult>
   return { success: allPassed, sprints: results, totalDurationMs: totalDuration };
 }
 
+function deriveTotalSprints(spec: string, maxSprints: number): number {
+  const sprintNumbers = Array.from(spec.matchAll(/sprint\s+(\d+)/gi))
+    .map((m) => parseInt(m[1]!, 10))
+    .filter((n) => n > 0 && n <= maxSprints);
+
+  return sprintNumbers.length > 0
+    ? Math.min(Math.max(...sprintNumbers), maxSprints)
+    : 3;
+}
+
 async function negotiateContract(
   workDir: string,
   spec: string,
diff --git a/claude-harness/index.ts b/claude-harness/index.ts
index 78c7fec..2519c64 100644
--- a/claude-harness/index.ts
+++ b/claude-harness/index.ts
@@ -3,38 +3,100 @@ import { readFile } from "fs/promises";
 import { runHarness } from "./harness.ts";
 import { DEFAULT_CONFIG } from "../shared/config.ts";
 import { log, logError, logDivider } from "../shared/logger.ts";
-import type { HarnessConfig } from "../shared/types.ts";
+import type { HarnessConfig, ResumeMode, RetryStrategy } from "../shared/types.ts";
 
 let userPrompt: string | undefined;
+let promptFilePath: string | undefined;
+let resumeMode: ResumeMode | undefined;
+let retryStrategy: RetryStrategy | undefined;
+let hardFailUnlockStreak: number | undefined;
 
-const arg = process.argv[2];
-if (arg === "--file" || arg === "-f") {
-  const filePath = process.argv[3];
-  if (!filePath) {
-    console.error("Error: --file requires a path argument");
+const args = process.argv.slice(2);
+
+for (let i = 0; i < args.length; i++) {
+  const arg = args[i]!;
+
+  if (arg === "--file" || arg === "-f") {
+    promptFilePath = args[i + 1];
+    if (!promptFilePath) {
+      console.error("Error: --file requires a path argument");
+      process.exit(1);
+    }
+    i += 1;
+    continue;
+  }
+
+  if (arg === "--resume") {
+    resumeMode = "strict";
+    continue;
+  }
+
+  if (arg.startsWith("--resume=")) {
+    const mode = arg.split("=")[1];
+    if (mode === "strict" || mode === "reset-retries" || mode === "reset-contract") {
+      resumeMode = mode;
+      continue;
+    }
+    console.error(`Error: invalid resume mode '${mode}'. Expected strict, reset-retries, or reset-contract.`);
+    process.exit(1);
+  }
+
+  if (arg.startsWith("--retry-strategy=")) {
+    const mode = arg.split("=")[1];
+    if (mode === "strict" || mode === "stabilized") {
+      retryStrategy = mode;
+      continue;
+    }
+    console.error(`Error: invalid retry strategy '${mode}'. Expected strict or stabilized.`);
+    process.exit(1);
+  }
+
+  if (arg.startsWith("--hard-fail-unlock-streak=")) {
+    const raw = arg.split("=")[1];
+    const parsed = raw ? parseInt(raw, 10) : NaN;
+    if (Number.isInteger(parsed) && parsed >= 1) {
+      hardFailUnlockStreak = parsed;
+      continue;
+    }
+    console.error(`Error: invalid hard fail unlock streak '${raw}'. Expected integer >= 1.`);
     process.exit(1);
   }
-  userPrompt = await readFile(resolve(filePath), "utf-8");
-} else {
-  userPrompt = arg;
+
+  userPrompt = userPrompt ? `${userPrompt} ${arg}` : arg;
+}
+
+if (promptFilePath) {
+  userPrompt = await readFile(resolve(promptFilePath), "utf-8");
 }
 
-if (!userPrompt) {
+if (!userPrompt && !resumeMode) {
   console.error("Usage: bun run claude-harness/index.ts <prompt>");
   console.error('       bun run claude-harness/index.ts --file <path-to-prompt.md>');
+  console.error('       bun run claude-harness/index.ts --resume[=strict|reset-retries|reset-contract]');
+  console.error('       bun run claude-harness/index.ts --retry-strategy=strict|stabilized <prompt>');
+  console.error('       bun run claude-harness/index.ts --hard-fail-unlock-streak=2 <prompt>');
+  console.error('       bun run claude-harness/index.ts --resume=reset-retries "optional prompt"');
   console.error('Example: bun run claude-harness/index.ts "Build a task manager with REST API and dashboard"');
   process.exit(1);
 }
 
 const config: HarnessConfig = {
   ...DEFAULT_CONFIG,
-  userPrompt,
+  userPrompt: userPrompt ?? "RESUME",
   workDir: resolve("workspace/claude"),
+  resumeMode,
+  retryStrategy: retryStrategy ?? DEFAULT_CONFIG.retryStrategy,
+  hardFailUnlockStreak: hardFailUnlockStreak ?? DEFAULT_CONFIG.hardFailUnlockStreak,
 };
 
 logDivider();
 log("HARNESS", "ADVERSARIAL DEV - Claude Agent SDK Harness");
-log("HARNESS", `Prompt: "${userPrompt}"`);
+log("HARNESS", `Prompt: "${config.userPrompt}"`);
+if (resumeMode) {
+  log("HARNESS", `Resume: ${resumeMode}`);
+}
+log("HARNESS", `Retry strategy: ${config.retryStrategy}`);
+log("HARNESS", `Hard fail unlock streak: ${config.hardFailUnlockStreak}`);
 logDivider();
 
 try {
diff --git a/codex-harness/evaluator.ts b/codex-harness/evaluator.ts
index 4ae40fb..777a91a 100644
--- a/codex-harness/evaluator.ts
+++ b/codex-harness/evaluator.ts
@@ -2,6 +2,7 @@ import { Codex } from "@openai/codex-sdk";
 import { EVALUATOR_SYSTEM_PROMPT } from "../shared/prompts.ts";
 import { CODEX_MODEL, CODEX_NETWORK_ACCESS } from "../shared/config.ts";
 import { log, logError } from "../shared/logger.ts";
+import { getCriterionThreshold } from "../shared/evaluation.ts";
 import type { SprintContract, EvalResult } from "../shared/types.ts";
 
 export async function runEvaluator(
@@ -18,7 +19,8 @@ ${JSON.stringify(contract, null, 2)}
 
 ## Pass Threshold
 
-Each criterion must score at least ${passThreshold}/10 to pass.
+Each criterion must satisfy its own \
+\`threshold\` from the sprint contract. If a criterion has no threshold, use ${passThreshold}/10.
 
 ## Instructions
 
@@ -40,26 +42,49 @@ Examine the application in the \`app/\` directory. Read the code, run it if poss
 
   log("EVALUATOR", `Evaluation complete for sprint ${sprint}`);
 
-  const evalResult = parseEvalResult(response, contract, passThreshold);
+  const invalidThresholds = contract.criteria
+    .filter((criterion) => !Number.isInteger(criterion.threshold) || criterion.threshold < 1 || criterion.threshold > 10)
+    .map((criterion) => `${criterion.name}=${criterion.threshold}`);
 
-  const passedCount = evalResult.feedback.filter((f) => f.score >= passThreshold).length;
+  if (invalidThresholds.length > 0) {
+    log(
+      "EVALUATOR",
+      `Ignoring ${invalidThresholds.length} invalid contract thresholds (expected integer 1-10): ${invalidThresholds.join(", ")}`,
+    );
+  }
+
+  let evalResult = tryParseEvalResult(response, contract, passThreshold);
+  if (!evalResult) {
+    logError("EVALUATOR", "Failed to parse evaluation JSON from first attempt; retrying evaluator once...");
+    const recoveryPrompt = `${fullPrompt}\n\nCRITICAL RETRY INSTRUCTION: Your previous response was not valid JSON. Re-run any checks you need, then output ONLY a valid JSON object matching the required schema.`;
+    const recoveryTurn = await thread.run(recoveryPrompt);
+    const recoveryResponse = recoveryTurn.finalResponse ?? "";
+    evalResult = tryParseEvalResult(recoveryResponse, contract, passThreshold);
+  }
+
+  if (!evalResult) {
+    evalResult = buildParseFailureEvalResult(contract, response);
+  }
+
+  const passedCount = evalResult.feedback.filter((f) => f.score >= getCriterionThreshold(contract, f.criterion, passThreshold)).length;
   const totalCount = evalResult.feedback.length;
   const verdict = evalResult.passed ? "PASSED" : "FAILED";
   log("EVALUATOR", `Sprint ${sprint}: ${verdict} (${passedCount}/${totalCount} criteria passed)`);
 
   for (const item of evalResult.feedback) {
-    const status = item.score >= passThreshold ? "\x1b[32mPASS\x1b[0m" : "\x1b[31mFAIL\x1b[0m";
-    log("EVALUATOR", `  [${status}] ${item.criterion}: ${item.score}/10 - ${item.details.slice(0, 100)}`);
+    const threshold = getCriterionThreshold(contract, item.criterion, passThreshold);
+    const status = item.score >= threshold ? "\x1b[32mPASS\x1b[0m" : "\x1b[31mFAIL\x1b[0m";
+    log("EVALUATOR", `  [${status}] ${item.criterion}: ${item.score}/10 (threshold ${threshold}) - ${item.details.slice(0, 100)}`);
   }
 
   return evalResult;
 }
 
-function parseEvalResult(
+function tryParseEvalResult(
   response: string,
   contract: SprintContract,
   passThreshold: number,
-): EvalResult {
+): EvalResult | null {
   // Try multiple strategies to extract JSON from the response
   const candidates: string[] = [];
 
@@ -80,7 +105,7 @@ function parseEvalResult(
     try {
       const parsed = JSON.parse(candidate) as EvalResult;
       if (parsed.feedback && Array.isArray(parsed.feedback)) {
-        parsed.passed = parsed.feedback.every((f) => f.score >= passThreshold);
+        parsed.passed = parsed.feedback.every((f) => f.score >= getCriterionThreshold(contract, f.criterion, passThreshold));
         return parsed;
       }
     } catch {
@@ -88,6 +113,10 @@ function parseEvalResult(
     }
   }
 
+  return null;
+}
+
+function buildParseFailureEvalResult(contract: SprintContract, response: string): EvalResult {
   logError("EVALUATOR", "Failed to parse evaluation JSON from any extraction strategy");
   return {
     passed: false,
diff --git a/codex-harness/generator.ts b/codex-harness/generator.ts
index d4622e8..86cea7a 100644
--- a/codex-harness/generator.ts
+++ b/codex-harness/generator.ts
@@ -9,6 +9,7 @@ export async function runGenerator(
   spec: string,
   contract: SprintContract,
   previousFeedback?: EvalResult,
+  retryFocusCriteria: string[] = [],
 ): Promise<{ response: string }> {
   const sprint = contract.sprintNumber;
   const attempt = previousFeedback ? "retry" : "initial";
@@ -18,6 +19,10 @@ export async function runGenerator(
 
   if (previousFeedback) {
     taskPrompt += `\n\n## Evaluation Feedback (MUST ADDRESS)\n\n${JSON.stringify(previousFeedback, null, 2)}`;
+    if (retryFocusCriteria.length > 0) {
+      taskPrompt += `\n\n## Retry Focus (Scope Control)\n\nOnly these criteria are still failing and must be fixed now:\n${retryFocusCriteria.map((name) => `- ${name}`).join("\n")}`;
+      taskPrompt += "\n\nMinimize changes outside the failing criteria. Preserve behavior for criteria that already pass unless a dependency forces a shared fix.";
+    }
     taskPrompt += `\n\nThe previous attempt failed evaluation. Address every issue in the feedback above.`;
   } else {
     taskPrompt += `\n\nImplement the features listed in this sprint contract. Work in the \`app/\` directory.`;
diff --git a/codex-harness/harness.ts b/codex-harness/harness.ts
index 105c64d..6365fdf 100644
--- a/codex-harness/harness.ts
+++ b/codex-harness/harness.ts
@@ -10,16 +10,25 @@ import {
   writeSpec,
   readSpec,
   writeContract,
+  readContract,
+  readFeedback,
   writeFeedback,
+  readProgress,
   writeProgress,
+  findLatestFeedbackRound,
+  readSprintStabilityState,
+  writeSprintStabilityState,
 } from "../shared/files.ts";
+import { stabilizeEvaluation, buildStabilityStateFromEval, getFailedCriteria } from "../shared/evaluation.ts";
 import type {
   HarnessConfig,
+  ResumeMode,
   SprintContract,
   EvalResult,
   HarnessProgress,
   HarnessResult,
   SprintResult,
+  SprintStabilityState,
 } from "../shared/types.ts";
 
 import { runPlanner } from "./planner.ts";
@@ -29,52 +38,108 @@ import { runEvaluator } from "./evaluator.ts";
 export async function runHarness(config: HarnessConfig): Promise<HarnessResult> {
   const startTime = Date.now();
   const results: SprintResult[] = [];
+  const isResume = config.resumeMode !== undefined;
+  const resumeMode: ResumeMode = config.resumeMode ?? "strict";
 
   log("HARNESS", "Initializing Codex SDK harness");
   log("HARNESS", `Work directory: ${config.workDir}`);
   log("HARNESS", `Max sprints: ${config.maxSprints} | Max retries: ${config.maxRetriesPerSprint} | Threshold: ${config.passThreshold}/10`);
+  log("HARNESS", `Retry strategy: ${config.retryStrategy} (unlock streak: ${config.hardFailUnlockStreak})`);
+  if (isResume) {
+    log("HARNESS", `Resume mode: ${resumeMode}`);
+  }
 
-  await initWorkspace(config.workDir);
+  await initWorkspace(config.workDir, { clean: !isResume });
 
-  // Phase 1: Planning
-  logDivider();
-  log("HARNESS", "PHASE 1: PLANNING");
-  logDivider();
+  let spec: string;
+  let totalSprints = 0;
+  let startSprint = 1;
+  let initialRetryForSprint = 0;
+  let reuseExistingContractOnStartSprint = false;
+  let lastEvalForStartSprint: EvalResult | undefined;
+  let stabilityStateForStartSprint: SprintStabilityState | undefined;
+
+  const progress: HarnessProgress = isResume
+    ? await readProgress(config.workDir)
+    : {
+      status: "planning",
+      currentSprint: 0,
+      totalSprints: 0,
+      completedSprints: 0,
+      retryCount: 0,
+    };
 
-  const progress: HarnessProgress = {
-    status: "planning",
-    currentSprint: 0,
-    totalSprints: 0,
-    completedSprints: 0,
-    retryCount: 0,
-  };
-  await writeProgress(config.workDir, progress);
+  if (!isResume) {
+    // Phase 1: Planning
+    logDivider();
+    log("HARNESS", "PHASE 1: PLANNING");
+    logDivider();
 
-  const plannerResponse = await runPlanner(config.userPrompt, config.workDir);
+    await writeProgress(config.workDir, progress);
 
-  // Planner may have written spec.md via its tools, or returned it as text
-  let spec: string;
-  try {
+    const plannerResponse = await runPlanner(config.userPrompt, config.workDir);
+
+    // Planner may have written spec.md via its tools, or returned it as text
+    try {
+      spec = await readSpec(config.workDir);
+    } catch {
+      log("HARNESS", "Planner returned spec as text, writing to spec.md");
+      await writeSpec(config.workDir, plannerResponse);
+      spec = plannerResponse;
+    }
+
+    // Parse sprint count from spec - look for "Sprint N" patterns
+    totalSprints = deriveTotalSprints(spec, config.maxSprints);
+    progress.totalSprints = totalSprints;
+    log("HARNESS", `Planner produced ${totalSprints} sprints`);
+  } else {
     spec = await readSpec(config.workDir);
-  } catch {
-    log("HARNESS", "Planner returned spec as text, writing to spec.md");
-    await writeSpec(config.workDir, plannerResponse);
-    spec = plannerResponse;
-  }
+    totalSprints = progress.totalSprints > 0 ? progress.totalSprints : deriveTotalSprints(spec, config.maxSprints);
+    progress.totalSprints = totalSprints;
 
-  // Parse sprint count from spec - look for "Sprint N" patterns
-  const sprintNumbers = Array.from(spec.matchAll(/sprint\s+(\d+)/gi))
-    .map((m) => parseInt(m[1]!, 10))
-    .filter((n) => n > 0 && n <= config.maxSprints);
-  const totalSprints = sprintNumbers.length > 0
-    ? Math.min(Math.max(...sprintNumbers), config.maxSprints)
-    : 3; // Default to 3 if no sprint numbers found
+    if (progress.status === "complete") {
+      log("HARNESS", "Resume requested but harness is already complete.");
+      return { success: true, sprints: [], totalDurationMs: Date.now() - startTime };
+    }
+
+    if (progress.currentSprint <= 0) {
+      throw new Error("Cannot resume: progress.json does not contain a valid currentSprint");
+    }
 
-  progress.totalSprints = totalSprints;
-  log("HARNESS", `Planner produced ${totalSprints} sprints`);
+    startSprint = progress.currentSprint;
+    const latestRound = await findLatestFeedbackRound(config.workDir, startSprint);
+    if (latestRound !== null) {
+      lastEvalForStartSprint = await readFeedback(config.workDir, startSprint, latestRound);
+      try {
+        stabilityStateForStartSprint = await readSprintStabilityState(config.workDir, startSprint);
+      } catch {
+        // Backward compatibility: older runs do not have stability snapshots
+      }
+    }
+
+    if (resumeMode === "strict") {
+      if (progress.status === "failed" && latestRound !== null && latestRound >= config.maxRetriesPerSprint) {
+        throw new Error(
+          `Cannot strictly resume sprint ${startSprint}: retry budget exhausted (last round ${latestRound})`,
+        );
+      }
+      initialRetryForSprint = latestRound === null ? 0 : latestRound + 1;
+      reuseExistingContractOnStartSprint = true;
+    } else if (resumeMode === "reset-retries") {
+      initialRetryForSprint = 0;
+      reuseExistingContractOnStartSprint = true;
+    } else {
+      initialRetryForSprint = 0;
+      reuseExistingContractOnStartSprint = false;
+      lastEvalForStartSprint = undefined;
+      stabilityStateForStartSprint = undefined;
+    }
+
+    log("HARNESS", `Resuming at sprint ${startSprint}/${totalSprints} from retry ${initialRetryForSprint}`);
+  }
 
   // Phase 2-4: Sprint Loop
-  for (let sprint = 1; sprint <= totalSprints; sprint++) {
+  for (let sprint = startSprint; sprint <= totalSprints; sprint++) {
     logDivider();
     log("HARNESS", `SPRINT ${sprint}/${totalSprints}`);
     logDivider();
@@ -82,20 +147,30 @@ export async function runHarness(config: HarnessConfig): Promise<HarnessResult>
     // Phase 2: Contract Negotiation
     progress.status = "negotiating";
     progress.currentSprint = sprint;
-    progress.retryCount = 0;
+    progress.retryCount = sprint === startSprint ? initialRetryForSprint : 0;
     await writeProgress(config.workDir, progress);
 
-    log("HARNESS", "Negotiating sprint contract...");
-    const contract = await negotiateContract(config.workDir, spec, sprint);
-    await writeContract(config.workDir, contract);
+    let contract: SprintContract;
+    const shouldReuseContract = sprint === startSprint && reuseExistingContractOnStartSprint;
+    if (shouldReuseContract) {
+      log("HARNESS", "Reusing existing sprint contract...");
+      contract = await readContract(config.workDir, sprint);
+    } else {
+      log("HARNESS", "Negotiating sprint contract...");
+      contract = await negotiateContract(config.workDir, spec, sprint);
+      await writeContract(config.workDir, contract);
+    }
     log("HARNESS", `Contract agreed: ${contract.criteria.length} criteria for ${contract.features.length} features`);
 
     // Phase 3-4: Build-Evaluate Loop
     let passed = false;
-    let lastEval: EvalResult | undefined;
+    let lastEval: EvalResult | undefined = sprint === startSprint ? lastEvalForStartSprint : undefined;
+    let sprintStabilityState: SprintStabilityState | undefined = sprint === startSprint ? stabilityStateForStartSprint : undefined;
     let attempts = 0;
 
-    for (let retry = 0; retry <= config.maxRetriesPerSprint; retry++) {
+    const retryStart = sprint === startSprint ? initialRetryForSprint : 0;
+
+    for (let retry = retryStart; retry <= config.maxRetriesPerSprint; retry++) {
       attempts = retry + 1;
 
       // Build
@@ -103,13 +178,36 @@ export async function runHarness(config: HarnessConfig): Promise<HarnessResult>
       progress.retryCount = retry;
       await writeProgress(config.workDir, progress);
 
-      await runGenerator(config.workDir, spec, contract, lastEval);
+      if (!sprintStabilityState && lastEval) {
+        sprintStabilityState = buildStabilityStateFromEval(contract, lastEval, config.passThreshold);
+      }
+
+      const retryFocusCriteria = lastEval
+        ? getFailedCriteria(contract, lastEval, config.passThreshold)
+        : [];
+
+      await runGenerator(config.workDir, spec, contract, lastEval, retryFocusCriteria);
 
       // Evaluate
       progress.status = "evaluating";
       await writeProgress(config.workDir, progress);
 
-      lastEval = await runEvaluator(config.workDir, contract, config.passThreshold);
+      const rawEval = await runEvaluator(config.workDir, contract, config.passThreshold);
+      const stabilized = stabilizeEvaluation(contract, rawEval, config, sprintStabilityState);
+      lastEval = stabilized.result;
+      sprintStabilityState = stabilized.state;
+
+      if (config.retryStrategy === "stabilized") {
+        await writeSprintStabilityState(config.workDir, sprint, sprintStabilityState);
+        const { lockedPassRetained, unlockedRegressions, inconclusiveRetained } = stabilized.summary;
+        if (lockedPassRetained > 0 || unlockedRegressions > 0) {
+          log(
+            "HARNESS",
+            `Stabilized retry: retained ${lockedPassRetained} locked pass(es) (${inconclusiveRetained} inconclusive), unlocked ${unlockedRegressions} regression(s)`,
+          );
+        }
+      }
+
       await writeFeedback(config.workDir, sprint, retry, lastEval);
 
       if (lastEval.passed) {
@@ -154,6 +252,16 @@ export async function runHarness(config: HarnessConfig): Promise<HarnessResult>
   return { success: allPassed, sprints: results, totalDurationMs: totalDuration };
 }
 
+function deriveTotalSprints(spec: string, maxSprints: number): number {
+  const sprintNumbers = Array.from(spec.matchAll(/sprint\s+(\d+)/gi))
+    .map((m) => parseInt(m[1]!, 10))
+    .filter((n) => n > 0 && n <= maxSprints);
+
+  return sprintNumbers.length > 0
+    ? Math.min(Math.max(...sprintNumbers), maxSprints)
+    : 3;
+}
+
 async function negotiateContract(
   workDir: string,
   spec: string,
diff --git a/codex-harness/index.ts b/codex-harness/index.ts
index 16cb310..70f819c 100644
--- a/codex-harness/index.ts
+++ b/codex-harness/index.ts
@@ -3,38 +3,100 @@ import { readFile } from "fs/promises";
 import { runHarness } from "./harness.ts";
 import { DEFAULT_CONFIG } from "../shared/config.ts";
 import { log, logError, logDivider } from "../shared/logger.ts";
-import type { HarnessConfig } from "../shared/types.ts";
+import type { HarnessConfig, ResumeMode, RetryStrategy } from "../shared/types.ts";
 
 let userPrompt: string | undefined;
+let promptFilePath: string | undefined;
+let resumeMode: ResumeMode | undefined;
+let retryStrategy: RetryStrategy | undefined;
+let hardFailUnlockStreak: number | undefined;
 
-const arg = process.argv[2];
-if (arg === "--file" || arg === "-f") {
-  const filePath = process.argv[3];
-  if (!filePath) {
-    console.error("Error: --file requires a path argument");
+const args = process.argv.slice(2);
+
+for (let i = 0; i < args.length; i++) {
+  const arg = args[i]!;
+
+  if (arg === "--file" || arg === "-f") {
+    promptFilePath = args[i + 1];
+    if (!promptFilePath) {
+      console.error("Error: --file requires a path argument");
+      process.exit(1);
+    }
+    i += 1;
+    continue;
+  }
+
+  if (arg === "--resume") {
+    resumeMode = "strict";
+    continue;
+  }
+
+  if (arg.startsWith("--resume=")) {
+    const mode = arg.split("=")[1];
+    if (mode === "strict" || mode === "reset-retries" || mode === "reset-contract") {
+      resumeMode = mode;
+      continue;
+    }
+    console.error(`Error: invalid resume mode '${mode}'. Expected strict, reset-retries, or reset-contract.`);
+    process.exit(1);
+  }
+
+  if (arg.startsWith("--retry-strategy=")) {
+    const mode = arg.split("=")[1];
+    if (mode === "strict" || mode === "stabilized") {
+      retryStrategy = mode;
+      continue;
+    }
+    console.error(`Error: invalid retry strategy '${mode}'. Expected strict or stabilized.`);
+    process.exit(1);
+  }
+
+  if (arg.startsWith("--hard-fail-unlock-streak=")) {
+    const raw = arg.split("=")[1];
+    const parsed = raw ? parseInt(raw, 10) : NaN;
+    if (Number.isInteger(parsed) && parsed >= 1) {
+      hardFailUnlockStreak = parsed;
+      continue;
+    }
+    console.error(`Error: invalid hard fail unlock streak '${raw}'. Expected integer >= 1.`);
     process.exit(1);
   }
-  userPrompt = await readFile(resolve(filePath), "utf-8");
-} else {
-  userPrompt = arg;
+
+  userPrompt = userPrompt ? `${userPrompt} ${arg}` : arg;
+}
+
+if (promptFilePath) {
+  userPrompt = await readFile(resolve(promptFilePath), "utf-8");
 }
 
-if (!userPrompt) {
+if (!userPrompt && !resumeMode) {
   console.error("Usage: bun run codex-harness/index.ts <prompt>");
   console.error('       bun run codex-harness/index.ts --file <path-to-prompt.md>');
+  console.error('       bun run codex-harness/index.ts --resume[=strict|reset-retries|reset-contract]');
+  console.error('       bun run codex-harness/index.ts --retry-strategy=strict|stabilized <prompt>');
+  console.error('       bun run codex-harness/index.ts --hard-fail-unlock-streak=2 <prompt>');
+  console.error('       bun run codex-harness/index.ts --resume=reset-retries "optional prompt"');
   console.error('Example: bun run codex-harness/index.ts "Build a task manager with REST API and dashboard"');
   process.exit(1);
 }
 
 const config: HarnessConfig = {
   ...DEFAULT_CONFIG,
-  userPrompt,
+  userPrompt: userPrompt ?? "RESUME",
   workDir: resolve("workspace/codex"),
+  resumeMode,
+  retryStrategy: retryStrategy ?? DEFAULT_CONFIG.retryStrategy,
+  hardFailUnlockStreak: hardFailUnlockStreak ?? DEFAULT_CONFIG.hardFailUnlockStreak,
 };
 
 logDivider();
 log("HARNESS", "ADVERSARIAL DEV - Codex SDK Harness");
-log("HARNESS", `Prompt: "${userPrompt}"`);
+log("HARNESS", `Prompt: "${config.userPrompt}"`);
+if (resumeMode) {
+  log("HARNESS", `Resume: ${resumeMode}`);
+}
+log("HARNESS", `Retry strategy: ${config.retryStrategy}`);
+log("HARNESS", `Hard fail unlock streak: ${config.hardFailUnlockStreak}`);
 logDivider();
 
 try {
diff --git a/shared/config.ts b/shared/config.ts
index 821c963..1499621 100644
--- a/shared/config.ts
+++ b/shared/config.ts
@@ -4,10 +4,12 @@ export const DEFAULT_CONFIG: Omit<HarnessConfig, "userPrompt" | "workDir"> = {
   maxSprints: 10,
   maxRetriesPerSprint: 3,
   passThreshold: 7,
+  retryStrategy: "stabilized",
+  hardFailUnlockStreak: 2,
 };
 
 export const CLAUDE_MODEL = "claude-sonnet-4-6";
 export const CODEX_MODEL = "gpt-5.4";
 
-export const CLAUDE_MAX_TURNS = 50;
+export const CLAUDE_MAX_TURNS = 80;
 export const CODEX_NETWORK_ACCESS = true;
diff --git a/shared/evaluation.ts b/shared/evaluation.ts
new file mode 100644
index 0000000..a65438d
--- /dev/null
+++ b/shared/evaluation.ts
@@ -0,0 +1,175 @@
+import type {
+  SprintContract,
+  EvalResult,
+  SprintStabilityState,
+  StabilizationSummary,
+  CriterionOutcome,
+  HarnessConfig,
+} from "./types.ts";
+
+const INCONCLUSIVE_PATTERN = /(cannot|can't|unable|not available|unavailable|not possible|missing|not installed|environment|could not run|chrome not available|permission denied|tooling unavailable|sdk unavailable)/i;
+
+export function getCriterionThreshold(contract: SprintContract, criterion: string, fallback: number): number {
+  const rawThreshold = contract.criteria.find((c) => c.name === criterion)?.threshold;
+  if (typeof rawThreshold !== "number" || !Number.isInteger(rawThreshold)) {
+    return fallback;
+  }
+  if (rawThreshold < 1 || rawThreshold > 10) {
+    return fallback;
+  }
+  return rawThreshold;
+}
+
+function classifyOutcome(score: number, threshold: number, details: string): CriterionOutcome {
+  if (score >= threshold) {
+    return "pass";
+  }
+
+  return INCONCLUSIVE_PATTERN.test(details) ? "inconclusive" : "hard_fail";
+}
+
+export function buildStabilityStateFromEval(
+  contract: SprintContract,
+  evalResult: EvalResult,
+  passThreshold: number,
+): SprintStabilityState {
+  const criteria: SprintStabilityState["criteria"] = {};
+
+  for (const criterion of contract.criteria) {
+    const threshold = getCriterionThreshold(contract, criterion.name, passThreshold);
+    const feedback = evalResult.feedback.find((f) => f.criterion === criterion.name);
+    const score = feedback?.score ?? 0;
+    const details = feedback?.details ?? "No evaluator feedback";
+    const outcome = classifyOutcome(score, threshold, details);
+
+    criteria[criterion.name] = {
+      locked: outcome === "pass",
+      bestScore: outcome === "pass" ? score : 0,
+      consecutiveHardFails: outcome === "hard_fail" ? 1 : 0,
+      lastObservedScore: score,
+      lastObservedOutcome: outcome,
+    };
+  }
+
+  return {
+    sprintNumber: contract.sprintNumber,
+    criteria,
+  };
+}
+
+export function stabilizeEvaluation(
+  contract: SprintContract,
+  rawEvalResult: EvalResult,
+  config: Pick<HarnessConfig, "passThreshold" | "retryStrategy" | "hardFailUnlockStreak">,
+  previousState?: SprintStabilityState,
+): { result: EvalResult; state: SprintStabilityState; summary: StabilizationSummary } {
+  const summary: StabilizationSummary = {
+    lockedPassRetained: 0,
+    unlockedRegressions: 0,
+    inconclusiveRetained: 0,
+  };
+
+  const stateCriteria: SprintStabilityState["criteria"] = {};
+  const scores: Record<string, number> = {};
+  const feedback = contract.criteria.map((criterion) => {
+    const threshold = getCriterionThreshold(contract, criterion.name, config.passThreshold);
+    const rawItem = rawEvalResult.feedback.find((f) => f.criterion === criterion.name) ?? {
+      criterion: criterion.name,
+      score: 0,
+      details: "No evaluator feedback returned for this criterion",
+    };
+
+    const rawOutcome = classifyOutcome(rawItem.score, threshold, rawItem.details);
+    const prev = previousState?.criteria[criterion.name];
+
+    let effectiveScore = rawItem.score;
+    let effectiveDetails = rawItem.details;
+    let locked = prev?.locked ?? false;
+    let bestScore = prev?.bestScore ?? 0;
+    let consecutiveHardFails = prev?.consecutiveHardFails ?? 0;
+
+    if (rawOutcome === "pass") {
+      locked = true;
+      bestScore = Math.max(bestScore, rawItem.score);
+      consecutiveHardFails = 0;
+    } else if (config.retryStrategy === "stabilized" && prev?.locked) {
+      if (rawOutcome === "inconclusive") {
+        effectiveScore = Math.max(bestScore, threshold);
+        effectiveDetails = `${rawItem.details} [stabilized: retained previous verified pass because this check was inconclusive in the current environment]`;
+        summary.lockedPassRetained += 1;
+        summary.inconclusiveRetained += 1;
+        consecutiveHardFails = 0;
+      } else {
+        const nextHardFailCount = consecutiveHardFails + 1;
+        if (nextHardFailCount < config.hardFailUnlockStreak) {
+          effectiveScore = Math.max(bestScore, threshold);
+          effectiveDetails = `${rawItem.details} [stabilized: retained previous verified pass; hard fail ${nextHardFailCount}/${config.hardFailUnlockStreak} before unlock]`;
+          summary.lockedPassRetained += 1;
+          consecutiveHardFails = nextHardFailCount;
+        } else {
+          locked = false;
+          summary.unlockedRegressions += 1;
+          consecutiveHardFails = nextHardFailCount;
+        }
+      }
+    } else if (rawOutcome === "hard_fail") {
+      consecutiveHardFails += 1;
+    }
+
+    const effectiveOutcome = classifyOutcome(effectiveScore, threshold, effectiveDetails);
+    if (effectiveOutcome === "pass") {
+      locked = true;
+      bestScore = Math.max(bestScore, effectiveScore);
+      consecutiveHardFails = 0;
+    }
+
+    scores[criterion.name] = effectiveScore;
+    stateCriteria[criterion.name] = {
+      locked,
+      bestScore,
+      consecutiveHardFails,
+      lastObservedScore: rawItem.score,
+      lastObservedOutcome: rawOutcome,
+    };
+
+    return {
+      criterion: criterion.name,
+      score: effectiveScore,
+      details: effectiveDetails,
+    };
+  });
+
+  const passed = contract.criteria.every((criterion) => {
+    const threshold = getCriterionThreshold(contract, criterion.name, config.passThreshold);
+    const score = scores[criterion.name] ?? 0;
+    return score >= threshold;
+  });
+
+  return {
+    result: {
+      passed,
+      scores,
+      feedback,
+      overallSummary: rawEvalResult.overallSummary,
+    },
+    state: {
+      sprintNumber: contract.sprintNumber,
+      criteria: stateCriteria,
+    },
+    summary,
+  };
+}
+
+export function getFailedCriteria(
+  contract: SprintContract,
+  evalResult: EvalResult,
+  passThreshold: number,
+): string[] {
+  return contract.criteria
+    .filter((criterion) => {
+      const threshold = getCriterionThreshold(contract, criterion.name, passThreshold);
+      const score = evalResult.feedback.find((f) => f.criterion === criterion.name)?.score ?? 0;
+      return score < threshold;
+    })
+    .map((criterion) => criterion.name);
+}
diff --git a/shared/files.ts b/shared/files.ts
index 23263af..a0c90b8 100644
--- a/shared/files.ts
+++ b/shared/files.ts
@@ -1,23 +1,29 @@
 import { mkdir, readFile, writeFile, access, rm, readdir, unlink } from "fs/promises";
 import { join } from "path";
 import { execSync } from "child_process";
-import type { SprintContract, EvalResult, HarnessProgress } from "./types.ts";
+import type { SprintContract, EvalResult, HarnessProgress, SprintStabilityState } from "./types.ts";
 
-export async function initWorkspace(workDir: string): Promise<void> {
+export async function initWorkspace(
+  workDir: string,
+  options: { clean?: boolean } = {},
+): Promise<void> {
+  const clean = options.clean ?? true;
   await mkdir(join(workDir, "contracts"), { recursive: true });
   await mkdir(join(workDir, "feedback"), { recursive: true });
   await mkdir(join(workDir, "app"), { recursive: true });
 
   // Clean stale artifacts from previous runs
-  try { await unlink(join(workDir, "spec.md")); } catch {}
-  try { await unlink(join(workDir, "progress.json")); } catch {}
-  for (const dir of ["contracts", "feedback"]) {
-    try {
-      const files = await readdir(join(workDir, dir));
-      for (const f of files) {
-        await unlink(join(workDir, dir, f));
-      }
-    } catch {}
+  if (clean) {
+    try { await unlink(join(workDir, "spec.md")); } catch {}
+    try { await unlink(join(workDir, "progress.json")); } catch {}
+    for (const dir of ["contracts", "feedback"]) {
+      try {
+        const files = await readdir(join(workDir, dir));
+        for (const f of files) {
+          await unlink(join(workDir, dir, f));
+        }
+      } catch {}
+    }
   }
 
   // Initialize app/ as its own git repo so agent commits stay isolated
@@ -37,6 +43,29 @@ export async function initWorkspace(workDir: string): Promise<void> {
   }
 }
 
+export async function findLatestFeedbackRound(
+  workDir: string,
+  sprintNumber: number,
+): Promise<number | null> {
+  const feedbackDir = join(workDir, "feedback");
+  const pattern = new RegExp(`^sprint-${sprintNumber}-round-(\\d+)\\.json$`);
+
+  try {
+    const files = await readdir(feedbackDir);
+    const rounds = files
+      .map((file) => {
+        const match = file.match(pattern);
+        return match ? parseInt(match[1]!, 10) : null;
+      })
+      .filter((round): round is number => round !== null)
+      .sort((a, b) => b - a);
+
+    return rounds.length > 0 ? rounds[0]! : null;
+  } catch {
+    return null;
+  }
+}
+
 export async function writeSpec(workDir: string, spec: string): Promise<void> {
   await writeFile(join(workDir, "spec.md"), spec, "utf-8");
 }
@@ -84,6 +113,28 @@ export async function readFeedback(
   }
 }
 
+export async function writeSprintStabilityState(
+  workDir: string,
+  sprintNumber: number,
+  state: SprintStabilityState,
+): Promise<void> {
+  const path = join(workDir, "feedback", `sprint-${sprintNumber}-stability.json`);
+  await writeFile(path, JSON.stringify(state, null, 2), "utf-8");
+}
+
+export async function readSprintStabilityState(
+  workDir: string,
+  sprintNumber: number,
+): Promise<SprintStabilityState> {
+  const path = join(workDir, "feedback", `sprint-${sprintNumber}-stability.json`);
+  const raw = await readFile(path, "utf-8");
+  try {
+    return JSON.parse(raw) as SprintStabilityState;
+  } catch {
+    throw new Error(`Invalid JSON in stability state file: ${path}`);
+  }
+}
+
 export async function writeProgress(workDir: string, progress: HarnessProgress): Promise<void> {
   await writeFile(join(workDir, "progress.json"), JSON.stringify(progress, null, 2), "utf-8");
 }
diff --git a/shared/prompts.ts b/shared/prompts.ts
index 138fb7c..70c1312 100644
--- a/shared/prompts.ts
+++ b/shared/prompts.ts
@@ -127,7 +127,7 @@ You MUST output your evaluation as a JSON object (and nothing else) with this ex
 }
 \`\`\`
 
-A sprint PASSES only if ALL criteria score at or above the threshold (default: 7).
+A sprint PASSES only if ALL criteria score at or above the criterion threshold (integer 1-10, default: 7).
 If ANY criterion falls below the threshold, the sprint FAILS and work goes back to the generator.`;
 
 export const CONTRACT_NEGOTIATION_GENERATOR_PROMPT = `You are proposing a sprint contract. Based on the product spec and the sprint number, propose what you will build and how success should be measured.
@@ -152,6 +152,7 @@ Rules:
 - Each criterion must be SPECIFIC and TESTABLE (not vague like "works well")
 - Include 5-15 criteria per sprint depending on complexity
 - Criteria should cover: functionality, error handling, code quality, and user experience
+- \`threshold\` MUST be an integer score threshold on a 1-10 scale (typically 6-9), not a raw metric target like milliseconds or bytes. Put raw targets in the description text.
 - Output ONLY the JSON, no other text`;
 
 export const CONTRACT_NEGOTIATION_EVALUATOR_PROMPT = `You are reviewing a proposed sprint contract. Evaluate whether the criteria are specific enough, testable, and comprehensive.
@@ -164,4 +165,5 @@ Rules:
 - Criteria must be testable by reading code and running the app
 - Vague criteria like "works well" or "looks good" must be made specific
 - Ensure coverage of error handling and edge cases, not just happy paths
+- Ensure every criterion uses an integer \`threshold\` on the 1-10 score scale; move raw targets (ms, bytes, ratios) into the description text
 - Output either "APPROVED" or the revised JSON contract, nothing else`;
diff --git a/shared/types.ts b/shared/types.ts
index 6e31c1e..504acfe 100644
--- a/shared/types.ts
+++ b/shared/types.ts
@@ -4,8 +4,14 @@ export interface HarnessConfig {
   maxSprints: number;
   maxRetriesPerSprint: number;
   passThreshold: number;
+  resumeMode?: ResumeMode;
+  retryStrategy: RetryStrategy;
+  hardFailUnlockStreak: number;
 }
 
+export type ResumeMode = "strict" | "reset-retries" | "reset-contract";
+export type RetryStrategy = "strict" | "stabilized";
+
 export interface SprintContract {
   sprintNumber: number;
   features: string[];
@@ -51,3 +57,24 @@ export interface HarnessResult {
   sprints: SprintResult[];
   totalDurationMs: number;
 }
+
+export type CriterionOutcome = "pass" | "inconclusive" | "hard_fail";
+
+export interface CriterionStabilityState {
+  locked: boolean;
+  bestScore: number;
+  consecutiveHardFails: number;
+  lastObservedScore: number;
+  lastObservedOutcome: CriterionOutcome;
+}
+
+export interface SprintStabilityState {
+  sprintNumber: number;
+  criteria: Record<string, CriterionStabilityState>;
+}
+
+export interface StabilizationSummary {
+  lockedPassRetained: number;
+  unlockedRegressions: number;
+  inconclusiveRetained: number;
+}