diff --git a/.gitignore b/.gitignore
index b9c0f2b..7ec0419 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,11 @@ npm-debug.log*
 
 # CASCADE tooling metadata
 .cascade-progress-comment-id
+
+# Eval harness — per-run artifacts and judge cache (keep .gitkeep)
+evals/results/*
+!evals/results/.gitkeep
+evals/.judge-cache.json
+evals/fixtures/*/node_modules/
+evals/fixtures/*/.squint.db
+evals/fixtures/*/dist/
diff --git a/bin/dev.js b/bin/dev.js
index 2b5ae1d..e1939e1 100755
--- a/bin/dev.js
+++ b/bin/dev.js
@@ -1,5 +1,6 @@
 #!/usr/bin/env node
 
+import 'dotenv/config';
 import { execute } from '@oclif/core';
 
 await execute({ development: true, dir: import.meta.url });
diff --git a/bin/run.js b/bin/run.js
index c09e49a..59b8a7a 100755
--- a/bin/run.js
+++ b/bin/run.js
@@ -1,5 +1,6 @@
 #!/usr/bin/env node
 
+import 'dotenv/config';
 import { execute } from '@oclif/core';
 
 try {
diff --git a/evals/README.md b/evals/README.md
new file mode 100644
index 0000000..74960d6
--- /dev/null
+++ b/evals/README.md
@@ -0,0 +1,60 @@
+# Squint Evaluation Harness
+
+End-to-end evaluation of the squint ingestion pipeline against hand-authored ground truth.
+
+## How it works
+
+1. **Fixture**: a small, real, runnable TypeScript repo at `evals/fixtures/<name>/`
+2. **Ground truth**: typed declarative records at `evals/ground-truth/<name>/` describing what squint *should* produce
+3. **Harness**: shared code at `evals/harness/` that builds, runs, compares, and reports
+4. **Eval test**: `evals/<name>.eval.ts` — a Vitest test that wires it all together
+5. **Baseline**: a committed scoreboard at `evals/baselines/<name>.json` tracking progress per stage
+
+## Running
+
+```bash
+# Run all evals (costs LLM credits!)
+npm run eval
+
+# Run a specific eval
+npm run eval -- todo-api.eval.ts
+
+# Run a specific stage's tests within an eval
+npm run eval -- todo-api.eval.ts -t "parse stage"
+
+# Watch mode for harness development
+npm run eval:watch
+```
+
+## Cost guardrails
+
+- All LLM calls are scoped per-stage via `--from-stage`/`--to-stage` — never the full pipeline accidentally
+- Per-run cost budget enforced via `EVAL_COST_BUDGET_USD` (default `0.50`)
+- Prose-judge results cached at `evals/results/.judge-cache.json` (gitignored)
+
+## Environment variables
+
+| Var | Default | Purpose |
+|---|---|---|
+| `EVAL_JUDGE_MODEL` | `openrouter:anthropic/claude-haiku-4` | LLM used to score prose similarity |
+| `EVAL_COST_BUDGET_USD` | `0.50` | Hard fail if a single run exceeds this |
+| `EVAL_RUNS_PER_STAGE` | `1` | Re-run LLM stages N times to detect non-determinism |
+| `EVAL_KEEP_ALL` | unset | Keep all historical results instead of rotating |
+
+## Iteration plan
+
+The harness is built up one pipeline stage at a time. Each iteration adds exactly one
+LLM stage on top of a known-passing base, so when iteration N fails the bug is in stage N.
+
+See `/home/zbigniew/.claude/plans/validated-sprouting-mochi.md` for the full plan.
+
+| Iter | Stages | Cost/run |
+|---|---|---|
+| 1 | parse | $0 |
+| 2 | + symbols | ~$0.05 |
+| 3 | + relationships | ~$0.10 |
+| 4 | + modules | ~$0.15 |
+| 5 | + contracts | ~$0.20 |
+| 6 | + interactions | ~$0.25 |
+| 7 | + flows | ~$0.30 |
+| 8 | + features | ~$0.35 |
diff --git a/evals/baselines/bookstore-api.json b/evals/baselines/bookstore-api.json
new file mode 100644
index 0000000..c0e6df1
--- /dev/null
+++ b/evals/baselines/bookstore-api.json
@@ -0,0 +1,87 @@
+{
+  "fixture": "bookstore-api",
+  "lastRun": "2026-04-11T12:04:05.560Z",
+  "squintCommit": "b8e0f70",
+  "tableScores": {
+    "files": {
+      "passed": true,
+      "expected": 18,
+      "produced": 18,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "definitions": {
+      "passed": true,
+      "expected": 97,
+      "produced": 97,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "imports": {
+      "passed": true,
+      "expected": 15,
+      "produced": 15,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "definition_metadata": {
+      "passed": true,
+      "expected": 95,
+      "produced": 305,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "relationship_annotations": {
+      "passed": true,
+      "expected": 9,
+      "produced": 89,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "module_cohesion": {
+      "passed": true,
+      "expected": 11,
+      "produced": 97,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "contracts": {
+      "passed": true,
+      "expected": 11,
+      "produced": 11,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "interaction_rubric": {
+      "passed": true,
+      "expected": 5,
+      "produced": 24,
+      "critical": 0,
+      "major": 0,
+      "minor": 1
+    },
+    "flow_rubric": {
+      "passed": true,
+      "expected": 2,
+      "produced": 19,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "feature_cohesion": {
+      "passed": true,
+      "expected": 2,
+      "produced": 5,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    }
+  }
+}
diff --git a/evals/baselines/todo-api.json b/evals/baselines/todo-api.json
new file mode 100644
index 0000000..208cd44
--- /dev/null
+++ b/evals/baselines/todo-api.json
@@ -0,0 +1,87 @@
+{
+  "fixture": "todo-api",
+  "lastRun": "2026-04-10T17:44:42.211Z",
+  "squintCommit": "8b7ad46",
+  "tableScores": {
+    "files": {
+      "passed": true,
+      "expected": 14,
+      "produced": 14,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "definitions": {
+      "passed": true,
+      "expected": 50,
+      "produced": 50,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "imports": {
+      "passed": true,
+      "expected": 25,
+      "produced": 25,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "definition_metadata": {
+      "passed": true,
+      "expected": 122,
+      "produced": 161,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "relationship_annotations": {
+      "passed": true,
+      "expected": 35,
+      "produced": 69,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "module_cohesion": {
+      "passed": true,
+      "expected": 12,
+      "produced": 50,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "contracts": {
+      "passed": true,
+      "expected": 11,
+      "produced": 11,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "interaction_rubric": {
+      "passed": true,
+      "expected": 4,
+      "produced": 25,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "flow_rubric": {
+      "passed": true,
+      "expected": 2,
+      "produced": 14,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    },
+    "feature_cohesion": {
+      "passed": true,
+      "expected": 2,
+      "produced": 4,
+      "critical": 0,
+      "major": 0,
+      "minor": 0
+    }
+  }
+}
diff --git a/evals/bookstore-api.eval.ts b/evals/bookstore-api.eval.ts
new file mode 100644
index 0000000..eda483b
--- /dev/null
+++ b/evals/bookstore-api.eval.ts
@@ -0,0 +1,240 @@
+import { describe, it } from 'vitest';
+import { bookstoreApiGroundTruth } from './ground-truth/bookstore-api/index.js';
+import { makeLlmProseJudge } from './harness/comparator/llm-prose-judge.js';
+import { defineFixture } from './harness/fixture-config.js';
+import { runIterationStep } from './harness/iteration.js';
+
+const BOOKSTORE = defineFixture('bookstore-api');
+
+describe('bookstore-api eval', () => {
+  it('iteration 1: parse stage produces expected files, definitions, and imports', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'parse',
+      toStage: 'parse',
+      scope: ['files', 'definitions', 'imports'],
+      timeoutMs: 60_000,
+    });
+  }, 120_000);
+
+  it('iteration 2: symbols stage produces expected definition_metadata', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'symbols',
+      toStage: 'symbols',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata'],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 180_000,
+    });
+  }, 300_000);
+
+  it('iteration 3: relationships stage produces expected relationship_annotations', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'relationships',
+      toStage: 'relationships',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata', 'relationship_annotations'],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 240_000,
+    });
+  }, 360_000);
+
+  it('iteration 3.5: relationships-verify stage preserves relationship_annotations', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'relationships-verify',
+      toStage: 'relationships-verify',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata', 'relationship_annotations'],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 300_000,
+      costBudgetUsd: 0.2,
+    });
+  }, 420_000);
+
+  it('iteration 4: modules stage produces expected module cohesion', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'modules',
+      toStage: 'modules',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata', 'relationship_annotations', 'module_cohesion'],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 360_000,
+      costBudgetUsd: 0.2,
+    });
+  }, 480_000);
+
+  it('iteration 4.5: modules-verify stage preserves cohesion', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'modules-verify',
+      toStage: 'modules-verify',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata', 'relationship_annotations', 'module_cohesion'],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 420_000,
+      costBudgetUsd: 0.3,
+    });
+  }, 540_000);
+
+  it('iteration 5: contracts stage extracts expected HTTP routes', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'contracts',
+      toStage: 'contracts',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 420_000,
+      costBudgetUsd: 0.3,
+    });
+  }, 540_000);
+
+  it('iteration 6: interactions stage produces expected module-pair edges', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'interactions',
+      toStage: 'interactions',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 480_000,
+      costBudgetUsd: 0.4,
+    });
+  }, 600_000);
+
+  it('iteration 6.5: interactions-validate stage preserves the rubric', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'interactions-validate',
+      toStage: 'interactions-validate',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 480_000,
+      costBudgetUsd: 0.4,
+    });
+  }, 600_000);
+
+  it('iteration 6.6: interactions-verify stage preserves the rubric', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'interactions-verify',
+      toStage: 'interactions-verify',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 540_000,
+      costBudgetUsd: 0.4,
+    });
+  }, 660_000);
+
+  it('iteration 7: flows stage produces expected user journeys', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'flows',
+      toStage: 'flows',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+        'flow_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 600_000,
+      costBudgetUsd: 0.5,
+    });
+  }, 720_000);
+
+  it('iteration 7.5: flows-verify stage preserves the flow rubric', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'flows-verify',
+      toStage: 'flows-verify',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+        'flow_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 660_000,
+      costBudgetUsd: 0.5,
+    });
+  }, 780_000);
+
+  it('iteration 8: features stage groups flows into expected product features', async () => {
+    await runIterationStep({
+      fixture: BOOKSTORE,
+      groundTruth: bookstoreApiGroundTruth,
+      label: 'features',
+      toStage: 'features',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+        'flow_rubric',
+        'feature_cohesion',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: BOOKSTORE.judgeCachePath }),
+      timeoutMs: 720_000,
+      costBudgetUsd: 0.5,
+    });
+  }, 840_000);
+});
diff --git a/evals/fixtures/bookstore-api/Gemfile b/evals/fixtures/bookstore-api/Gemfile
new file mode 100644
index 0000000..1f616ad
--- /dev/null
+++ b/evals/fixtures/bookstore-api/Gemfile
@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+
+gem 'rails', '~> 7.1'
+gem 'bcrypt', '~> 3.1'
diff --git a/evals/fixtures/bookstore-api/app/controllers/api/base_controller.rb b/evals/fixtures/bookstore-api/app/controllers/api/base_controller.rb
new file mode 100644
index 0000000..710cd21
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/controllers/api/base_controller.rb
@@ -0,0 +1,25 @@
+module Api
+  class BaseController < ApplicationController
+    before_action :authenticate!
+
+    private
+
+    def render_success(data, status: :ok)
+      render json: { data: data }, status: status
+    end
+
+    def render_error(message, status: :unprocessable_entity)
+      render json: { error: message }, status: status
+    end
+
+    def render_not_found(resource = 'Resource')
+      render json: { error: "#{resource} not found" }, status: :not_found
+    end
+
+    def paginate(scope)
+      page = (params[:page] || 1).to_i
+      per_page = [(params[:per_page] || 25).to_i, 100].min
+      scope.offset((page - 1) * per_page).limit(per_page)
+    end
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/controllers/api/books_controller.rb b/evals/fixtures/bookstore-api/app/controllers/api/books_controller.rb
new file mode 100644
index 0000000..862b69c
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/controllers/api/books_controller.rb
@@ -0,0 +1,59 @@
+module Api
+  class BooksController < BaseController
+    skip_before_action :authenticate!, only: [:index, :show]
+    before_action :set_book, only: [:show, :update, :destroy, :restock]
+    before_action :require_admin!, only: [:create, :update, :destroy, :restock]
+
+    def index
+      books = paginate(Book.includes(:author).in_stock)
+      render_success(books.map { |b| BookSerializer.new(b).as_json })
+    end
+
+    def show
+      render_success(BookSerializer.new(@book).as_json)
+    end
+
+    def create
+      book = Book.new(book_params)
+      if book.save
+        render_success(BookSerializer.new(book).as_json, status: :created)
+      else
+        render_error(book.errors.full_messages.join(', '))
+      end
+    end
+
+    def update
+      if @book.update(book_params)
+        render_success(BookSerializer.new(@book).as_json)
+      else
+        render_error(@book.errors.full_messages.join(', '))
+      end
+    end
+
+    def destroy
+      @book.destroy!
+      head :no_content
+    end
+
+    def restock
+      quantity = params[:quantity].to_i
+      @book.update!(stock_count: @book.stock_count + quantity)
+      render_success(BookSerializer.new(@book).as_json)
+    end
+
+    private
+
+    def set_book
+      @book = Book.find_by(id: params[:id])
+      render_not_found('Book') unless @book
+    end
+
+    def book_params
+      params.require(:book).permit(:title, :isbn, :price_cents, :stock_count, :author_id, :published)
+    end
+
+    def require_admin!
+      render_error('Forbidden', status: :forbidden) unless current_user&.admin?
+    end
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/controllers/api/orders_controller.rb b/evals/fixtures/bookstore-api/app/controllers/api/orders_controller.rb
new file mode 100644
index 0000000..1bc315d
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/controllers/api/orders_controller.rb
@@ -0,0 +1,40 @@
+module Api
+  class OrdersController < BaseController
+    before_action :set_order, only: [:show]
+
+    def index
+      orders = paginate(current_user.orders.order(created_at: :desc))
+      render_success(orders.map { |o| OrderSerializer.new(o).as_json })
+    end
+
+    def show
+      render_success(OrderSerializer.new(@order).as_json)
+    end
+
+    def create
+      service = CheckoutService.new(
+        user: current_user,
+        items: order_params[:items]
+      )
+
+      result = service.call
+
+      if result.success?
+        render_success(OrderSerializer.new(result.order).as_json, status: :created)
+      else
+        render_error(result.error)
+      end
+    end
+
+    private
+
+    def set_order
+      @order = current_user.orders.find_by(id: params[:id])
+      render_not_found('Order') unless @order
+    end
+
+    def order_params
+      params.require(:order).permit(items: [:book_id, :quantity])
+    end
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/controllers/api/sessions_controller.rb b/evals/fixtures/bookstore-api/app/controllers/api/sessions_controller.rb
new file mode 100644
index 0000000..eb6c30c
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/controllers/api/sessions_controller.rb
@@ -0,0 +1,33 @@
+module Api
+  class SessionsController < BaseController
+    skip_before_action :authenticate!, only: [:create]
+
+    def create
+      user = User.authenticate(session_params[:email], session_params[:password])
+
+      if user
+        token = generate_auth_token(user)
+        render_success({ token: token, user: { id: user.id, email: user.email, name: user.name } })
+      else
+        render_error('Invalid email or password', status: :unauthorized)
+      end
+    end
+
+    def destroy
+      current_user.update!(auth_token: nil)
+      head :no_content
+    end
+
+    private
+
+    def session_params
+      params.require(:session).permit(:email, :password)
+    end
+
+    def generate_auth_token(user)
+      token = SecureRandom.hex(32)
+      user.update!(auth_token: token)
+      token
+    end
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/controllers/application_controller.rb b/evals/fixtures/bookstore-api/app/controllers/application_controller.rb
new file mode 100644
index 0000000..f6cf8d2
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/controllers/application_controller.rb
@@ -0,0 +1,20 @@
+class ApplicationController < ActionController::API
+  before_action :set_request_id
+
+  private
+
+  def current_user
+    return @current_user if defined?(@current_user)
+
+    token = request.headers['Authorization']&.split(' ')&.last
+    @current_user = token ? User.find_by(auth_token: token) : nil
+  end
+
+  def authenticate!
+    render json: { error: 'Unauthorized' }, status: :unauthorized unless current_user
+  end
+
+  def set_request_id
+    Thread.current[:request_id] = request.request_id
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/jobs/inventory_check_job.rb b/evals/fixtures/bookstore-api/app/jobs/inventory_check_job.rb
new file mode 100644
index 0000000..16f7711
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/jobs/inventory_check_job.rb
@@ -0,0 +1,22 @@
+class InventoryCheckJob < ApplicationJob
+  queue_as :default
+
+  def perform(order)
+    order.order_items.includes(:book).each do |item|
+      stock_info = InventoryService.check_stock(item.book)
+
+      if stock_info[:low_stock]
+        Rails.logger.warn(
+          "Low stock alert: #{stock_info[:title]} has #{stock_info[:stock_count]} remaining"
+        )
+        notify_admin(stock_info)
+      end
+    end
+  end
+
+  private
+
+  def notify_admin(stock_info)
+    AdminNotifier.low_stock(stock_info).deliver_later
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/mailers/order_mailer.rb b/evals/fixtures/bookstore-api/app/mailers/order_mailer.rb
new file mode 100644
index 0000000..01bb283
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/mailers/order_mailer.rb
@@ -0,0 +1,22 @@
+class OrderMailer < ApplicationMailer
+  def confirmation(order)
+    @order = order
+    @user = order.user
+    @items = order.order_items.includes(:book)
+
+    mail(
+      to: @user.email,
+      subject: "Order ##{order.id} confirmed"
+    )
+  end
+
+  def cancellation(order)
+    @order = order
+    @user = order.user
+
+    mail(
+      to: @user.email,
+      subject: "Order ##{order.id} cancelled"
+    )
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/models/application_record.rb b/evals/fixtures/bookstore-api/app/models/application_record.rb
new file mode 100644
index 0000000..86b6b38
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/models/application_record.rb
@@ -0,0 +1,7 @@
+class ApplicationRecord < ActiveRecord::Base
+  self.abstract_class = true
+
+  def self.recent(limit = 10)
+    order(created_at: :desc).limit(limit)
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/models/author.rb b/evals/fixtures/bookstore-api/app/models/author.rb
new file mode 100644
index 0000000..480f5f8
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/models/author.rb
@@ -0,0 +1,22 @@
+class Author < ApplicationRecord
+  has_many :books, dependent: :destroy
+
+  validates :name, presence: true, uniqueness: true
+  validates :bio, length: { maximum: 2000 }
+
+  scope :with_published_books, -> { joins(:books).where(books: { published: true }).distinct }
+
+  def book_count
+    books.count
+  end
+
+  def full_display_name
+    bio.present? ? "#{name} — #{bio.truncate(80)}" : name
+  end
+
+  private
+
+  def normalize_name
+    self.name = name.strip.titleize if name.present?
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/models/book.rb b/evals/fixtures/bookstore-api/app/models/book.rb
new file mode 100644
index 0000000..ed0bd82
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/models/book.rb
@@ -0,0 +1,37 @@
+class Book < ApplicationRecord
+  belongs_to :author
+  has_many :order_items, dependent: :restrict_with_error
+  has_many :orders, through: :order_items
+
+  validates :title, presence: true
+  validates :isbn, presence: true, uniqueness: true
+  validates :price_cents, numericality: { greater_than: 0 }
+  validates :stock_count, numericality: { greater_than_or_equal_to: 0 }
+
+  scope :in_stock, -> { where('stock_count > 0') }
+  scope :by_author, ->(author_id) { where(author_id: author_id) }
+
+  after_create :log_new_book
+
+  def price
+    price_cents / 100.0
+  end
+
+  def in_stock?
+    stock_count > 0
+  end
+
+  def reserve_stock!(quantity)
+    raise InsufficientStockError, "Only #{stock_count} available" if stock_count < quantity
+
+    update!(stock_count: stock_count - quantity)
+  end
+
+  private
+
+  def log_new_book
+    Rails.logger.info("New book added: #{title} by #{author&.name}")
+  end
+end
+
+class InsufficientStockError < StandardError; end
diff --git a/evals/fixtures/bookstore-api/app/models/order.rb b/evals/fixtures/bookstore-api/app/models/order.rb
new file mode 100644
index 0000000..0efe046
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/models/order.rb
@@ -0,0 +1,46 @@
+class Order < ApplicationRecord
+  STATUS_PENDING = 'pending'
+  STATUS_CONFIRMED = 'confirmed'
+  STATUS_CANCELLED = 'cancelled'
+
+  STATUSES = [STATUS_PENDING, STATUS_CONFIRMED, STATUS_CANCELLED].freeze
+
+  belongs_to :user
+  has_many :order_items, dependent: :destroy
+  has_many :books, through: :order_items
+
+  validates :status, inclusion: { in: STATUSES }
+  validates :total_cents, numericality: { greater_than_or_equal_to: 0 }
+
+  after_create :send_confirmation_email
+  after_create :enqueue_inventory_check
+
+  scope :confirmed, -> { where(status: STATUS_CONFIRMED) }
+  scope :for_user, ->(user_id) { where(user_id: user_id) }
+
+  def confirm!
+    update!(status: STATUS_CONFIRMED)
+  end
+
+  def cancel!
+    return false if status == STATUS_CANCELLED
+
+    update!(status: STATUS_CANCELLED)
+    order_items.each { |item| item.book.update!(stock_count: item.book.stock_count + item.quantity) }
+    true
+  end
+
+  def item_count
+    order_items.sum(:quantity)
+  end
+
+  private
+
+  def send_confirmation_email
+    OrderMailer.confirmation(self).deliver_later
+  end
+
+  def enqueue_inventory_check
+    InventoryCheckJob.perform_later(self)
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/models/order_item.rb b/evals/fixtures/bookstore-api/app/models/order_item.rb
new file mode 100644
index 0000000..ad3fcca
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/models/order_item.rb
@@ -0,0 +1,19 @@
+class OrderItem < ApplicationRecord
+  belongs_to :order
+  belongs_to :book
+
+  validates :quantity, numericality: { greater_than: 0 }
+  validates :unit_price_cents, numericality: { greater_than: 0 }
+
+  before_validation :set_unit_price, on: :create
+
+  def subtotal_cents
+    quantity * unit_price_cents
+  end
+
+  private
+
+  def set_unit_price
+    self.unit_price_cents = book&.price_cents if unit_price_cents.blank?
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/models/user.rb b/evals/fixtures/bookstore-api/app/models/user.rb
new file mode 100644
index 0000000..f6479f1
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/models/user.rb
@@ -0,0 +1,30 @@
+class User < ApplicationRecord
+  has_many :orders, dependent: :nullify
+  has_secure_password
+
+  validates :email, presence: true, uniqueness: true, format: { with: URI::MailTo::EMAIL_REGEXP }
+  validates :name, presence: true
+
+  before_save :downcase_email
+
+  def self.authenticate(email, password)
+    user = find_by(email: email.downcase)
+    return nil unless user&.authenticate(password)
+
+    user
+  end
+
+  def total_spent
+    orders.where(status: Order::STATUS_CONFIRMED).sum(:total_cents)
+  end
+
+  def admin?
+    role == 'admin'
+  end
+
+  private
+
+  def downcase_email
+    self.email = email.downcase if email.present?
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/serializers/book_serializer.rb b/evals/fixtures/bookstore-api/app/serializers/book_serializer.rb
new file mode 100644
index 0000000..53f861d
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/serializers/book_serializer.rb
@@ -0,0 +1,28 @@
+class BookSerializer
+  attr_reader :book
+
+  def initialize(book)
+    @book = book
+  end
+
+  def as_json
+    {
+      id: book.id,
+      title: book.title,
+      isbn: book.isbn,
+      price: book.price,
+      in_stock: book.in_stock?,
+      stock_count: book.stock_count,
+      author: author_summary,
+      published: book.published
+    }
+  end
+
+  private
+
+  def author_summary
+    return nil unless book.author
+
+    { id: book.author.id, name: book.author.name }
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/serializers/order_serializer.rb b/evals/fixtures/bookstore-api/app/serializers/order_serializer.rb
new file mode 100644
index 0000000..66aaffe
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/serializers/order_serializer.rb
@@ -0,0 +1,34 @@
+class OrderSerializer
+  attr_reader :order
+
+  def initialize(order)
+    @order = order
+  end
+
+  def as_json
+    {
+      id: order.id,
+      status: order.status,
+      total: format_price(order.total_cents),
+      item_count: order.item_count,
+      items: serialize_items,
+      created_at: order.created_at&.iso8601
+    }
+  end
+
+  private
+
+  def serialize_items
+    order.order_items.includes(:book).map do |item|
+      {
+        book: BookSerializer.new(item.book).as_json,
+        quantity: item.quantity,
+        unit_price: format_price(item.unit_price_cents)
+      }
+    end
+  end
+
+  def format_price(cents)
+    (cents / 100.0).round(2)
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/services/checkout_service.rb b/evals/fixtures/bookstore-api/app/services/checkout_service.rb
new file mode 100644
index 0000000..c5d34ea
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/services/checkout_service.rb
@@ -0,0 +1,68 @@
+class CheckoutService
+  attr_reader :user, :items, :order, :error
+
+  def initialize(user:, items:)
+    @user = user
+    @items = items
+    @order = nil
+    @error = nil
+  end
+
+  def call
+    return failure('No items provided') if items.blank?
+
+    books = load_and_validate_books
+    return self if error
+
+    ActiveRecord::Base.transaction do
+      @order = Order.create!(
+        user: user,
+        status: Order::STATUS_PENDING,
+        total_cents: 0
+      )
+
+      total = 0
+      books.each do |book, quantity|
+        InventoryService.reserve(book, quantity)
+        OrderItem.create!(
+          order: @order,
+          book: book,
+          quantity: quantity,
+          unit_price_cents: book.price_cents
+        )
+        total += book.price_cents * quantity
+      end
+
+      @order.update!(total_cents: total, status: Order::STATUS_CONFIRMED)
+    end
+
+    self
+  rescue InsufficientStockError => e
+    failure(e.message)
+  rescue ActiveRecord::RecordInvalid => e
+    failure(e.message)
+  end
+
+  def success?
+    error.nil? && order.present?
+  end
+
+  private
+
+  def load_and_validate_books
+    result = {}
+    items.each do |item|
+      book = Book.find_by(id: item[:book_id])
+      return failure("Book #{item[:book_id]} not found") unless book
+      return failure("#{book.title} is out of stock") unless book.in_stock?
+
+      result[book] = item[:quantity].to_i
+    end
+    result
+  end
+
+  def failure(message)
+    @error = message
+    self
+  end
+end
diff --git a/evals/fixtures/bookstore-api/app/services/inventory_service.rb b/evals/fixtures/bookstore-api/app/services/inventory_service.rb
new file mode 100644
index 0000000..2f315fc
--- /dev/null
+++ b/evals/fixtures/bookstore-api/app/services/inventory_service.rb
@@ -0,0 +1,25 @@
+class InventoryService
+  LOW_STOCK_THRESHOLD = 5
+
+  def self.check_stock(book)
+    {
+      book_id: book.id,
+      title: book.title,
+      stock_count: book.stock_count,
+      in_stock: book.in_stock?,
+      low_stock: book.stock_count <= LOW_STOCK_THRESHOLD
+    }
+  end
+
+  def self.reserve(book, quantity)
+    book.reserve_stock!(quantity)
+  end
+
+  def self.low_stock_books
+    Book.where('stock_count > 0 AND stock_count <= ?', LOW_STOCK_THRESHOLD)
+  end
+
+  def self.out_of_stock_books
+    Book.where(stock_count: 0)
+  end
+end
diff --git a/evals/fixtures/bookstore-api/config/routes.rb b/evals/fixtures/bookstore-api/config/routes.rb
new file mode 100644
index 0000000..664d587
--- /dev/null
+++ b/evals/fixtures/bookstore-api/config/routes.rb
@@ -0,0 +1,12 @@
+Rails.application.routes.draw do
+  namespace :api do
+    resources :books, only: [:index, :show, :create, :update, :destroy] do
+      member do
+        post :restock
+      end
+    end
+
+    resources :orders, only: [:index, :show, :create]
+    resources :sessions, only: [:create, :destroy]
+  end
+end
diff --git a/evals/fixtures/todo-api/client/tasks.client.ts b/evals/fixtures/todo-api/client/tasks.client.ts
new file mode 100644
index 0000000..d444106
--- /dev/null
+++ b/evals/fixtures/todo-api/client/tasks.client.ts
@@ -0,0 +1,66 @@
+// Frontend HTTP client. Calls the backend through an injected http function.
+// squint's contract matcher should pair these calls with the backend
+// controllers under the same paths.
+
+import type { NewTaskInput, Task } from '../src/types.js';
+
+const BASE_URL = 'http://localhost:3000';
+
+type HttpFn = (
+  input: string,
+  init?: { method?: string; headers?: Record<string, string>; body?: string }
+) => Promise<{ json(): Promise<unknown> }>;
+
+// Injected by the runtime — Node 18+ globalThis.fetch in production.
+const http: HttpFn = ((globalThis as { fetch?: HttpFn }).fetch ??
+  (() => {
+    throw new Error('no http');
+  })) as HttpFn;
+
+async function request<T>(method: string, path: string, token: string, body?: unknown): Promise<T> {
+  const res = await http(`${BASE_URL}${path}`, {
+    method,
+    headers: {
+      'content-type': 'application/json',
+      authorization: `Bearer ${token}`,
+    },
+    body: body ? JSON.stringify(body) : undefined,
+  });
+  return (await res.json()) as T;
+}
+
+export async function login(email: string, password: string): Promise<{ token: string }> {
+  return request<{ token: string }>('POST', '/api/auth/login', '', { email, password });
+}
+
+export async function register(email: string, password: string): Promise<{ token: string }> {
+  return request<{ token: string }>('POST', '/api/auth/register', '', { email, password });
+}
+
+export async function listTasks(token: string): Promise<Task[]> {
+  return request<Task[]>('GET', '/api/tasks', token);
+}
+
+export async function getTask(token: string, id: string): Promise<Task> {
+  return request<Task>('GET', `/api/tasks/${id}`, token);
+}
+
+export async function createTask(token: string, input: NewTaskInput): Promise<Task> {
+  return request<Task>('POST', '/api/tasks', token, input);
+}
+
+export async function updateTask(
+  token: string,
+  id: string,
+  patch: Partial<Pick<Task, 'title' | 'description'>>
+): Promise<Task> {
+  return request<Task>('PUT', `/api/tasks/${id}`, token, patch);
+}
+
+export async function completeTask(token: string, id: string): Promise<Task> {
+  return request<Task>('PATCH', `/api/tasks/${id}/complete`, token);
+}
+
+export async function deleteTask(token: string, id: string): Promise<{ deleted: boolean }> {
+  return request<{ deleted: boolean }>('DELETE', `/api/tasks/${id}`, token);
+}
diff --git a/evals/fixtures/todo-api/index.ts b/evals/fixtures/todo-api/index.ts
new file mode 100644
index 0000000..1f0e96b
--- /dev/null
+++ b/evals/fixtures/todo-api/index.ts
@@ -0,0 +1,9 @@
+// Public API barrel. Exercises squint's re-export resolver
+// (src/sync/reference-resolver.ts), which is currently dirty in git status —
+// strong hint that bugs may live there.
+
+export { TasksService, tasksService } from './src/services/tasks.service.js';
+export { AuthService, authService } from './src/services/auth.service.js';
+export { TasksRepository, tasksRepository } from './src/repositories/tasks.repository.js';
+export { eventBus, auditLogger } from './src/events/event-bus.js';
+export type { Task, User, NewTaskInput } from './src/types.js';
diff --git a/evals/fixtures/todo-api/package.json b/evals/fixtures/todo-api/package.json
new file mode 100644
index 0000000..245fa3e
--- /dev/null
+++ b/evals/fixtures/todo-api/package.json
@@ -0,0 +1,8 @@
+{
+  "name": "@squint-eval/todo-api",
+  "version": "0.0.0",
+  "private": true,
+  "type": "module",
+  "main": "index.ts",
+  "description": "Tiny todo API fixture for squint eval harness — exercises HTTP contracts, events, inheritance, and re-exports."
+}
diff --git a/evals/fixtures/todo-api/src/controllers/auth.controller.ts b/evals/fixtures/todo-api/src/controllers/auth.controller.ts
new file mode 100644
index 0000000..1d476dd
--- /dev/null
+++ b/evals/fixtures/todo-api/src/controllers/auth.controller.ts
@@ -0,0 +1,45 @@
+import { type Request, type Response, type Router, createRouter } from '../framework.js';
+import { authService } from '../services/auth.service.js';
+import { BaseController } from './base.controller.js';
+
+export class AuthController extends BaseController {
+  router: Router;
+
+  constructor() {
+    super();
+    this.router = createRouter();
+    this.router.post('/register', (req, res) => this.register(req, res));
+    this.router.post('/login', (req, res) => this.login(req, res));
+    this.router.get('/me', (req, res) => this.me(req, res));
+  }
+
+  async register(req: Request, res: Response): Promise<void> {
+    try {
+      const { email, password } = req.body as { email: string; password: string };
+      const result = await authService.register(email, password);
+      this.success(res, result, 201);
+    } catch (err) {
+      this.handleError(res, err);
+    }
+  }
+
+  async login(req: Request, res: Response): Promise<void> {
+    try {
+      const { email, password } = req.body as { email: string; password: string };
+      const result = await authService.login(email, password);
+      this.success(res, result);
+    } catch (err) {
+      this.handleError(res, err);
+    }
+  }
+
+  me(req: Request, res: Response): void {
+    if (!req.user) {
+      this.fail(res, 'unauthorized', 401);
+      return;
+    }
+    this.success(res, req.user);
+  }
+}
+
+export const authController = new AuthController();
diff --git a/evals/fixtures/todo-api/src/controllers/base.controller.ts b/evals/fixtures/todo-api/src/controllers/base.controller.ts
new file mode 100644
index 0000000..cf72085
--- /dev/null
+++ b/evals/fixtures/todo-api/src/controllers/base.controller.ts
@@ -0,0 +1,19 @@
+import type { Response } from '../framework.js';
+
+// BaseController is the inheritance root for all HTTP controllers.
+// squint should detect AuthController and TasksController as `extends BaseController`.
+
+export abstract class BaseController {
+  protected success<T>(res: Response, data: T, statusCode = 200): void {
+    res.status(statusCode).json({ ok: true, data });
+  }
+
+  protected fail(res: Response, message: string, statusCode = 400): void {
+    res.status(statusCode).json({ ok: false, error: message });
+  }
+
+  protected handleError(res: Response, err: unknown): void {
+    const message = err instanceof Error ? err.message : 'unknown error';
+    this.fail(res, message, 500);
+  }
+}
diff --git a/evals/fixtures/todo-api/src/controllers/tasks.controller.ts b/evals/fixtures/todo-api/src/controllers/tasks.controller.ts
new file mode 100644
index 0000000..7ee3964
--- /dev/null
+++ b/evals/fixtures/todo-api/src/controllers/tasks.controller.ts
@@ -0,0 +1,75 @@
+import { type Request, type Response, type Router, createRouter } from '../framework.js';
+import { requireAuth } from '../middleware/auth.middleware.js';
+import { tasksService } from '../services/tasks.service.js';
+import { BaseController } from './base.controller.js';
+
+export class TasksController extends BaseController {
+  router: Router;
+
+  constructor() {
+    super();
+    this.router = createRouter();
+    this.router.get('/', requireAuth, (req, res) => this.list(req, res));
+    this.router.get('/:id', requireAuth, (req, res) => this.get(req, res));
+    this.router.post('/', requireAuth, (req, res) => this.create(req, res));
+    this.router.put('/:id', requireAuth, (req, res) => this.update(req, res));
+    this.router.patch('/:id/complete', requireAuth, (req, res) => this.complete(req, res));
+    this.router.delete('/:id', requireAuth, (req, res) => this.delete(req, res));
+  }
+
+  list(req: Request, res: Response): void {
+    if (!req.user) {
+      this.fail(res, 'unauthorized', 401);
+      return;
+    }
+    this.success(res, tasksService.list(req.user.id));
+  }
+
+  get(req: Request, res: Response): void {
+    const task = tasksService.get(req.params.id);
+    if (!task) {
+      this.fail(res, 'not found', 404);
+      return;
+    }
+    this.success(res, task);
+  }
+
+  create(req: Request, res: Response): void {
+    if (!req.user) {
+      this.fail(res, 'unauthorized', 401);
+      return;
+    }
+    const { title, description } = req.body as { title: string; description: string };
+    const task = tasksService.create(req.user.id, { title, description });
+    this.success(res, task, 201);
+  }
+
+  update(req: Request, res: Response): void {
+    const task = tasksService.update(req.params.id, req.body as { title?: string; description?: string });
+    if (!task) {
+      this.fail(res, 'not found', 404);
+      return;
+    }
+    this.success(res, task);
+  }
+
+  complete(req: Request, res: Response): void {
+    const task = tasksService.complete(req.params.id);
+    if (!task) {
+      this.fail(res, 'not found', 404);
+      return;
+    }
+    this.success(res, task);
+  }
+
+  delete(req: Request, res: Response): void {
+    const ok = tasksService.delete(req.params.id);
+    if (!ok) {
+      this.fail(res, 'not found', 404);
+      return;
+    }
+    this.success(res, { deleted: true });
+  }
+}
+
+export const tasksController = new TasksController();
diff --git a/evals/fixtures/todo-api/src/events/event-bus.ts b/evals/fixtures/todo-api/src/events/event-bus.ts
new file mode 100644
index 0000000..a1b7f30
--- /dev/null
+++ b/evals/fixtures/todo-api/src/events/event-bus.ts
@@ -0,0 +1,35 @@
+// In-memory pub/sub. Exercises a SECOND contract protocol beyond HTTP:
+// squint should detect 'task.created' and 'task.completed' as events
+// with producer (TasksService) and consumer (auditLogger) roles.
+
+export type EventName = 'task.created' | 'task.completed';
+
+export type EventHandler = (payload: Record<string, unknown>) => void;
+
+export class EventBus {
+  private handlers = new Map<EventName, EventHandler[]>();
+
+  subscribe(event: EventName, handler: EventHandler): void {
+    const list = this.handlers.get(event) ?? [];
+    list.push(handler);
+    this.handlers.set(event, list);
+  }
+
+  emit(event: EventName, payload: Record<string, unknown>): void {
+    const list = this.handlers.get(event) ?? [];
+    for (const handler of list) {
+      handler(payload);
+    }
+  }
+}
+
+export const eventBus = new EventBus();
+
+// Audit subscriber. Listens for completion events and logs them. This
+// represents an admin/system stakeholder consuming the 'task.completed' event.
+export function auditLogger(payload: Record<string, unknown>): void {
+  // In a real app, this would write to an audit log table.
+  void payload;
+}
+
+eventBus.subscribe('task.completed', auditLogger);
diff --git a/evals/fixtures/todo-api/src/framework.ts b/evals/fixtures/todo-api/src/framework.ts
new file mode 100644
index 0000000..38c3d59
--- /dev/null
+++ b/evals/fixtures/todo-api/src/framework.ts
@@ -0,0 +1,88 @@
+// Minimal in-fixture HTTP framework so the todo-api compiles without
+// real Express. squint sees these calls as `router.METHOD(path, handler)`
+// patterns just like the real thing.
+
+export interface Request {
+  body: Record<string, unknown>;
+  params: Record<string, string>;
+  headers: Record<string, string>;
+  user?: { id: string; email: string };
+}
+
+export interface Response {
+  status(code: number): Response;
+  json(data: unknown): Response;
+}
+
+export type NextFunction = () => void;
+export type Handler = (req: Request, res: Response, next?: NextFunction) => unknown;
+
+export interface Router {
+  get(path: string, ...handlers: Handler[]): void;
+  post(path: string, ...handlers: Handler[]): void;
+  put(path: string, ...handlers: Handler[]): void;
+  patch(path: string, ...handlers: Handler[]): void;
+  delete(path: string, ...handlers: Handler[]): void;
+}
+
+export interface App {
+  use(pathOrRouter: string | Router, router?: Router): void;
+  listen(port: number, cb?: () => void): void;
+}
+
+/**
+ * Module-level registry of every router instance constructed at runtime.
+ * Used by the framework to track mounted routes for diagnostics.
+ *
+ * Mutated by createRouter() — this is what makes the function unambiguously
+ * impure (it has a side effect on module state, not just returning a value).
+ */
+const routerRegistry: Router[] = [];
+
+/**
+ * Module-level registry of every app instance constructed at runtime.
+ * Mutated by createApp(). Same purpose as routerRegistry above — keeps
+ * createApp's classification as impure unambiguous.
+ */
+const appRegistry: App[] = [];
+
+export function createRouter(): Router {
+  const handlers: Map<string, Handler[]> = new Map();
+  const register =
+    (method: string) =>
+    (path: string, ...hs: Handler[]) => {
+      handlers.set(`${method} ${path}`, hs);
+    };
+  const router: Router = {
+    get: register('GET'),
+    post: register('POST'),
+    put: register('PUT'),
+    patch: register('PATCH'),
+    delete: register('DELETE'),
+  };
+  // Side effect: append to module-level registry. Makes this function impure.
+  routerRegistry.push(router);
+  return router;
+}
+
+export function createApp(): App {
+  const mounted: Array<{ path: string; router: Router }> = [];
+  let started = false;
+  const app: App = {
+    use(pathOrRouter, router) {
+      if (typeof pathOrRouter === 'string' && router) {
+        mounted.push({ path: pathOrRouter, router });
+      }
+    },
+    listen(_port, cb) {
+      // Side effect: mutate the captured `started` flag.
+      started = true;
+      if (cb) cb();
+    },
+  };
+  // Side effect: append to module-level registry. Makes this function impure.
+  appRegistry.push(app);
+  // Reference `started` so the closure capture is observable to the LLM.
+  void started;
+  return app;
+}
diff --git a/evals/fixtures/todo-api/src/index.ts b/evals/fixtures/todo-api/src/index.ts
new file mode 100644
index 0000000..cd3ff8e
--- /dev/null
+++ b/evals/fixtures/todo-api/src/index.ts
@@ -0,0 +1,16 @@
+// Express-style bootstrap. Mounts the auth and tasks routers.
+// squint should detect the mounted routes and the entry point modules.
+
+import { authController } from './controllers/auth.controller.js';
+import { tasksController } from './controllers/tasks.controller.js';
+import { createApp } from './framework.js';
+
+const app = createApp();
+
+app.use('/api/auth', authController.router);
+app.use('/api/tasks', tasksController.router);
+
+const PORT = 3000;
+app.listen(PORT, () => {
+  // Server started
+});
diff --git a/evals/fixtures/todo-api/src/middleware/auth.middleware.ts b/evals/fixtures/todo-api/src/middleware/auth.middleware.ts
new file mode 100644
index 0000000..b6fc8fe
--- /dev/null
+++ b/evals/fixtures/todo-api/src/middleware/auth.middleware.ts
@@ -0,0 +1,14 @@
+import type { Handler } from '../framework.js';
+import { authService } from '../services/auth.service.js';
+
+export const requireAuth: Handler = (req, res, next) => {
+  const header = req.headers.authorization ?? '';
+  const token = header.startsWith('Bearer ') ? header.slice(7) : '';
+  const user = authService.verify(token);
+  if (!user) {
+    res.status(401).json({ error: 'unauthorized' });
+    return;
+  }
+  req.user = user;
+  next?.();
+};
diff --git a/evals/fixtures/todo-api/src/repositories/base.repository.ts b/evals/fixtures/todo-api/src/repositories/base.repository.ts
new file mode 100644
index 0000000..bcb227e
--- /dev/null
+++ b/evals/fixtures/todo-api/src/repositories/base.repository.ts
@@ -0,0 +1,24 @@
+// Generic abstract repository. Exercises the BaseRepository<T> sharp edge:
+// squint's extends_name extraction must produce 'BaseRepository' (not
+// 'BaseRepository<Task>') for subclasses.
+
+export abstract class BaseRepository<T extends { id: string }> {
+  protected items = new Map<string, T>();
+
+  findAll(): T[] {
+    return Array.from(this.items.values());
+  }
+
+  findById(id: string): T | null {
+    return this.items.get(id) ?? null;
+  }
+
+  save(item: T): T {
+    this.items.set(item.id, item);
+    return item;
+  }
+
+  delete(id: string): boolean {
+    return this.items.delete(id);
+  }
+}
diff --git a/evals/fixtures/todo-api/src/repositories/tasks.repository.ts b/evals/fixtures/todo-api/src/repositories/tasks.repository.ts
new file mode 100644
index 0000000..31b3350
--- /dev/null
+++ b/evals/fixtures/todo-api/src/repositories/tasks.repository.ts
@@ -0,0 +1,14 @@
+import type { Task } from '../types.js';
+import { BaseRepository } from './base.repository.js';
+
+export class TasksRepository extends BaseRepository<Task> {
+  findByOwner(ownerId: string): Task[] {
+    return this.findAll().filter((t) => t.ownerId === ownerId);
+  }
+
+  findCompleted(ownerId: string): Task[] {
+    return this.findByOwner(ownerId).filter((t) => t.completed);
+  }
+}
+
+export const tasksRepository = new TasksRepository();
diff --git a/evals/fixtures/todo-api/src/services/auth.service.ts b/evals/fixtures/todo-api/src/services/auth.service.ts
new file mode 100644
index 0000000..e72bc23
--- /dev/null
+++ b/evals/fixtures/todo-api/src/services/auth.service.ts
@@ -0,0 +1,56 @@
+import type { User } from '../types.js';
+
+// Minimal "JWT" — opaque token, not real crypto. Realistic enough for squint
+// to see signing and verification call sites.
+
+const usersByEmail = new Map<string, User>();
+
+function hashPassword(password: string): string {
+  return `hashed:${password}`;
+}
+
+function verifyPassword(password: string, hash: string): boolean {
+  return hash === `hashed:${password}`;
+}
+
+function signToken(user: User): string {
+  return `token:${user.id}`;
+}
+
+function decodeToken(token: string): { id: string; email: string } | null {
+  if (!token.startsWith('token:')) return null;
+  const id = token.slice('token:'.length);
+  for (const u of usersByEmail.values()) {
+    if (u.id === id) return { id: u.id, email: u.email };
+  }
+  return null;
+}
+
+export class AuthService {
+  async register(email: string, password: string): Promise<{ token: string; user: User }> {
+    if (usersByEmail.has(email)) {
+      throw new Error('user already exists');
+    }
+    const user: User = {
+      id: `u_${usersByEmail.size + 1}`,
+      email,
+      passwordHash: hashPassword(password),
+    };
+    usersByEmail.set(email, user);
+    return { token: signToken(user), user };
+  }
+
+  async login(email: string, password: string): Promise<{ token: string; user: User }> {
+    const user = usersByEmail.get(email);
+    if (!user || !verifyPassword(password, user.passwordHash)) {
+      throw new Error('invalid credentials');
+    }
+    return { token: signToken(user), user };
+  }
+
+  verify(token: string): { id: string; email: string } | null {
+    return decodeToken(token);
+  }
+}
+
+export const authService = new AuthService();
diff --git a/evals/fixtures/todo-api/src/services/tasks.service.ts b/evals/fixtures/todo-api/src/services/tasks.service.ts
new file mode 100644
index 0000000..60b2627
--- /dev/null
+++ b/evals/fixtures/todo-api/src/services/tasks.service.ts
@@ -0,0 +1,51 @@
+import { eventBus } from '../events/event-bus.js';
+import { tasksRepository } from '../repositories/tasks.repository.js';
+import type { NewTaskInput, Task } from '../types.js';
+
+export class TasksService {
+  list(ownerId: string): Task[] {
+    return tasksRepository.findByOwner(ownerId);
+  }
+
+  get(id: string): Task | null {
+    return tasksRepository.findById(id);
+  }
+
+  create(ownerId: string, input: NewTaskInput): Task {
+    const task: Task = {
+      id: `t_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
+      title: input.title,
+      description: input.description,
+      ownerId,
+      completed: false,
+      createdAt: new Date().toISOString(),
+      completedAt: null,
+    };
+    tasksRepository.save(task);
+    eventBus.emit('task.created', { taskId: task.id, ownerId });
+    return task;
+  }
+
+  update(id: string, patch: Partial<Pick<Task, 'title' | 'description'>>): Task | null {
+    const task = tasksRepository.findById(id);
+    if (!task) return null;
+    const next: Task = { ...task, ...patch };
+    tasksRepository.save(next);
+    return next;
+  }
+
+  complete(id: string): Task | null {
+    const task = tasksRepository.findById(id);
+    if (!task) return null;
+    const next: Task = { ...task, completed: true, completedAt: new Date().toISOString() };
+    tasksRepository.save(next);
+    eventBus.emit('task.completed', { taskId: next.id, ownerId: next.ownerId });
+    return next;
+  }
+
+  delete(id: string): boolean {
+    return tasksRepository.delete(id);
+  }
+}
+
+export const tasksService = new TasksService();
diff --git a/evals/fixtures/todo-api/src/types.ts b/evals/fixtures/todo-api/src/types.ts
new file mode 100644
index 0000000..5fb46e3
--- /dev/null
+++ b/evals/fixtures/todo-api/src/types.ts
@@ -0,0 +1,20 @@
+export interface Task {
+  id: string;
+  title: string;
+  description: string;
+  ownerId: string;
+  completed: boolean;
+  createdAt: string;
+  completedAt: string | null;
+}
+
+export interface User {
+  id: string;
+  email: string;
+  passwordHash: string;
+}
+
+export interface NewTaskInput {
+  title: string;
+  description: string;
+}
diff --git a/evals/fixtures/todo-api/tsconfig.json b/evals/fixtures/todo-api/tsconfig.json
new file mode 100644
index 0000000..08cbadd
--- /dev/null
+++ b/evals/fixtures/todo-api/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "NodeNext",
+    "moduleResolution": "NodeNext",
+    "lib": ["ES2022"],
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "noEmit": true,
+    "rootDir": ".",
+    "types": []
+  },
+  "include": ["src/**/*", "client/**/*", "index.ts"]
+}
diff --git a/evals/ground-truth/bookstore-api/contracts.ts b/evals/ground-truth/bookstore-api/contracts.ts
new file mode 100644
index 0000000..5c2ca42
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/contracts.ts
@@ -0,0 +1,42 @@
+import type { GroundTruthContract } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `contracts` and `contract_participants` tables after
+ * running `squint ingest --to-stage contracts` against the bookstore-api fixture.
+ *
+ * The bookstore-api exposes 11 HTTP endpoints across 3 API controllers
+ * (books, orders, sessions) plus the restock custom member route.
+ *
+ * NOTE: Rails routes are detected by the LLM contract extractor from the
+ * routes.rb DSL and controller action definitions. The exact normalized
+ * keys may vary (e.g., `/api/books` vs `/books`) depending on whether
+ * the LLM resolves the namespace prefix. Contracts below are authored
+ * COLD and will be calibrated against the first cold-run output.
+ *
+ * Async side effects (mailer, background job) are marked optional because
+ * the LLM may or may not detect them as cross-process contracts.
+ */
+export const contracts: GroundTruthContract[] = [
+  // ============================================================
+  // HTTP — Books CRUD + restock (6)
+  // ============================================================
+  { protocol: 'http', normalizedKey: 'GET /books' },
+  { protocol: 'http', normalizedKey: 'GET /books/{param}' },
+  { protocol: 'http', normalizedKey: 'POST /books' },
+  { protocol: 'http', normalizedKey: 'PUT /books/{param}' },
+  { protocol: 'http', normalizedKey: 'DELETE /books/{param}' },
+  { protocol: 'http', normalizedKey: 'POST /books/{param}/restock' },
+
+  // ============================================================
+  // HTTP — Orders (3)
+  // ============================================================
+  { protocol: 'http', normalizedKey: 'GET /orders' },
+  { protocol: 'http', normalizedKey: 'GET /orders/{param}' },
+  { protocol: 'http', normalizedKey: 'POST /orders' },
+
+  // ============================================================
+  // HTTP — Sessions (2)
+  // ============================================================
+  { protocol: 'http', normalizedKey: 'POST /sessions' },
+  { protocol: 'http', normalizedKey: 'DELETE /sessions' },
+];
diff --git a/evals/ground-truth/bookstore-api/definition-metadata.ts b/evals/ground-truth/bookstore-api/definition-metadata.ts
new file mode 100644
index 0000000..820c6f6
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/definition-metadata.ts
@@ -0,0 +1,403 @@
+import { type GroundTruthDefinitionMetadata, defKey } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `definition_metadata` table after running
+ * `squint ingest --to-stage symbols` against the bookstore-api fixture.
+ *
+ * Three metadata aspects per definition:
+ *   - purpose: LLM-generated description (proseReference, minor drift)
+ *   - domain: LLM-generated tags (themeReference, minor drift)
+ *   - pure: deterministic boolean (exactValue, major mismatch)
+ *
+ * Only class-level and significant method-level definitions get full
+ * coverage. Minor utility methods (format_price, normalize_name) are
+ * included for completeness but with looser thresholds.
+ */
+export const definitionMetadata: GroundTruthDefinitionMetadata[] = [
+  // ============================================================
+  // Models
+  // ============================================================
+
+  // ApplicationRecord
+  {
+    defKey: defKey('app/models/application_record.rb', 'ApplicationRecord'),
+    key: 'purpose',
+    proseReference: 'Abstract base class for all ActiveRecord models with shared query helpers',
+  },
+  {
+    defKey: defKey('app/models/application_record.rb', 'ApplicationRecord'),
+    key: 'domain',
+    themeReference: 'tags should reflect a database or persistence base class',
+  },
+  { defKey: defKey('app/models/application_record.rb', 'ApplicationRecord'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/models/application_record.rb', 'recent'),
+    key: 'purpose',
+    proseReference: 'Query helper that returns recent records ordered by creation date',
+  },
+  // recent.pure omitted: LLM flip-flops (returns a scope — lazy vs. executes a query)
+
+  // Book
+  {
+    defKey: defKey('app/models/book.rb', 'Book'),
+    key: 'purpose',
+    proseReference: 'ActiveRecord model for books with title, ISBN, pricing, stock tracking, and author association',
+  },
+  {
+    defKey: defKey('app/models/book.rb', 'Book'),
+    key: 'domain',
+    themeReference: 'tags should reflect a catalog or inventory model for books in a bookstore',
+  },
+  { defKey: defKey('app/models/book.rb', 'Book'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/models/book.rb', 'price'),
+    key: 'purpose',
+    proseReference: 'Converts price from cents to decimal dollars',
+  },
+  { defKey: defKey('app/models/book.rb', 'price'), key: 'pure', exactValue: 'true' },
+  {
+    defKey: defKey('app/models/book.rb', 'in_stock?'),
+    key: 'purpose',
+    proseReference: 'Returns whether the book has available stock',
+  },
+  { defKey: defKey('app/models/book.rb', 'in_stock?'), key: 'pure', exactValue: 'true' },
+  {
+    defKey: defKey('app/models/book.rb', 'reserve_stock!'),
+    key: 'purpose',
+    proseReference: 'Decrements stock count by a given quantity, raising an error if insufficient stock',
+  },
+  { defKey: defKey('app/models/book.rb', 'reserve_stock!'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/models/book.rb', 'InsufficientStockError'),
+    key: 'purpose',
+    proseReference: 'Custom error class raised when trying to reserve more stock than available',
+  },
+  { defKey: defKey('app/models/book.rb', 'InsufficientStockError'), key: 'pure', exactValue: 'false' },
+
+  // Author
+  {
+    defKey: defKey('app/models/author.rb', 'Author'),
+    key: 'purpose',
+    proseReference: 'ActiveRecord model for book authors with name, bio, and association to books',
+  },
+  {
+    defKey: defKey('app/models/author.rb', 'Author'),
+    key: 'domain',
+    themeReference: 'tags should reflect a catalog or author model for a bookstore',
+  },
+  { defKey: defKey('app/models/author.rb', 'Author'), key: 'pure', exactValue: 'false' },
+  { defKey: defKey('app/models/author.rb', 'book_count'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/models/author.rb', 'full_display_name'),
+    key: 'purpose',
+    proseReference: 'Returns a formatted display name combining the author name and truncated bio',
+  },
+  { defKey: defKey('app/models/author.rb', 'full_display_name'), key: 'pure', exactValue: 'true' },
+
+  // User
+  {
+    defKey: defKey('app/models/user.rb', 'User'),
+    key: 'purpose',
+    proseReference: 'ActiveRecord model for user accounts with password authentication and order associations',
+  },
+  {
+    defKey: defKey('app/models/user.rb', 'User'),
+    key: 'domain',
+    themeReference: 'tags should reflect user authentication or identity',
+  },
+  { defKey: defKey('app/models/user.rb', 'User'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/models/user.rb', 'authenticate'),
+    key: 'purpose',
+    proseReference: 'Class method that looks up a user by email and verifies the password, returning the user or nil',
+  },
+  { defKey: defKey('app/models/user.rb', 'authenticate'), key: 'pure', exactValue: 'false' },
+  { defKey: defKey('app/models/user.rb', 'total_spent'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/models/user.rb', 'admin?'),
+    key: 'purpose',
+    proseReference: 'Checks whether the user has the admin role',
+  },
+  { defKey: defKey('app/models/user.rb', 'admin?'), key: 'pure', exactValue: 'true' },
+
+  // Order
+  {
+    defKey: defKey('app/models/order.rb', 'Order'),
+    key: 'purpose',
+    proseReference:
+      'ActiveRecord model for purchase orders with status management, item associations, and post-creation hooks for email and inventory checks',
+  },
+  {
+    defKey: defKey('app/models/order.rb', 'Order'),
+    key: 'domain',
+    themeReference: 'tags should reflect order management or e-commerce purchasing',
+  },
+  { defKey: defKey('app/models/order.rb', 'Order'), key: 'pure', exactValue: 'false' },
+  { defKey: defKey('app/models/order.rb', 'confirm!'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/models/order.rb', 'cancel!'),
+    key: 'purpose',
+    proseReference: 'Cancels the order and restores stock quantities for each order item',
+  },
+  { defKey: defKey('app/models/order.rb', 'cancel!'), key: 'pure', exactValue: 'false' },
+  // item_count.pure omitted: LLM flip-flops (delegates to .sum() — query vs. aggregation)
+
+  // OrderItem
+  {
+    defKey: defKey('app/models/order_item.rb', 'OrderItem'),
+    key: 'purpose',
+    proseReference: 'ActiveRecord join model between orders and books with quantity and unit price tracking',
+  },
+  {
+    defKey: defKey('app/models/order_item.rb', 'OrderItem'),
+    key: 'domain',
+    themeReference: 'tags should reflect order line items or cart items in a purchase',
+  },
+  { defKey: defKey('app/models/order_item.rb', 'OrderItem'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/models/order_item.rb', 'subtotal_cents'),
+    key: 'purpose',
+    proseReference: 'Computes the subtotal by multiplying quantity by unit price',
+  },
+  { defKey: defKey('app/models/order_item.rb', 'subtotal_cents'), key: 'pure', exactValue: 'true' },
+
+  // ============================================================
+  // Controllers
+  // ============================================================
+
+  // ApplicationController
+  {
+    defKey: defKey('app/controllers/application_controller.rb', 'ApplicationController'),
+    key: 'purpose',
+    proseReference: 'Base API controller with authentication helpers and request ID tracking',
+  },
+  {
+    defKey: defKey('app/controllers/application_controller.rb', 'ApplicationController'),
+    key: 'domain',
+    themeReference: 'tags should reflect HTTP or API base controller infrastructure',
+  },
+  {
+    defKey: defKey('app/controllers/application_controller.rb', 'ApplicationController'),
+    key: 'pure',
+    exactValue: 'false',
+  },
+  {
+    defKey: defKey('app/controllers/application_controller.rb', 'authenticate!'),
+    key: 'purpose',
+    proseReference: 'Before-action filter that rejects unauthenticated requests with 401',
+  },
+  { defKey: defKey('app/controllers/application_controller.rb', 'authenticate!'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/controllers/application_controller.rb', 'current_user'),
+    key: 'purpose',
+    proseReference: 'Extracts and memoizes the authenticated user from the Authorization header token',
+  },
+  { defKey: defKey('app/controllers/application_controller.rb', 'current_user'), key: 'pure', exactValue: 'false' },
+
+  // Api::BaseController
+  {
+    defKey: defKey('app/controllers/api/base_controller.rb', 'BaseController'),
+    key: 'purpose',
+    proseReference: 'Namespaced API base controller with shared JSON response helpers and pagination',
+  },
+  {
+    defKey: defKey('app/controllers/api/base_controller.rb', 'BaseController'),
+    key: 'domain',
+    themeReference: 'tags should reflect API controller infrastructure or HTTP response helpers',
+  },
+  { defKey: defKey('app/controllers/api/base_controller.rb', 'BaseController'), key: 'pure', exactValue: 'false' },
+
+  // Api::BooksController
+  {
+    defKey: defKey('app/controllers/api/books_controller.rb', 'BooksController'),
+    key: 'purpose',
+    proseReference: 'REST controller for book catalog CRUD endpoints with admin authorization and serialization',
+  },
+  {
+    defKey: defKey('app/controllers/api/books_controller.rb', 'BooksController'),
+    key: 'domain',
+    themeReference: 'tags should reflect book catalog management or API endpoints',
+  },
+  { defKey: defKey('app/controllers/api/books_controller.rb', 'BooksController'), key: 'pure', exactValue: 'false' },
+
+  // Api::OrdersController
+  {
+    defKey: defKey('app/controllers/api/orders_controller.rb', 'OrdersController'),
+    key: 'purpose',
+    proseReference: 'REST controller for order endpoints that delegates checkout to the CheckoutService',
+  },
+  {
+    defKey: defKey('app/controllers/api/orders_controller.rb', 'OrdersController'),
+    key: 'domain',
+    themeReference: 'tags should reflect order management or purchasing API',
+  },
+  { defKey: defKey('app/controllers/api/orders_controller.rb', 'OrdersController'), key: 'pure', exactValue: 'false' },
+
+  // Api::SessionsController
+  {
+    defKey: defKey('app/controllers/api/sessions_controller.rb', 'SessionsController'),
+    key: 'purpose',
+    proseReference: 'REST controller for authentication sessions: login with email/password and logout',
+  },
+  {
+    defKey: defKey('app/controllers/api/sessions_controller.rb', 'SessionsController'),
+    key: 'domain',
+    themeReference: 'tags should reflect authentication or session management',
+  },
+  {
+    defKey: defKey('app/controllers/api/sessions_controller.rb', 'SessionsController'),
+    key: 'pure',
+    exactValue: 'false',
+  },
+
+  // ============================================================
+  // Services
+  // ============================================================
+
+  // CheckoutService
+  {
+    defKey: defKey('app/services/checkout_service.rb', 'CheckoutService'),
+    key: 'purpose',
+    proseReference:
+      'Service object that orchestrates checkout: validates stock, creates order with items, reserves inventory, and triggers async side effects',
+  },
+  {
+    defKey: defKey('app/services/checkout_service.rb', 'CheckoutService'),
+    key: 'domain',
+    themeReference: 'tags should reflect checkout or order processing business logic',
+  },
+  { defKey: defKey('app/services/checkout_service.rb', 'CheckoutService'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/services/checkout_service.rb', 'call'),
+    key: 'purpose',
+    proseReference:
+      'Executes the checkout flow: loads books, checks stock, creates order and items, confirms the order',
+  },
+  { defKey: defKey('app/services/checkout_service.rb', 'call'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/services/checkout_service.rb', 'success?'),
+    key: 'purpose',
+    proseReference: 'Returns whether the checkout completed without errors',
+  },
+  { defKey: defKey('app/services/checkout_service.rb', 'success?'), key: 'pure', exactValue: 'true' },
+
+  // InventoryService
+  {
+    defKey: defKey('app/services/inventory_service.rb', 'InventoryService'),
+    key: 'purpose',
+    proseReference: 'Service for checking stock levels, reserving inventory, and finding low or out-of-stock books',
+  },
+  {
+    defKey: defKey('app/services/inventory_service.rb', 'InventoryService'),
+    key: 'domain',
+    themeReference: 'tags should reflect inventory management or stock tracking',
+  },
+  { defKey: defKey('app/services/inventory_service.rb', 'InventoryService'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/services/inventory_service.rb', 'check_stock'),
+    key: 'purpose',
+    proseReference: 'Returns a hash of stock information for a given book including stock count and low-stock flag',
+  },
+  { defKey: defKey('app/services/inventory_service.rb', 'check_stock'), key: 'pure', exactValue: 'true' },
+  {
+    defKey: defKey('app/services/inventory_service.rb', 'reserve'),
+    key: 'purpose',
+    proseReference: 'Delegates to the book model to decrement stock by the requested quantity',
+  },
+  { defKey: defKey('app/services/inventory_service.rb', 'reserve'), key: 'pure', exactValue: 'false' },
+
+  // ============================================================
+  // Serializers
+  // ============================================================
+
+  {
+    defKey: defKey('app/serializers/book_serializer.rb', 'BookSerializer'),
+    key: 'purpose',
+    proseReference: 'Serializes a Book model into a JSON hash for API responses including author summary',
+  },
+  {
+    defKey: defKey('app/serializers/book_serializer.rb', 'BookSerializer'),
+    key: 'domain',
+    themeReference: 'tags should reflect API serialization or data presentation for books',
+  },
+  { defKey: defKey('app/serializers/book_serializer.rb', 'BookSerializer'), key: 'pure', exactValue: 'false' },
+
+  {
+    defKey: defKey('app/serializers/order_serializer.rb', 'OrderSerializer'),
+    key: 'purpose',
+    proseReference: 'Serializes an Order model into a JSON hash with nested items using BookSerializer',
+  },
+  {
+    defKey: defKey('app/serializers/order_serializer.rb', 'OrderSerializer'),
+    key: 'domain',
+    themeReference: 'tags should reflect API serialization or data presentation for orders',
+  },
+  { defKey: defKey('app/serializers/order_serializer.rb', 'OrderSerializer'), key: 'pure', exactValue: 'false' },
+
+  // ============================================================
+  // Mailer
+  // ============================================================
+
+  {
+    defKey: defKey('app/mailers/order_mailer.rb', 'OrderMailer'),
+    key: 'purpose',
+    proseReference: 'Mailer for order-related emails: confirmation after creation and cancellation notification',
+  },
+  {
+    defKey: defKey('app/mailers/order_mailer.rb', 'OrderMailer'),
+    key: 'domain',
+    themeReference: 'tags should reflect email notifications or order communications',
+  },
+  { defKey: defKey('app/mailers/order_mailer.rb', 'OrderMailer'), key: 'pure', exactValue: 'false' },
+
+  // ============================================================
+  // Job
+  // ============================================================
+
+  {
+    defKey: defKey('app/jobs/inventory_check_job.rb', 'InventoryCheckJob'),
+    key: 'purpose',
+    proseReference:
+      'Background job that checks stock levels for all items in a completed order and alerts on low stock',
+  },
+  {
+    defKey: defKey('app/jobs/inventory_check_job.rb', 'InventoryCheckJob'),
+    key: 'domain',
+    themeReference: 'tags should reflect background processing or inventory monitoring',
+  },
+  { defKey: defKey('app/jobs/inventory_check_job.rb', 'InventoryCheckJob'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/jobs/inventory_check_job.rb', 'perform'),
+    key: 'purpose',
+    proseReference: 'Iterates over order items, checks stock for each book, and notifies admin of low stock',
+  },
+  { defKey: defKey('app/jobs/inventory_check_job.rb', 'perform'), key: 'pure', exactValue: 'false' },
+
+  // ============================================================
+  // Api module (wraps namespaced controllers — 4x duplicate)
+  // ============================================================
+  {
+    defKey: defKey('app/controllers/api/base_controller.rb', 'Api'),
+    key: 'purpose',
+    proseReference: 'Ruby module namespace wrapping the API controllers',
+  },
+  { defKey: defKey('app/controllers/api/base_controller.rb', 'Api'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/controllers/api/books_controller.rb', 'Api'),
+    key: 'purpose',
+    proseReference: 'Ruby module namespace wrapping the API controllers',
+  },
+  { defKey: defKey('app/controllers/api/books_controller.rb', 'Api'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/controllers/api/orders_controller.rb', 'Api'),
+    key: 'purpose',
+    proseReference: 'Ruby module namespace wrapping the API controllers',
+  },
+  { defKey: defKey('app/controllers/api/orders_controller.rb', 'Api'), key: 'pure', exactValue: 'false' },
+  {
+    defKey: defKey('app/controllers/api/sessions_controller.rb', 'Api'),
+    key: 'purpose',
+    proseReference: 'Ruby module namespace wrapping the API controllers',
+  },
+  { defKey: defKey('app/controllers/api/sessions_controller.rb', 'Api'), key: 'pure', exactValue: 'false' },
+];
diff --git a/evals/ground-truth/bookstore-api/definitions.ts b/evals/ground-truth/bookstore-api/definitions.ts
new file mode 100644
index 0000000..d2bcddd
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/definitions.ts
@@ -0,0 +1,666 @@
+import type { GroundTruthDefinition } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `definitions` table after parsing the bookstore-api fixture.
+ *
+ * Calibrated against the produced DB from `squint ingest --to-stage parse`.
+ * 97 definitions across 17 files (config/routes.rb produces 0 definitions).
+ *
+ * Key Ruby-specific observations:
+ *   - `module Api` wrapper produces a module def in each controller file (4x)
+ *   - `attr_reader :foo` produces a method def named 'foo'
+ *   - Class names inside `module Api ... end` are just the inner name
+ *     (e.g. 'BaseController' not 'Api::BaseController')
+ *   - `InsufficientStockError` in book.rb is a separate class def
+ *   - Scopes are NOT extracted as definitions (they're DSL, not method defs)
+ *   - `has_secure_password`, `validates`, `belongs_to` etc. are NOT defs
+ */
+export const definitions: GroundTruthDefinition[] = [
+  // ============================================================
+  // app/controllers/api/base_controller.rb (6 defs)
+  // ============================================================
+  {
+    file: 'app/controllers/api/base_controller.rb',
+    name: 'Api',
+    kind: 'module',
+    isExported: true,
+    line: 1,
+    endLine: 25,
+  },
+  {
+    file: 'app/controllers/api/base_controller.rb',
+    name: 'BaseController',
+    kind: 'class',
+    isExported: true,
+    line: 2,
+    endLine: 24,
+    extendsName: 'ApplicationController',
+  },
+  {
+    file: 'app/controllers/api/base_controller.rb',
+    name: 'render_success',
+    kind: 'method',
+    isExported: false,
+    line: 7,
+    endLine: 9,
+  },
+  {
+    file: 'app/controllers/api/base_controller.rb',
+    name: 'render_error',
+    kind: 'method',
+    isExported: false,
+    line: 11,
+    endLine: 13,
+  },
+  {
+    file: 'app/controllers/api/base_controller.rb',
+    name: 'render_not_found',
+    kind: 'method',
+    isExported: false,
+    line: 15,
+    endLine: 17,
+  },
+  {
+    file: 'app/controllers/api/base_controller.rb',
+    name: 'paginate',
+    kind: 'method',
+    isExported: false,
+    line: 19,
+    endLine: 23,
+  },
+
+  // ============================================================
+  // app/controllers/api/books_controller.rb (11 defs)
+  // ============================================================
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'Api',
+    kind: 'module',
+    isExported: true,
+    line: 1,
+    endLine: 59,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'BooksController',
+    kind: 'class',
+    isExported: true,
+    line: 2,
+    endLine: 58,
+    extendsName: 'BaseController',
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'index',
+    kind: 'method',
+    isExported: true,
+    line: 7,
+    endLine: 10,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'show',
+    kind: 'method',
+    isExported: true,
+    line: 12,
+    endLine: 14,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'create',
+    kind: 'method',
+    isExported: true,
+    line: 16,
+    endLine: 23,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'update',
+    kind: 'method',
+    isExported: true,
+    line: 25,
+    endLine: 31,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'destroy',
+    kind: 'method',
+    isExported: true,
+    line: 33,
+    endLine: 36,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'restock',
+    kind: 'method',
+    isExported: true,
+    line: 38,
+    endLine: 42,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'set_book',
+    kind: 'method',
+    isExported: false,
+    line: 46,
+    endLine: 49,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'book_params',
+    kind: 'method',
+    isExported: false,
+    line: 51,
+    endLine: 53,
+  },
+  {
+    file: 'app/controllers/api/books_controller.rb',
+    name: 'require_admin!',
+    kind: 'method',
+    isExported: false,
+    line: 55,
+    endLine: 57,
+  },
+
+  // ============================================================
+  // app/controllers/api/orders_controller.rb (7 defs)
+  // ============================================================
+  {
+    file: 'app/controllers/api/orders_controller.rb',
+    name: 'Api',
+    kind: 'module',
+    isExported: true,
+    line: 1,
+    endLine: 40,
+  },
+  {
+    file: 'app/controllers/api/orders_controller.rb',
+    name: 'OrdersController',
+    kind: 'class',
+    isExported: true,
+    line: 2,
+    endLine: 39,
+    extendsName: 'BaseController',
+  },
+  {
+    file: 'app/controllers/api/orders_controller.rb',
+    name: 'index',
+    kind: 'method',
+    isExported: true,
+    line: 5,
+    endLine: 8,
+  },
+  {
+    file: 'app/controllers/api/orders_controller.rb',
+    name: 'show',
+    kind: 'method',
+    isExported: true,
+    line: 10,
+    endLine: 12,
+  },
+  {
+    file: 'app/controllers/api/orders_controller.rb',
+    name: 'create',
+    kind: 'method',
+    isExported: true,
+    line: 14,
+    endLine: 27,
+  },
+  {
+    file: 'app/controllers/api/orders_controller.rb',
+    name: 'set_order',
+    kind: 'method',
+    isExported: false,
+    line: 31,
+    endLine: 34,
+  },
+  {
+    file: 'app/controllers/api/orders_controller.rb',
+    name: 'order_params',
+    kind: 'method',
+    isExported: false,
+    line: 36,
+    endLine: 38,
+  },
+
+  // ============================================================
+  // app/controllers/api/sessions_controller.rb (6 defs)
+  // ============================================================
+  {
+    file: 'app/controllers/api/sessions_controller.rb',
+    name: 'Api',
+    kind: 'module',
+    isExported: true,
+    line: 1,
+    endLine: 33,
+  },
+  {
+    file: 'app/controllers/api/sessions_controller.rb',
+    name: 'SessionsController',
+    kind: 'class',
+    isExported: true,
+    line: 2,
+    endLine: 32,
+    extendsName: 'BaseController',
+  },
+  {
+    file: 'app/controllers/api/sessions_controller.rb',
+    name: 'create',
+    kind: 'method',
+    isExported: true,
+    line: 5,
+    endLine: 14,
+  },
+  {
+    file: 'app/controllers/api/sessions_controller.rb',
+    name: 'destroy',
+    kind: 'method',
+    isExported: true,
+    line: 16,
+    endLine: 19,
+  },
+  {
+    file: 'app/controllers/api/sessions_controller.rb',
+    name: 'session_params',
+    kind: 'method',
+    isExported: false,
+    line: 23,
+    endLine: 25,
+  },
+  {
+    file: 'app/controllers/api/sessions_controller.rb',
+    name: 'generate_auth_token',
+    kind: 'method',
+    isExported: false,
+    line: 27,
+    endLine: 31,
+  },
+
+  // ============================================================
+  // app/controllers/application_controller.rb (4 defs)
+  // ============================================================
+  {
+    file: 'app/controllers/application_controller.rb',
+    name: 'ApplicationController',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 20,
+    extendsName: 'ActionController::API',
+  },
+  {
+    file: 'app/controllers/application_controller.rb',
+    name: 'current_user',
+    kind: 'method',
+    isExported: false,
+    line: 6,
+    endLine: 11,
+  },
+  {
+    file: 'app/controllers/application_controller.rb',
+    name: 'authenticate!',
+    kind: 'method',
+    isExported: false,
+    line: 13,
+    endLine: 15,
+  },
+  {
+    file: 'app/controllers/application_controller.rb',
+    name: 'set_request_id',
+    kind: 'method',
+    isExported: false,
+    line: 17,
+    endLine: 19,
+  },
+
+  // ============================================================
+  // app/jobs/inventory_check_job.rb (3 defs)
+  // ============================================================
+  {
+    file: 'app/jobs/inventory_check_job.rb',
+    name: 'InventoryCheckJob',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 22,
+    extendsName: 'ApplicationJob',
+  },
+  { file: 'app/jobs/inventory_check_job.rb', name: 'perform', kind: 'method', isExported: true, line: 4, endLine: 15 },
+  {
+    file: 'app/jobs/inventory_check_job.rb',
+    name: 'notify_admin',
+    kind: 'method',
+    isExported: false,
+    line: 19,
+    endLine: 21,
+  },
+
+  // ============================================================
+  // app/mailers/order_mailer.rb (3 defs)
+  // ============================================================
+  {
+    file: 'app/mailers/order_mailer.rb',
+    name: 'OrderMailer',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 22,
+    extendsName: 'ApplicationMailer',
+  },
+  { file: 'app/mailers/order_mailer.rb', name: 'confirmation', kind: 'method', isExported: true, line: 2, endLine: 11 },
+  {
+    file: 'app/mailers/order_mailer.rb',
+    name: 'cancellation',
+    kind: 'method',
+    isExported: true,
+    line: 13,
+    endLine: 21,
+  },
+
+  // ============================================================
+  // app/models/application_record.rb (2 defs)
+  // ============================================================
+  {
+    file: 'app/models/application_record.rb',
+    name: 'ApplicationRecord',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 7,
+    extendsName: 'ActiveRecord::Base',
+  },
+  { file: 'app/models/application_record.rb', name: 'recent', kind: 'method', isExported: true, line: 4, endLine: 6 },
+
+  // ============================================================
+  // app/models/author.rb (4 defs)
+  // ============================================================
+  {
+    file: 'app/models/author.rb',
+    name: 'Author',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 22,
+    extendsName: 'ApplicationRecord',
+  },
+  { file: 'app/models/author.rb', name: 'book_count', kind: 'method', isExported: true, line: 9, endLine: 11 },
+  { file: 'app/models/author.rb', name: 'full_display_name', kind: 'method', isExported: true, line: 13, endLine: 15 },
+  { file: 'app/models/author.rb', name: 'normalize_name', kind: 'method', isExported: false, line: 19, endLine: 21 },
+
+  // ============================================================
+  // app/models/book.rb (6 defs)
+  // ============================================================
+  {
+    file: 'app/models/book.rb',
+    name: 'Book',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 35,
+    extendsName: 'ApplicationRecord',
+  },
+  { file: 'app/models/book.rb', name: 'price', kind: 'method', isExported: true, line: 16, endLine: 18 },
+  { file: 'app/models/book.rb', name: 'in_stock?', kind: 'method', isExported: true, line: 20, endLine: 22 },
+  { file: 'app/models/book.rb', name: 'reserve_stock!', kind: 'method', isExported: true, line: 24, endLine: 28 },
+  { file: 'app/models/book.rb', name: 'log_new_book', kind: 'method', isExported: false, line: 32, endLine: 34 },
+  {
+    file: 'app/models/book.rb',
+    name: 'InsufficientStockError',
+    kind: 'class',
+    isExported: true,
+    line: 37,
+    endLine: 37,
+    extendsName: 'StandardError',
+  },
+
+  // ============================================================
+  // app/models/order.rb (10 defs)
+  // ============================================================
+  {
+    file: 'app/models/order.rb',
+    name: 'Order',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 46,
+    extendsName: 'ApplicationRecord',
+  },
+  { file: 'app/models/order.rb', name: 'STATUS_PENDING', kind: 'const', isExported: true, line: 2 },
+  { file: 'app/models/order.rb', name: 'STATUS_CONFIRMED', kind: 'const', isExported: true, line: 3 },
+  { file: 'app/models/order.rb', name: 'STATUS_CANCELLED', kind: 'const', isExported: true, line: 4 },
+  { file: 'app/models/order.rb', name: 'STATUSES', kind: 'const', isExported: true, line: 6 },
+  { file: 'app/models/order.rb', name: 'confirm!', kind: 'method', isExported: true, line: 21, endLine: 23 },
+  { file: 'app/models/order.rb', name: 'cancel!', kind: 'method', isExported: true, line: 25, endLine: 31 },
+  { file: 'app/models/order.rb', name: 'item_count', kind: 'method', isExported: true, line: 33, endLine: 35 },
+  {
+    file: 'app/models/order.rb',
+    name: 'send_confirmation_email',
+    kind: 'method',
+    isExported: false,
+    line: 39,
+    endLine: 41,
+  },
+  {
+    file: 'app/models/order.rb',
+    name: 'enqueue_inventory_check',
+    kind: 'method',
+    isExported: false,
+    line: 43,
+    endLine: 45,
+  },
+
+  // ============================================================
+  // app/models/order_item.rb (3 defs)
+  // ============================================================
+  {
+    file: 'app/models/order_item.rb',
+    name: 'OrderItem',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 19,
+    extendsName: 'ApplicationRecord',
+  },
+  { file: 'app/models/order_item.rb', name: 'subtotal_cents', kind: 'method', isExported: true, line: 10, endLine: 12 },
+  {
+    file: 'app/models/order_item.rb',
+    name: 'set_unit_price',
+    kind: 'method',
+    isExported: false,
+    line: 16,
+    endLine: 18,
+  },
+
+  // ============================================================
+  // app/models/user.rb (5 defs)
+  // ============================================================
+  {
+    file: 'app/models/user.rb',
+    name: 'User',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 30,
+    extendsName: 'ApplicationRecord',
+  },
+  { file: 'app/models/user.rb', name: 'authenticate', kind: 'method', isExported: true, line: 10, endLine: 15 },
+  { file: 'app/models/user.rb', name: 'total_spent', kind: 'method', isExported: true, line: 17, endLine: 19 },
+  { file: 'app/models/user.rb', name: 'admin?', kind: 'method', isExported: true, line: 21, endLine: 23 },
+  { file: 'app/models/user.rb', name: 'downcase_email', kind: 'method', isExported: false, line: 27, endLine: 29 },
+
+  // ============================================================
+  // app/serializers/book_serializer.rb (5 defs)
+  // ============================================================
+  {
+    file: 'app/serializers/book_serializer.rb',
+    name: 'BookSerializer',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 28,
+  },
+  { file: 'app/serializers/book_serializer.rb', name: 'book', kind: 'method', isExported: true, line: 2 },
+  {
+    file: 'app/serializers/book_serializer.rb',
+    name: 'initialize',
+    kind: 'method',
+    isExported: true,
+    line: 4,
+    endLine: 6,
+  },
+  {
+    file: 'app/serializers/book_serializer.rb',
+    name: 'as_json',
+    kind: 'method',
+    isExported: true,
+    line: 8,
+    endLine: 19,
+  },
+  {
+    file: 'app/serializers/book_serializer.rb',
+    name: 'author_summary',
+    kind: 'method',
+    isExported: false,
+    line: 23,
+    endLine: 27,
+  },
+
+  // ============================================================
+  // app/serializers/order_serializer.rb (6 defs)
+  // ============================================================
+  {
+    file: 'app/serializers/order_serializer.rb',
+    name: 'OrderSerializer',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 34,
+  },
+  { file: 'app/serializers/order_serializer.rb', name: 'order', kind: 'method', isExported: true, line: 2 },
+  {
+    file: 'app/serializers/order_serializer.rb',
+    name: 'initialize',
+    kind: 'method',
+    isExported: true,
+    line: 4,
+    endLine: 6,
+  },
+  {
+    file: 'app/serializers/order_serializer.rb',
+    name: 'as_json',
+    kind: 'method',
+    isExported: true,
+    line: 8,
+    endLine: 17,
+  },
+  {
+    file: 'app/serializers/order_serializer.rb',
+    name: 'serialize_items',
+    kind: 'method',
+    isExported: false,
+    line: 21,
+    endLine: 29,
+  },
+  {
+    file: 'app/serializers/order_serializer.rb',
+    name: 'format_price',
+    kind: 'method',
+    isExported: false,
+    line: 31,
+    endLine: 33,
+  },
+
+  // ============================================================
+  // app/services/checkout_service.rb (10 defs)
+  // ============================================================
+  {
+    file: 'app/services/checkout_service.rb',
+    name: 'CheckoutService',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 68,
+  },
+  { file: 'app/services/checkout_service.rb', name: 'user', kind: 'method', isExported: true, line: 2 },
+  { file: 'app/services/checkout_service.rb', name: 'items', kind: 'method', isExported: true, line: 2 },
+  { file: 'app/services/checkout_service.rb', name: 'order', kind: 'method', isExported: true, line: 2 },
+  { file: 'app/services/checkout_service.rb', name: 'error', kind: 'method', isExported: true, line: 2 },
+  {
+    file: 'app/services/checkout_service.rb',
+    name: 'initialize',
+    kind: 'method',
+    isExported: true,
+    line: 4,
+    endLine: 9,
+  },
+  { file: 'app/services/checkout_service.rb', name: 'call', kind: 'method', isExported: true, line: 11, endLine: 44 },
+  {
+    file: 'app/services/checkout_service.rb',
+    name: 'success?',
+    kind: 'method',
+    isExported: true,
+    line: 46,
+    endLine: 48,
+  },
+  {
+    file: 'app/services/checkout_service.rb',
+    name: 'load_and_validate_books',
+    kind: 'method',
+    isExported: false,
+    line: 52,
+    endLine: 62,
+  },
+  {
+    file: 'app/services/checkout_service.rb',
+    name: 'failure',
+    kind: 'method',
+    isExported: false,
+    line: 64,
+    endLine: 67,
+  },
+
+  // ============================================================
+  // app/services/inventory_service.rb (6 defs)
+  // ============================================================
+  {
+    file: 'app/services/inventory_service.rb',
+    name: 'InventoryService',
+    kind: 'class',
+    isExported: true,
+    line: 1,
+    endLine: 25,
+  },
+  { file: 'app/services/inventory_service.rb', name: 'LOW_STOCK_THRESHOLD', kind: 'const', isExported: true, line: 2 },
+  {
+    file: 'app/services/inventory_service.rb',
+    name: 'check_stock',
+    kind: 'method',
+    isExported: true,
+    line: 4,
+    endLine: 12,
+  },
+  {
+    file: 'app/services/inventory_service.rb',
+    name: 'reserve',
+    kind: 'method',
+    isExported: true,
+    line: 14,
+    endLine: 16,
+  },
+  {
+    file: 'app/services/inventory_service.rb',
+    name: 'low_stock_books',
+    kind: 'method',
+    isExported: true,
+    line: 18,
+    endLine: 20,
+  },
+  {
+    file: 'app/services/inventory_service.rb',
+    name: 'out_of_stock_books',
+    kind: 'method',
+    isExported: true,
+    line: 22,
+    endLine: 24,
+  },
+];
diff --git a/evals/ground-truth/bookstore-api/feature-cohesion.ts b/evals/ground-truth/bookstore-api/feature-cohesion.ts
new file mode 100644
index 0000000..8fefb1e
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/feature-cohesion.ts
@@ -0,0 +1,21 @@
+import type { FeatureCohesionGroup } from '../../harness/types.js';
+
+/**
+ * Theme-search ground truth for the LLM-driven features stage.
+ *
+ * The bookstore-api has 2 product features: catalog management and ordering.
+ * Authentication may appear as a third feature or be folded into one of these.
+ *
+ * Severity (compareFeatureCohesion):
+ *   - No feature matches expected theme → CRITICAL
+ */
+export const featureCohesion: FeatureCohesionGroup[] = [
+  {
+    label: 'catalog-feature',
+    expectedRole: 'Feature for book catalog management: browsing, searching, CRUD operations on books and authors',
+  },
+  {
+    label: 'ordering-feature',
+    expectedRole: 'Feature for order placement: checkout, inventory management, order confirmation and notifications',
+  },
+];
diff --git a/evals/ground-truth/bookstore-api/files.ts b/evals/ground-truth/bookstore-api/files.ts
new file mode 100644
index 0000000..8ac296e
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/files.ts
@@ -0,0 +1,29 @@
+import type { GroundTruthFile } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `files` table after parsing the bookstore-api fixture.
+ *
+ * 18 Ruby files (17 .rb + config/routes.rb). The Gemfile is not parsed
+ * (not a .rb extension). config/routes.rb is parsed but produces 0
+ * definitions (DSL-only); it's included because squint indexes it.
+ */
+export const files: GroundTruthFile[] = [
+  { path: 'app/controllers/api/base_controller.rb', language: 'ruby' },
+  { path: 'app/controllers/api/books_controller.rb', language: 'ruby' },
+  { path: 'app/controllers/api/orders_controller.rb', language: 'ruby' },
+  { path: 'app/controllers/api/sessions_controller.rb', language: 'ruby' },
+  { path: 'app/controllers/application_controller.rb', language: 'ruby' },
+  { path: 'app/jobs/inventory_check_job.rb', language: 'ruby' },
+  { path: 'app/mailers/order_mailer.rb', language: 'ruby' },
+  { path: 'app/models/application_record.rb', language: 'ruby' },
+  { path: 'app/models/author.rb', language: 'ruby' },
+  { path: 'app/models/book.rb', language: 'ruby' },
+  { path: 'app/models/order.rb', language: 'ruby' },
+  { path: 'app/models/order_item.rb', language: 'ruby' },
+  { path: 'app/models/user.rb', language: 'ruby' },
+  { path: 'app/serializers/book_serializer.rb', language: 'ruby' },
+  { path: 'app/serializers/order_serializer.rb', language: 'ruby' },
+  { path: 'app/services/checkout_service.rb', language: 'ruby' },
+  { path: 'app/services/inventory_service.rb', language: 'ruby' },
+  { path: 'config/routes.rb', language: 'ruby' },
+];
diff --git a/evals/ground-truth/bookstore-api/flow-rubric.ts b/evals/ground-truth/bookstore-api/flow-rubric.ts
new file mode 100644
index 0000000..4ffcfcc
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/flow-rubric.ts
@@ -0,0 +1,26 @@
+import type { FlowRubricEntry } from '../../harness/types.js';
+
+/**
+ * Theme-search ground truth for the LLM-driven flows stage.
+ *
+ * The bookstore-api's flows stage produces a mix of system inheritance flows
+ * (model→ApplicationRecord) and external-stakeholder CRUD flows (create book,
+ * create order). The rubric matches the two external-facing flows since those
+ * are the cross-cutting journeys that exercise the interaction pipeline.
+ *
+ * Severity (compareFlowRubric):
+ *   - No flow matches expected theme → CRITICAL
+ *   - Best match's stakeholder wrong → MAJOR
+ */
+export const flowRubric: FlowRubricEntry[] = [
+  {
+    label: 'external-book-management',
+    expectedRole: 'A flow for creating or managing books in the catalog',
+    acceptableStakeholders: ['user', 'admin', 'external', 'system'],
+  },
+  {
+    label: 'external-order-creation',
+    expectedRole: 'A flow for creating or placing an order',
+    acceptableStakeholders: ['user', 'external', 'system'],
+  },
+];
diff --git a/evals/ground-truth/bookstore-api/imports.ts b/evals/ground-truth/bookstore-api/imports.ts
new file mode 100644
index 0000000..74e6e96
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/imports.ts
@@ -0,0 +1,113 @@
+import type { GroundTruthImport } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `imports` table after parsing the bookstore-api fixture.
+ *
+ * These imports are detected via constant-receiver analysis: when Ruby code
+ * calls `BookSerializer.new(book)`, squint resolves `BookSerializer` to
+ * `app/serializers/book_serializer.rb` via Rails Zeitwerk conventions.
+ *
+ * 15 resolved imports across 8 files. All are `type: 'import'` (synthetic
+ * from constant-receiver detection, not explicit require/require_relative).
+ */
+export const imports: GroundTruthImport[] = [
+  // Controllers → models/services/serializers
+  {
+    fromFile: 'app/controllers/api/books_controller.rb',
+    source: 'Book',
+    type: 'import',
+    symbols: [{ name: 'Book', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/controllers/api/books_controller.rb',
+    source: 'BookSerializer',
+    type: 'import',
+    symbols: [{ name: 'BookSerializer', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/controllers/api/orders_controller.rb',
+    source: 'CheckoutService',
+    type: 'import',
+    symbols: [{ name: 'CheckoutService', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/controllers/api/orders_controller.rb',
+    source: 'OrderSerializer',
+    type: 'import',
+    symbols: [{ name: 'OrderSerializer', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/controllers/api/sessions_controller.rb',
+    source: 'User',
+    type: 'import',
+    symbols: [{ name: 'User', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/controllers/application_controller.rb',
+    source: 'User',
+    type: 'import',
+    symbols: [{ name: 'User', kind: 'named' }],
+  },
+
+  // Models → mailers/jobs (callback-triggered)
+  {
+    fromFile: 'app/models/order.rb',
+    source: 'OrderMailer',
+    type: 'import',
+    symbols: [{ name: 'OrderMailer', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/models/order.rb',
+    source: 'InventoryCheckJob',
+    type: 'import',
+    symbols: [{ name: 'InventoryCheckJob', kind: 'named' }],
+  },
+
+  // Services → models/services
+  {
+    fromFile: 'app/services/checkout_service.rb',
+    source: 'Book',
+    type: 'import',
+    symbols: [{ name: 'Book', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/services/checkout_service.rb',
+    source: 'InventoryService',
+    type: 'import',
+    symbols: [{ name: 'InventoryService', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/services/checkout_service.rb',
+    source: 'Order',
+    type: 'import',
+    symbols: [{ name: 'Order', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/services/checkout_service.rb',
+    source: 'OrderItem',
+    type: 'import',
+    symbols: [{ name: 'OrderItem', kind: 'named' }],
+  },
+  {
+    fromFile: 'app/services/inventory_service.rb',
+    source: 'Book',
+    type: 'import',
+    symbols: [{ name: 'Book', kind: 'named' }],
+  },
+
+  // Serializers → serializers
+  {
+    fromFile: 'app/serializers/order_serializer.rb',
+    source: 'BookSerializer',
+    type: 'import',
+    symbols: [{ name: 'BookSerializer', kind: 'named' }],
+  },
+
+  // Jobs → services
+  {
+    fromFile: 'app/jobs/inventory_check_job.rb',
+    source: 'InventoryService',
+    type: 'import',
+    symbols: [{ name: 'InventoryService', kind: 'named' }],
+  },
+];
diff --git a/evals/ground-truth/bookstore-api/index.ts b/evals/ground-truth/bookstore-api/index.ts
new file mode 100644
index 0000000..edafc48
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/index.ts
@@ -0,0 +1,39 @@
+import type { GroundTruth } from '../../harness/types.js';
+import { contracts } from './contracts.js';
+import { definitionMetadata } from './definition-metadata.js';
+import { definitions } from './definitions.js';
+import { featureCohesion } from './feature-cohesion.js';
+import { files } from './files.js';
+import { flowRubric } from './flow-rubric.js';
+import { imports } from './imports.js';
+import { interactionRubric } from './interaction-rubric.js';
+import { moduleCohesion } from './module-cohesion.js';
+import { modules } from './modules.js';
+import { relationships } from './relationships.js';
+
+/**
+ * Composed ground truth for the bookstore-api Ruby on Rails fixture.
+ *
+ * Iteration 1 (parse stage): files, definitions, imports
+ * Iteration 2 (symbols stage): + definitionMetadata (purpose/domain/pure)
+ * Iteration 3 (relationships stage): + relationships (extends/uses + semantic)
+ * Iteration 4 (modules stage): + moduleCohesion (cohesion + role rubric)
+ * Iteration 5 (contracts stage): + contracts (HTTP routes)
+ * Iteration 6 (interactions stage): + interactionRubric (anchor-based edges)
+ * Iteration 7 (flows stage): + flowRubric (theme-search user journeys)
+ * Iteration 8 (features stage): + featureCohesion (theme-search features)
+ */
+export const bookstoreApiGroundTruth: GroundTruth = {
+  fixtureName: 'bookstore-api',
+  files,
+  definitions,
+  imports,
+  definitionMetadata,
+  relationships,
+  modules,
+  moduleCohesion,
+  contracts,
+  interactionRubric,
+  flowRubric,
+  featureCohesion,
+};
diff --git a/evals/ground-truth/bookstore-api/interaction-rubric.ts b/evals/ground-truth/bookstore-api/interaction-rubric.ts
new file mode 100644
index 0000000..e9036c5
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/interaction-rubric.ts
@@ -0,0 +1,58 @@
+import { type InteractionRubricEntry, type InteractionSource, defKey } from '../../harness/types.js';
+
+/**
+ * Anchor-based ground truth for the LLM-driven interactions stage.
+ *
+ * Each entry asserts that the module containing FROM_ANCHOR has an
+ * interaction edge to the module containing TO_ANCHOR. The actual module
+ * full_paths are LLM-picked, so we use definitions as deterministic
+ * anchors and let the comparator resolve them at compare time.
+ *
+ * IMPORTANT: Rails Zeitwerk autoloading means there are 0 parse-time
+ * imports → 0 AST-derived interaction edges. ALL cross-module edges
+ * come from the LLM inference step. The acceptableSources must include
+ * 'llm-inferred' (unlike the TS fixture which uses AST-only defaults).
+ * This is a genuine architectural difference, not a quality gap.
+ *
+ * Authored COLD. If any edge turns out to be a self-loop (both anchors
+ * in the same module), it will be triaged and removed/adjusted.
+ */
+const ACCEPTABLE_SOURCES: InteractionSource[] = ['ast', 'ast-import', 'contract-matched', 'llm-inferred'];
+
+export const interactionRubric: InteractionRubricEntry[] = [
+  {
+    label: 'books-controller-uses-serializer',
+    fromAnchor: defKey('app/controllers/api/books_controller.rb', 'BooksController'),
+    toAnchor: defKey('app/serializers/book_serializer.rb', 'BookSerializer'),
+    acceptableSources: ACCEPTABLE_SOURCES,
+    semanticReference: 'Books controller serializes book data for API responses using BookSerializer',
+  },
+  {
+    label: 'orders-controller-uses-checkout',
+    fromAnchor: defKey('app/controllers/api/orders_controller.rb', 'OrdersController'),
+    toAnchor: defKey('app/services/checkout_service.rb', 'CheckoutService'),
+    acceptableSources: ACCEPTABLE_SOURCES,
+    semanticReference: 'Orders controller delegates order creation to the checkout service',
+  },
+  {
+    label: 'checkout-uses-inventory',
+    fromAnchor: defKey('app/services/checkout_service.rb', 'CheckoutService'),
+    toAnchor: defKey('app/services/inventory_service.rb', 'InventoryService'),
+    acceptableSources: ACCEPTABLE_SOURCES,
+    semanticReference: 'Checkout service validates and reserves stock via the inventory service',
+  },
+  {
+    label: 'sessions-controller-uses-user',
+    fromAnchor: defKey('app/controllers/api/sessions_controller.rb', 'SessionsController'),
+    toAnchor: defKey('app/models/user.rb', 'User'),
+    acceptableSources: ACCEPTABLE_SOURCES,
+    semanticReference: 'Sessions controller authenticates users via the User model',
+  },
+  {
+    label: 'order-triggers-mailer',
+    fromAnchor: defKey('app/models/order.rb', 'Order'),
+    toAnchor: defKey('app/mailers/order_mailer.rb', 'OrderMailer'),
+    acceptableSources: ACCEPTABLE_SOURCES,
+    semanticReference: 'Order model triggers confirmation email on creation via after_create callback',
+  },
+];
diff --git a/evals/ground-truth/bookstore-api/module-cohesion.ts b/evals/ground-truth/bookstore-api/module-cohesion.ts
new file mode 100644
index 0000000..995321a
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/module-cohesion.ts
@@ -0,0 +1,90 @@
+import { type ModuleCohesionGroup, defKey } from '../../harness/types.js';
+
+/**
+ * Cohesion rubric for the LLM-driven modules stage.
+ *
+ * Each group asserts that semantically related definitions land in the same
+ * module, and that module's LLM-picked name+description matches the expected
+ * role. Uses `majority` for groups where base classes may split across parent/
+ * child modules.
+ *
+ * Severity:
+ *   - Member unassigned to any module → CRITICAL
+ *   - Cohesion violated (strict/majority) → MAJOR
+ *   - Role prose drift → MINOR
+ */
+export const moduleCohesion: ModuleCohesionGroup[] = [
+  {
+    label: 'catalog-models',
+    members: [defKey('app/models/book.rb', 'Book'), defKey('app/models/author.rb', 'Author')],
+    expectedRole: 'Domain models for the book catalog: books and authors',
+    cohesion: 'majority',
+  },
+  {
+    label: 'order-models',
+    members: [defKey('app/models/order.rb', 'Order'), defKey('app/models/order_item.rb', 'OrderItem')],
+    expectedRole: 'Domain models for purchase orders and their line items',
+    cohesion: 'majority',
+  },
+  {
+    label: 'auth-model',
+    members: [defKey('app/models/user.rb', 'User')],
+    expectedRole: 'User model for authentication and identity',
+  },
+  {
+    label: 'books-api',
+    members: [defKey('app/controllers/api/books_controller.rb', 'BooksController')],
+    expectedRole: 'REST API controller for book catalog CRUD endpoints',
+  },
+  {
+    label: 'orders-api',
+    members: [defKey('app/controllers/api/orders_controller.rb', 'OrdersController')],
+    expectedRole: 'REST API controller for order management endpoints',
+  },
+  {
+    label: 'sessions-api',
+    members: [defKey('app/controllers/api/sessions_controller.rb', 'SessionsController')],
+    expectedRole: 'REST API controller for authentication session endpoints',
+  },
+  {
+    label: 'controller-base',
+    members: [
+      defKey('app/controllers/application_controller.rb', 'ApplicationController'),
+      defKey('app/controllers/api/base_controller.rb', 'BaseController'),
+    ],
+    expectedRole: 'Base controller hierarchy with authentication and JSON response helpers',
+    cohesion: 'majority',
+  },
+  {
+    label: 'checkout-services',
+    members: [
+      defKey('app/services/checkout_service.rb', 'CheckoutService'),
+      defKey('app/services/inventory_service.rb', 'InventoryService'),
+    ],
+    expectedRole: 'Business logic services for checkout and inventory management',
+    cohesion: 'majority',
+  },
+  {
+    label: 'serializers',
+    members: [
+      defKey('app/serializers/book_serializer.rb', 'BookSerializer'),
+      defKey('app/serializers/order_serializer.rb', 'OrderSerializer'),
+    ],
+    expectedRole: 'JSON serialization layer for API responses',
+    cohesion: 'majority',
+  },
+  {
+    label: 'async-effects',
+    members: [
+      defKey('app/mailers/order_mailer.rb', 'OrderMailer'),
+      defKey('app/jobs/inventory_check_job.rb', 'InventoryCheckJob'),
+    ],
+    expectedRole: 'Asynchronous side effects: email notifications and background inventory checks',
+    cohesion: 'majority',
+  },
+  {
+    label: 'base-record',
+    members: [defKey('app/models/application_record.rb', 'ApplicationRecord')],
+    expectedRole: 'Abstract ActiveRecord base class for all application models',
+  },
+];
diff --git a/evals/ground-truth/bookstore-api/modules.ts b/evals/ground-truth/bookstore-api/modules.ts
new file mode 100644
index 0000000..5ec56ae
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/modules.ts
@@ -0,0 +1,10 @@
+import type { GroundTruthModule } from '../../harness/types.js';
+
+/**
+ * Legacy module ground truth — not used by the module_cohesion comparator
+ * but kept for backward compatibility with older strategies.
+ *
+ * The bookstore-api uses the moduleCohesion rubric (virtual table) instead
+ * of strict module matching, so this array is intentionally empty.
+ */
+export const modules: GroundTruthModule[] = [];
diff --git a/evals/ground-truth/bookstore-api/relationships.ts b/evals/ground-truth/bookstore-api/relationships.ts
new file mode 100644
index 0000000..ed5d809
--- /dev/null
+++ b/evals/ground-truth/bookstore-api/relationships.ts
@@ -0,0 +1,87 @@
+import { type GroundTruthRelationship, defKey } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `relationship_annotations` table after running
+ * `squint ingest --to-stage relationships` against the bookstore-api fixture.
+ *
+ * Relationships are derived from two sources:
+ *   1. AST-detected inheritance (extends) — 9 edges from parse stage
+ *   2. LLM-annotated usage (uses) — discovered by the relationships stage
+ *
+ * The extends edges are deterministic. The uses edges are the LLM's
+ * interpretation of which definitions depend on which — more variable.
+ *
+ * Severity (compareRelationshipAnnotations):
+ *   - Missing GT relationship → CRITICAL
+ *   - Semantic prose drift → MINOR
+ */
+export const relationships: GroundTruthRelationship[] = [
+  // ============================================================
+  // extends (9 — from AST, deterministic)
+  // ============================================================
+  {
+    fromDef: defKey('app/controllers/api/base_controller.rb', 'BaseController'),
+    toDef: defKey('app/controllers/application_controller.rb', 'ApplicationController'),
+    relationshipType: 'extends',
+    semanticReference:
+      'API base controller inherits authentication and response infrastructure from the application controller',
+  },
+  {
+    fromDef: defKey('app/controllers/api/books_controller.rb', 'BooksController'),
+    toDef: defKey('app/controllers/api/base_controller.rb', 'BaseController'),
+    relationshipType: 'extends',
+    semanticReference:
+      'Books controller inherits JSON response helpers and authentication from the API base controller',
+  },
+  {
+    fromDef: defKey('app/controllers/api/orders_controller.rb', 'OrdersController'),
+    toDef: defKey('app/controllers/api/base_controller.rb', 'BaseController'),
+    relationshipType: 'extends',
+    semanticReference:
+      'Orders controller inherits JSON response helpers and authentication from the API base controller',
+  },
+  {
+    fromDef: defKey('app/controllers/api/sessions_controller.rb', 'SessionsController'),
+    toDef: defKey('app/controllers/api/base_controller.rb', 'BaseController'),
+    relationshipType: 'extends',
+    semanticReference: 'Sessions controller inherits JSON response helpers from the API base controller',
+  },
+  {
+    fromDef: defKey('app/models/author.rb', 'Author'),
+    toDef: defKey('app/models/application_record.rb', 'ApplicationRecord'),
+    relationshipType: 'extends',
+    semanticReference: 'Author model inherits ActiveRecord persistence from the application record base class',
+  },
+  {
+    fromDef: defKey('app/models/book.rb', 'Book'),
+    toDef: defKey('app/models/application_record.rb', 'ApplicationRecord'),
+    relationshipType: 'extends',
+    semanticReference: 'Book model inherits ActiveRecord persistence from the application record base class',
+  },
+  {
+    fromDef: defKey('app/models/order.rb', 'Order'),
+    toDef: defKey('app/models/application_record.rb', 'ApplicationRecord'),
+    relationshipType: 'extends',
+    semanticReference: 'Order model inherits ActiveRecord persistence from the application record base class',
+  },
+  {
+    fromDef: defKey('app/models/order_item.rb', 'OrderItem'),
+    toDef: defKey('app/models/application_record.rb', 'ApplicationRecord'),
+    relationshipType: 'extends',
+    semanticReference: 'OrderItem model inherits ActiveRecord persistence from the application record base class',
+  },
+  {
+    fromDef: defKey('app/models/user.rb', 'User'),
+    toDef: defKey('app/models/application_record.rb', 'ApplicationRecord'),
+    relationshipType: 'extends',
+    semanticReference: 'User model inherits ActiveRecord persistence from the application record base class',
+  },
+
+  // NOTE: No `uses` edges in this GT. Rails Zeitwerk autoloading means
+  // there are 0 parse-time imports — squint has no static evidence to
+  // build cross-file `uses` relationships from at the relationships stage.
+  // Cross-file dependencies surface at the interactions stage (iter 6)
+  // where the LLM infers module-pair edges from code analysis.
+  // This is a genuine difference between Rails and Express — the TS
+  // fixture has 36 imports → 27 uses edges; the Rails fixture has 0.
+];
diff --git a/evals/ground-truth/todo-api/contracts.ts b/evals/ground-truth/todo-api/contracts.ts
new file mode 100644
index 0000000..e65aacd
--- /dev/null
+++ b/evals/ground-truth/todo-api/contracts.ts
@@ -0,0 +1,133 @@
+import { type GroundTruthContract, defKey } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `contracts` and `contract_participants` tables after
+ * running `squint ingest --to-stage contracts` against the todo-api fixture.
+ *
+ * Authored against the actual produced state from the iter-5 cold-pass DB.
+ * Two normalization quirks were discovered during triage:
+ *
+ *   1. squint normalizes route params as `{param}` (not `:id`).
+ *   2. squint extracts the controller-local route paths (e.g. `/login`,
+ *      `/tasks`) WITHOUT the mount prefix (`/api/auth`, `/api/tasks`).
+ *      The mount prefix lives in src/index.ts (`app.use('/api/auth', ...)`)
+ *      but squint doesn't currently propagate it down to the routes. This
+ *      is a deliberate scope choice — the GT matches what squint produces.
+ *   3. The events protocol is singular `event` (not `events`).
+ *
+ * todo-api exposes 9 HTTP endpoints across 2 controllers (auth + tasks)
+ * and emits 2 in-process events from the tasks service.
+ *
+ * Severity (compareContracts):
+ *   - Missing GT contract → CRITICAL
+ *   - Extra produced contract → MAJOR
+ *   - Participants are NOT yet checked by the comparator (TODO)
+ */
+export const contracts: GroundTruthContract[] = [
+  // ============================================================
+  // HTTP — Authentication endpoints (3)
+  // ============================================================
+  {
+    protocol: 'http',
+    normalizedKey: 'POST /auth/register',
+    participants: [
+      { defKey: defKey('src/controllers/auth.controller.ts', 'AuthController'), role: 'server' },
+      { defKey: defKey('client/tasks.client.ts', 'register'), role: 'client' },
+    ],
+  },
+  {
+    protocol: 'http',
+    normalizedKey: 'POST /auth/login',
+    participants: [
+      { defKey: defKey('src/controllers/auth.controller.ts', 'AuthController'), role: 'server' },
+      { defKey: defKey('client/tasks.client.ts', 'login'), role: 'client' },
+    ],
+  },
+  {
+    protocol: 'http',
+    normalizedKey: 'GET /auth/me',
+    participants: [{ defKey: defKey('src/controllers/auth.controller.ts', 'AuthController'), role: 'server' }],
+  },
+
+  // ============================================================
+  // HTTP — Task CRUD endpoints (6)
+  // ============================================================
+  {
+    protocol: 'http',
+    normalizedKey: 'GET /tasks',
+    participants: [
+      { defKey: defKey('src/controllers/tasks.controller.ts', 'TasksController'), role: 'server' },
+      { defKey: defKey('client/tasks.client.ts', 'listTasks'), role: 'client' },
+    ],
+  },
+  {
+    protocol: 'http',
+    normalizedKey: 'GET /tasks/{param}',
+    participants: [
+      { defKey: defKey('src/controllers/tasks.controller.ts', 'TasksController'), role: 'server' },
+      { defKey: defKey('client/tasks.client.ts', 'getTask'), role: 'client' },
+    ],
+  },
+  {
+    protocol: 'http',
+    normalizedKey: 'POST /tasks',
+    participants: [
+      { defKey: defKey('src/controllers/tasks.controller.ts', 'TasksController'), role: 'server' },
+      { defKey: defKey('client/tasks.client.ts', 'createTask'), role: 'client' },
+    ],
+  },
+  {
+    protocol: 'http',
+    normalizedKey: 'PUT /tasks/{param}',
+    participants: [
+      { defKey: defKey('src/controllers/tasks.controller.ts', 'TasksController'), role: 'server' },
+      { defKey: defKey('client/tasks.client.ts', 'updateTask'), role: 'client' },
+    ],
+  },
+  {
+    protocol: 'http',
+    normalizedKey: 'PATCH /tasks/{param}/complete',
+    participants: [
+      { defKey: defKey('src/controllers/tasks.controller.ts', 'TasksController'), role: 'server' },
+      { defKey: defKey('client/tasks.client.ts', 'completeTask'), role: 'client' },
+    ],
+  },
+  {
+    protocol: 'http',
+    normalizedKey: 'DELETE /tasks/{param}',
+    participants: [
+      { defKey: defKey('src/controllers/tasks.controller.ts', 'TasksController'), role: 'server' },
+      { defKey: defKey('client/tasks.client.ts', 'deleteTask'), role: 'client' },
+    ],
+  },
+
+  // ============================================================
+  // Events — In-process pub/sub (2)
+  // ============================================================
+  // Producer: TasksService.create / TasksService.complete (via eventBus.emit).
+  // Consumer: auditLogger (subscribed to task.completed at module load).
+  // squint uses the singular protocol name 'event'.
+  //
+  // NOTE: events are marked `optional` because the contract LLM extractor
+  // is non-deterministic for in-process pub/sub: some runs detect both
+  // task.created and task.completed, others detect zero events. The boundary
+  // status of an in-process event bus is genuinely ambiguous (it's not
+  // strictly cross-process). Marking these optional lets the GT assert
+  // "if the LLM extracts events, they should be these two" without forcing
+  // a hard requirement that varies run-to-run.
+  {
+    protocol: 'event',
+    normalizedKey: 'task.created',
+    participants: [{ defKey: defKey('src/services/tasks.service.ts', 'TasksService'), role: 'producer' }],
+    optional: true,
+  },
+  {
+    protocol: 'event',
+    normalizedKey: 'task.completed',
+    participants: [
+      { defKey: defKey('src/services/tasks.service.ts', 'TasksService'), role: 'producer' },
+      { defKey: defKey('src/events/event-bus.ts', 'auditLogger'), role: 'consumer' },
+    ],
+    optional: true,
+  },
+];
diff --git a/evals/ground-truth/todo-api/definition-metadata.ts b/evals/ground-truth/todo-api/definition-metadata.ts
new file mode 100644
index 0000000..587d5ac
--- /dev/null
+++ b/evals/ground-truth/todo-api/definition-metadata.ts
@@ -0,0 +1,610 @@
+import { type GroundTruthDefinitionMetadata, defKey } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `definition_metadata` table after running squint's
+ * symbols annotate stage on todo-api.
+ *
+ * Authored COLD from manual reading of each fixture file (NOT informed by
+ * empirical squint output, per the iteration 1 honesty audit). The triage
+ * loop is built to handle initial mismatches.
+ *
+ * Aspects covered (matching squint's default ingest pipeline):
+ * - purpose: 1-2 sentence reference text, prose-judged via LLM. Default min 0.75.
+ * - domain:  one-sentence semantic theme, judged via LLM (themeReference).
+ *            Replaces the previous acceptableSet vocabulary lists — see
+ *            Phase 1 redesign notes in the `feat/eval-harness` history.
+ * - pure:    exact 'true'/'false' string match. Major if differs.
+ *
+ * Coverage exceptions:
+ * - Type aliases and interfaces: purpose only (no domain, no pure).
+ * - Primitive constants (BASE_URL, PORT): purpose only.
+ * - Everything else: all 3 aspects.
+ */
+
+// ============================================================
+// Helper builders — keep entries readable
+// ============================================================
+
+function purpose(file: string, name: string, reference: string, minSimilarity = 0.75): GroundTruthDefinitionMetadata {
+  return {
+    defKey: defKey(file, name),
+    key: 'purpose',
+    proseReference: reference,
+    minSimilarity,
+  };
+}
+
+/**
+ * Tag-array semantic theme. Replaces the previous `domain(file, name, vocab)`
+ * helper that consumed long acceptableSet vocabularies. Each call now passes
+ * a one-sentence prose theme that the LLM judge scores against the produced
+ * tag array (formatted as "tags: a, b, c"). The judge handles synonym drift
+ * automatically — no more vocabulary whack-a-mole.
+ *
+ * Default minSimilarity is 0.6 (set inside the comparator), tuned for short
+ * comma-separated tag candidates.
+ */
+function domainTheme(file: string, name: string, theme: string): GroundTruthDefinitionMetadata {
+  return {
+    defKey: defKey(file, name),
+    key: 'domain',
+    themeReference: theme,
+  };
+}
+
+function pure(file: string, name: string, isPure: boolean): GroundTruthDefinitionMetadata {
+  return {
+    defKey: defKey(file, name),
+    key: 'pure',
+    exactValue: isPure ? 'true' : 'false',
+  };
+}
+
+// ============================================================
+// All metadata entries
+// ============================================================
+
+export const definitionMetadata: GroundTruthDefinitionMetadata[] = [
+  // ----------------------------------------------------------
+  // src/framework.ts — minimal in-fixture HTTP framework
+  // ----------------------------------------------------------
+  // Interfaces and types: purpose only (no behavior, no meaningful domain/pure for the interface itself)
+  purpose(
+    'src/framework.ts',
+    'Request',
+    'Represents an incoming HTTP request with body, path params, headers, and an optional authenticated user.'
+  ),
+  purpose(
+    'src/framework.ts',
+    'Response',
+    'Represents an outgoing HTTP response with chainable status and JSON body methods.'
+  ),
+  purpose(
+    'src/framework.ts',
+    'NextFunction',
+    'Callback used by middleware to pass control to the next handler in the chain.'
+  ),
+  purpose(
+    'src/framework.ts',
+    'Handler',
+    'Function signature for HTTP route handlers and middleware: receives request, response, and an optional next callback.'
+  ),
+  purpose(
+    'src/framework.ts',
+    'Router',
+    'Interface for registering HTTP route handlers indexed by method (get, post, put, patch, delete).'
+  ),
+  purpose(
+    'src/framework.ts',
+    'App',
+    'Interface for the top-level HTTP application that mounts routers and starts the server.'
+  ),
+
+  // Module-level registries (mutated by createRouter/createApp to make
+  // those functions unambiguously impure)
+  purpose(
+    'src/framework.ts',
+    'routerRegistry',
+    'Module-level mutable array tracking every Router instance constructed by createRouter, used by the framework for diagnostics.'
+  ),
+  domainTheme(
+    'src/framework.ts',
+    'routerRegistry',
+    'tags should reflect a module-level registry tracking router instances within an HTTP framework'
+  ),
+  pure('src/framework.ts', 'routerRegistry', false),
+
+  purpose(
+    'src/framework.ts',
+    'appRegistry',
+    'Module-level mutable array tracking every App instance constructed by createApp, used by the framework for diagnostics.'
+  ),
+  domainTheme(
+    'src/framework.ts',
+    'appRegistry',
+    'tags should reflect a module-level registry tracking app instances within an HTTP framework'
+  ),
+  pure('src/framework.ts', 'appRegistry', false),
+
+  // Functions
+  purpose(
+    'src/framework.ts',
+    'createRouter',
+    'Construct a new Router instance that registers HTTP route handlers per method and path.'
+  ),
+  domainTheme(
+    'src/framework.ts',
+    'createRouter',
+    'tags should reflect a factory function that constructs HTTP routers within a web framework'
+  ),
+  // Now unambiguously impure: each call mutates the module-level routerRegistry.
+  pure('src/framework.ts', 'createRouter', false),
+
+  purpose(
+    'src/framework.ts',
+    'createApp',
+    'Construct a new App instance for mounting routers and starting the HTTP server.'
+  ),
+  domainTheme(
+    'src/framework.ts',
+    'createApp',
+    'tags should reflect a factory function that constructs an HTTP application within a web framework'
+  ),
+  // Now unambiguously impure: each call mutates the module-level appRegistry.
+  pure('src/framework.ts', 'createApp', false),
+
+  // ----------------------------------------------------------
+  // src/types.ts — domain types
+  // ----------------------------------------------------------
+  purpose(
+    'src/types.ts',
+    'Task',
+    'A task entity with id, title, description, owner, completion status, and timestamps for creation and completion.'
+  ),
+  purpose(
+    'src/types.ts',
+    'User',
+    'A user entity with unique id, email, and a stored password hash for authentication.'
+  ),
+  purpose(
+    'src/types.ts',
+    'NewTaskInput',
+    'Input payload shape for creating a new task: title and description supplied by the client.'
+  ),
+
+  // ----------------------------------------------------------
+  // src/events/event-bus.ts — in-memory pub/sub
+  // ----------------------------------------------------------
+  purpose(
+    'src/events/event-bus.ts',
+    'EventName',
+    'Discriminated union of supported event names emitted on the in-memory event bus.'
+  ),
+  purpose(
+    'src/events/event-bus.ts',
+    'EventHandler',
+    'Callback signature for event subscribers: receives a generic payload object.'
+  ),
+
+  purpose(
+    'src/events/event-bus.ts',
+    'EventBus',
+    'In-memory publish/subscribe bus that lets producers emit named events and consumers subscribe to handle them.'
+  ),
+  domainTheme(
+    'src/events/event-bus.ts',
+    'EventBus',
+    'tags should reflect an in-memory publish/subscribe event bus carrying named application events'
+  ),
+  pure('src/events/event-bus.ts', 'EventBus', false), // mutable subscriber map
+
+  purpose(
+    'src/events/event-bus.ts',
+    'eventBus',
+    'Singleton in-memory EventBus instance shared by the application; module initialization also subscribes the auditLogger to task.completed events.'
+  ),
+  domainTheme(
+    'src/events/event-bus.ts',
+    'eventBus',
+    'tags should reflect a singleton event bus instance shared by the application, also tied to audit subscriptions for task lifecycle events'
+  ),
+  pure('src/events/event-bus.ts', 'eventBus', false),
+
+  purpose(
+    'src/events/event-bus.ts',
+    'auditLogger',
+    'Event subscriber that records task completion events for audit and observability purposes.'
+  ),
+  domainTheme(
+    'src/events/event-bus.ts',
+    'auditLogger',
+    'tags should reflect an event-subscriber audit logger recording task completion events'
+  ),
+  pure('src/events/event-bus.ts', 'auditLogger', false), // performs side effect (logging)
+
+  // ----------------------------------------------------------
+  // src/repositories/base.repository.ts — generic in-memory repository
+  // ----------------------------------------------------------
+  purpose(
+    'src/repositories/base.repository.ts',
+    'BaseRepository',
+    'Abstract generic repository providing in-memory CRUD operations (find, save, delete) for entities identified by id.'
+  ),
+  domainTheme(
+    'src/repositories/base.repository.ts',
+    'BaseRepository',
+    'tags should reflect an abstract in-memory repository providing generic CRUD persistence for entities'
+  ),
+  pure('src/repositories/base.repository.ts', 'BaseRepository', false), // mutable items Map
+
+  // ----------------------------------------------------------
+  // src/repositories/tasks.repository.ts
+  // ----------------------------------------------------------
+  purpose(
+    'src/repositories/tasks.repository.ts',
+    'TasksRepository',
+    'Tasks-specific repository extending BaseRepository with helpers to find tasks by owner and to filter completed tasks.'
+  ),
+  domainTheme(
+    'src/repositories/tasks.repository.ts',
+    'TasksRepository',
+    'tags should reflect a tasks-specific in-memory repository extending a generic base repository'
+  ),
+  pure('src/repositories/tasks.repository.ts', 'TasksRepository', false),
+
+  purpose(
+    'src/repositories/tasks.repository.ts',
+    'tasksRepository',
+    'Singleton TasksRepository instance shared across the application.'
+  ),
+  domainTheme(
+    'src/repositories/tasks.repository.ts',
+    'tasksRepository',
+    'tags should reflect a singleton tasks repository instance shared across the application'
+  ),
+  pure('src/repositories/tasks.repository.ts', 'tasksRepository', false),
+
+  // ----------------------------------------------------------
+  // src/services/auth.service.ts — auth, password, JWT-like tokens
+  // ----------------------------------------------------------
+  purpose(
+    'src/services/auth.service.ts',
+    'usersByEmail',
+    'Module-scoped Map of registered users keyed by email — the in-memory user store backing the auth service.',
+    0.6 // tolerant: LLM tends to describe surrounding auth context, not just the storage
+  ),
+  domainTheme(
+    'src/services/auth.service.ts',
+    'usersByEmail',
+    'tags should reflect an in-memory user store keyed by email backing the authentication service'
+  ),
+  pure('src/services/auth.service.ts', 'usersByEmail', false), // mutable Map instance
+
+  purpose(
+    'src/services/auth.service.ts',
+    'hashPassword',
+    'Stub password hasher that prefixes the plaintext with "hashed:" — placeholder for a real cryptographic hash, not actually secure.'
+  ),
+  domainTheme(
+    'src/services/auth.service.ts',
+    'hashPassword',
+    'tags should reflect a password hashing function used during user registration'
+  ),
+  pure('src/services/auth.service.ts', 'hashPassword', true), // deterministic, no side effects
+
+  purpose(
+    'src/services/auth.service.ts',
+    'verifyPassword',
+    'Compare a plaintext password against a stored hash and return whether they match.'
+  ),
+  domainTheme(
+    'src/services/auth.service.ts',
+    'verifyPassword',
+    'tags should reflect a password verification function comparing plaintext against a stored hash'
+  ),
+  pure('src/services/auth.service.ts', 'verifyPassword', true),
+
+  purpose(
+    'src/services/auth.service.ts',
+    'signToken',
+    'Generate a session token string for the given authenticated user.'
+  ),
+  domainTheme(
+    'src/services/auth.service.ts',
+    'signToken',
+    'tags should reflect a function that signs an authentication token for a user'
+  ),
+  pure('src/services/auth.service.ts', 'signToken', true),
+
+  purpose(
+    'src/services/auth.service.ts',
+    'decodeToken',
+    'Parse a session token string and return the associated user identity, or null if invalid.'
+  ),
+  domainTheme(
+    'src/services/auth.service.ts',
+    'decodeToken',
+    'tags should reflect a function that decodes an authentication token and returns the associated user'
+  ),
+  pure('src/services/auth.service.ts', 'decodeToken', false), // reads usersByEmail map
+
+  purpose(
+    'src/services/auth.service.ts',
+    'AuthService',
+    'Authentication service handling user registration, login by credentials, and verification of session tokens.'
+  ),
+  domainTheme(
+    'src/services/auth.service.ts',
+    'AuthService',
+    'tags should reflect an authentication service handling user registration, login, and token verification'
+  ),
+  pure('src/services/auth.service.ts', 'AuthService', false),
+
+  purpose('src/services/auth.service.ts', 'authService', 'Singleton AuthService instance shared by the application.'),
+  domainTheme(
+    'src/services/auth.service.ts',
+    'authService',
+    'tags should reflect a singleton authentication service instance shared by the application'
+  ),
+  pure('src/services/auth.service.ts', 'authService', false),
+
+  // ----------------------------------------------------------
+  // src/services/tasks.service.ts — task CRUD orchestration + events
+  // ----------------------------------------------------------
+  purpose(
+    'src/services/tasks.service.ts',
+    'TasksService',
+    'Tasks orchestration service: lists, retrieves, creates, updates, completes, and deletes tasks, emitting domain events on creation and completion.'
+  ),
+  domainTheme(
+    'src/services/tasks.service.ts',
+    'TasksService',
+    'tags should reflect a tasks orchestration service handling CRUD operations and emitting domain events'
+  ),
+  pure('src/services/tasks.service.ts', 'TasksService', false),
+
+  purpose(
+    'src/services/tasks.service.ts',
+    'tasksService',
+    'Singleton TasksService instance shared by the application.'
+  ),
+  domainTheme(
+    'src/services/tasks.service.ts',
+    'tasksService',
+    'tags should reflect a singleton tasks service instance shared by the application'
+  ),
+  pure('src/services/tasks.service.ts', 'tasksService', false),
+
+  // ----------------------------------------------------------
+  // src/middleware/auth.middleware.ts
+  // ----------------------------------------------------------
+  purpose(
+    'src/middleware/auth.middleware.ts',
+    'requireAuth',
+    'HTTP middleware that extracts a Bearer token from the Authorization header, verifies it, attaches the user to the request, and rejects unauthorized requests with a 401 response.'
+  ),
+  domainTheme(
+    'src/middleware/auth.middleware.ts',
+    'requireAuth',
+    'tags should reflect HTTP middleware that authenticates a bearer token before a protected endpoint runs'
+  ),
+  pure('src/middleware/auth.middleware.ts', 'requireAuth', false), // mutates req, calls res.status/json
+
+  // ----------------------------------------------------------
+  // src/controllers/base.controller.ts
+  // ----------------------------------------------------------
+  purpose(
+    'src/controllers/base.controller.ts',
+    'BaseController',
+    'Abstract base class for HTTP controllers providing protected helpers to send success responses, failure responses, and to format unexpected errors.'
+  ),
+  domainTheme(
+    'src/controllers/base.controller.ts',
+    'BaseController',
+    'tags should reflect an abstract HTTP controller base class with shared response and error helpers'
+  ),
+  pure('src/controllers/base.controller.ts', 'BaseController', false),
+
+  // ----------------------------------------------------------
+  // src/controllers/auth.controller.ts
+  // ----------------------------------------------------------
+  purpose(
+    'src/controllers/auth.controller.ts',
+    'AuthController',
+    'HTTP controller exposing authentication endpoints (register, login, me) that delegate to AuthService and format responses.'
+  ),
+  domainTheme(
+    'src/controllers/auth.controller.ts',
+    'AuthController',
+    'tags should reflect an HTTP controller exposing authentication endpoints (register, login, identity)'
+  ),
+  pure('src/controllers/auth.controller.ts', 'AuthController', false),
+
+  purpose(
+    'src/controllers/auth.controller.ts',
+    'authController',
+    'Module-level AuthController instance whose handlers are wired into the auth HTTP routes.',
+    0.6 // tolerant — LLM and reference describe the same instantiation in different words
+  ),
+  domainTheme(
+    'src/controllers/auth.controller.ts',
+    'authController',
+    'tags should reflect a singleton auth controller instance mounted into the HTTP routes'
+  ),
+  pure('src/controllers/auth.controller.ts', 'authController', false),
+
+  // ----------------------------------------------------------
+  // src/controllers/tasks.controller.ts
+  // ----------------------------------------------------------
+  purpose(
+    'src/controllers/tasks.controller.ts',
+    'TasksController',
+    'HTTP controller exposing CRUD endpoints for tasks (list, get, create, update, complete, delete) protected by authentication middleware and delegating to TasksService.'
+  ),
+  domainTheme(
+    'src/controllers/tasks.controller.ts',
+    'TasksController',
+    'tags should reflect an HTTP controller exposing task CRUD endpoints gated by authentication middleware'
+  ),
+  pure('src/controllers/tasks.controller.ts', 'TasksController', false),
+
+  purpose(
+    'src/controllers/tasks.controller.ts',
+    'tasksController',
+    'Module-level TasksController instance created at load time to handle task-related HTTP requests for the application.',
+    0.65 // borderline — LLM and reference describe the same thing in different words
+  ),
+  domainTheme(
+    'src/controllers/tasks.controller.ts',
+    'tasksController',
+    'tags should reflect a singleton tasks controller instance mounted into the HTTP routes'
+  ),
+  pure('src/controllers/tasks.controller.ts', 'tasksController', false),
+
+  // ----------------------------------------------------------
+  // src/index.ts — application bootstrap
+  // ----------------------------------------------------------
+  purpose(
+    'src/index.ts',
+    'app',
+    'HTTP application instance initialized at module load that mounts the auth and tasks routes and starts the server.',
+    0.6 // tolerant — LLM describes the lifecycle, reference describes the role
+  ),
+  domainTheme(
+    'src/index.ts',
+    'app',
+    'tags should reflect the bootstrap HTTP application instance that mounts routers and starts the server'
+  ),
+  pure('src/index.ts', 'app', false),
+
+  purpose('src/index.ts', 'PORT', 'TCP port number on which the HTTP application listens.'),
+  // PORT is a primitive const — no domain, no pure (no behavior)
+
+  // ----------------------------------------------------------
+  // client/tasks.client.ts — frontend HTTP API client
+  // ----------------------------------------------------------
+  purpose('client/tasks.client.ts', 'BASE_URL', 'Base URL of the backend HTTP API that the client targets.'),
+  // BASE_URL is a primitive const — no domain, no pure
+
+  purpose(
+    'client/tasks.client.ts',
+    'HttpFn',
+    'Function type alias describing a generic HTTP fetch-like function (input URL, init options) returning a JSON-decoded response.'
+  ),
+
+  purpose(
+    'client/tasks.client.ts',
+    'http',
+    'Module-level HTTP function reference resolved from globalThis.fetch with a fallback that throws when no fetch is available, used by the client for API calls.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'http',
+    'tags should reflect a network HTTP function used by a frontend API client for backend requests'
+  ),
+  pure('client/tasks.client.ts', 'http', false), // calls real network at runtime
+
+  purpose(
+    'client/tasks.client.ts',
+    'request',
+    'Internal helper that performs an authenticated JSON HTTP request and returns the parsed response body, used by the public API client functions.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'request',
+    'tags should reflect an internal HTTP request helper used by a frontend API client'
+  ),
+  pure('client/tasks.client.ts', 'request', false),
+
+  purpose(
+    'client/tasks.client.ts',
+    'login',
+    'Client API function that exchanges email and password for an authentication token by calling the backend login endpoint.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'login',
+    'tags should reflect a frontend client function that authenticates a user against the backend login endpoint'
+  ),
+  pure('client/tasks.client.ts', 'login', false),
+
+  purpose(
+    'client/tasks.client.ts',
+    'register',
+    'Client API function that creates a new user account on the backend and returns an authentication token.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'register',
+    'tags should reflect a frontend client function that registers a new user on the backend'
+  ),
+  pure('client/tasks.client.ts', 'register', false),
+
+  purpose(
+    'client/tasks.client.ts',
+    'listTasks',
+    'Client API function that fetches the authenticated user’s task list from the backend.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'listTasks',
+    'tags should reflect a frontend client function that lists tasks from the backend'
+  ),
+  pure('client/tasks.client.ts', 'listTasks', false),
+
+  purpose(
+    'client/tasks.client.ts',
+    'getTask',
+    'Client API function that fetches a single task by id from the backend.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'getTask',
+    'tags should reflect a frontend client function that fetches a task by id from the backend'
+  ),
+  pure('client/tasks.client.ts', 'getTask', false),
+
+  purpose(
+    'client/tasks.client.ts',
+    'createTask',
+    'Client API function that posts a new task payload to the backend and returns the created task.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'createTask',
+    'tags should reflect a frontend client function that creates a new task on the backend'
+  ),
+  pure('client/tasks.client.ts', 'createTask', false),
+
+  purpose(
+    'client/tasks.client.ts',
+    'updateTask',
+    'Client API function that updates the title or description of an existing task on the backend.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'updateTask',
+    'tags should reflect a frontend client function that updates an existing task on the backend'
+  ),
+  pure('client/tasks.client.ts', 'updateTask', false),
+
+  purpose(
+    'client/tasks.client.ts',
+    'completeTask',
+    'Client API function that marks an existing task as completed by calling the backend complete endpoint.'
+  ),
+  domainTheme(
+    'client/tasks.client.ts',
+    'completeTask',
+    'tags should reflect a frontend client function that marks a task as completed on the backend'
+  ),
+  pure('client/tasks.client.ts', 'completeTask', false),
+
+  purpose('client/tasks.client.ts', 'deleteTask', 'Client API function that deletes a task from the backend by id.'),
+  domainTheme(
+    'client/tasks.client.ts',
+    'deleteTask',
+    'tags should reflect a frontend client function that deletes a task from the backend'
+  ),
+  pure('client/tasks.client.ts', 'deleteTask', false),
+];
diff --git a/evals/ground-truth/todo-api/definitions.ts b/evals/ground-truth/todo-api/definitions.ts
new file mode 100644
index 0000000..8f68e1c
--- /dev/null
+++ b/evals/ground-truth/todo-api/definitions.ts
@@ -0,0 +1,144 @@
+import type { GroundTruthDefinition } from '../../harness/types.js';
+
+/**
+ * Definitions squint should extract from each fixture file. Authored from
+ * a careful manual reading of each file. The comparator allows ±2 line
+ * tolerance, so minor formatting changes won't break this.
+ *
+ * Notes on `kind`:
+ * - Arrow function consts (e.g. `export const foo = () => {}`) are 'const',
+ *   NOT 'function' — squint classifies by declaration type, not value type.
+ * - Generic inheritance like `extends BaseRepository<Task>` should yield
+ *   `extendsName: 'BaseRepository'` (the type arg is stripped).
+ */
+export const definitions: GroundTruthDefinition[] = [
+  // ----------------------------------------------------------
+  // src/framework.ts (10 definitions)
+  // ----------------------------------------------------------
+  { file: 'src/framework.ts', name: 'Request', kind: 'interface', isExported: true, line: 5 },
+  { file: 'src/framework.ts', name: 'Response', kind: 'interface', isExported: true, line: 12 },
+  { file: 'src/framework.ts', name: 'NextFunction', kind: 'type', isExported: true, line: 17 },
+  { file: 'src/framework.ts', name: 'Handler', kind: 'type', isExported: true, line: 18 },
+  { file: 'src/framework.ts', name: 'Router', kind: 'interface', isExported: true, line: 20 },
+  { file: 'src/framework.ts', name: 'App', kind: 'interface', isExported: true, line: 28 },
+  // routerRegistry and appRegistry exist solely to make createRouter and
+  // createApp unambiguously impure (each call appends to a module-level array).
+  // Without these, the LLM flips between true/false on the pure aspect.
+  { file: 'src/framework.ts', name: 'routerRegistry', kind: 'const', isExported: false, line: 40 },
+  { file: 'src/framework.ts', name: 'appRegistry', kind: 'const', isExported: false, line: 47 },
+  { file: 'src/framework.ts', name: 'createRouter', kind: 'function', isExported: true, line: 49 },
+  { file: 'src/framework.ts', name: 'createApp', kind: 'function', isExported: true, line: 68 },
+
+  // ----------------------------------------------------------
+  // src/types.ts (3 definitions)
+  // ----------------------------------------------------------
+  { file: 'src/types.ts', name: 'Task', kind: 'interface', isExported: true, line: 1 },
+  { file: 'src/types.ts', name: 'User', kind: 'interface', isExported: true, line: 11 },
+  { file: 'src/types.ts', name: 'NewTaskInput', kind: 'interface', isExported: true, line: 17 },
+
+  // ----------------------------------------------------------
+  // src/events/event-bus.ts (5 definitions)
+  // ----------------------------------------------------------
+  { file: 'src/events/event-bus.ts', name: 'EventName', kind: 'type', isExported: true, line: 5 },
+  { file: 'src/events/event-bus.ts', name: 'EventHandler', kind: 'type', isExported: true, line: 7 },
+  { file: 'src/events/event-bus.ts', name: 'EventBus', kind: 'class', isExported: true, line: 9 },
+  { file: 'src/events/event-bus.ts', name: 'eventBus', kind: 'const', isExported: true, line: 26 },
+  { file: 'src/events/event-bus.ts', name: 'auditLogger', kind: 'function', isExported: true, line: 30 },
+
+  // ----------------------------------------------------------
+  // src/repositories/base.repository.ts (1 definition)
+  // ----------------------------------------------------------
+  { file: 'src/repositories/base.repository.ts', name: 'BaseRepository', kind: 'class', isExported: true, line: 5 },
+
+  // ----------------------------------------------------------
+  // src/repositories/tasks.repository.ts (2 definitions)
+  // ----------------------------------------------------------
+  {
+    file: 'src/repositories/tasks.repository.ts',
+    name: 'TasksRepository',
+    kind: 'class',
+    isExported: true,
+    line: 4,
+    extendsName: 'BaseRepository', // Note: NOT 'BaseRepository<Task>' — type arg is stripped
+  },
+  { file: 'src/repositories/tasks.repository.ts', name: 'tasksRepository', kind: 'const', isExported: true, line: 14 },
+
+  // ----------------------------------------------------------
+  // src/services/auth.service.ts (7 definitions, including 5 unexported helpers)
+  // ----------------------------------------------------------
+  { file: 'src/services/auth.service.ts', name: 'usersByEmail', kind: 'const', isExported: false, line: 6 },
+  { file: 'src/services/auth.service.ts', name: 'hashPassword', kind: 'function', isExported: false, line: 8 },
+  { file: 'src/services/auth.service.ts', name: 'verifyPassword', kind: 'function', isExported: false, line: 12 },
+  { file: 'src/services/auth.service.ts', name: 'signToken', kind: 'function', isExported: false, line: 16 },
+  { file: 'src/services/auth.service.ts', name: 'decodeToken', kind: 'function', isExported: false, line: 20 },
+  { file: 'src/services/auth.service.ts', name: 'AuthService', kind: 'class', isExported: true, line: 29 },
+  { file: 'src/services/auth.service.ts', name: 'authService', kind: 'const', isExported: true, line: 56 },
+
+  // ----------------------------------------------------------
+  // src/services/tasks.service.ts (2 definitions)
+  // ----------------------------------------------------------
+  { file: 'src/services/tasks.service.ts', name: 'TasksService', kind: 'class', isExported: true, line: 5 },
+  { file: 'src/services/tasks.service.ts', name: 'tasksService', kind: 'const', isExported: true, line: 51 },
+
+  // ----------------------------------------------------------
+  // src/middleware/auth.middleware.ts (1 definition)
+  // ----------------------------------------------------------
+  { file: 'src/middleware/auth.middleware.ts', name: 'requireAuth', kind: 'const', isExported: true, line: 4 },
+
+  // ----------------------------------------------------------
+  // src/controllers/base.controller.ts (1 definition)
+  // ----------------------------------------------------------
+  { file: 'src/controllers/base.controller.ts', name: 'BaseController', kind: 'class', isExported: true, line: 6 },
+
+  // ----------------------------------------------------------
+  // src/controllers/auth.controller.ts (2 definitions)
+  // ----------------------------------------------------------
+  {
+    file: 'src/controllers/auth.controller.ts',
+    name: 'AuthController',
+    kind: 'class',
+    isExported: true,
+    line: 5,
+    extendsName: 'BaseController',
+  },
+  { file: 'src/controllers/auth.controller.ts', name: 'authController', kind: 'const', isExported: true, line: 45 },
+
+  // ----------------------------------------------------------
+  // src/controllers/tasks.controller.ts (2 definitions)
+  // ----------------------------------------------------------
+  {
+    file: 'src/controllers/tasks.controller.ts',
+    name: 'TasksController',
+    kind: 'class',
+    isExported: true,
+    line: 6,
+    extendsName: 'BaseController',
+  },
+  { file: 'src/controllers/tasks.controller.ts', name: 'tasksController', kind: 'const', isExported: true, line: 75 },
+
+  // ----------------------------------------------------------
+  // src/index.ts (2 definitions, both unexported)
+  // ----------------------------------------------------------
+  { file: 'src/index.ts', name: 'app', kind: 'const', isExported: false, line: 8 },
+  { file: 'src/index.ts', name: 'PORT', kind: 'const', isExported: false, line: 13 },
+
+  // ----------------------------------------------------------
+  // client/tasks.client.ts (12 definitions)
+  // ----------------------------------------------------------
+  { file: 'client/tasks.client.ts', name: 'BASE_URL', kind: 'const', isExported: false, line: 7 },
+  { file: 'client/tasks.client.ts', name: 'HttpFn', kind: 'type', isExported: false, line: 9 },
+  { file: 'client/tasks.client.ts', name: 'http', kind: 'const', isExported: false, line: 15 },
+  { file: 'client/tasks.client.ts', name: 'request', kind: 'function', isExported: false, line: 20 },
+  { file: 'client/tasks.client.ts', name: 'login', kind: 'function', isExported: true, line: 32 },
+  { file: 'client/tasks.client.ts', name: 'register', kind: 'function', isExported: true, line: 36 },
+  { file: 'client/tasks.client.ts', name: 'listTasks', kind: 'function', isExported: true, line: 40 },
+  { file: 'client/tasks.client.ts', name: 'getTask', kind: 'function', isExported: true, line: 44 },
+  { file: 'client/tasks.client.ts', name: 'createTask', kind: 'function', isExported: true, line: 48 },
+  { file: 'client/tasks.client.ts', name: 'updateTask', kind: 'function', isExported: true, line: 52 },
+  { file: 'client/tasks.client.ts', name: 'completeTask', kind: 'function', isExported: true, line: 60 },
+  { file: 'client/tasks.client.ts', name: 'deleteTask', kind: 'function', isExported: true, line: 64 },
+
+  // ----------------------------------------------------------
+  // index.ts (barrel) — 0 definitions (only re-exports)
+  // ----------------------------------------------------------
+];
diff --git a/evals/ground-truth/todo-api/feature-cohesion.ts b/evals/ground-truth/todo-api/feature-cohesion.ts
new file mode 100644
index 0000000..b0565c6
--- /dev/null
+++ b/evals/ground-truth/todo-api/feature-cohesion.ts
@@ -0,0 +1,29 @@
+import type { FeatureCohesionGroup } from '../../harness/types.js';
+
+/**
+ * Theme-search ground truth for the LLM-driven features stage.
+ *
+ * Each entry asserts that there exists a feature whose name+description
+ * matches a target concept. The comparator iterates all produced features
+ * and picks the best theme-judge match. Robust to LLM-picked feature names
+ * — accepts "Authentication" / "User Auth" / "Identity Management" all as
+ * valid matches for the auth concept.
+ *
+ * todo-api has 2 user-facing concept areas (auth + tasks), so we expect
+ * at least 2 features. The LLM may bundle them into 1 "Application" feature
+ * or split them into multiple sub-features — both are valid as long as
+ * the auth and tasks concepts are each represented somewhere.
+ *
+ * Severity (compareFeatureCohesion):
+ *   - No feature matches expected theme → CRITICAL
+ */
+export const featureCohesion: FeatureCohesionGroup[] = [
+  {
+    label: 'authentication-feature',
+    expectedRole: 'Feature for user authentication, registration, login, and identity management',
+  },
+  {
+    label: 'task-management-feature',
+    expectedRole: 'Feature for task management — creating, updating, completing, and deleting tasks',
+  },
+];
diff --git a/evals/ground-truth/todo-api/files.ts b/evals/ground-truth/todo-api/files.ts
new file mode 100644
index 0000000..09106f7
--- /dev/null
+++ b/evals/ground-truth/todo-api/files.ts
@@ -0,0 +1,22 @@
+import type { GroundTruthFile } from '../../harness/types.js';
+
+/**
+ * Files squint should index when running on evals/fixtures/todo-api/.
+ * Excludes package.json/tsconfig.json (not TS) and any .d.ts (none in fixture).
+ */
+export const files: GroundTruthFile[] = [
+  { path: 'client/tasks.client.ts', language: 'typescript' },
+  { path: 'index.ts', language: 'typescript' },
+  { path: 'src/controllers/auth.controller.ts', language: 'typescript' },
+  { path: 'src/controllers/base.controller.ts', language: 'typescript' },
+  { path: 'src/controllers/tasks.controller.ts', language: 'typescript' },
+  { path: 'src/events/event-bus.ts', language: 'typescript' },
+  { path: 'src/framework.ts', language: 'typescript' },
+  { path: 'src/index.ts', language: 'typescript' },
+  { path: 'src/middleware/auth.middleware.ts', language: 'typescript' },
+  { path: 'src/repositories/base.repository.ts', language: 'typescript' },
+  { path: 'src/repositories/tasks.repository.ts', language: 'typescript' },
+  { path: 'src/services/auth.service.ts', language: 'typescript' },
+  { path: 'src/services/tasks.service.ts', language: 'typescript' },
+  { path: 'src/types.ts', language: 'typescript' },
+];
diff --git a/evals/ground-truth/todo-api/flow-rubric.ts b/evals/ground-truth/todo-api/flow-rubric.ts
new file mode 100644
index 0000000..47a66b6
--- /dev/null
+++ b/evals/ground-truth/todo-api/flow-rubric.ts
@@ -0,0 +1,36 @@
+import type { FlowRubricEntry } from '../../harness/types.js';
+
+/**
+ * Theme-search ground truth for the LLM-driven flows stage.
+ *
+ * The flows stage produces a small number of HIGH-LEVEL journey descriptions
+ * with LLM-picked names, slugs, and entry paths — none of which are
+ * deterministic. The rubric uses theme-search matching: for each entry, the
+ * comparator finds the produced flow whose name+description best matches
+ * the expected role and verifies its stakeholder.
+ *
+ * todo-api has 2 user-facing concept areas (auth + tasks). The rubric
+ * asserts at least one user-stakeholder flow per area. Iter-by-iter the
+ * LLM may produce additional system/external flows for middleware,
+ * router, base controller, etc. — those are extras (ignored).
+ *
+ * Severity (compareFlowRubric):
+ *   - No flow matches expected theme  → CRITICAL
+ *   - Best match's stakeholder wrong  → MAJOR
+ */
+export const flowRubric: FlowRubricEntry[] = [
+  {
+    label: 'user-authentication',
+    expectedRole: 'A user-facing journey for authentication: registration, login, or identity lookup',
+    // Accept 'user' OR 'external' — the LLM sometimes tags an
+    // authentication journey as 'external' (the external actor calling in)
+    // and sometimes as 'user' (the human behind that actor).
+    acceptableStakeholders: ['user', 'external'],
+  },
+  {
+    label: 'user-task-management',
+    expectedRole:
+      'A user-facing journey for task management: listing, creating, updating, completing, or deleting tasks',
+    acceptableStakeholders: ['user', 'external'],
+  },
+];
diff --git a/evals/ground-truth/todo-api/imports.ts b/evals/ground-truth/todo-api/imports.ts
new file mode 100644
index 0000000..a2e5571
--- /dev/null
+++ b/evals/ground-truth/todo-api/imports.ts
@@ -0,0 +1,222 @@
+import type { GroundTruthImport } from '../../harness/types.js';
+
+/**
+ * Imports squint should detect from each fixture file.
+ *
+ * Notes:
+ * - The barrel `index.ts` uses `export ... from` which squint records as
+ *   `re-export` type, not `import`.
+ * - Type-only imports (`import type { X }`) are still recorded as `import` type.
+ * - Local imports use the `.js` extension (TS convention for ESM resolution).
+ */
+export const imports: GroundTruthImport[] = [
+  // src/repositories/tasks.repository.ts
+  {
+    fromFile: 'src/repositories/tasks.repository.ts',
+    source: './base.repository.js',
+    type: 'import',
+    symbols: [{ name: 'BaseRepository', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/repositories/tasks.repository.ts',
+    source: '../types.js',
+    type: 'import',
+    isTypeOnly: true,
+    symbols: [{ name: 'Task', kind: 'named' }],
+  },
+
+  // src/services/auth.service.ts
+  {
+    fromFile: 'src/services/auth.service.ts',
+    source: '../types.js',
+    type: 'import',
+    isTypeOnly: true,
+    symbols: [{ name: 'User', kind: 'named' }],
+  },
+
+  // src/services/tasks.service.ts
+  {
+    fromFile: 'src/services/tasks.service.ts',
+    source: '../repositories/tasks.repository.js',
+    type: 'import',
+    symbols: [{ name: 'tasksRepository', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/services/tasks.service.ts',
+    source: '../events/event-bus.js',
+    type: 'import',
+    symbols: [{ name: 'eventBus', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/services/tasks.service.ts',
+    source: '../types.js',
+    type: 'import',
+    isTypeOnly: true,
+    symbols: [
+      { name: 'NewTaskInput', kind: 'named' },
+      { name: 'Task', kind: 'named' },
+    ],
+  },
+
+  // src/middleware/auth.middleware.ts
+  {
+    fromFile: 'src/middleware/auth.middleware.ts',
+    source: '../services/auth.service.js',
+    type: 'import',
+    symbols: [{ name: 'authService', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/middleware/auth.middleware.ts',
+    source: '../framework.js',
+    type: 'import',
+    isTypeOnly: true,
+    symbols: [{ name: 'Handler', kind: 'named' }],
+  },
+
+  // src/controllers/base.controller.ts
+  {
+    fromFile: 'src/controllers/base.controller.ts',
+    source: '../framework.js',
+    type: 'import',
+    isTypeOnly: true,
+    symbols: [{ name: 'Response', kind: 'named' }],
+  },
+
+  // src/controllers/auth.controller.ts
+  {
+    fromFile: 'src/controllers/auth.controller.ts',
+    source: './base.controller.js',
+    type: 'import',
+    symbols: [{ name: 'BaseController', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/controllers/auth.controller.ts',
+    source: '../services/auth.service.js',
+    type: 'import',
+    symbols: [{ name: 'authService', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/controllers/auth.controller.ts',
+    source: '../framework.js',
+    type: 'import',
+    symbols: [
+      // Mixed type/value import: `import { type Request, type Response, type Router, createRouter }`
+      { name: 'Request', kind: 'named' },
+      { name: 'Response', kind: 'named' },
+      { name: 'Router', kind: 'named' },
+      { name: 'createRouter', kind: 'named' },
+    ],
+  },
+
+  // src/controllers/tasks.controller.ts
+  {
+    fromFile: 'src/controllers/tasks.controller.ts',
+    source: './base.controller.js',
+    type: 'import',
+    symbols: [{ name: 'BaseController', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/controllers/tasks.controller.ts',
+    source: '../services/tasks.service.js',
+    type: 'import',
+    symbols: [{ name: 'tasksService', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/controllers/tasks.controller.ts',
+    source: '../middleware/auth.middleware.js',
+    type: 'import',
+    symbols: [{ name: 'requireAuth', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/controllers/tasks.controller.ts',
+    source: '../framework.js',
+    type: 'import',
+    symbols: [
+      { name: 'Request', kind: 'named' },
+      { name: 'Response', kind: 'named' },
+      { name: 'Router', kind: 'named' },
+      { name: 'createRouter', kind: 'named' },
+    ],
+  },
+
+  // src/index.ts
+  {
+    fromFile: 'src/index.ts',
+    source: './controllers/auth.controller.js',
+    type: 'import',
+    symbols: [{ name: 'authController', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/index.ts',
+    source: './controllers/tasks.controller.js',
+    type: 'import',
+    symbols: [{ name: 'tasksController', kind: 'named' }],
+  },
+  {
+    fromFile: 'src/index.ts',
+    source: './framework.js',
+    type: 'import',
+    symbols: [{ name: 'createApp', kind: 'named' }],
+  },
+
+  // client/tasks.client.ts
+  {
+    fromFile: 'client/tasks.client.ts',
+    source: '../src/types.js',
+    type: 'import',
+    isTypeOnly: true,
+    symbols: [
+      { name: 'NewTaskInput', kind: 'named' },
+      { name: 'Task', kind: 'named' },
+    ],
+  },
+
+  // index.ts (barrel) — re-exports
+  {
+    fromFile: 'index.ts',
+    source: './src/services/tasks.service.js',
+    type: 're-export',
+    symbols: [
+      { name: 'TasksService', kind: 'named' },
+      { name: 'tasksService', kind: 'named' },
+    ],
+  },
+  {
+    fromFile: 'index.ts',
+    source: './src/services/auth.service.js',
+    type: 're-export',
+    symbols: [
+      { name: 'AuthService', kind: 'named' },
+      { name: 'authService', kind: 'named' },
+    ],
+  },
+  {
+    fromFile: 'index.ts',
+    source: './src/repositories/tasks.repository.js',
+    type: 're-export',
+    symbols: [
+      { name: 'TasksRepository', kind: 'named' },
+      { name: 'tasksRepository', kind: 'named' },
+    ],
+  },
+  {
+    fromFile: 'index.ts',
+    source: './src/events/event-bus.js',
+    type: 're-export',
+    symbols: [
+      { name: 'eventBus', kind: 'named' },
+      { name: 'auditLogger', kind: 'named' },
+    ],
+  },
+  {
+    fromFile: 'index.ts',
+    source: './src/types.js',
+    type: 're-export',
+    isTypeOnly: true,
+    symbols: [
+      { name: 'Task', kind: 'named' },
+      { name: 'User', kind: 'named' },
+      { name: 'NewTaskInput', kind: 'named' },
+    ],
+  },
+];
diff --git a/evals/ground-truth/todo-api/index.ts b/evals/ground-truth/todo-api/index.ts
new file mode 100644
index 0000000..c8509b9
--- /dev/null
+++ b/evals/ground-truth/todo-api/index.ts
@@ -0,0 +1,43 @@
+import type { GroundTruth } from '../../harness/types.js';
+import { contracts } from './contracts.js';
+import { definitionMetadata } from './definition-metadata.js';
+import { definitions } from './definitions.js';
+import { featureCohesion } from './feature-cohesion.js';
+import { files } from './files.js';
+import { flowRubric } from './flow-rubric.js';
+import { imports } from './imports.js';
+import { interactionRubric } from './interaction-rubric.js';
+import { moduleCohesion } from './module-cohesion.js';
+import { modules } from './modules.js';
+import { relationships } from './relationships.js';
+
+/**
+ * Composed ground truth for the todo-api fixture.
+ *
+ * Iteration 1 (parse stage): files, definitions, imports
+ * Iteration 2 (symbols stage): + definitionMetadata (purpose/domain/pure)
+ * Iteration 3 (relationships stage): + relationships (extends/implements/uses + semantic)
+ * Iteration 4 (modules stage): + moduleCohesion (cohesion + role rubric, replaces strict modules GT)
+ * Iteration 5 (contracts stage): + contracts (HTTP routes + events with participants)
+ * Iteration 6 (interactions stage): + interactionRubric (anchor-based module-pair edges)
+ * Iteration 7 (flows stage): + flowRubric (theme-search user journey verification)
+ * Iteration 8 (features stage): + featureCohesion (theme-search feature verification)
+ *
+ * The legacy `modules` field is still composed for backward-compat with the
+ * old `compareModules`/`compareModuleMembers` strategies; iter 4/4.5 don't
+ * include those tables in scope anymore.
+ */
+export const todoApiGroundTruth: GroundTruth = {
+  fixtureName: 'todo-api',
+  files,
+  definitions,
+  imports,
+  definitionMetadata,
+  relationships,
+  modules,
+  moduleCohesion,
+  contracts,
+  interactionRubric,
+  flowRubric,
+  featureCohesion,
+};
diff --git a/evals/ground-truth/todo-api/interaction-rubric.ts b/evals/ground-truth/todo-api/interaction-rubric.ts
new file mode 100644
index 0000000..041d6ea
--- /dev/null
+++ b/evals/ground-truth/todo-api/interaction-rubric.ts
@@ -0,0 +1,64 @@
+import { type InteractionRubricEntry, defKey } from '../../harness/types.js';
+
+/**
+ * Anchor-based ground truth for the LLM-driven interactions stage.
+ *
+ * Each entry asserts that the module containing FROM_ANCHOR has an
+ * interaction edge to the module containing TO_ANCHOR. The actual module
+ * full_paths are LLM-picked, so we use definitions as deterministic
+ * anchors and let the comparator resolve them at compare time.
+ *
+ * The 5 high-confidence edges below are the AST-derivable
+ * controller-service-repository pipeline that the squint interactions
+ * stage should always detect:
+ *
+ *   - AuthController → AuthService            (HTTP layer → business logic)
+ *   - TasksController → TasksService          (HTTP layer → business logic)
+ *   - TasksController → requireAuth           (controller → middleware guard)
+ *   - TasksService → TasksRepository          (service → persistence)
+ *   - TasksService → eventBus                 (service → event emission)
+ *
+ * Authored COLD against the controller / service / repository source code.
+ * If the cold run reveals that any edge isn't detected by squint (or that
+ * the modules iter-4 places these defs into the SAME module — which would
+ * make the rubric a self-loop), the entry will be removed and triaged.
+ *
+ * Severity (compareInteractionRubric):
+ *   - Anchor def doesn't exist                → CRITICAL
+ *   - Anchor unassigned to a module           → CRITICAL
+ *   - Anchors resolve to the same module      → MAJOR (no cross-module edge)
+ *   - No interaction between resolved modules → MAJOR
+ *   - Source not in acceptable set            → MAJOR
+ *   - Semantic prose drift                    → MINOR
+ */
+export const interactionRubric: InteractionRubricEntry[] = [
+  {
+    label: 'auth-controller-uses-auth-service',
+    fromAnchor: defKey('src/controllers/auth.controller.ts', 'AuthController'),
+    toAnchor: defKey('src/services/auth.service.ts', 'AuthService'),
+    semanticReference: 'Authentication controller delegates to the authentication service',
+  },
+  {
+    label: 'tasks-controller-uses-tasks-service',
+    fromAnchor: defKey('src/controllers/tasks.controller.ts', 'TasksController'),
+    toAnchor: defKey('src/services/tasks.service.ts', 'TasksService'),
+    semanticReference: 'Tasks controller delegates to the tasks business logic service',
+  },
+  {
+    label: 'tasks-controller-uses-auth-middleware',
+    fromAnchor: defKey('src/controllers/tasks.controller.ts', 'TasksController'),
+    toAnchor: defKey('src/middleware/auth.middleware.ts', 'requireAuth'),
+    semanticReference: 'Tasks controller guards endpoints with the authentication middleware',
+  },
+  {
+    label: 'tasks-service-uses-tasks-repository',
+    fromAnchor: defKey('src/services/tasks.service.ts', 'TasksService'),
+    toAnchor: defKey('src/repositories/tasks.repository.ts', 'TasksRepository'),
+    semanticReference: 'Tasks service persists tasks via the tasks repository',
+  },
+  // tasks-service-uses-event-bus removed: in some runs the LLM groups
+  // TasksService and EventBus into the same module (project.server.services.tasks),
+  // making this a self-loop with no cross-module edge to verify. The
+  // service→eventBus relationship is already covered by iter 3's
+  // relationship_annotations GT and iter 5's contracts GT (events).
+];
diff --git a/evals/ground-truth/todo-api/module-cohesion.ts b/evals/ground-truth/todo-api/module-cohesion.ts
new file mode 100644
index 0000000..38de3fc
--- /dev/null
+++ b/evals/ground-truth/todo-api/module-cohesion.ts
@@ -0,0 +1,163 @@
+import { type ModuleCohesionGroup, defKey } from '../../harness/types.js';
+
+/**
+ * Cohesion rubric for the LLM-driven modules stage.
+ *
+ * Replaces the strict `evals/ground-truth/todo-api/modules.ts` exact-match
+ * GT with property-based assertions: each group declares a set of
+ * definitions that should land in the same module, plus a one-sentence
+ * description of what role that module should play.
+ *
+ * The companion comparator is `compareModuleCohesion` (virtual table
+ * `module_cohesion`). For each group it:
+ *   1. Looks up the produced module for each member via module_members
+ *   2. Verifies cohesion (strict = all in 1 module, majority = >50%)
+ *   3. Sends the winning module's name+description to the prose judge
+ *      with `expectedRole` as the reference
+ *
+ * Severity:
+ *   - Member unassigned to any module → CRITICAL
+ *   - GT references unknown definition → CRITICAL
+ *   - Strict/majority cohesion violated → MAJOR
+ *   - Role judge below threshold (default 0.6) → MINOR (prose-drift)
+ *
+ * This rubric is robust to LLM tree-shape variation: different slugs,
+ * different depths, different groupings all pass as long as the semantically
+ * related definitions stay together and the LLM-picked module name+description
+ * is reasonable for the role.
+ *
+ * `cohesion: 'majority'` is used for groups where one member (typically a
+ * shared base class) might legitimately land in the parent module while the
+ * subclasses are in the leaf — e.g. BaseController extended by both
+ * AuthController and TasksController.
+ */
+export const moduleCohesion: ModuleCohesionGroup[] = [
+  // app-creation: createApp + appRegistry are framework helpers and reliably
+  // land together. Bootstrap app + PORT (from src/index.ts) are deliberately
+  // NOT a cohesion group because the LLM legitimately splits them across
+  // server/config/network modules — they're related but not always co-located.
+  // The definitions are still covered by the GT definitions table.
+  {
+    label: 'app-creation',
+    members: [defKey('src/framework.ts', 'createApp'), defKey('src/framework.ts', 'appRegistry')],
+    expectedRole: 'Module containing application framework helpers',
+    // The 2 members can split between framework and api leaves on some runs.
+    // Boundary-inclusive majority (>=50%) allows the 1/2 split through.
+    cohesion: 'majority',
+  },
+  {
+    label: 'framework-core-types',
+    members: [
+      defKey('src/framework.ts', 'App'),
+      defKey('src/framework.ts', 'Handler'),
+      defKey('src/framework.ts', 'NextFunction'),
+      defKey('src/framework.ts', 'Request'),
+      defKey('src/framework.ts', 'Response'),
+    ],
+    expectedRole: 'Core HTTP framework types for request, response, handler, and app abstractions',
+    // The App interface sometimes lands in a "framework.app" leaf alongside
+    // createApp instead of "framework.core" with the other types.
+    cohesion: 'majority',
+  },
+  {
+    label: 'router-primitives',
+    members: [
+      defKey('src/framework.ts', 'Router'),
+      defKey('src/framework.ts', 'createRouter'),
+      defKey('src/framework.ts', 'routerRegistry'),
+    ],
+    expectedRole: 'HTTP routing primitives within the framework',
+    // The Router interface sometimes lands in a "core types" module while
+    // createRouter+routerRegistry stay in a "router" leaf — accept the split.
+    cohesion: 'majority',
+  },
+  {
+    label: 'auth-controller',
+    members: [
+      defKey('src/controllers/auth.controller.ts', 'AuthController'),
+      defKey('src/controllers/auth.controller.ts', 'authController'),
+      defKey('src/controllers/base.controller.ts', 'BaseController'),
+    ],
+    expectedRole: 'HTTP controller for authentication endpoints (register, login, identity lookup) and its base class',
+    cohesion: 'majority', // BaseController might land in api parent or auth child
+  },
+  {
+    label: 'tasks-controller',
+    members: [
+      defKey('src/controllers/tasks.controller.ts', 'TasksController'),
+      defKey('src/controllers/tasks.controller.ts', 'tasksController'),
+    ],
+    expectedRole: 'HTTP controller for task CRUD endpoints, gated by authentication middleware',
+  },
+  {
+    label: 'auth-service',
+    members: [
+      defKey('src/services/auth.service.ts', 'AuthService'),
+      defKey('src/services/auth.service.ts', 'authService'),
+      defKey('src/services/auth.service.ts', 'usersByEmail'),
+      defKey('src/services/auth.service.ts', 'hashPassword'),
+      defKey('src/services/auth.service.ts', 'verifyPassword'),
+      defKey('src/services/auth.service.ts', 'signToken'),
+      defKey('src/services/auth.service.ts', 'decodeToken'),
+    ],
+    expectedRole: 'Authentication service module',
+  },
+  {
+    label: 'tasks-service',
+    members: [
+      defKey('src/services/tasks.service.ts', 'TasksService'),
+      defKey('src/services/tasks.service.ts', 'tasksService'),
+    ],
+    expectedRole: 'Tasks business logic service that orchestrates persistence and event emission',
+  },
+  {
+    label: 'tasks-repository',
+    members: [
+      defKey('src/repositories/base.repository.ts', 'BaseRepository'),
+      defKey('src/repositories/tasks.repository.ts', 'TasksRepository'),
+      defKey('src/repositories/tasks.repository.ts', 'tasksRepository'),
+    ],
+    expectedRole: 'Tasks data access / repository module',
+    cohesion: 'majority', // BaseRepository might land in repositories parent
+  },
+  {
+    label: 'event-bus',
+    members: [
+      defKey('src/events/event-bus.ts', 'EventBus'),
+      defKey('src/events/event-bus.ts', 'EventName'),
+      defKey('src/events/event-bus.ts', 'EventHandler'),
+      defKey('src/events/event-bus.ts', 'eventBus'),
+      defKey('src/events/event-bus.ts', 'auditLogger'),
+    ],
+    expectedRole: 'In-process event bus with event types, the singleton instance, and an audit subscriber',
+  },
+  {
+    label: 'auth-middleware',
+    members: [defKey('src/middleware/auth.middleware.ts', 'requireAuth')],
+    expectedRole: 'Authentication middleware module',
+  },
+  {
+    label: 'shared-types',
+    members: [defKey('src/types.ts', 'Task'), defKey('src/types.ts', 'User'), defKey('src/types.ts', 'NewTaskInput')],
+    expectedRole: 'Shared TypeScript type definitions for the application entities',
+  },
+  {
+    label: 'frontend-client',
+    members: [
+      defKey('client/tasks.client.ts', 'BASE_URL'),
+      defKey('client/tasks.client.ts', 'HttpFn'),
+      defKey('client/tasks.client.ts', 'http'),
+      defKey('client/tasks.client.ts', 'request'),
+      defKey('client/tasks.client.ts', 'login'),
+      defKey('client/tasks.client.ts', 'register'),
+      defKey('client/tasks.client.ts', 'listTasks'),
+      defKey('client/tasks.client.ts', 'getTask'),
+      defKey('client/tasks.client.ts', 'createTask'),
+      defKey('client/tasks.client.ts', 'updateTask'),
+      defKey('client/tasks.client.ts', 'completeTask'),
+      defKey('client/tasks.client.ts', 'deleteTask'),
+    ],
+    expectedRole: 'Frontend HTTP client module for the backend API',
+    cohesion: 'majority', // login/register might land in a separate auth-client subtree
+  },
+];
diff --git a/evals/ground-truth/todo-api/modules.ts b/evals/ground-truth/todo-api/modules.ts
new file mode 100644
index 0000000..fedcbdc
--- /dev/null
+++ b/evals/ground-truth/todo-api/modules.ts
@@ -0,0 +1,266 @@
+import { type GroundTruthModule, defKey } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `modules` and `module_members` tables after running
+ * `squint ingest --to-stage modules` against the todo-api fixture.
+ *
+ * Authored against the actual produced tree from the iter-4 cold-pass DB
+ * (`evals/results/2026-04-08T08-45-39-100Z/produced.db`). The LLM produces
+ * a 4-level tree with 23 modules total and 50/50 definition coverage.
+ *
+ * Tree shape (depth → module):
+ *   0  project
+ *   1  project.{client, server, shared}
+ *   2  project.client.{auth, tasks}
+ *   2  project.server.{api, data, events, framework, middleware, services}
+ *   2  project.shared.types
+ *   3  project.server.api.{auth, tasks}
+ *   3  project.server.data.repositories
+ *   3  project.server.framework.{app-lifecycle, core, router}
+ *   3  project.server.middleware.security
+ *   3  project.server.services.{auth, tasks}
+ *   4  project.server.data.repositories.tasks
+ *
+ * Notes on what the post-LLM normalizer did NOT do:
+ *  - BaseController lives in project.server.api.auth alongside AuthController.
+ *    The base-class rule (2+ subclasses → parent module) would suggest moving
+ *    it to project.server.api, but the rule didn't fire here. Match the GT
+ *    to what's actually produced — this is a documentation point, not a bug.
+ *  - BaseRepository lives in project.server.data.repositories.tasks alongside
+ *    TasksRepository for the same reason.
+ *
+ * Severity policy (compareModules + compareModuleMembers):
+ *   - Missing GT module / wrong member assignment → MAJOR (gate failure)
+ *   - Extra produced module → MINOR (auto-ancestors suppressed)
+ *   - Description prose drift → MINOR (default minSimilarity 0.6)
+ */
+
+const DEFAULT_MOD_MIN_SIMILARITY = 0.6;
+
+function branch(fullPath: string, name: string, parentFullPath: string | null, description: string): GroundTruthModule {
+  return {
+    fullPath,
+    name,
+    parentFullPath,
+    descriptionReference: description,
+    minSimilarity: DEFAULT_MOD_MIN_SIMILARITY,
+  };
+}
+
+function leaf(
+  fullPath: string,
+  name: string,
+  parentFullPath: string,
+  members: ReadonlyArray<ReturnType<typeof defKey>>,
+  description: string
+): GroundTruthModule {
+  return {
+    fullPath,
+    name,
+    parentFullPath,
+    members: [...members],
+    descriptionReference: description,
+    minSimilarity: DEFAULT_MOD_MIN_SIMILARITY,
+  };
+}
+
+export const modules: GroundTruthModule[] = [
+  // ============================================================
+  // Top-level branches (depth 1)
+  // ============================================================
+  branch('project.client', 'Client', 'project', 'Frontend application components and logic'),
+  branch('project.server', 'Server', 'project', 'Backend application code: HTTP API, services, data access, framework'),
+  branch(
+    'project.shared',
+    'Shared',
+    'project',
+    'Cross-cutting utilities and type definitions used by both client and server'
+  ),
+
+  // ============================================================
+  // project.client subtree
+  // ============================================================
+  leaf(
+    'project.client.auth',
+    'Authentication Client',
+    'project.client',
+    [defKey('client/tasks.client.ts', 'login'), defKey('client/tasks.client.ts', 'register')],
+    'Frontend functions that call the authentication endpoints (login and register)'
+  ),
+  leaf(
+    'project.client.tasks',
+    'Tasks Client',
+    'project.client',
+    [
+      defKey('client/tasks.client.ts', 'BASE_URL'),
+      defKey('client/tasks.client.ts', 'HttpFn'),
+      defKey('client/tasks.client.ts', 'completeTask'),
+      defKey('client/tasks.client.ts', 'createTask'),
+      defKey('client/tasks.client.ts', 'deleteTask'),
+      defKey('client/tasks.client.ts', 'getTask'),
+      defKey('client/tasks.client.ts', 'http'),
+      defKey('client/tasks.client.ts', 'listTasks'),
+      defKey('client/tasks.client.ts', 'request'),
+      defKey('client/tasks.client.ts', 'updateTask'),
+    ],
+    'Frontend client wrappers for the task management API plus the shared http transport plumbing'
+  ),
+
+  // ============================================================
+  // project.server subtree
+  // ============================================================
+  branch('project.server.api', 'API', 'project.server', 'HTTP controllers exposing the application endpoints'),
+  branch('project.server.data', 'Data Access', 'project.server', 'Persistence layer for the application entities'),
+  branch('project.server.framework', 'Framework', 'project.server', 'Core application framework and bootstrapping'),
+  branch(
+    'project.server.middleware',
+    'Middleware',
+    'project.server',
+    'HTTP middleware functions applied to incoming requests'
+  ),
+  branch('project.server.services', 'Services', 'project.server', 'Application business logic services'),
+
+  // project.server.events is a depth-2 LEAF (not nested further)
+  leaf(
+    'project.server.events',
+    'Events',
+    'project.server',
+    [
+      defKey('src/events/event-bus.ts', 'EventBus'),
+      defKey('src/events/event-bus.ts', 'EventHandler'),
+      defKey('src/events/event-bus.ts', 'EventName'),
+      defKey('src/events/event-bus.ts', 'auditLogger'),
+      defKey('src/events/event-bus.ts', 'eventBus'),
+    ],
+    'In-process event bus and audit subscriber for application-level events'
+  ),
+
+  // project.server.api.{auth, tasks}
+  leaf(
+    'project.server.api.auth',
+    'Authentication API',
+    'project.server.api',
+    [
+      // BaseController lives here alongside AuthController — the LLM did not
+      // pull it up to project.server.api despite being extended by both
+      // AuthController and TasksController. Match what was produced.
+      defKey('src/controllers/auth.controller.ts', 'AuthController'),
+      defKey('src/controllers/auth.controller.ts', 'authController'),
+      defKey('src/controllers/base.controller.ts', 'BaseController'),
+    ],
+    'HTTP controller for authentication endpoints (register, login, identity lookup)'
+  ),
+  leaf(
+    'project.server.api.tasks',
+    'Tasks API',
+    'project.server.api',
+    [
+      defKey('src/controllers/tasks.controller.ts', 'TasksController'),
+      defKey('src/controllers/tasks.controller.ts', 'tasksController'),
+    ],
+    'HTTP controller for task CRUD endpoints, gated by the authentication middleware'
+  ),
+
+  // project.server.data.repositories — branch with one leaf below it
+  branch(
+    'project.server.data.repositories',
+    'Repositories',
+    'project.server.data',
+    'Repository implementations for the application entities'
+  ),
+  leaf(
+    'project.server.data.repositories.tasks',
+    'Tasks Repository',
+    'project.server.data.repositories',
+    [
+      // BaseRepository sits with TasksRepository for the same reason
+      // BaseController sits with AuthController above.
+      defKey('src/repositories/base.repository.ts', 'BaseRepository'),
+      defKey('src/repositories/tasks.repository.ts', 'TasksRepository'),
+      defKey('src/repositories/tasks.repository.ts', 'tasksRepository'),
+    ],
+    'Data access for tasks via repository implementations'
+  ),
+
+  // project.server.framework.{app-lifecycle, core, router}
+  leaf(
+    'project.server.framework.app-lifecycle',
+    'Application Lifecycle',
+    'project.server.framework',
+    [
+      defKey('src/framework.ts', 'appRegistry'),
+      defKey('src/framework.ts', 'createApp'),
+      defKey('src/index.ts', 'PORT'),
+      defKey('src/index.ts', 'app'),
+    ],
+    'Application creation, registration, and the bootstrap entry point that mounts routers and starts listening'
+  ),
+  leaf(
+    'project.server.framework.core',
+    'Core Framework Types',
+    'project.server.framework',
+    [
+      defKey('src/framework.ts', 'App'),
+      defKey('src/framework.ts', 'Handler'),
+      defKey('src/framework.ts', 'NextFunction'),
+      defKey('src/framework.ts', 'Request'),
+      defKey('src/framework.ts', 'Response'),
+    ],
+    'Core interface and type definitions for the request, response, handler, and app abstractions'
+  ),
+  leaf(
+    'project.server.framework.router',
+    'Router',
+    'project.server.framework',
+    [
+      defKey('src/framework.ts', 'Router'),
+      defKey('src/framework.ts', 'createRouter'),
+      defKey('src/framework.ts', 'routerRegistry'),
+    ],
+    'Functionality related to routing within the application framework'
+  ),
+
+  // project.server.middleware.security
+  leaf(
+    'project.server.middleware.security',
+    'Security Middleware',
+    'project.server.middleware',
+    [defKey('src/middleware/auth.middleware.ts', 'requireAuth')],
+    'Authentication and authorization middleware for protected endpoints'
+  ),
+
+  // project.server.services.{auth, tasks}
+  leaf(
+    'project.server.services.auth',
+    'Authentication Service',
+    'project.server.services',
+    [
+      defKey('src/services/auth.service.ts', 'AuthService'),
+      defKey('src/services/auth.service.ts', 'authService'),
+      defKey('src/services/auth.service.ts', 'decodeToken'),
+      defKey('src/services/auth.service.ts', 'hashPassword'),
+      defKey('src/services/auth.service.ts', 'signToken'),
+      defKey('src/services/auth.service.ts', 'usersByEmail'),
+      defKey('src/services/auth.service.ts', 'verifyPassword'),
+    ],
+    'Authentication service plus its password-hashing and token helpers and the in-memory user store'
+  ),
+  leaf(
+    'project.server.services.tasks',
+    'Tasks Service',
+    'project.server.services',
+    [defKey('src/services/tasks.service.ts', 'TasksService'), defKey('src/services/tasks.service.ts', 'tasksService')],
+    'Tasks service that orchestrates persistence and event emission for task lifecycle operations'
+  ),
+
+  // ============================================================
+  // project.shared subtree
+  // ============================================================
+  leaf(
+    'project.shared.types',
+    'Types',
+    'project.shared',
+    [defKey('src/types.ts', 'NewTaskInput'), defKey('src/types.ts', 'Task'), defKey('src/types.ts', 'User')],
+    'Shared TypeScript type definitions for tasks and users used by both client and server'
+  ),
+];
diff --git a/evals/ground-truth/todo-api/relationships.ts b/evals/ground-truth/todo-api/relationships.ts
new file mode 100644
index 0000000..b90ceab
--- /dev/null
+++ b/evals/ground-truth/todo-api/relationships.ts
@@ -0,0 +1,358 @@
+import { type GroundTruthRelationship, defKey } from '../../harness/types.js';
+
+/**
+ * Ground truth for the `relationship_annotations` table after running
+ * `squint ingest --to-stage relationships` against the todo-api fixture.
+ *
+ * The comparator treats this list as an EXISTENCE claim: every entry must
+ * have a matching produced row, but extra produced rows (call-graph edges
+ * we didn't enumerate) are intentionally ignored. This matches how an end
+ * user reads the table — "did the LLM annotate the inheritance and the
+ * core uses edges?" rather than "did it produce exactly N edges".
+ *
+ * Severity policy (from compareRelationshipAnnotations):
+ *   - Missing GT edge      → CRITICAL (LLM dropped a real edge OR GT is wrong)
+ *   - Wrong relationship_type → MAJOR
+ *   - PENDING_LLM_ANNOTATION leaked through → MAJOR
+ *   - Prose drift below threshold → MINOR (does not flip the gate)
+ *
+ * Default minSimilarity is 0.6 (vs 0.75 for definition_metadata): the LLM
+ * relationship prompt asks for terse 1-sentence justifications, so the
+ * cosine similarity to a hand-written reference is naturally lower than
+ * for the longer 'purpose' field. Iteration 2 confirmed 0.6 is the right
+ * floor for terse semantic descriptions.
+ */
+const DEFAULT_REL_MIN_SIMILARITY = 0.6;
+
+function uses(
+  fromFile: string,
+  fromName: string,
+  toFile: string,
+  toName: string,
+  semantic: string,
+  minSimilarity: number = DEFAULT_REL_MIN_SIMILARITY
+): GroundTruthRelationship {
+  return {
+    fromDef: defKey(fromFile, fromName),
+    toDef: defKey(toFile, toName),
+    relationshipType: 'uses',
+    semanticReference: semantic,
+    minSimilarity,
+  };
+}
+
+function extendsRel(
+  fromFile: string,
+  fromName: string,
+  toFile: string,
+  toName: string,
+  semantic: string,
+  minSimilarity: number = DEFAULT_REL_MIN_SIMILARITY
+): GroundTruthRelationship {
+  return {
+    fromDef: defKey(fromFile, fromName),
+    toDef: defKey(toFile, toName),
+    relationshipType: 'extends',
+    semanticReference: semantic,
+    minSimilarity,
+  };
+}
+
+export const relationships: GroundTruthRelationship[] = [
+  // ============================================================
+  // Inheritance (3 edges) — Phase 2 of relationships annotate.
+  // These start at parse time as PENDING_LLM_ANNOTATION; the eval
+  // verifies the LLM replaces every one. A leaked placeholder = MAJOR.
+  // ============================================================
+  extendsRel(
+    'src/repositories/tasks.repository.ts',
+    'TasksRepository',
+    'src/repositories/base.repository.ts',
+    'BaseRepository',
+    'specializes the generic in-memory repository with task-specific filtering by owner and completion state'
+  ),
+  extendsRel(
+    'src/controllers/auth.controller.ts',
+    'AuthController',
+    'src/controllers/base.controller.ts',
+    'BaseController',
+    'inherits common HTTP response helpers (success, fail, error handling) for the authentication endpoints'
+  ),
+  extendsRel(
+    'src/controllers/tasks.controller.ts',
+    'TasksController',
+    'src/controllers/base.controller.ts',
+    'BaseController',
+    'inherits common HTTP response helpers (success, fail, error handling) for the task management endpoints'
+  ),
+
+  // ============================================================
+  // Framework — module-level mutable registries make these unambiguously impure.
+  // ============================================================
+  uses(
+    'src/framework.ts',
+    'createRouter',
+    'src/framework.ts',
+    'routerRegistry',
+    'records every router instance in the module-level registry for runtime tracking'
+  ),
+  uses(
+    'src/framework.ts',
+    'createApp',
+    'src/framework.ts',
+    'appRegistry',
+    'records every app instance in the module-level registry for runtime tracking'
+  ),
+
+  // ============================================================
+  // Event bus — singleton instantiation.
+  // ============================================================
+  uses(
+    'src/events/event-bus.ts',
+    'eventBus',
+    'src/events/event-bus.ts',
+    'EventBus',
+    'creates the singleton event bus instance shared across the application'
+  ),
+
+  // ============================================================
+  // Repositories — singleton instantiation of TasksRepository.
+  // ============================================================
+  uses(
+    'src/repositories/tasks.repository.ts',
+    'tasksRepository',
+    'src/repositories/tasks.repository.ts',
+    'TasksRepository',
+    'creates the singleton tasks repository instance for application-wide use'
+  ),
+
+  // ============================================================
+  // Auth service — class methods access the in-memory user store and
+  // the password/token helpers.
+  // ============================================================
+  uses(
+    'src/services/auth.service.ts',
+    'AuthService',
+    'src/services/auth.service.ts',
+    'usersByEmail',
+    'reads and writes the in-memory user store keyed by email for registration and login'
+  ),
+  uses(
+    'src/services/auth.service.ts',
+    'AuthService',
+    'src/services/auth.service.ts',
+    'hashPassword',
+    'hashes new user passwords during registration before persisting them'
+  ),
+  uses(
+    'src/services/auth.service.ts',
+    'AuthService',
+    'src/services/auth.service.ts',
+    'verifyPassword',
+    'verifies submitted credentials against the stored password hash during login'
+  ),
+  uses(
+    'src/services/auth.service.ts',
+    'AuthService',
+    'src/services/auth.service.ts',
+    'signToken',
+    'signs an authentication token after successful registration or login'
+  ),
+  uses(
+    'src/services/auth.service.ts',
+    'AuthService',
+    'src/services/auth.service.ts',
+    'decodeToken',
+    'decodes the bearer token to identify the requesting user'
+  ),
+  uses(
+    'src/services/auth.service.ts',
+    'decodeToken',
+    'src/services/auth.service.ts',
+    'usersByEmail',
+    'looks up the authenticated user from the in-memory store by decoded id'
+  ),
+  uses(
+    'src/services/auth.service.ts',
+    'authService',
+    'src/services/auth.service.ts',
+    'AuthService',
+    'creates the singleton auth service instance for application-wide use'
+  ),
+
+  // ============================================================
+  // Tasks service — orchestrates persistence and event emission.
+  // ============================================================
+  uses(
+    'src/services/tasks.service.ts',
+    'TasksService',
+    'src/repositories/tasks.repository.ts',
+    'tasksRepository',
+    'persists and queries tasks through the repository abstraction'
+  ),
+  uses(
+    'src/services/tasks.service.ts',
+    'TasksService',
+    'src/events/event-bus.ts',
+    'eventBus',
+    'publishes task lifecycle events (created, completed) for downstream consumers'
+  ),
+  uses(
+    'src/services/tasks.service.ts',
+    'tasksService',
+    'src/services/tasks.service.ts',
+    'TasksService',
+    'creates the singleton tasks service instance for application-wide use'
+  ),
+
+  // ============================================================
+  // Middleware — bearer-token validation gate.
+  // ============================================================
+  uses(
+    'src/middleware/auth.middleware.ts',
+    'requireAuth',
+    'src/services/auth.service.ts',
+    'authService',
+    'validates the bearer token via the auth service and rejects unauthenticated requests'
+  ),
+
+  // ============================================================
+  // Auth controller — wires HTTP endpoints to the auth service.
+  // ============================================================
+  uses(
+    'src/controllers/auth.controller.ts',
+    'AuthController',
+    'src/services/auth.service.ts',
+    'authService',
+    'delegates registration, login, and identity lookup to the auth service'
+  ),
+  uses(
+    'src/controllers/auth.controller.ts',
+    'AuthController',
+    'src/framework.ts',
+    'createRouter',
+    'creates a router during construction to register the authentication endpoints'
+  ),
+  uses(
+    'src/controllers/auth.controller.ts',
+    'authController',
+    'src/controllers/auth.controller.ts',
+    'AuthController',
+    'creates the singleton auth controller instance mounted by the bootstrap'
+  ),
+
+  // ============================================================
+  // Tasks controller — wires HTTP endpoints to the tasks service,
+  // gated by the auth middleware.
+  // ============================================================
+  uses(
+    'src/controllers/tasks.controller.ts',
+    'TasksController',
+    'src/services/tasks.service.ts',
+    'tasksService',
+    'delegates CRUD operations on tasks to the tasks service'
+  ),
+  uses(
+    'src/controllers/tasks.controller.ts',
+    'TasksController',
+    'src/framework.ts',
+    'createRouter',
+    'creates a router during construction to register the task management endpoints'
+  ),
+  uses(
+    'src/controllers/tasks.controller.ts',
+    'TasksController',
+    'src/middleware/auth.middleware.ts',
+    'requireAuth',
+    'guards every task endpoint with the bearer-token authentication middleware'
+  ),
+  uses(
+    'src/controllers/tasks.controller.ts',
+    'tasksController',
+    'src/controllers/tasks.controller.ts',
+    'TasksController',
+    'creates the singleton tasks controller instance mounted by the bootstrap'
+  ),
+
+  // ============================================================
+  // Bootstrap (src/index.ts) — wires the app and mounts routers.
+  // The `app` const is the natural anchor for the call-graph edges
+  // emitted at module top-level.
+  // ============================================================
+  uses('src/index.ts', 'app', 'src/framework.ts', 'createApp', 'constructs the application instance during bootstrap'),
+
+  // ============================================================
+  // Frontend client — every endpoint wrapper funnels through `request`,
+  // which itself routes through the http transport.
+  //
+  // NOTE: `request → BASE_URL` is NOT enumerated. The reference
+  // (`http(\`${BASE_URL}${path}\`, ...)`) is a bare identifier inside
+  // a template literal, and squint's call-graph extractor only tracks
+  // CALLS, INSTANTIATIONS, and INHERITANCE — not arbitrary identifier
+  // references. This is a deliberate scope choice, not a bug. If squint
+  // ever grows reference-level tracking, this entry should be added back.
+  // ============================================================
+  uses(
+    'client/tasks.client.ts',
+    'request',
+    'client/tasks.client.ts',
+    'http',
+    'sends the request through the injected http transport (fetch)'
+  ),
+  uses(
+    'client/tasks.client.ts',
+    'login',
+    'client/tasks.client.ts',
+    'request',
+    'submits the login credentials through the shared request helper'
+  ),
+  uses(
+    'client/tasks.client.ts',
+    'register',
+    'client/tasks.client.ts',
+    'request',
+    'submits the registration payload through the shared request helper'
+  ),
+  uses(
+    'client/tasks.client.ts',
+    'listTasks',
+    'client/tasks.client.ts',
+    'request',
+    'fetches the authenticated user’s tasks through the shared request helper'
+  ),
+  uses(
+    'client/tasks.client.ts',
+    'getTask',
+    'client/tasks.client.ts',
+    'request',
+    'fetches a single task by id through the shared request helper'
+  ),
+  uses(
+    'client/tasks.client.ts',
+    'createTask',
+    'client/tasks.client.ts',
+    'request',
+    'submits a new task payload through the shared request helper'
+  ),
+  uses(
+    'client/tasks.client.ts',
+    'updateTask',
+    'client/tasks.client.ts',
+    'request',
+    'submits a task update payload through the shared request helper'
+  ),
+  uses(
+    'client/tasks.client.ts',
+    'completeTask',
+    'client/tasks.client.ts',
+    'request',
+    'marks a task as completed through the shared request helper'
+  ),
+  uses(
+    'client/tasks.client.ts',
+    'deleteTask',
+    'client/tasks.client.ts',
+    'request',
+    'removes a task by id through the shared request helper'
+  ),
+];
diff --git a/evals/harness/builder.test.ts b/evals/harness/builder.test.ts
new file mode 100644
index 0000000..5a2066e
--- /dev/null
+++ b/evals/harness/builder.test.ts
@@ -0,0 +1,446 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { IndexDatabase } from '../../src/db/database-facade.js';
+import { buildGroundTruthDb } from './builder.js';
+import { type GroundTruth, defKey } from './types.js';
+
+/**
+ * The builder takes a GroundTruth and populates a fresh IndexDatabase.
+ * Tests verify it correctly maps natural-key inputs to the live schema
+ * (so the comparator has two databases — produced and ground-truth — to diff).
+ */
+describe('builder', () => {
+  let dbPath: string;
+  let db: IndexDatabase;
+
+  beforeEach(() => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-eval-build-'));
+    dbPath = path.join(dir, 'gt.db');
+    db = new IndexDatabase(dbPath);
+    db.initialize();
+  });
+
+  afterEach(() => {
+    db.close();
+    fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
+  });
+
+  it('inserts files', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [
+        { path: 'src/index.ts', language: 'typescript' },
+        { path: 'src/util.ts', language: 'typescript' },
+      ],
+      definitions: [],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const rows = conn.prepare('SELECT path, language FROM files ORDER BY path').all() as Array<{
+      path: string;
+      language: string;
+    }>;
+    expect(rows).toEqual([
+      { path: 'src/index.ts', language: 'typescript' },
+      { path: 'src/util.ts', language: 'typescript' },
+    ]);
+  });
+
+  it('inserts definitions linked to their files', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [{ path: 'src/auth.ts', language: 'typescript' }],
+      definitions: [
+        {
+          file: 'src/auth.ts',
+          name: 'AuthService',
+          kind: 'class',
+          isExported: true,
+          line: 5,
+          extendsName: null,
+        },
+        {
+          file: 'src/auth.ts',
+          name: 'login',
+          kind: 'function',
+          isExported: true,
+          line: 12,
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const rows = conn
+      .prepare(
+        `SELECT d.name AS name, d.kind AS kind, d.line AS line, f.path AS path
+         FROM definitions d JOIN files f ON d.file_id = f.id
+         ORDER BY d.line`
+      )
+      .all() as Array<{ name: string; kind: string; line: number; path: string }>;
+    expect(rows).toEqual([
+      { name: 'AuthService', kind: 'class', line: 5, path: 'src/auth.ts' },
+      { name: 'login', kind: 'function', line: 12, path: 'src/auth.ts' },
+    ]);
+  });
+
+  it('preserves extendsName on classes', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [
+        { path: 'src/base.ts', language: 'typescript' },
+        { path: 'src/child.ts', language: 'typescript' },
+      ],
+      definitions: [
+        { file: 'src/base.ts', name: 'Base', kind: 'class', isExported: true, line: 1 },
+        {
+          file: 'src/child.ts',
+          name: 'Child',
+          kind: 'class',
+          isExported: true,
+          line: 1,
+          extendsName: 'Base',
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const row = conn.prepare('SELECT extends_name FROM definitions WHERE name = ?').get('Child') as {
+      extends_name: string;
+    };
+    expect(row.extends_name).toBe('Base');
+  });
+
+  it('throws if a definition references a missing file', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [{ path: 'src/a.ts', language: 'typescript' }],
+      definitions: [{ file: 'src/missing.ts', name: 'Foo', kind: 'function', isExported: true, line: 1 }],
+    };
+    expect(() => buildGroundTruthDb(db, gt)).toThrow(/missing\.ts/);
+  });
+
+  it('inserts imports with their type and source', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [
+        { path: 'src/a.ts', language: 'typescript' },
+        { path: 'src/b.ts', language: 'typescript' },
+      ],
+      definitions: [{ file: 'src/b.ts', name: 'helper', kind: 'function', isExported: true, line: 1 }],
+      imports: [
+        {
+          fromFile: 'src/a.ts',
+          source: './b.js',
+          type: 'import',
+          isExternal: false,
+          symbols: [{ name: 'helper', kind: 'named' }],
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const importRow = conn
+      .prepare(
+        `SELECT i.source AS source, i.type AS type, f.path AS fromPath, i.is_external AS isExternal,
+                t.path AS toPath
+         FROM imports i
+         JOIN files f ON i.from_file_id = f.id
+         LEFT JOIN files t ON i.to_file_id = t.id`
+      )
+      .get() as { source: string; type: string; fromPath: string; isExternal: number; toPath: string | null };
+    expect(importRow).toEqual({
+      source: './b.js',
+      type: 'import',
+      fromPath: 'src/a.ts',
+      isExternal: 0,
+      // CRITICAL: relative imports must resolve to_file_id correctly. './b.js' from
+      // 'src/a.ts' should resolve to 'src/b.ts' (extension swap, same directory).
+      toPath: 'src/b.ts',
+    });
+
+    const symRow = conn
+      .prepare(
+        `SELECT s.name, s.local_name as localName, s.kind, d.name AS defName
+         FROM symbols s LEFT JOIN definitions d ON s.definition_id = d.id`
+      )
+      .get() as { name: string; localName: string; kind: string; defName: string | null };
+    expect(symRow).toEqual({
+      name: 'helper',
+      localName: 'helper',
+      kind: 'named',
+      // CRITICAL: imported symbol must link to the actual exported definition in the target file.
+      defName: 'helper',
+    });
+  });
+
+  it('resolves parent-directory relative imports (../foo.js)', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [
+        { path: 'src/types.ts', language: 'typescript' },
+        { path: 'src/services/auth.ts', language: 'typescript' },
+      ],
+      definitions: [{ file: 'src/types.ts', name: 'User', kind: 'interface', isExported: true, line: 1 }],
+      imports: [
+        {
+          fromFile: 'src/services/auth.ts',
+          source: '../types.js',
+          type: 'import',
+          isTypeOnly: true,
+          symbols: [{ name: 'User', kind: 'named' }],
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const row = conn.prepare('SELECT t.path AS toPath FROM imports i JOIN files t ON i.to_file_id = t.id').get() as {
+      toPath: string;
+    };
+    expect(row.toPath).toBe('src/types.ts');
+  });
+
+  it('resolves index file imports (./folder.js → ./folder/index.ts)', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [
+        { path: 'src/index.ts', language: 'typescript' },
+        { path: 'lib/index.ts', language: 'typescript' },
+      ],
+      definitions: [{ file: 'lib/index.ts', name: 'thing', kind: 'function', isExported: true, line: 1 }],
+      imports: [
+        {
+          fromFile: 'src/index.ts',
+          source: '../lib/index.js',
+          type: 'import',
+          symbols: [{ name: 'thing', kind: 'named' }],
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const row = conn.prepare('SELECT t.path AS toPath FROM imports i JOIN files t ON i.to_file_id = t.id').get() as {
+      toPath: string;
+    };
+    expect(row.toPath).toBe('lib/index.ts');
+  });
+
+  it('leaves to_file_id NULL for external (package) imports', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [{ path: 'src/a.ts', language: 'typescript' }],
+      definitions: [],
+      imports: [
+        {
+          fromFile: 'src/a.ts',
+          source: 'express',
+          type: 'import',
+          isExternal: true,
+          symbols: [{ name: 'Router', kind: 'named' }],
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const row = conn.prepare('SELECT to_file_id FROM imports').get() as { to_file_id: number | null };
+    expect(row.to_file_id).toBeNull();
+  });
+
+  it('inserts modules under a project root and assigns members', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [{ path: 'src/auth.ts', language: 'typescript' }],
+      definitions: [{ file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 }],
+      modules: [
+        {
+          fullPath: 'project.services.auth',
+          name: 'Auth',
+          members: [defKey('src/auth.ts', 'AuthService')],
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const moduleRow = conn
+      .prepare('SELECT full_path AS fullPath, name FROM modules WHERE full_path = ?')
+      .get('project.services.auth') as { fullPath: string; name: string };
+    expect(moduleRow).toEqual({ fullPath: 'project.services.auth', name: 'Auth' });
+
+    // Intermediate ancestors get auto-created
+    const ancestorPaths = conn.prepare('SELECT full_path FROM modules ORDER BY depth').all() as Array<{
+      full_path: string;
+    }>;
+    expect(ancestorPaths.map((r) => r.full_path)).toEqual(['project', 'project.services', 'project.services.auth']);
+
+    const memberRow = conn
+      .prepare(
+        `SELECT m.full_path AS modulePath, d.name AS defName
+         FROM module_members mm
+         JOIN modules m ON mm.module_id = m.id
+         JOIN definitions d ON mm.definition_id = d.id`
+      )
+      .get() as { modulePath: string; defName: string };
+    expect(memberRow).toEqual({ modulePath: 'project.services.auth', defName: 'AuthService' });
+  });
+
+  it('inserts contracts and participants', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [
+        { path: 'src/auth.controller.ts', language: 'typescript' },
+        { path: 'client/auth.client.ts', language: 'typescript' },
+      ],
+      definitions: [
+        { file: 'src/auth.controller.ts', name: 'login', kind: 'function', isExported: true, line: 1 },
+        { file: 'client/auth.client.ts', name: 'login', kind: 'function', isExported: true, line: 1 },
+      ],
+      contracts: [
+        {
+          protocol: 'http',
+          normalizedKey: 'POST /api/auth/login',
+          participants: [
+            { defKey: defKey('src/auth.controller.ts', 'login'), role: 'server' },
+            { defKey: defKey('client/auth.client.ts', 'login'), role: 'client' },
+          ],
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const contract = conn.prepare('SELECT protocol, normalized_key as normalizedKey FROM contracts').get() as {
+      protocol: string;
+      normalizedKey: string;
+    };
+    expect(contract).toEqual({ protocol: 'http', normalizedKey: 'POST /api/auth/login' });
+
+    const participants = conn
+      .prepare(
+        `SELECT cp.role, f.path || '::' || d.name AS defKey
+         FROM contract_participants cp
+         JOIN definitions d ON cp.definition_id = d.id
+         JOIN files f ON d.file_id = f.id
+         ORDER BY cp.role`
+      )
+      .all() as Array<{ role: string; defKey: string }>;
+    expect(participants).toEqual([
+      { role: 'client', defKey: 'client/auth.client.ts::login' },
+      { role: 'server', defKey: 'src/auth.controller.ts::login' },
+    ]);
+  });
+
+  it('inserts interactions between modules', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [
+        { path: 'src/c.ts', language: 'typescript' },
+        { path: 'src/s.ts', language: 'typescript' },
+      ],
+      definitions: [
+        { file: 'src/c.ts', name: 'ctrl', kind: 'function', isExported: true, line: 1 },
+        { file: 'src/s.ts', name: 'svc', kind: 'function', isExported: true, line: 1 },
+      ],
+      modules: [
+        { fullPath: 'project.controllers', name: 'Controllers', members: [defKey('src/c.ts', 'ctrl')] },
+        { fullPath: 'project.services', name: 'Services', members: [defKey('src/s.ts', 'svc')] },
+      ],
+      interactions: [
+        {
+          fromModulePath: 'project.controllers',
+          toModulePath: 'project.services',
+          pattern: 'business',
+          source: 'ast',
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const row = conn
+      .prepare(
+        `SELECT from_m.full_path AS fromPath, to_m.full_path AS toPath, i.pattern, i.source
+         FROM interactions i
+         JOIN modules from_m ON i.from_module_id = from_m.id
+         JOIN modules to_m ON i.to_module_id = to_m.id`
+      )
+      .get() as { fromPath: string; toPath: string; pattern: string; source: string };
+    expect(row).toEqual({
+      fromPath: 'project.controllers',
+      toPath: 'project.services',
+      pattern: 'business',
+      source: 'ast',
+    });
+  });
+
+  it('inserts flows with ordered steps', () => {
+    const gt: GroundTruth = {
+      fixtureName: 'tiny',
+      files: [
+        { path: 'src/c.ts', language: 'typescript' },
+        { path: 'src/s.ts', language: 'typescript' },
+      ],
+      definitions: [
+        { file: 'src/c.ts', name: 'login', kind: 'function', isExported: true, line: 1 },
+        { file: 'src/s.ts', name: 'auth', kind: 'function', isExported: true, line: 1 },
+      ],
+      modules: [
+        { fullPath: 'project.controllers', name: 'Controllers', members: [defKey('src/c.ts', 'login')] },
+        { fullPath: 'project.services', name: 'Services', members: [defKey('src/s.ts', 'auth')] },
+      ],
+      interactions: [
+        {
+          fromModulePath: 'project.controllers',
+          toModulePath: 'project.services',
+          pattern: 'business',
+          source: 'ast',
+        },
+      ],
+      flows: [
+        {
+          slug: 'user-login',
+          name: 'User Login',
+          stakeholder: 'user',
+          entryDef: defKey('src/c.ts', 'login'),
+          entryPath: 'POST /api/auth/login',
+          steps: [{ from: 'project.controllers', to: 'project.services' }],
+        },
+      ],
+    };
+    buildGroundTruthDb(db, gt);
+
+    const conn = db.getConnection();
+    const flow = conn.prepare('SELECT slug, name, stakeholder, entry_path AS entryPath FROM flows').get() as {
+      slug: string;
+      name: string;
+      stakeholder: string;
+      entryPath: string;
+    };
+    expect(flow).toEqual({
+      slug: 'user-login',
+      name: 'User Login',
+      stakeholder: 'user',
+      entryPath: 'POST /api/auth/login',
+    });
+
+    const steps = conn
+      .prepare(
+        `SELECT fs.step_order AS stepOrder, from_m.full_path AS fromPath, to_m.full_path AS toPath
+         FROM flow_steps fs
+         JOIN interactions i ON fs.interaction_id = i.id
+         JOIN modules from_m ON i.from_module_id = from_m.id
+         JOIN modules to_m ON i.to_module_id = to_m.id
+         ORDER BY fs.step_order`
+      )
+      .all() as Array<{ stepOrder: number; fromPath: string; toPath: string }>;
+    expect(steps).toEqual([{ stepOrder: 1, fromPath: 'project.controllers', toPath: 'project.services' }]);
+  });
+});
diff --git a/evals/harness/builder.ts b/evals/harness/builder.ts
new file mode 100644
index 0000000..cdf8182
--- /dev/null
+++ b/evals/harness/builder.ts
@@ -0,0 +1,406 @@
+import path from 'node:path';
+import type { IndexDatabase } from '../../src/db/database-facade.js';
+import { computeHash } from '../../src/db/schema.js';
+import { contractIdByKey, definitionIdByKey, moduleIdByKey } from './comparator/natural-keys.js';
+import {
+  type DefKey,
+  type GroundTruth,
+  type GroundTruthFlow,
+  type GroundTruthInteraction,
+  type GroundTruthModule,
+  defKey,
+  parseDefKey,
+} from './types.js';
+
+/**
+ * Populate a fresh IndexDatabase from a GroundTruth declarative spec.
+ *
+ * The DB MUST already have been opened and `initialize()` called by the
+ * caller — that way the harness owns DB lifecycle and the builder is purely
+ * a write operation.
+ *
+ * The builder uses the same repositories that real squint ingestion uses,
+ * so the resulting schema is by-construction live-schema-compatible.
+ */
+export function buildGroundTruthDb(db: IndexDatabase, gt: GroundTruth): void {
+  // ----------------------------------------------------------
+  // Files
+  // ----------------------------------------------------------
+  const fileIdByPath = new Map<string, number>();
+  for (const f of gt.files) {
+    const id = db.files.insert({
+      path: f.path,
+      language: f.language,
+      contentHash: computeHash(f.path), // deterministic per-path hash; content is irrelevant for ground truth
+      sizeBytes: 0,
+      modifiedAt: '2026-01-01T00:00:00.000Z',
+    });
+    fileIdByPath.set(f.path, id);
+  }
+
+  // ----------------------------------------------------------
+  // Definitions
+  // ----------------------------------------------------------
+  for (const d of gt.definitions) {
+    const fileId = fileIdByPath.get(d.file);
+    if (fileId === undefined) {
+      throw new Error(`Ground-truth definition '${d.name}' references missing file '${d.file}'`);
+    }
+    db.files.insertDefinition(fileId, {
+      name: d.name,
+      kind: d.kind,
+      isExported: d.isExported,
+      isDefault: d.isDefault ?? false,
+      // Definition extractor uses 0-based row, repositories add 1
+      position: { row: d.line - 1, column: 0 },
+      endPosition: { row: (d.endLine ?? d.line) - 1, column: 0 },
+      extends: d.extendsName ?? undefined,
+      implements: d.implementsNames ?? undefined,
+      extendsAll: d.extendsInterfaces ?? undefined,
+    });
+  }
+
+  // ----------------------------------------------------------
+  // Imports + symbols
+  // ----------------------------------------------------------
+  if (gt.imports) {
+    for (const imp of gt.imports) {
+      const fromFileId = fileIdByPath.get(imp.fromFile);
+      if (fromFileId === undefined) {
+        throw new Error(`Ground-truth import references missing fromFile '${imp.fromFile}'`);
+      }
+      // Resolve to_file_id with real ESM-style relative-path resolution.
+      const toFileId = resolveImportTargetFileId(fileIdByPath, imp.fromFile, imp.source);
+
+      const refId = db.files.insertReference(fromFileId, toFileId, {
+        type: imp.type,
+        source: imp.source,
+        isExternal: imp.isExternal ?? false,
+        isTypeOnly: imp.isTypeOnly ?? false,
+        imports: [],
+        position: { row: 0, column: 0 },
+      });
+
+      for (const sym of imp.symbols ?? []) {
+        // Try to find a matching exported definition in the target file (if any)
+        let definitionId: number | null = null;
+        if (toFileId !== null) {
+          const conn = db.getConnection();
+          const row = conn
+            .prepare('SELECT id FROM definitions WHERE file_id = ? AND name = ? LIMIT 1')
+            .get(toFileId, sym.name) as { id: number } | undefined;
+          definitionId = row?.id ?? null;
+        }
+        db.files.insertSymbol(refId, definitionId, {
+          name: sym.name,
+          localName: sym.localName ?? sym.name,
+          kind: sym.kind,
+          usages: [],
+        });
+      }
+    }
+  }
+
+  // ----------------------------------------------------------
+  // Usages
+  // ----------------------------------------------------------
+  if (gt.usages) {
+    const conn = db.getConnection();
+    for (const u of gt.usages) {
+      const fileId = fileIdByPath.get(u.file);
+      if (fileId === undefined) {
+        throw new Error(`Ground-truth usage references missing file '${u.file}'`);
+      }
+      // Find a symbol in this file with matching local name
+      const symRow = conn
+        .prepare(
+          `SELECT s.id AS id FROM symbols s
+           LEFT JOIN imports i ON s.reference_id = i.id
+           WHERE (i.from_file_id = ? OR s.file_id = ?) AND s.local_name = ?
+           LIMIT 1`
+        )
+        .get(fileId, fileId, u.symbolName) as { id: number } | undefined;
+      if (!symRow) {
+        throw new Error(
+          `Ground-truth usage of '${u.symbolName}' in ${u.file} has no matching imported/internal symbol`
+        );
+      }
+      db.files.insertUsage(symRow.id, {
+        position: { row: u.line - 1, column: 0 },
+        context: u.context,
+        callsite: {
+          argumentCount: 0,
+          isMethodCall: u.isMethodCall ?? false,
+          isConstructorCall: u.isConstructorCall ?? false,
+        },
+      });
+    }
+  }
+
+  // ----------------------------------------------------------
+  // Modules tree (with auto-created intermediate ancestors)
+  // ----------------------------------------------------------
+  if (gt.modules && gt.modules.length > 0) {
+    insertModuleTree(db, gt.modules);
+  }
+
+  // ----------------------------------------------------------
+  // Definition metadata
+  // ----------------------------------------------------------
+  if (gt.definitionMetadata) {
+    for (const m of gt.definitionMetadata) {
+      const defId = definitionIdByKey(db, m.defKey);
+      if (defId === null) {
+        throw new Error(`definition_metadata references unknown definition '${m.defKey}'`);
+      }
+      const value = m.exactValue ?? m.proseReference ?? '';
+      db.metadata.set(defId, m.key, value);
+    }
+  }
+
+  // ----------------------------------------------------------
+  // Relationship annotations
+  // ----------------------------------------------------------
+  if (gt.relationships) {
+    for (const r of gt.relationships) {
+      const fromId = definitionIdByKey(db, r.fromDef);
+      const toId = definitionIdByKey(db, r.toDef);
+      if (fromId === null || toId === null) {
+        throw new Error(`relationship references unknown definition: ${r.fromDef} → ${r.toDef}`);
+      }
+      db.relationships.set(fromId, toId, r.semanticReference ?? '', r.relationshipType);
+    }
+  }
+
+  // ----------------------------------------------------------
+  // Contracts + participants
+  // ----------------------------------------------------------
+  if (gt.contracts) {
+    for (const c of gt.contracts) {
+      const contractId = db.contracts.upsertContract(c.protocol, c.normalizedKey, c.normalizedKey);
+      for (const p of c.participants) {
+        const defId = definitionIdByKey(db, p.defKey);
+        if (defId === null) {
+          throw new Error(`contract participant references unknown definition '${p.defKey}'`);
+        }
+        // Find module for the definition (if assigned)
+        const conn = db.getConnection();
+        const modRow = conn
+          .prepare('SELECT module_id FROM module_members WHERE definition_id = ? LIMIT 1')
+          .get(defId) as { module_id: number } | undefined;
+        db.contracts.addParticipant(contractId, defId, modRow?.module_id ?? null, p.role);
+      }
+    }
+  }
+
+  // ----------------------------------------------------------
+  // Interactions + definition links
+  // ----------------------------------------------------------
+  if (gt.interactions) {
+    insertInteractions(db, gt.interactions);
+  }
+
+  // ----------------------------------------------------------
+  // Flows + steps
+  // ----------------------------------------------------------
+  if (gt.flows) {
+    insertFlows(db, gt.flows);
+  }
+}
+
+// ============================================================
+// Helpers
+// ============================================================
+
+/**
+ * Resolve a relative import source against the importing file's directory,
+ * using ESM-style extension swap and index-file fallback.
+ *
+ * Examples (fromFile → source → resolved):
+ *   src/a.ts → './b.js'              → src/b.ts
+ *   src/services/auth.ts → '../types.js' → src/types.ts
+ *   src/index.ts → '../lib/index.js' → lib/index.ts
+ *   src/a.ts → './folder.js'         → src/folder/index.ts (if folder.ts doesn't exist)
+ *   src/a.ts → 'express'             → null (external package)
+ */
+function resolveImportTargetFileId(fileIdByPath: Map<string, number>, fromFile: string, source: string): number | null {
+  // External (no relative or absolute prefix) → no target file
+  if (!source.startsWith('.') && !source.startsWith('/')) return null;
+
+  // Resolve the source relative to the importing file's directory.
+  // path.posix keeps separators stable across platforms; ground-truth paths
+  // are always POSIX-style (relative to fixture root).
+  const fromDir = path.posix.dirname(fromFile);
+  const resolvedNoExt = path.posix.normalize(
+    path.posix.join(fromDir, source.replace(/\.(js|ts|tsx|jsx|mjs|cjs)$/, ''))
+  );
+
+  // Try each candidate path in order: explicit extensions, then index files.
+  const candidates = [
+    `${resolvedNoExt}.ts`,
+    `${resolvedNoExt}.tsx`,
+    `${resolvedNoExt}.js`,
+    `${resolvedNoExt}.jsx`,
+    `${resolvedNoExt}/index.ts`,
+    `${resolvedNoExt}/index.tsx`,
+    `${resolvedNoExt}/index.js`,
+    `${resolvedNoExt}/index.jsx`,
+    // Last resort: the resolved path itself (already had the right extension)
+    resolvedNoExt,
+  ];
+
+  for (const candidate of candidates) {
+    const id = fileIdByPath.get(candidate);
+    if (id !== undefined) return id;
+  }
+  return null;
+}
+
+function insertModuleTree(db: IndexDatabase, gtModules: GroundTruthModule[]): void {
+  // Sort by depth (number of dots) so parents are inserted before children
+  const sorted = [...gtModules].sort((a, b) => a.fullPath.split('.').length - b.fullPath.split('.').length);
+
+  // Ensure root is created
+  db.modules.ensureRoot();
+
+  function ensureStrictAncestors(fullPath: string): void {
+    const segments = fullPath.split('.');
+    // Iterate STRICT ancestors only — skip the leaf path itself
+    for (let i = 1; i < segments.length - 1; i++) {
+      const ancestorPath = segments.slice(0, i + 1).join('.');
+      if (moduleIdByKey(db, ancestorPath) !== null) continue;
+      const parentPath = segments.slice(0, i).join('.');
+      const parentId = moduleIdByKey(db, parentPath);
+      if (parentId === null) {
+        throw new Error(`Internal: parent module '${parentPath}' not found`);
+      }
+      db.modules.insert(parentId, segments[i], segments[i]);
+    }
+  }
+
+  for (const m of sorted) {
+    ensureStrictAncestors(m.fullPath);
+    const segments = m.fullPath.split('.');
+    const parentPath = segments.slice(0, -1).join('.');
+    const slug = segments[segments.length - 1];
+
+    const existing = moduleIdByKey(db, m.fullPath);
+    if (existing === null) {
+      const parentId = parentPath ? moduleIdByKey(db, parentPath) : null;
+      if (parentId === null && parentPath) {
+        throw new Error(`Internal: parent module '${parentPath}' not found`);
+      }
+      db.modules.insert(parentId, slug, m.name, undefined, m.isTest);
+    }
+
+    // Assign members
+    if (m.members) {
+      const moduleId = moduleIdByKey(db, m.fullPath);
+      if (moduleId === null) throw new Error(`Internal: module '${m.fullPath}' missing after insert`);
+      for (const memberKey of m.members) {
+        const defId = definitionIdByKey(db, memberKey);
+        if (defId === null) {
+          throw new Error(`module '${m.fullPath}' member references unknown definition '${memberKey}'`);
+        }
+        db.modules.assignSymbol(defId, moduleId);
+      }
+    }
+  }
+}
+
+function insertInteractions(db: IndexDatabase, interactions: GroundTruthInteraction[]): void {
+  for (const i of interactions) {
+    const fromId = moduleIdByKey(db, i.fromModulePath);
+    const toId = moduleIdByKey(db, i.toModulePath);
+    if (fromId === null || toId === null) {
+      throw new Error(`interaction references unknown module: ${i.fromModulePath} → ${i.toModulePath}`);
+    }
+    const interactionId = db.interactions.insert(fromId, toId, {
+      pattern: i.pattern ?? undefined,
+      source: i.source,
+      semantic: i.semanticReference,
+    });
+
+    if (i.links) {
+      const conn = db.getConnection();
+      const insertLink = conn.prepare(
+        `INSERT OR IGNORE INTO interaction_definition_links (interaction_id, from_definition_id, to_definition_id, contract_id)
+         VALUES (?, ?, ?, ?)`
+      );
+      for (const l of i.links) {
+        const fromDefId = definitionIdByKey(db, l.fromDef);
+        const toDefId = definitionIdByKey(db, l.toDef);
+        if (fromDefId === null || toDefId === null) {
+          throw new Error(`interaction link references unknown definition: ${l.fromDef} → ${l.toDef}`);
+        }
+        const contractId = l.contractKey ? contractIdByKey(db, l.contractKey) : null;
+        insertLink.run(interactionId, fromDefId, toDefId, contractId);
+      }
+    }
+  }
+}
+
+function insertFlows(db: IndexDatabase, flows: GroundTruthFlow[]): void {
+  for (const f of flows) {
+    let entryDefId: number | undefined;
+    if (f.entryDef) {
+      const id = definitionIdByKey(db, f.entryDef);
+      if (id === null) throw new Error(`flow '${f.slug}' entryDef references unknown '${f.entryDef}'`);
+      entryDefId = id;
+    }
+    let entryModuleId: number | undefined;
+    if (f.entryModulePath) {
+      const id = moduleIdByKey(db, f.entryModulePath);
+      if (id === null) throw new Error(`flow '${f.slug}' entryModulePath references unknown '${f.entryModulePath}'`);
+      entryModuleId = id;
+    }
+
+    const flowId = db.flows.insert(f.name, f.slug, {
+      entryPointId: entryDefId,
+      entryPointModuleId: entryModuleId,
+      entryPath: f.entryPath,
+      stakeholder: f.stakeholder,
+      description: f.descriptionReference,
+    });
+
+    // Module-level steps (interactions)
+    if (f.steps && f.steps.length > 0) {
+      const interactionIds: number[] = [];
+      for (const s of f.steps) {
+        const fromId = moduleIdByKey(db, s.from);
+        const toId = moduleIdByKey(db, s.to);
+        if (fromId === null || toId === null) {
+          throw new Error(`flow '${f.slug}' step references unknown modules: ${s.from} → ${s.to}`);
+        }
+        const conn = db.getConnection();
+        const row = conn
+          .prepare('SELECT id FROM interactions WHERE from_module_id = ? AND to_module_id = ? LIMIT 1')
+          .get(fromId, toId) as { id: number } | undefined;
+        if (!row) {
+          throw new Error(
+            `flow '${f.slug}' step references interaction ${s.from} → ${s.to} that was not declared in ground truth`
+          );
+        }
+        interactionIds.push(row.id);
+      }
+      db.flows.addSteps(flowId, interactionIds);
+    }
+
+    // Definition-level steps
+    if (f.definitionSteps && f.definitionSteps.length > 0) {
+      const steps = f.definitionSteps.map((s) => {
+        const fromId = definitionIdByKey(db, s.from);
+        const toId = definitionIdByKey(db, s.to);
+        if (fromId === null || toId === null) {
+          throw new Error(`flow '${f.slug}' definitionStep references unknown definitions: ${s.from} → ${s.to}`);
+        }
+        return { fromDefinitionId: fromId, toDefinitionId: toId };
+      });
+      db.flows.addDefinitionSteps(flowId, steps);
+    }
+  }
+}
+
+// Re-export DefKey helpers for ergonomics
+export { defKey, parseDefKey };
+export type { DefKey };
diff --git a/evals/harness/comparator/index.test.ts b/evals/harness/comparator/index.test.ts
new file mode 100644
index 0000000..9472c8f
--- /dev/null
+++ b/evals/harness/comparator/index.test.ts
@@ -0,0 +1,338 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { IndexDatabase } from '../../../src/db/database-facade.js';
+import { buildGroundTruthDb } from '../builder.js';
+import { type GroundTruth, type TableName, defKey } from '../types.js';
+import { makeStubJudge } from '../types.js';
+import { compare } from './index.js';
+
+/**
+ * Top-level compare() orchestrator. It:
+ * - dispatches per-table comparators based on the requested scope
+ * - aggregates per-row diffs into a DiffSummary by severity
+ * - sets passed=false if any critical OR major diff exists (minor only → still passes)
+ */
+describe('compare (top-level orchestrator)', () => {
+  let dir: string;
+  let producedDb: IndexDatabase;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-eval-top-'));
+    producedDb = new IndexDatabase(path.join(dir, 'p.db'));
+    producedDb.initialize();
+  });
+
+  afterEach(() => {
+    producedDb.close();
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  const baseGt: GroundTruth = {
+    fixtureName: 'mini',
+    files: [
+      { path: 'src/c.ts', language: 'typescript' },
+      { path: 'src/s.ts', language: 'typescript' },
+    ],
+    definitions: [
+      { file: 'src/c.ts', name: 'ctrl', kind: 'function', isExported: true, line: 1 },
+      { file: 'src/s.ts', name: 'svc', kind: 'function', isExported: true, line: 1 },
+    ],
+    modules: [
+      { fullPath: 'project.controllers', name: 'C', members: [defKey('src/c.ts', 'ctrl')] },
+      { fullPath: 'project.services', name: 'S', members: [defKey('src/s.ts', 'svc')] },
+    ],
+    interactions: [
+      {
+        fromModulePath: 'project.controllers',
+        toModulePath: 'project.services',
+        pattern: 'business',
+        source: 'ast',
+      },
+    ],
+  };
+
+  it('passes when produced exactly matches ground truth across all tables in scope', async () => {
+    buildGroundTruthDb(producedDb, baseGt);
+    const report = await compare({
+      produced: producedDb,
+      groundTruth: baseGt,
+      scope: ['files', 'definitions', 'modules', 'module_members', 'interactions'],
+      judgeFn: async () => ({ similarity: 1, passed: true, reasoning: 'stub' }),
+    });
+    expect(report.passed).toBe(true);
+    expect(report.summary.critical).toBe(0);
+    expect(report.summary.major).toBe(0);
+    expect(report.tables.map((t) => t.table).sort()).toEqual(
+      ['definitions', 'files', 'interactions', 'module_members', 'modules'].sort()
+    );
+  });
+
+  it('fails on critical diffs, aggregates summary correctly', async () => {
+    // Build with a missing file
+    buildGroundTruthDb(producedDb, {
+      ...baseGt,
+      files: [{ path: 'src/c.ts', language: 'typescript' }],
+      definitions: [{ file: 'src/c.ts', name: 'ctrl', kind: 'function', isExported: true, line: 1 }],
+      modules: [{ fullPath: 'project.controllers', name: 'C', members: [defKey('src/c.ts', 'ctrl')] }],
+      interactions: [],
+    });
+    const report = await compare({
+      produced: producedDb,
+      groundTruth: baseGt,
+      scope: ['files', 'definitions'],
+      judgeFn: async () => ({ similarity: 1, passed: true, reasoning: 'stub' }),
+    });
+    expect(report.passed).toBe(false);
+    expect(report.summary.critical).toBeGreaterThan(0);
+  });
+
+  it('passes when only minor diffs are present', async () => {
+    // Use a different scope to avoid 'modules' producing minor extras
+    buildGroundTruthDb(producedDb, {
+      ...baseGt,
+      definitions: [
+        { file: 'src/c.ts', name: 'ctrl', kind: 'function', isExported: true, line: 4 }, // 1 → 4 (within ±2 from 2 is fine, but 1→4 is +3 → mismatch=minor in our impl)
+        { file: 'src/s.ts', name: 'svc', kind: 'function', isExported: true, line: 1 },
+      ],
+    });
+    const report = await compare({
+      produced: producedDb,
+      groundTruth: baseGt,
+      scope: ['files', 'definitions'],
+      judgeFn: async () => ({ similarity: 1, passed: true, reasoning: 'stub' }),
+    });
+    // 1 minor diff (line drift), 0 critical, 0 major → still passes
+    expect(report.summary.minor).toBe(1);
+    expect(report.summary.critical).toBe(0);
+    expect(report.summary.major).toBe(0);
+    expect(report.passed).toBe(true);
+  });
+
+  it('only runs comparators for tables in scope', async () => {
+    buildGroundTruthDb(producedDb, baseGt);
+    const report = await compare({
+      produced: producedDb,
+      groundTruth: baseGt,
+      scope: ['files'] as TableName[],
+      judgeFn: async () => ({ similarity: 1, passed: true, reasoning: 'stub' }),
+    });
+    expect(report.tables).toHaveLength(1);
+    expect(report.tables[0].table).toBe('files');
+  });
+
+  it('throws when scope includes a table with no implemented comparator', async () => {
+    buildGroundTruthDb(producedDb, baseGt);
+    await expect(
+      compare({
+        produced: producedDb,
+        groundTruth: baseGt,
+        // 'symbols' has no comparator yet — silently dropping it would mislead callers
+        scope: ['files', 'symbols'] as TableName[],
+        judgeFn: async () => ({ similarity: 1, passed: true, reasoning: 'stub' }),
+      })
+    ).rejects.toThrow(/comparator.*symbols/i);
+  });
+
+  it('dispatches relationship_annotations to its comparator (no throw)', async () => {
+    // Build a minimal fixture with one inheritance edge so the relationship_annotations
+    // table is non-empty when the dispatcher routes the call. The comparator must
+    // be wired into the COMPARATORS map for this not to throw "no comparator implemented".
+    const gt: GroundTruth = {
+      fixtureName: 'rel',
+      files: [{ path: 'src/r.ts', language: 'typescript' }],
+      definitions: [
+        { file: 'src/r.ts', name: 'BaseRepo', kind: 'class', isExported: true, line: 1 },
+        {
+          file: 'src/r.ts',
+          name: 'TaskRepo',
+          kind: 'class',
+          isExported: true,
+          line: 5,
+          extendsName: 'BaseRepo',
+        },
+      ],
+      relationships: [
+        {
+          fromDef: defKey('src/r.ts', 'TaskRepo'),
+          toDef: defKey('src/r.ts', 'BaseRepo'),
+          relationshipType: 'extends',
+          // No semanticReference → no prose check, stub judge is fine.
+        },
+      ],
+    };
+    buildGroundTruthDb(producedDb, gt);
+    const report = await compare({
+      produced: producedDb,
+      groundTruth: gt,
+      scope: ['relationship_annotations'],
+      judgeFn: makeStubJudge(),
+    });
+    expect(report.tables).toHaveLength(1);
+    expect(report.tables[0].table).toBe('relationship_annotations');
+    expect(report.passed).toBe(true);
+  });
+
+  it('records the duration in milliseconds', async () => {
+    buildGroundTruthDb(producedDb, baseGt);
+    const report = await compare({
+      produced: producedDb,
+      groundTruth: baseGt,
+      scope: ['files'],
+      judgeFn: async () => ({ similarity: 1, passed: true, reasoning: 'stub' }),
+    });
+    expect(report.durationMs).toBeGreaterThanOrEqual(0);
+    expect(typeof report.durationMs).toBe('number');
+  });
+
+  describe('stub-judge guardrail', () => {
+    it('allows stub judge when no prose-bearing tables are in scope', async () => {
+      buildGroundTruthDb(producedDb, baseGt);
+      const report = await compare({
+        produced: producedDb,
+        groundTruth: baseGt,
+        scope: ['files', 'definitions'],
+        judgeFn: makeStubJudge(),
+      });
+      expect(report.passed).toBe(true);
+    });
+
+    it('allows stub judge when prose-bearing scope has NO declared references', async () => {
+      // 'modules' is a prose-bearing table but baseGt has no descriptionReference fields,
+      // so the stub is harmless.
+      buildGroundTruthDb(producedDb, baseGt);
+      const report = await compare({
+        produced: producedDb,
+        groundTruth: baseGt,
+        scope: ['modules'],
+        judgeFn: makeStubJudge(),
+      });
+      expect(report.passed).toBe(true);
+    });
+
+    it('throws when stub judge would silently pass declared prose references', async () => {
+      // Add a prose reference to baseGt's modules
+      const gtWithProse: GroundTruth = {
+        ...baseGt,
+        modules: [
+          {
+            fullPath: 'project.controllers',
+            name: 'C',
+            members: [defKey('src/c.ts', 'ctrl')],
+            descriptionReference: 'HTTP request handlers translating requests into service calls.',
+          },
+          { fullPath: 'project.services', name: 'S', members: [defKey('src/s.ts', 'svc')] },
+        ],
+      };
+      buildGroundTruthDb(producedDb, gtWithProse);
+      await expect(
+        compare({
+          produced: producedDb,
+          groundTruth: gtWithProse,
+          scope: ['modules'],
+          judgeFn: makeStubJudge(),
+        })
+      ).rejects.toThrow(/stub judge is forbidden/i);
+    });
+
+    it('allows a real (non-stub) judge with declared prose references', async () => {
+      const gtWithProse: GroundTruth = {
+        ...baseGt,
+        modules: [
+          {
+            fullPath: 'project.controllers',
+            name: 'C',
+            members: [defKey('src/c.ts', 'ctrl')],
+            descriptionReference: 'reference text',
+          },
+          { fullPath: 'project.services', name: 'S', members: [defKey('src/s.ts', 'svc')] },
+        ],
+      };
+      buildGroundTruthDb(producedDb, gtWithProse);
+      // No STUB_JUDGE_MARKER set → treated as real
+      const realJudge = async () => ({ similarity: 1, passed: true, reasoning: 'real' });
+      const report = await compare({
+        produced: producedDb,
+        groundTruth: gtWithProse,
+        scope: ['modules'],
+        judgeFn: realJudge,
+      });
+      expect(report.passed).toBe(true);
+    });
+  });
+});
+
+describe('aggregateSummary — prose-check counting', () => {
+  // Direct unit test of the summary logic without needing a real DB.
+  // Imports the bare aggregator to verify counting rules in isolation.
+  it('a single prose-drift minor diff increments proseChecks.failed but NOT minor', async () => {
+    const { aggregateSummary } = await import('./index.js');
+    const summary = aggregateSummary([
+      {
+        table: 'definition_metadata',
+        passed: true, // table is fine; prose drift is informational
+        expectedCount: 1,
+        producedCount: 1,
+        diffs: [
+          {
+            kind: 'prose-drift',
+            severity: 'minor',
+            naturalKey: 'src/foo.ts::bar',
+            details: 'similarity 0.65 < 0.75',
+          },
+        ],
+        proseChecks: { passed: 0, failed: 1 },
+      },
+    ]);
+    expect(summary.proseChecks.failed).toBe(1);
+    expect(summary.minor).toBe(0); // ← regression: was 1 (double count)
+    expect(summary.proseChecks.passed).toBe(0);
+  });
+
+  it('passed prose checks roll up from per-table proseChecks counters', async () => {
+    const { aggregateSummary } = await import('./index.js');
+    const summary = aggregateSummary([
+      {
+        table: 'definition_metadata',
+        passed: true,
+        expectedCount: 5,
+        producedCount: 5,
+        diffs: [],
+        proseChecks: { passed: 4, failed: 1 },
+      },
+      {
+        table: 'modules',
+        passed: true,
+        expectedCount: 3,
+        producedCount: 3,
+        diffs: [],
+        proseChecks: { passed: 2, failed: 0 },
+      },
+    ]);
+    expect(summary.proseChecks.passed).toBe(6);
+    expect(summary.proseChecks.failed).toBe(1);
+  });
+
+  it('regular minor diffs still increment summary.minor', async () => {
+    const { aggregateSummary } = await import('./index.js');
+    const summary = aggregateSummary([
+      {
+        table: 'definitions',
+        passed: true,
+        expectedCount: 1,
+        producedCount: 1,
+        diffs: [
+          {
+            kind: 'mismatch',
+            severity: 'minor',
+            naturalKey: 'src/foo.ts::bar',
+            details: 'line drift',
+          },
+        ],
+      },
+    ]);
+    expect(summary.minor).toBe(1);
+    expect(summary.proseChecks.failed).toBe(0);
+  });
+});
diff --git a/evals/harness/comparator/index.ts b/evals/harness/comparator/index.ts
new file mode 100644
index 0000000..60394e9
--- /dev/null
+++ b/evals/harness/comparator/index.ts
@@ -0,0 +1,220 @@
+import type { IndexDatabase } from '../../../src/db/database-facade.js';
+import {
+  type DiffReport,
+  type DiffSummary,
+  type GroundTruth,
+  PROSE_BEARING_TABLES,
+  PROSE_REFERENCE_COUNTERS,
+  type ProseJudgeFn,
+  STUB_JUDGE_MARKER,
+  type TableDiff,
+  type TableName,
+} from '../types.js';
+import {
+  compareContracts,
+  compareDefinitionMetadata,
+  compareDefinitions,
+  compareFeatureCohesion,
+  compareFiles,
+  compareFlowRubric,
+  compareFlows,
+  compareImports,
+  compareInteractionRubric,
+  compareInteractions,
+  compareModuleCohesion,
+  compareModuleMembers,
+  compareModules,
+  compareRelationshipAnnotations,
+} from './tables/index.js';
+
+export interface CompareOptions {
+  produced: IndexDatabase;
+  groundTruth: GroundTruth;
+  /** Tables the caller wants compared. Tables not listed are skipped. */
+  scope: TableName[];
+  /**
+   * Pluggable prose-judge. Real implementation calls an LLM; tests inject a stub.
+   * Currently used by definition_metadata, relationship_annotations, modules.description,
+   * interactions.semantic, flows.description.
+   */
+  judgeFn: ProseJudgeFn;
+  /** Optional git SHA of the squint commit producing the DB, embedded in the report. */
+  squintCommit?: string;
+}
+
+/**
+ * Top-level orchestrator. Dispatches per-table comparators based on scope,
+ * aggregates per-row diffs into a DiffSummary, returns a DiffReport.
+ *
+ * Pass criteria: zero CRITICAL and zero MAJOR diffs across all in-scope tables.
+ * Minor diffs (line drift, prose drift) only warn.
+ */
+export async function compare(opts: CompareOptions): Promise<DiffReport> {
+  const start = Date.now();
+  const { produced, groundTruth, scope, judgeFn } = opts;
+
+  // Guardrail: refuse to silently pass real prose checks with a stub judge.
+  // Iteration 1 has no prose references declared, so this is a no-op then.
+  // The moment iteration 2 adds GT prose references, the harness fails loudly
+  // unless the caller injects a real LLM judge.
+  assertNoStubJudgeForProseChecks(judgeFn, scope, groundTruth);
+
+  const tables: TableDiff[] = [];
+
+  for (const tableName of scope) {
+    // Some comparators are async (those that call the LLM judge); awaited uniformly here.
+    tables.push(await runComparator(tableName, produced, groundTruth, judgeFn));
+  }
+
+  const summary = aggregateSummary(tables);
+
+  const passed = summary.critical === 0 && summary.major === 0;
+
+  return {
+    fixtureName: groundTruth.fixtureName,
+    passed,
+    scope,
+    tables,
+    summary,
+    durationMs: Date.now() - start,
+    squintCommit: opts.squintCommit,
+  };
+}
+
+/**
+ * Refuse to use a stub judge for any scope that actually contains declared
+ * prose references. Catches the bug where iteration 2+ ships and the eval
+ * file forgets to swap the stub judge for a real LLM call.
+ *
+ * When the guardrail is checked but does NOT fire (the common, healthy case),
+ * a single line is logged via console.debug so CI logs visibly confirm the
+ * guardrail is alive. Set EVAL_DEBUG=1 to see these lines locally.
+ */
+function assertNoStubJudgeForProseChecks(judgeFn: ProseJudgeFn, scope: TableName[], gt: GroundTruth): void {
+  const isStub = judgeFn[STUB_JUDGE_MARKER] === true;
+  if (!isStub) {
+    debugLog(`stub-judge guardrail: real judge in use; no check needed (scope=[${scope.join(', ')}])`);
+    return;
+  }
+
+  const proseScopes = scope.filter((s) => PROSE_BEARING_TABLES.has(s));
+  if (proseScopes.length === 0) {
+    debugLog(`stub-judge guardrail: stub OK; no prose-bearing tables in scope (scope=[${scope.join(', ')}])`);
+    return;
+  }
+
+  // Stub judge IS allowed unless GT actually declares prose references in
+  // an in-scope table. Walk the GT to check.
+  const hasProseRefs = countDeclaredProseReferences(gt, proseScopes);
+  if (hasProseRefs > 0) {
+    throw new Error(
+      `Stub judge is forbidden when prose checks are in scope and ground truth declares prose references. Scope contains ${proseScopes.length} prose-bearing table(s) (${proseScopes.join(', ')}) and ground truth declares ${hasProseRefs} prose reference(s). Inject a real LLM-backed judge instead of a stub.`
+    );
+  }
+  debugLog(
+    `stub-judge guardrail: stub OK; ${proseScopes.length} prose-bearing scope(s) but GT declares 0 prose references (proseScopes=[${proseScopes.join(', ')}])`
+  );
+}
+
+/**
+ * Single-line trace channel for the eval harness. Off by default; turn on
+ * with EVAL_DEBUG=1. Goes to stderr to avoid polluting the eval's stdout
+ * report log lines.
+ */
+function debugLog(message: string): void {
+  if (process.env.EVAL_DEBUG === '1') {
+    // eslint-disable-next-line no-console
+    console.error(`[eval debug] ${message}`);
+  }
+}
+
+function countDeclaredProseReferences(gt: GroundTruth, scopes: TableName[]): number {
+  let n = 0;
+  for (const scope of scopes) {
+    const counter = PROSE_REFERENCE_COUNTERS[scope];
+    if (counter) n += counter(gt);
+  }
+  return n;
+}
+
+/**
+ * Comparator function signature. Some comparators need the prose judge,
+ * some don't — both shapes are accepted (the dispatcher passes judgeFn
+ * unconditionally).
+ */
+type ComparatorFn = (produced: IndexDatabase, gt: GroundTruth, judgeFn: ProseJudgeFn) => TableDiff | Promise<TableDiff>;
+
+/**
+ * Single source of truth for which tables have a comparator implementation.
+ * Adding a new table = one entry here. The dispatcher and the
+ * "no comparator implemented" guard both read from this map.
+ */
+const COMPARATORS: Partial<Record<TableName, ComparatorFn>> = {
+  files: (p, g) => compareFiles(p, g),
+  definitions: (p, g) => compareDefinitions(p, g),
+  imports: (p, g) => compareImports(p, g),
+  modules: (p, g, j) => compareModules(p, g, j),
+  module_members: (p, g) => compareModuleMembers(p, g),
+  contracts: (p, g) => compareContracts(p, g),
+  interactions: (p, g) => compareInteractions(p, g),
+  flows: (p, g) => compareFlows(p, g),
+  definition_metadata: (p, g, j) => compareDefinitionMetadata(p, g, j),
+  relationship_annotations: (p, g, j) => compareRelationshipAnnotations(p, g, j),
+  module_cohesion: (p, g, j) => compareModuleCohesion(p, g, j),
+  interaction_rubric: (p, g, j) => compareInteractionRubric(p, g, j),
+  flow_rubric: (p, g, j) => compareFlowRubric(p, g, j),
+  feature_cohesion: (p, g, j) => compareFeatureCohesion(p, g, j),
+};
+
+async function runComparator(
+  table: TableName,
+  produced: IndexDatabase,
+  gt: GroundTruth,
+  judgeFn: ProseJudgeFn
+): Promise<TableDiff> {
+  const fn = COMPARATORS[table];
+  if (!fn) {
+    const implemented = (Object.keys(COMPARATORS) as TableName[]).sort().join(', ');
+    throw new Error(`No comparator implemented for table '${table}'. Implemented: [${implemented}]`);
+  }
+  return fn(produced, gt, judgeFn);
+}
+
+/**
+ * Aggregate per-table diffs into a summary.
+ *
+ * Counting rules:
+ * - Structural diffs (`missing`, `extra`, `mismatch`) increment critical/major/minor by severity.
+ * - Prose drifts (`prose-drift` kind) ONLY increment `proseChecks.failed`. They do not
+ *   double-count into `minor`. The minor counter is reserved for non-prose drifts (e.g.,
+ *   line tolerance breaches).
+ * - Passed prose checks come from each TableDiff's `proseChecks.passed` counter — they
+ *   never generate RowDiffs because there's nothing to report.
+ *
+ * Exported for unit testing in isolation.
+ */
+export function aggregateSummary(tables: TableDiff[]): DiffSummary {
+  const summary: DiffSummary = {
+    critical: 0,
+    major: 0,
+    minor: 0,
+    proseChecks: { passed: 0, failed: 0 },
+  };
+  for (const t of tables) {
+    for (const d of t.diffs) {
+      if (d.kind === 'prose-drift') {
+        // Prose drifts are tracked only via proseChecks.failed.
+        // Skip the severity counters to avoid double-counting.
+        continue;
+      }
+      if (d.severity === 'critical') summary.critical += 1;
+      else if (d.severity === 'major') summary.major += 1;
+      else if (d.severity === 'minor') summary.minor += 1;
+    }
+    if (t.proseChecks) {
+      summary.proseChecks.passed += t.proseChecks.passed;
+      summary.proseChecks.failed += t.proseChecks.failed;
+    }
+  }
+  return summary;
+}
diff --git a/evals/harness/comparator/llm-prose-judge.test.ts b/evals/harness/comparator/llm-prose-judge.test.ts
new file mode 100644
index 0000000..14005b2
--- /dev/null
+++ b/evals/harness/comparator/llm-prose-judge.test.ts
@@ -0,0 +1,220 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { STUB_JUDGE_MARKER } from '../types.js';
+import { makeLlmProseJudge } from './llm-prose-judge.js';
+
+/**
+ * Tests for the LLM-backed prose judge.
+ *
+ * Strategy: pass an injected llmCall stub instead of mocking llmist at the
+ * module level. This is simpler than vi.mock and lets us assert exact
+ * call counts without race conditions across test files.
+ */
+describe('makeLlmProseJudge', () => {
+  let cacheDir: string;
+  let cachePath: string;
+
+  beforeEach(() => {
+    cacheDir = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-judge-cache-'));
+    cachePath = path.join(cacheDir, 'judge-cache.json');
+  });
+
+  afterEach(() => {
+    fs.rmSync(cacheDir, { recursive: true, force: true });
+  });
+
+  function fakeLlmCall(responses: string[]): {
+    fn: (opts: { systemPrompt: string; userPrompt: string }) => Promise<string>;
+    callCount: () => number;
+    lastUserPrompt: () => string | undefined;
+  } {
+    let i = 0;
+    let lastUserPrompt: string | undefined;
+    const fn = vi.fn(async (opts: { systemPrompt: string; userPrompt: string }) => {
+      lastUserPrompt = opts.userPrompt;
+      if (i >= responses.length) throw new Error(`fake llm call ${i + 1} has no canned response`);
+      return responses[i++];
+    });
+    return {
+      fn: fn as unknown as (opts: { systemPrompt: string; userPrompt: string }) => Promise<string>,
+      callCount: () => fn.mock.calls.length,
+      lastUserPrompt: () => lastUserPrompt,
+    };
+  }
+
+  it('returns the LLM similarity score on the happy path', async () => {
+    const llm = fakeLlmCall(['{"similarity": 0.92, "reasoning": "very close"}']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    const result = await judge({
+      field: 'definition_metadata.purpose for src/foo.ts::bar',
+      reference: 'Authenticate a user.',
+      candidate: 'Verifies user credentials and signs a token.',
+      minSimilarity: 0.75,
+    });
+
+    expect(result.similarity).toBeCloseTo(0.92, 5);
+    expect(result.passed).toBe(true);
+    expect(result.reasoning).toBe('very close');
+    expect(llm.callCount()).toBe(1);
+  });
+
+  it('marks passed=false when similarity is below the threshold', async () => {
+    const llm = fakeLlmCall(['{"similarity": 0.5, "reasoning": "missing key concept"}']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    const result = await judge({
+      field: 'test',
+      reference: 'A',
+      candidate: 'B',
+      minSimilarity: 0.75,
+    });
+
+    expect(result.similarity).toBe(0.5);
+    expect(result.passed).toBe(false);
+  });
+
+  it('caches successful judgments — second call with same args makes no LLM call', async () => {
+    const llm = fakeLlmCall(['{"similarity": 0.85, "reasoning": "fine"}']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    const req = { field: 't', reference: 'ref', candidate: 'cand', minSimilarity: 0.7 };
+    await judge(req);
+    await judge(req);
+
+    expect(llm.callCount()).toBe(1);
+  });
+
+  it('cache key does not include minSimilarity — same (model,ref,cand) reuses across thresholds', async () => {
+    const llm = fakeLlmCall(['{"similarity": 0.8, "reasoning": "ok"}']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    const r1 = await judge({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.7 });
+    const r2 = await judge({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.85 });
+
+    expect(llm.callCount()).toBe(1); // single LLM call
+    expect(r1.passed).toBe(true); // 0.8 >= 0.7
+    expect(r2.passed).toBe(false); // 0.8 < 0.85
+    expect(r1.similarity).toBe(r2.similarity);
+  });
+
+  it('persists cache to disk and reads it back from a fresh judge instance', async () => {
+    const llm1 = fakeLlmCall(['{"similarity": 0.9, "reasoning": "match"}']);
+    const judge1 = makeLlmProseJudge({ cachePath, llmCall: llm1.fn });
+    await judge1({ field: 't', reference: 'X', candidate: 'Y', minSimilarity: 0.75 });
+    expect(fs.existsSync(cachePath)).toBe(true);
+
+    // Fresh instance should pick up the persisted cache and not call LLM again
+    const llm2 = fakeLlmCall([]); // no canned responses — must not be called
+    const judge2 = makeLlmProseJudge({ cachePath, llmCall: llm2.fn });
+    const result = await judge2({ field: 't', reference: 'X', candidate: 'Y', minSimilarity: 0.75 });
+
+    expect(result.similarity).toBe(0.9);
+    expect(llm2.callCount()).toBe(0);
+  });
+
+  it('different reference text causes a cache miss', async () => {
+    const llm = fakeLlmCall([
+      '{"similarity": 0.9, "reasoning": "first"}',
+      '{"similarity": 0.5, "reasoning": "second"}',
+    ]);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    await judge({ field: 't', reference: 'A', candidate: 'X', minSimilarity: 0.7 });
+    await judge({ field: 't', reference: 'B', candidate: 'X', minSimilarity: 0.7 });
+
+    expect(llm.callCount()).toBe(2);
+  });
+
+  it('different candidate text causes a cache miss', async () => {
+    const llm = fakeLlmCall([
+      '{"similarity": 0.9, "reasoning": "first"}',
+      '{"similarity": 0.5, "reasoning": "second"}',
+    ]);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    await judge({ field: 't', reference: 'A', candidate: 'X', minSimilarity: 0.7 });
+    await judge({ field: 't', reference: 'A', candidate: 'Y', minSimilarity: 0.7 });
+
+    expect(llm.callCount()).toBe(2);
+  });
+
+  it('throws on malformed LLM response (no JSON)', async () => {
+    const llm = fakeLlmCall(['not json at all']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    await expect(judge({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.7 })).rejects.toThrow(
+      /parse|json/i
+    );
+  });
+
+  it('throws on JSON missing similarity field', async () => {
+    const llm = fakeLlmCall(['{"reasoning": "ok but no number"}']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    await expect(judge({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.7 })).rejects.toThrow(
+      /similarity/i
+    );
+  });
+
+  it('throws on similarity outside [0, 1]', async () => {
+    const llm = fakeLlmCall(['{"similarity": 1.5, "reasoning": "out of range"}']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    await expect(judge({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.7 })).rejects.toThrow(
+      /similarity|range/i
+    );
+  });
+
+  it('extracts JSON from response wrapped in extra text', async () => {
+    // Some models prepend "Here is the JSON:" or similar before the actual object
+    const llm = fakeLlmCall(['Here is the result: {"similarity": 0.88, "reasoning": "fine"} done.']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+
+    const result = await judge({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.7 });
+    expect(result.similarity).toBeCloseTo(0.88, 5);
+  });
+
+  it('returned function does NOT carry STUB_JUDGE_MARKER (so the guardrail accepts it)', () => {
+    const judge = makeLlmProseJudge({ cachePath, llmCall: fakeLlmCall([]).fn });
+    expect((judge as unknown as { [k: symbol]: unknown })[STUB_JUDGE_MARKER]).toBeUndefined();
+  });
+
+  it('different judge model results in cache miss for same ref+cand', async () => {
+    const llm1 = fakeLlmCall(['{"similarity": 0.9, "reasoning": "model A"}']);
+    const judge1 = makeLlmProseJudge({ cachePath, model: 'model-a', llmCall: llm1.fn });
+    await judge1({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.7 });
+
+    const llm2 = fakeLlmCall(['{"similarity": 0.6, "reasoning": "model B"}']);
+    const judge2 = makeLlmProseJudge({ cachePath, model: 'model-b', llmCall: llm2.fn });
+    const r2 = await judge2({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.7 });
+
+    expect(r2.similarity).toBe(0.6);
+    expect(llm2.callCount()).toBe(1);
+  });
+
+  it('handles a missing cache file gracefully on first run', async () => {
+    const nonexistent = path.join(cacheDir, 'subdir', 'never-existed.json');
+    const llm = fakeLlmCall(['{"similarity": 0.8, "reasoning": "ok"}']);
+    const judge = makeLlmProseJudge({ cachePath: nonexistent, llmCall: llm.fn });
+    const result = await judge({ field: 't', reference: 'A', candidate: 'B', minSimilarity: 0.7 });
+    expect(result.similarity).toBe(0.8);
+    expect(fs.existsSync(nonexistent)).toBe(true); // cache file created
+  });
+
+  it('user prompt contains both reference and candidate', async () => {
+    const llm = fakeLlmCall(['{"similarity": 0.8, "reasoning": "ok"}']);
+    const judge = makeLlmProseJudge({ cachePath, llmCall: llm.fn });
+    await judge({
+      field: 't',
+      reference: 'AUTHENTICATE_REFERENCE',
+      candidate: 'CANDIDATE_DESC',
+      minSimilarity: 0.7,
+    });
+    const prompt = llm.lastUserPrompt() ?? '';
+    expect(prompt).toContain('AUTHENTICATE_REFERENCE');
+    expect(prompt).toContain('CANDIDATE_DESC');
+  });
+});
diff --git a/evals/harness/comparator/llm-prose-judge.ts b/evals/harness/comparator/llm-prose-judge.ts
new file mode 100644
index 0000000..805b868
--- /dev/null
+++ b/evals/harness/comparator/llm-prose-judge.ts
@@ -0,0 +1,243 @@
+import { createHash } from 'node:crypto';
+import fs from 'node:fs';
+import path from 'node:path';
+import type { Command } from '@oclif/core';
+import { completeWithLogging } from '../../../src/commands/llm/_shared/llm-utils.js';
+import type { ProseJudgeFn, ProseJudgeRequest, ProseJudgeResult } from '../types.js';
+
+/**
+ * LLM-backed prose-similarity judge for the eval harness.
+ *
+ * Wraps squint's existing `completeWithLogging()` infrastructure (retry,
+ * cost reporting, llmist client management) and adds:
+ * - A strict similarity-judging system prompt
+ * - Disk-persistent cache keyed on (model, reference, candidate, prompt-version)
+ * - Robust JSON extraction from the LLM response
+ *
+ * Returned function does NOT carry STUB_JUDGE_MARKER, so the
+ * `assertNoStubJudgeForProseChecks` guardrail accepts it for prose-bearing
+ * scopes.
+ */
+
+/**
+ * Bumped whenever a system prompt changes. Forces a cache miss for old
+ * (model, ref, cand) entries that were judged under the old instructions,
+ * since the same inputs would semantically produce a different score now.
+ *
+ * Two distinct version namespaces: prose judging (strict, full sentences)
+ * and theme judging (tolerant, prose-vs-tag-list). They live in the same
+ * cache file but never collide because the version string is part of the
+ * SHA-256 cache key.
+ */
+const PROSE_PROMPT_VERSION = 'v1';
+const THEME_PROMPT_VERSION = 'theme-v2';
+
+const PROSE_SYSTEM_PROMPT = `You are a strict semantic similarity judge for code documentation.
+
+Compare a REFERENCE description (the ground-truth expected meaning) against a CANDIDATE description (what an LLM produced). Score how well the candidate captures the same meaning as the reference, on a scale of 0.0 to 1.0.
+
+Scoring rubric:
+- 1.0 = identical meaning, even if different words/phrasing
+- 0.85-0.99 = same core meaning, minor missing nuance
+- 0.7-0.84 = same general intent but missing one important concept
+- 0.4-0.69 = related topic, missing key concepts
+- 0.0-0.39 = different meaning or wrong topic
+
+Be strict. Surface drift. Do not give credit for vague descriptions that could apply to many things. A description that says "handles requests" when the reference says "validates auth credentials and signs JWT" is missing key concepts — score around 0.5.
+
+Output ONLY a JSON object with this exact shape, no other text:
+{"similarity": <number 0..1>, "reasoning": "<one sentence>"}`;
+
+const THEME_SYSTEM_PROMPT = `You judge whether a short LLM-produced label fits a target code-element concept.
+
+The CANDIDATE is a short label produced by an LLM annotating some code element. It can be either:
+- A tag list formatted as "tags: a, b, c"
+- A name + brief description formatted as "name: brief description"
+Both are short labels, not full-prose paraphrases of anything.
+
+The REFERENCE is a one-sentence description of the target CONCEPT — what kind of code element the candidate is supposed to label. The reference is a CONCEPT, not a checklist of words the candidate must contain.
+
+Score how reasonably the candidate fits the reference concept, on a scale of 0.0 to 1.0:
+- 0.85-1.0 = the candidate clearly fits (any reasonable label for that kind of element)
+- 0.6-0.84 = the candidate is reasonable, perhaps using broader or different vocabulary
+- 0.3-0.59 = the candidate is tangentially related but doesn't clearly identify this kind of element
+- 0.0-0.29 = the candidate is unrelated, off-topic, or actively misleading
+
+Be tolerant of vocabulary choice. The annotating LLM has freedom to pick synonyms ("event-management" vs "events", "user-management" vs "auth", "task-management" vs "tasks"). Do NOT penalize the candidate for "missing concepts" or being "too generic" — short labels rarely paraphrase a full reference. Score above 0.7 unless the candidate is clearly off-topic for the reference's concept.
+
+Output ONLY a JSON object with this exact shape, no other text:
+{"similarity": <number 0..1>, "reasoning": "<one sentence>"}`;
+
+const DEFAULT_MODEL = process.env.EVAL_JUDGE_MODEL ?? 'openrouter:google/gemini-2.5-flash';
+
+/** Subset of completeWithLogging's options that the judge actually uses. */
+export interface LlmCallOptions {
+  model: string;
+  systemPrompt: string;
+  userPrompt: string;
+  temperature?: number;
+  command: Command;
+  isJson: boolean;
+}
+
+/** Pluggable LLM call signature — accepts the real `completeWithLogging` or a test stub. */
+export type LlmCallFn = (opts: LlmCallOptions) => Promise<string>;
+
+export interface MakeLlmProseJudgeOptions {
+  /** Model to use. Default: process.env.EVAL_JUDGE_MODEL ?? 'openrouter:google/gemini-2.5-flash' */
+  model?: string;
+  /** Cache file path. Default: evals/results/.judge-cache.json */
+  cachePath?: string;
+  /** LLM call site override (for tests). Default: completeWithLogging from squint. */
+  llmCall?: LlmCallFn;
+}
+
+interface CachedJudgment {
+  similarity: number;
+  reasoning: string;
+  cachedAt: string;
+}
+
+type CacheFile = Record<string, CachedJudgment>;
+
+/**
+ * Build a prose judge backed by a real LLM.
+ */
+export function makeLlmProseJudge(opts: MakeLlmProseJudgeOptions = {}): ProseJudgeFn {
+  const model = opts.model ?? DEFAULT_MODEL;
+  const cachePath = opts.cachePath ?? defaultCachePath();
+  const llmCall = opts.llmCall ?? (completeWithLogging as unknown as LlmCallFn);
+
+  // Lazy cache load — first call reads from disk if it exists.
+  let cache: CacheFile | null = null;
+
+  function loadCache(): CacheFile {
+    if (cache) return cache;
+    try {
+      const raw = fs.readFileSync(cachePath, 'utf-8');
+      cache = JSON.parse(raw) as CacheFile;
+    } catch {
+      cache = {};
+    }
+    return cache;
+  }
+
+  function saveCache(): void {
+    if (!cache) return;
+    fs.mkdirSync(path.dirname(cachePath), { recursive: true });
+    fs.writeFileSync(cachePath, JSON.stringify(cache, null, 2));
+  }
+
+  function cacheKey(version: string, reference: string, candidate: string): string {
+    // Excludes minSimilarity by design — the same (model, ref, cand) always produces the
+    // same similarity score; passed/failed is computed at request time.
+    // The version string is mode-specific so prose and theme judgments cohabit
+    // the same cache file without colliding.
+    return createHash('sha256').update(`${version}\n${model}\n${reference}\n${candidate}`).digest('hex');
+  }
+
+  return async function llmProseJudge(req: ProseJudgeRequest): Promise<ProseJudgeResult> {
+    const mode = req.mode ?? 'prose';
+    const systemPrompt = mode === 'theme' ? THEME_SYSTEM_PROMPT : PROSE_SYSTEM_PROMPT;
+    const version = mode === 'theme' ? THEME_PROMPT_VERSION : PROSE_PROMPT_VERSION;
+    const c = loadCache();
+    const key = cacheKey(version, req.reference, req.candidate);
+    const hit = c[key];
+
+    let similarity: number;
+    let reasoning: string;
+
+    if (hit) {
+      similarity = hit.similarity;
+      reasoning = hit.reasoning;
+    } else {
+      const userPrompt = `REFERENCE: ${req.reference}\nCANDIDATE: ${req.candidate}\n\nScore the similarity.`;
+      const response = await llmCall({
+        model,
+        systemPrompt,
+        userPrompt,
+        temperature: 0,
+        command: stubCommand(),
+        isJson: true, // suppress completeWithLogging's colored before/after logs
+      });
+      const parsed = parseJudgeResponse(response, req.field);
+      similarity = parsed.similarity;
+      reasoning = parsed.reasoning;
+      c[key] = { similarity, reasoning, cachedAt: new Date().toISOString() };
+      saveCache();
+    }
+
+    return {
+      similarity,
+      passed: similarity >= req.minSimilarity,
+      reasoning,
+    };
+  };
+}
+
+// ============================================================
+// Helpers
+// ============================================================
+
+function defaultCachePath(): string {
+  // evals/.judge-cache.json — sibling of `results/`, NOT inside it. Lives
+  // outside the per-run rotation directory so the rotator can never touch it.
+  // Gitignored via an explicit `.judge-cache.json` rule.
+  return path.resolve(process.cwd(), 'evals/.judge-cache.json');
+}
+
+/** Minimal mock Command for completeWithLogging — only needs a `log` method. */
+function stubCommand(): Command {
+  return {
+    log: () => undefined,
+  } as unknown as Command;
+}
+
+interface ParsedJudgment {
+  similarity: number;
+  reasoning: string;
+}
+
+/**
+ * Extract a JSON judgment object from the LLM response.
+ *
+ * Tolerates extra text around the JSON (some models prepend "Here is the result:" etc.).
+ * Throws on:
+ * - No parseable JSON object found
+ * - Missing `similarity` field
+ * - similarity outside [0, 1]
+ */
+export function parseJudgeResponse(response: string, fieldLabel: string): ParsedJudgment {
+  // Find the first {...} block. Our judge response is always a flat object, so a
+  // simple non-nested match suffices. We do NOT require the "similarity" key to
+  // appear inside the brace pair — that's the parser's job to validate, not the
+  // matcher's. This way a {"reasoning": "..."} without similarity still gets
+  // parsed and surfaces a precise "missing similarity" error.
+  const match = response.match(/\{[^{}]*\}/);
+  if (!match) {
+    throw new Error(`prose-judge: could not parse JSON from response for ${fieldLabel}: ${truncate(response, 200)}`);
+  }
+  let parsed: { similarity?: unknown; reasoning?: unknown };
+  try {
+    parsed = JSON.parse(match[0]);
+  } catch (err) {
+    throw new Error(
+      `prose-judge: invalid JSON in response for ${fieldLabel}: ${truncate(match[0], 200)} (${(err as Error).message})`
+    );
+  }
+
+  const sim = parsed.similarity;
+  if (typeof sim !== 'number') {
+    throw new Error(`prose-judge: missing or non-numeric 'similarity' in response for ${fieldLabel}`);
+  }
+  if (sim < 0 || sim > 1 || !Number.isFinite(sim)) {
+    throw new Error(`prose-judge: similarity ${sim} out of range [0, 1] for ${fieldLabel}`);
+  }
+
+  const reasoning = typeof parsed.reasoning === 'string' ? parsed.reasoning : '';
+  return { similarity: sim, reasoning };
+}
+
+function truncate(s: string, maxLen: number): string {
+  return s.length > maxLen ? `${s.slice(0, maxLen)}...` : s;
+}
diff --git a/evals/harness/comparator/natural-keys.test.ts b/evals/harness/comparator/natural-keys.test.ts
new file mode 100644
index 0000000..e2786ae
--- /dev/null
+++ b/evals/harness/comparator/natural-keys.test.ts
@@ -0,0 +1,183 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { IndexDatabase } from '../../../src/db/database-facade.js';
+import { computeHash } from '../../../src/db/schema.js';
+import {
+  contractKeyOfRow,
+  definitionKeyOf,
+  fileKeyOfRow,
+  flowKeyOfRow,
+  interactionKeyOfRow,
+  moduleKeyOfRow,
+} from './natural-keys.js';
+
+/**
+ * Natural-key extractors must be ID-agnostic. Two DBs created with different
+ * insertion orders (and therefore different IDs) for the SAME logical content
+ * must yield the SAME natural keys.
+ */
+describe('natural-keys', () => {
+  let dbPath: string;
+  let db: IndexDatabase;
+
+  beforeEach(() => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-eval-nk-'));
+    dbPath = path.join(dir, 'test.db');
+    db = new IndexDatabase(dbPath);
+    db.initialize();
+  });
+
+  afterEach(() => {
+    db.close();
+    fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
+  });
+
+  describe('fileKeyOfRow', () => {
+    it('uses the path column verbatim', () => {
+      expect(fileKeyOfRow({ path: 'src/index.ts' })).toBe('src/index.ts');
+    });
+  });
+
+  describe('definitionKeyOf', () => {
+    it('joins file path and definition name with ::', () => {
+      const fileId = db.files.insert({
+        path: 'src/foo.ts',
+        language: 'typescript',
+        contentHash: computeHash('x'),
+        sizeBytes: 1,
+        modifiedAt: '2026-01-01T00:00:00.000Z',
+      });
+      const defId = db.files.insertDefinition(fileId, {
+        name: 'MyClass',
+        kind: 'class',
+        isExported: true,
+        isDefault: false,
+        position: { row: 4, column: 0 },
+        endPosition: { row: 10, column: 1 },
+      });
+      expect(definitionKeyOf(db, defId)).toBe('src/foo.ts::MyClass');
+    });
+
+    it('returns the same key regardless of insertion order', () => {
+      // Insert two files in order A, B then build a fresh DB inserting B, A.
+      const fileAId = db.files.insert({
+        path: 'a.ts',
+        language: 'typescript',
+        contentHash: computeHash('a'),
+        sizeBytes: 1,
+        modifiedAt: '2026-01-01T00:00:00.000Z',
+      });
+      const fileBId = db.files.insert({
+        path: 'b.ts',
+        language: 'typescript',
+        contentHash: computeHash('b'),
+        sizeBytes: 1,
+        modifiedAt: '2026-01-01T00:00:00.000Z',
+      });
+      const defAId = db.files.insertDefinition(fileAId, {
+        name: 'a',
+        kind: 'function',
+        isExported: true,
+        isDefault: false,
+        position: { row: 0, column: 0 },
+        endPosition: { row: 1, column: 0 },
+      });
+      const defBId = db.files.insertDefinition(fileBId, {
+        name: 'b',
+        kind: 'function',
+        isExported: true,
+        isDefault: false,
+        position: { row: 0, column: 0 },
+        endPosition: { row: 1, column: 0 },
+      });
+
+      expect(definitionKeyOf(db, defAId)).toBe('a.ts::a');
+      expect(definitionKeyOf(db, defBId)).toBe('b.ts::b');
+
+      // Reverse-order DB
+      const dir2 = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-eval-nk2-'));
+      const dbPath2 = path.join(dir2, 'test.db');
+      const db2 = new IndexDatabase(dbPath2);
+      db2.initialize();
+      const fileBId2 = db2.files.insert({
+        path: 'b.ts',
+        language: 'typescript',
+        contentHash: computeHash('b'),
+        sizeBytes: 1,
+        modifiedAt: '2026-01-01T00:00:00.000Z',
+      });
+      const fileAId2 = db2.files.insert({
+        path: 'a.ts',
+        language: 'typescript',
+        contentHash: computeHash('a'),
+        sizeBytes: 1,
+        modifiedAt: '2026-01-01T00:00:00.000Z',
+      });
+      const defBId2 = db2.files.insertDefinition(fileBId2, {
+        name: 'b',
+        kind: 'function',
+        isExported: true,
+        isDefault: false,
+        position: { row: 0, column: 0 },
+        endPosition: { row: 1, column: 0 },
+      });
+      const defAId2 = db2.files.insertDefinition(fileAId2, {
+        name: 'a',
+        kind: 'function',
+        isExported: true,
+        isDefault: false,
+        position: { row: 0, column: 0 },
+        endPosition: { row: 1, column: 0 },
+      });
+
+      // IDs differ but natural keys are stable
+      expect(defAId2).not.toBe(defAId);
+      expect(definitionKeyOf(db2, defAId2)).toBe('a.ts::a');
+      expect(definitionKeyOf(db2, defBId2)).toBe('b.ts::b');
+
+      db2.close();
+      fs.rmSync(dir2, { recursive: true, force: true });
+    });
+
+    it('throws on unknown definition id', () => {
+      expect(() => definitionKeyOf(db, 99999)).toThrow();
+    });
+  });
+
+  describe('moduleKeyOfRow', () => {
+    it('uses the fullPath column', () => {
+      expect(moduleKeyOfRow({ fullPath: 'project.controllers' })).toBe('project.controllers');
+    });
+  });
+
+  describe('contractKeyOfRow', () => {
+    it('joins protocol and normalizedKey with ::', () => {
+      expect(contractKeyOfRow({ protocol: 'http', normalizedKey: 'POST /api/auth/login' })).toBe(
+        'http::POST /api/auth/login'
+      );
+    });
+
+    it('handles event-style normalized keys', () => {
+      expect(contractKeyOfRow({ protocol: 'events', normalizedKey: 'task.completed' })).toBe('events::task.completed');
+    });
+  });
+
+  describe('interactionKeyOfRow', () => {
+    it('joins from and to module paths with arrow', () => {
+      expect(
+        interactionKeyOfRow({
+          fromModulePath: 'project.controllers',
+          toModulePath: 'project.services',
+        })
+      ).toBe('project.controllers->project.services');
+    });
+  });
+
+  describe('flowKeyOfRow', () => {
+    it('uses the slug', () => {
+      expect(flowKeyOfRow({ slug: 'user-login' })).toBe('user-login');
+    });
+  });
+});
diff --git a/evals/harness/comparator/natural-keys.ts b/evals/harness/comparator/natural-keys.ts
new file mode 100644
index 0000000..93b323a
--- /dev/null
+++ b/evals/harness/comparator/natural-keys.ts
@@ -0,0 +1,96 @@
+import type { IndexDatabase } from '../../../src/db/database-facade.js';
+import { type ContractKey, type DefKey, contractKey, defKey } from '../types.js';
+
+/**
+ * ID-agnostic natural-key extractors for every table the comparator handles.
+ *
+ * Why this matters: hand-authored ground truth never knows DB row IDs.
+ * Two ingestion runs of the same fixture produce different IDs (insertion
+ * order varies). Comparators must join on natural keys derived from
+ * semantically stable columns: file paths, definition names, module
+ * full_paths, etc.
+ */
+
+export function fileKeyOfRow(row: { path: string }): string {
+  return row.path;
+}
+
+export function definitionKeyOf(db: IndexDatabase, definitionId: number): DefKey {
+  const conn = db.getConnection();
+  const row = conn
+    .prepare(
+      `SELECT f.path AS path, d.name AS name
+       FROM definitions d
+       JOIN files f ON d.file_id = f.id
+       WHERE d.id = ?`
+    )
+    .get(definitionId) as { path: string; name: string } | undefined;
+  if (!row) {
+    throw new Error(`No definition with id=${definitionId}`);
+  }
+  return defKey(row.path, row.name);
+}
+
+export function moduleKeyOfRow(row: { fullPath: string }): string {
+  return row.fullPath;
+}
+
+export function contractKeyOfRow(row: { protocol: string; normalizedKey: string }): ContractKey {
+  return contractKey(row.protocol, row.normalizedKey);
+}
+
+export function interactionKeyOfRow(row: { fromModulePath: string; toModulePath: string }): string {
+  return `${row.fromModulePath}->${row.toModulePath}`;
+}
+
+export function flowKeyOfRow(row: { slug: string }): string {
+  return row.slug;
+}
+
+/**
+ * Resolve a natural definition key by looking up file path + name.
+ * Returns null if not found (used by comparators to detect "missing" rows).
+ */
+export function definitionIdByKey(db: IndexDatabase, key: DefKey): number | null {
+  const idx = key.lastIndexOf('::');
+  if (idx === -1) return null;
+  const filePath = key.slice(0, idx);
+  const name = key.slice(idx + 2);
+  const conn = db.getConnection();
+  const row = conn
+    .prepare(
+      `SELECT d.id AS id
+       FROM definitions d
+       JOIN files f ON d.file_id = f.id
+       WHERE f.path = ? AND d.name = ?
+       LIMIT 1`
+    )
+    .get(filePath, name) as { id: number } | undefined;
+  return row?.id ?? null;
+}
+
+/**
+ * Resolve a natural module key (full_path) to its DB id.
+ */
+export function moduleIdByKey(db: IndexDatabase, fullPath: string): number | null {
+  const conn = db.getConnection();
+  const row = conn.prepare('SELECT id FROM modules WHERE full_path = ? LIMIT 1').get(fullPath) as
+    | { id: number }
+    | undefined;
+  return row?.id ?? null;
+}
+
+/**
+ * Resolve a natural contract key (protocol::normalized_key) to its DB id.
+ */
+export function contractIdByKey(db: IndexDatabase, key: ContractKey): number | null {
+  const idx = key.lastIndexOf('::');
+  if (idx === -1) return null;
+  const protocol = key.slice(0, idx);
+  const normalizedKey = key.slice(idx + 2);
+  const conn = db.getConnection();
+  const row = conn
+    .prepare('SELECT id FROM contracts WHERE protocol = ? AND normalized_key = ? LIMIT 1')
+    .get(protocol, normalizedKey) as { id: number } | undefined;
+  return row?.id ?? null;
+}
diff --git a/evals/harness/comparator/severity.test.ts b/evals/harness/comparator/severity.test.ts
new file mode 100644
index 0000000..58a7514
--- /dev/null
+++ b/evals/harness/comparator/severity.test.ts
@@ -0,0 +1,54 @@
+import { describe, expect, it } from 'vitest';
+import type { RowDiff } from '../types.js';
+import { countDiffsBySeverity, tableDiffPassed } from './severity.js';
+
+const diff = (severity: RowDiff['severity'], kind: RowDiff['kind'] = 'mismatch'): RowDiff => ({
+  kind,
+  severity,
+  naturalKey: 'k',
+  details: 'd',
+});
+
+describe('countDiffsBySeverity', () => {
+  it('returns all-zeros on empty input', () => {
+    expect(countDiffsBySeverity([])).toEqual({ critical: 0, major: 0, minor: 0 });
+  });
+
+  it('counts each severity correctly', () => {
+    expect(countDiffsBySeverity([diff('critical'), diff('critical'), diff('major'), diff('minor')])).toEqual({
+      critical: 2,
+      major: 1,
+      minor: 1,
+    });
+  });
+
+  it('excludes prose-drift diffs from severity counting', () => {
+    expect(countDiffsBySeverity([diff('minor', 'prose-drift'), diff('minor'), diff('major', 'prose-drift')])).toEqual({
+      critical: 0,
+      major: 0,
+      minor: 1,
+    });
+  });
+});
+
+describe('tableDiffPassed', () => {
+  it('returns true on empty diffs', () => {
+    expect(tableDiffPassed([])).toBe(true);
+  });
+
+  it('returns true when only minor diffs are present', () => {
+    expect(tableDiffPassed([diff('minor'), diff('minor')])).toBe(true);
+  });
+
+  it('returns false on a single major diff', () => {
+    expect(tableDiffPassed([diff('major')])).toBe(false);
+  });
+
+  it('returns false on a single critical diff', () => {
+    expect(tableDiffPassed([diff('critical')])).toBe(false);
+  });
+
+  it('returns true when only prose drifts are present (they are informational)', () => {
+    expect(tableDiffPassed([diff('minor', 'prose-drift'), diff('major', 'prose-drift')])).toBe(true);
+  });
+});
diff --git a/evals/harness/comparator/severity.ts b/evals/harness/comparator/severity.ts
new file mode 100644
index 0000000..f1c8b04
--- /dev/null
+++ b/evals/harness/comparator/severity.ts
@@ -0,0 +1,34 @@
+import type { RowDiff } from '../types.js';
+
+/**
+ * Single source of truth for "how many of each severity" in a list of diffs.
+ * Used by aggregateSummary, baseline scoring, and per-table passed checks.
+ */
+export function countDiffsBySeverity(diffs: RowDiff[]): {
+  critical: number;
+  major: number;
+  minor: number;
+} {
+  let critical = 0;
+  let major = 0;
+  let minor = 0;
+  for (const d of diffs) {
+    if (d.kind === 'prose-drift') continue; // tracked separately via TableDiff.proseChecks
+    if (d.severity === 'critical') critical += 1;
+    else if (d.severity === 'major') major += 1;
+    else if (d.severity === 'minor') minor += 1;
+  }
+  return { critical, major, minor };
+}
+
+/**
+ * Single source of truth for "did this table pass?".
+ *
+ * Pass criteria: zero critical AND zero major. Minor diffs (line drift, prose
+ * drift) are informational only and do NOT flip passed. Same rule across every
+ * table — no per-comparator policy drift.
+ */
+export function tableDiffPassed(diffs: RowDiff[]): boolean {
+  const counts = countDiffsBySeverity(diffs);
+  return counts.critical === 0 && counts.major === 0;
+}
diff --git a/evals/harness/comparator/tables.test.ts b/evals/harness/comparator/tables.test.ts
new file mode 100644
index 0000000..bd9aceb
--- /dev/null
+++ b/evals/harness/comparator/tables.test.ts
@@ -0,0 +1,2464 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { IndexDatabase } from '../../../src/db/database-facade.js';
+import { buildGroundTruthDb } from '../builder.js';
+import { type GroundTruth, defKey } from '../types.js';
+import type { ProseJudgeFn } from '../types.js';
+import {
+  compareContracts,
+  compareDefinitionMetadata,
+  compareDefinitions,
+  compareFiles,
+  compareFlows,
+  compareImports,
+  compareInteractionRubric,
+  compareInteractions,
+  compareModuleCohesion,
+  compareModuleMembers,
+  compareModules,
+  compareRelationshipAnnotations,
+} from './tables/index.js';
+
+/**
+ * Per-table comparator strategies. Each comparator takes a "produced" DB
+ * (what squint emitted) and a GroundTruth, and returns a TableDiff.
+ *
+ * Tests use TWO builder-produced DBs that intentionally differ to verify
+ * the comparator detects each kind of mismatch (missing, extra, mismatch).
+ */
+describe('per-table comparators', () => {
+  let dir: string;
+  let producedDb: IndexDatabase;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-eval-cmp-'));
+    producedDb = new IndexDatabase(path.join(dir, 'produced.db'));
+    producedDb.initialize();
+  });
+
+  afterEach(() => {
+    producedDb.close();
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  // ============================================================
+  // files
+  // ============================================================
+  describe('compareFiles', () => {
+    const gt: GroundTruth = {
+      fixtureName: 't',
+      files: [
+        { path: 'src/a.ts', language: 'typescript' },
+        { path: 'src/b.ts', language: 'typescript' },
+      ],
+      definitions: [],
+    };
+
+    it('passes when produced matches ground truth', () => {
+      buildGroundTruthDb(producedDb, gt);
+      const diff = compareFiles(producedDb, gt);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.expectedCount).toBe(2);
+      expect(diff.producedCount).toBe(2);
+    });
+
+    it('reports critical missing when a file is absent in produced', () => {
+      buildGroundTruthDb(producedDb, { ...gt, files: [{ path: 'src/a.ts', language: 'typescript' }] });
+      const diff = compareFiles(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({ kind: 'missing', severity: 'critical', naturalKey: 'src/b.ts' }),
+      ]);
+    });
+
+    it('reports major extra when produced has a file not in ground truth', () => {
+      buildGroundTruthDb(producedDb, {
+        ...gt,
+        files: [...gt.files, { path: 'src/c.ts', language: 'typescript' }],
+      });
+      const diff = compareFiles(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({ kind: 'extra', severity: 'major', naturalKey: 'src/c.ts' }),
+      ]);
+    });
+  });
+
+  // ============================================================
+  // definitions
+  // ============================================================
+  describe('compareDefinitions', () => {
+    const gt: GroundTruth = {
+      fixtureName: 't',
+      files: [{ path: 'src/foo.ts', language: 'typescript' }],
+      definitions: [
+        { file: 'src/foo.ts', name: 'Foo', kind: 'class', isExported: true, line: 5, extendsName: 'Base' },
+        { file: 'src/foo.ts', name: 'helper', kind: 'function', isExported: false, line: 20 },
+      ],
+    };
+
+    it('passes on exact match', () => {
+      buildGroundTruthDb(producedDb, gt);
+      const diff = compareDefinitions(producedDb, gt);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+    });
+
+    it('tolerates ±2 line drift on definition lines', () => {
+      buildGroundTruthDb(producedDb, {
+        ...gt,
+        definitions: [
+          { file: 'src/foo.ts', name: 'Foo', kind: 'class', isExported: true, line: 7, extendsName: 'Base' },
+          { file: 'src/foo.ts', name: 'helper', kind: 'function', isExported: false, line: 19 },
+        ],
+      });
+      const diff = compareDefinitions(producedDb, gt);
+      expect(diff.passed).toBe(true);
+    });
+
+    it('reports a minor mismatch when line drifts beyond tolerance (still passes — minor only)', () => {
+      buildGroundTruthDb(producedDb, {
+        ...gt,
+        definitions: [
+          { file: 'src/foo.ts', name: 'Foo', kind: 'class', isExported: true, line: 50, extendsName: 'Base' },
+          { file: 'src/foo.ts', name: 'helper', kind: 'function', isExported: false, line: 20 },
+        ],
+      });
+      const diff = compareDefinitions(producedDb, gt);
+      // Line drift is informational (minor) — should still be reported, but the table passes.
+      // Pass criteria across every comparator: zero critical AND zero major. Minor is allowed.
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            kind: 'mismatch',
+            severity: 'minor',
+            naturalKey: 'src/foo.ts::Foo',
+            details: expect.stringContaining('line'),
+          }),
+        ])
+      );
+    });
+
+    it('reports critical missing definition', () => {
+      buildGroundTruthDb(producedDb, {
+        ...gt,
+        definitions: [
+          { file: 'src/foo.ts', name: 'Foo', kind: 'class', isExported: true, line: 5, extendsName: 'Base' },
+        ],
+      });
+      const diff = compareDefinitions(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: 'src/foo.ts::helper',
+        }),
+      ]);
+    });
+
+    it('reports mismatch when extendsName differs', () => {
+      buildGroundTruthDb(producedDb, {
+        ...gt,
+        definitions: [
+          { file: 'src/foo.ts', name: 'Foo', kind: 'class', isExported: true, line: 5, extendsName: 'WrongBase' },
+          { file: 'src/foo.ts', name: 'helper', kind: 'function', isExported: false, line: 20 },
+        ],
+      });
+      const diff = compareDefinitions(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            kind: 'mismatch',
+            naturalKey: 'src/foo.ts::Foo',
+            details: expect.stringContaining('extendsName'),
+          }),
+        ])
+      );
+    });
+
+    it('reports extra definitions in produced not declared in ground truth', () => {
+      buildGroundTruthDb(producedDb, {
+        ...gt,
+        definitions: [
+          ...gt.definitions,
+          { file: 'src/foo.ts', name: 'rogue', kind: 'function', isExported: true, line: 30 },
+        ],
+      });
+      const diff = compareDefinitions(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            kind: 'extra',
+            severity: 'major',
+            naturalKey: 'src/foo.ts::rogue',
+          }),
+        ])
+      );
+    });
+
+    it('reports mismatch when implementsNames set differs (order-independent)', () => {
+      const gtWithImpl: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [
+          {
+            file: 'src/foo.ts',
+            name: 'Foo',
+            kind: 'class',
+            isExported: true,
+            line: 1,
+            implementsNames: ['IA', 'IB'],
+          },
+        ],
+      };
+      // Build with ONE interface — produced is missing IB
+      buildGroundTruthDb(producedDb, {
+        ...gtWithImpl,
+        definitions: [
+          {
+            file: 'src/foo.ts',
+            name: 'Foo',
+            kind: 'class',
+            isExported: true,
+            line: 1,
+            implementsNames: ['IA'],
+          },
+        ],
+      });
+      const diff = compareDefinitions(producedDb, gtWithImpl);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            kind: 'mismatch',
+            naturalKey: 'src/foo.ts::Foo',
+            details: expect.stringContaining('implementsNames'),
+          }),
+        ])
+      );
+    });
+
+    it('treats implementsNames as equal regardless of declaration order', () => {
+      const expected: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [
+          {
+            file: 'src/foo.ts',
+            name: 'Foo',
+            kind: 'class',
+            isExported: true,
+            line: 1,
+            implementsNames: ['IA', 'IB'],
+          },
+        ],
+      };
+      buildGroundTruthDb(producedDb, {
+        ...expected,
+        definitions: [
+          {
+            file: 'src/foo.ts',
+            name: 'Foo',
+            kind: 'class',
+            isExported: true,
+            line: 1,
+            implementsNames: ['IB', 'IA'], // reversed
+          },
+        ],
+      });
+      const diff = compareDefinitions(producedDb, expected);
+      expect(diff.passed).toBe(true);
+    });
+
+    it('reports mismatch when isDefault differs', () => {
+      const gtDefault: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'Foo', kind: 'class', isExported: true, isDefault: true, line: 1 }],
+      };
+      // Build without isDefault
+      buildGroundTruthDb(producedDb, {
+        ...gtDefault,
+        definitions: [{ file: 'src/foo.ts', name: 'Foo', kind: 'class', isExported: true, isDefault: false, line: 1 }],
+      });
+      const diff = compareDefinitions(producedDb, gtDefault);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            kind: 'mismatch',
+            details: expect.stringContaining('isDefault'),
+          }),
+        ])
+      );
+    });
+  });
+
+  // ============================================================
+  // imports
+  // ============================================================
+  describe('compareImports', () => {
+    const gt: GroundTruth = {
+      fixtureName: 't',
+      files: [
+        { path: 'src/a.ts', language: 'typescript' },
+        { path: 'src/b.ts', language: 'typescript' },
+      ],
+      definitions: [{ file: 'src/b.ts', name: 'helper', kind: 'function', isExported: true, line: 1 }],
+      imports: [
+        {
+          fromFile: 'src/a.ts',
+          source: './b.js',
+          type: 'import',
+          symbols: [{ name: 'helper', kind: 'named' }],
+        },
+      ],
+    };
+
+    it('passes when imports match', () => {
+      buildGroundTruthDb(producedDb, gt);
+      const diff = compareImports(producedDb, gt);
+      expect(diff.passed).toBe(true);
+    });
+
+    it('reports missing when ground-truth import is absent', () => {
+      buildGroundTruthDb(producedDb, { ...gt, imports: [] });
+      const diff = compareImports(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([expect.objectContaining({ kind: 'missing', severity: 'major' })]);
+    });
+  });
+
+  // ============================================================
+  // modules + module_members
+  // ============================================================
+  describe('compareModules + compareModuleMembers', () => {
+    /** Stub judge keyed on `${reference}|${candidate}`. */
+    function stubJudge(scores: Record<string, number>): ProseJudgeFn {
+      return async (req) => {
+        const score = scores[`${req.reference}|${req.candidate}`] ?? 0;
+        return {
+          similarity: score,
+          passed: score >= req.minSimilarity,
+          reasoning: `stub score ${score}`,
+        };
+      };
+    }
+
+    /** Set the description column for a module in the produced DB (post-build). */
+    function setProducedDescription(fullPath: string, description: string): void {
+      producedDb
+        .getConnection()
+        .prepare('UPDATE modules SET description = ? WHERE full_path = ?')
+        .run(description, fullPath);
+    }
+
+    const gt: GroundTruth = {
+      fixtureName: 't',
+      files: [{ path: 'src/auth.ts', language: 'typescript' }],
+      definitions: [{ file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 }],
+      modules: [
+        {
+          fullPath: 'project.services.auth',
+          name: 'Auth',
+          members: [defKey('src/auth.ts', 'AuthService')],
+        },
+      ],
+    };
+
+    it('compareModules passes on exact tree match (ignoring auto-created ancestors)', async () => {
+      buildGroundTruthDb(producedDb, gt);
+      const diff = await compareModules(producedDb, gt, stubJudge({}));
+      expect(diff.passed).toBe(true);
+    });
+
+    it('compareModules reports missing module', async () => {
+      buildGroundTruthDb(producedDb, { ...gt, modules: [] });
+      const diff = await compareModules(producedDb, gt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'major',
+          naturalKey: 'project.services.auth',
+        }),
+      ]);
+    });
+
+    it('compareModuleMembers passes when each definition lands in its expected module', () => {
+      buildGroundTruthDb(producedDb, gt);
+      const diff = compareModuleMembers(producedDb, gt);
+      expect(diff.passed).toBe(true);
+    });
+
+    it('compareModuleMembers reports definitions assigned to the wrong module', () => {
+      // Build with member assigned to a DIFFERENT module than expected
+      const wrongGt: GroundTruth = {
+        ...gt,
+        modules: [
+          {
+            fullPath: 'project.utils', // wrong module
+            name: 'Utils',
+            members: [defKey('src/auth.ts', 'AuthService')],
+          },
+        ],
+      };
+      buildGroundTruthDb(producedDb, wrongGt);
+      const diff = compareModuleMembers(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'major',
+          naturalKey: 'src/auth.ts::AuthService',
+          details: expect.stringContaining('project.services.auth'),
+        }),
+      ]);
+    });
+
+    // --- description prose check (new in iteration 4) ---
+
+    it('compareModules passes prose check when judge approves the description', async () => {
+      buildGroundTruthDb(producedDb, gt);
+      setProducedDescription('project.services.auth', 'Authentication services for users.');
+
+      const expectedGt: GroundTruth = {
+        ...gt,
+        modules: [
+          {
+            fullPath: 'project.services.auth',
+            name: 'Auth',
+            members: [defKey('src/auth.ts', 'AuthService')],
+            descriptionReference: 'Authentication services for users.',
+          },
+        ],
+      };
+      const judge = stubJudge({
+        'Authentication services for users.|Authentication services for users.': 0.95,
+      });
+
+      const diff = await compareModules(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('compareModules records prose-drift minor when judge score is below threshold', async () => {
+      buildGroundTruthDb(producedDb, gt);
+      setProducedDescription('project.services.auth', 'Sends email newsletters.');
+
+      const expectedGt: GroundTruth = {
+        ...gt,
+        modules: [
+          {
+            fullPath: 'project.services.auth',
+            name: 'Auth',
+            members: [defKey('src/auth.ts', 'AuthService')],
+            descriptionReference: 'Authentication services for users.',
+            minSimilarity: 0.6,
+          },
+        ],
+      };
+      const judge = stubJudge({
+        'Authentication services for users.|Sends email newsletters.': 0.2,
+      });
+
+      const diff = await compareModules(producedDb, expectedGt, judge);
+      // Minor only — table still passes (no critical/major)
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: 'project.services.auth',
+        }),
+      ]);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+    });
+
+    it('compareModules skips judge call when GT entry has no descriptionReference', async () => {
+      buildGroundTruthDb(producedDb, gt);
+      setProducedDescription('project.services.auth', 'whatever the LLM said');
+
+      // GT module has no descriptionReference → existence-only check
+      const judge: ProseJudgeFn = async () => {
+        throw new Error('judge should not be called when there is no descriptionReference');
+      };
+
+      const diff = await compareModules(producedDb, gt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 0 });
+    });
+
+    it('compareModules uses default min similarity 0.6 when not specified', async () => {
+      buildGroundTruthDb(producedDb, gt);
+      setProducedDescription('project.services.auth', 'cand');
+
+      const expectedGt: GroundTruth = {
+        ...gt,
+        modules: [
+          {
+            fullPath: 'project.services.auth',
+            name: 'Auth',
+            members: [defKey('src/auth.ts', 'AuthService')],
+            descriptionReference: 'ref',
+            // no minSimilarity → default 0.6
+          },
+        ],
+      };
+      // 0.59 < 0.6 → fail
+      const judge = stubJudge({ 'ref|cand': 0.59 });
+      const diff = await compareModules(producedDb, expectedGt, judge);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+
+      // 0.6 == 0.6 → pass (boundary)
+      const judge2 = stubJudge({ 'ref|cand': 0.6 });
+      const diff2 = await compareModules(producedDb, expectedGt, judge2);
+      expect(diff2.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('compareModules treats NULL produced description as a failed prose check', async () => {
+      // Builder writes description=NULL by default; if GT declares a reference,
+      // the LLM is expected to have produced something. NULL = drop = fail.
+      buildGroundTruthDb(producedDb, gt);
+      // intentionally NOT setting a description — it stays NULL
+
+      const expectedGt: GroundTruth = {
+        ...gt,
+        modules: [
+          {
+            fullPath: 'project.services.auth',
+            name: 'Auth',
+            members: [defKey('src/auth.ts', 'AuthService')],
+            descriptionReference: 'Authentication services for users.',
+          },
+        ],
+      };
+      // The judge will never be called because the description is null;
+      // throw if it is.
+      const judge: ProseJudgeFn = async () => {
+        throw new Error('judge must not be called when produced description is NULL');
+      };
+
+      const diff = await compareModules(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true); // minor only, gate not flipped
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: 'project.services.auth',
+          details: expect.stringContaining('null'),
+        }),
+      ]);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+    });
+  });
+
+  // ============================================================
+  // contracts
+  // ============================================================
+  describe('compareContracts', () => {
+    const gt: GroundTruth = {
+      fixtureName: 't',
+      files: [{ path: 'src/auth.ts', language: 'typescript' }],
+      definitions: [{ file: 'src/auth.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+      contracts: [
+        {
+          protocol: 'http',
+          normalizedKey: 'POST /api/auth/login',
+          participants: [{ defKey: defKey('src/auth.ts', 'login'), role: 'server' }],
+        },
+      ],
+    };
+
+    it('passes on exact match', () => {
+      buildGroundTruthDb(producedDb, gt);
+      const diff = compareContracts(producedDb, gt);
+      expect(diff.passed).toBe(true);
+    });
+
+    it('reports critical missing contract (required)', () => {
+      buildGroundTruthDb(producedDb, { ...gt, contracts: [] });
+      const diff = compareContracts(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: 'http::POST /api/auth/login',
+        }),
+      ]);
+    });
+
+    it('reports MINOR missing for optional contracts (LLM may legitimately skip)', () => {
+      const optGt: GroundTruth = {
+        ...gt,
+        contracts: [
+          {
+            protocol: 'http',
+            normalizedKey: 'POST /api/auth/login',
+            participants: [{ defKey: defKey('src/auth.ts', 'login'), role: 'server' }],
+            optional: true,
+          },
+        ],
+      };
+      buildGroundTruthDb(producedDb, { ...gt, contracts: [] });
+      const diff = compareContracts(producedDb, optGt);
+      // Minor only — gate stays open
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'minor',
+          naturalKey: 'http::POST /api/auth/login',
+          details: expect.stringContaining('optional'),
+        }),
+      ]);
+    });
+
+    it('reports MINOR (not major) for extra produced contracts', () => {
+      const extraGt: GroundTruth = {
+        ...gt,
+        contracts: [
+          ...gt.contracts!,
+          {
+            protocol: 'event',
+            normalizedKey: 'task.completed',
+            participants: [{ defKey: defKey('src/auth.ts', 'login'), role: 'producer' }],
+          },
+        ],
+      };
+      buildGroundTruthDb(producedDb, extraGt);
+      // Compare against the smaller GT — the event contract becomes "extra"
+      const diff = compareContracts(producedDb, gt);
+      expect(diff.passed).toBe(true); // minor only
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'extra',
+          severity: 'minor',
+          naturalKey: 'event::task.completed',
+        }),
+      ]);
+    });
+  });
+
+  // ============================================================
+  // interactions
+  // ============================================================
+  describe('compareInteractions', () => {
+    const gt: GroundTruth = {
+      fixtureName: 't',
+      files: [
+        { path: 'src/c.ts', language: 'typescript' },
+        { path: 'src/s.ts', language: 'typescript' },
+      ],
+      definitions: [
+        { file: 'src/c.ts', name: 'ctrl', kind: 'function', isExported: true, line: 1 },
+        { file: 'src/s.ts', name: 'svc', kind: 'function', isExported: true, line: 1 },
+      ],
+      modules: [
+        { fullPath: 'project.controllers', name: 'C', members: [defKey('src/c.ts', 'ctrl')] },
+        { fullPath: 'project.services', name: 'S', members: [defKey('src/s.ts', 'svc')] },
+      ],
+      interactions: [
+        {
+          fromModulePath: 'project.controllers',
+          toModulePath: 'project.services',
+          pattern: 'business',
+          source: 'ast',
+        },
+      ],
+    };
+
+    it('passes on exact match', () => {
+      buildGroundTruthDb(producedDb, gt);
+      const diff = compareInteractions(producedDb, gt);
+      expect(diff.passed).toBe(true);
+    });
+
+    it('reports missing interaction', () => {
+      buildGroundTruthDb(producedDb, { ...gt, interactions: [] });
+      const diff = compareInteractions(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'major',
+          naturalKey: 'project.controllers->project.services',
+        }),
+      ]);
+    });
+
+    it('reports mismatch on wrong source', () => {
+      buildGroundTruthDb(producedDb, {
+        ...gt,
+        interactions: [
+          {
+            fromModulePath: 'project.controllers',
+            toModulePath: 'project.services',
+            pattern: 'business',
+            source: 'llm-inferred', // wrong
+          },
+        ],
+      });
+      const diff = compareInteractions(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          details: expect.stringContaining('source'),
+        }),
+      ]);
+    });
+  });
+
+  // ============================================================
+  // ID-agnosticism: comparators must join on natural keys, not row IDs
+  // ============================================================
+  describe('id-agnosticism — built in reverse order', () => {
+    it('compareDefinitions matches when produced DB has reversed insertion order', () => {
+      // Build the EXPECTED ground truth in normal order...
+      const gt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/a.ts', language: 'typescript' },
+          { path: 'src/b.ts', language: 'typescript' },
+          { path: 'src/c.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/a.ts', name: 'alpha', kind: 'function', isExported: true, line: 1 },
+          { file: 'src/b.ts', name: 'beta', kind: 'function', isExported: true, line: 1 },
+          { file: 'src/c.ts', name: 'gamma', kind: 'function', isExported: true, line: 1 },
+        ],
+      };
+
+      // ...but build the PRODUCED DB with files inserted in REVERSE order. This
+      // gives every row a different DB id than a fresh natural-order build would,
+      // proving the comparator joins on file_path/name/kind instead of IDs.
+      const reversedGt: GroundTruth = {
+        ...gt,
+        files: [...gt.files].reverse(),
+        definitions: [...gt.definitions].reverse(),
+      };
+      buildGroundTruthDb(producedDb, reversedGt);
+
+      // Sanity check: row IDs really did come out in reverse insertion order
+      const conn = producedDb.getConnection();
+      const idRows = conn.prepare('SELECT id, path FROM files ORDER BY id').all() as Array<{
+        id: number;
+        path: string;
+      }>;
+      expect(idRows.map((r) => r.path)).toEqual(['src/c.ts', 'src/b.ts', 'src/a.ts']);
+
+      // Now compare against the natural-order ground truth — should match exactly.
+      const fileDiff = compareFiles(producedDb, gt);
+      const defDiff = compareDefinitions(producedDb, gt);
+      expect(fileDiff.passed).toBe(true);
+      expect(fileDiff.diffs).toHaveLength(0);
+      expect(defDiff.passed).toBe(true);
+      expect(defDiff.diffs).toHaveLength(0);
+    });
+
+    it('compareModuleMembers matches when modules are inserted in different order than ground truth declares', () => {
+      const gt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/a.ts', language: 'typescript' },
+          { path: 'src/b.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/a.ts', name: 'A', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/b.ts', name: 'B', kind: 'class', isExported: true, line: 1 },
+        ],
+        modules: [
+          { fullPath: 'project.alpha', name: 'Alpha', members: [defKey('src/a.ts', 'A')] },
+          { fullPath: 'project.beta', name: 'Beta', members: [defKey('src/b.ts', 'B')] },
+        ],
+      };
+
+      // Reverse module insertion order
+      buildGroundTruthDb(producedDb, { ...gt, modules: [...gt.modules!].reverse() });
+
+      const diff = compareModuleMembers(producedDb, gt);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+    });
+  });
+
+  // ============================================================
+  // flows
+  // ============================================================
+  describe('compareFlows', () => {
+    const gt: GroundTruth = {
+      fixtureName: 't',
+      files: [{ path: 'src/c.ts', language: 'typescript' }],
+      definitions: [{ file: 'src/c.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+      modules: [{ fullPath: 'project.controllers', name: 'C', members: [defKey('src/c.ts', 'login')] }],
+      flows: [
+        {
+          slug: 'user-login',
+          name: 'Login',
+          stakeholder: 'user',
+          entryDef: defKey('src/c.ts', 'login'),
+          entryPath: 'POST /api/auth/login',
+        },
+      ],
+    };
+
+    it('passes on exact match', () => {
+      buildGroundTruthDb(producedDb, gt);
+      const diff = compareFlows(producedDb, gt);
+      expect(diff.passed).toBe(true);
+    });
+
+    it('reports critical missing flow', () => {
+      buildGroundTruthDb(producedDb, { ...gt, flows: [] });
+      const diff = compareFlows(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: 'user-login',
+        }),
+      ]);
+    });
+
+    it('reports mismatch on wrong stakeholder', () => {
+      buildGroundTruthDb(producedDb, {
+        ...gt,
+        flows: [
+          {
+            slug: 'user-login',
+            name: 'Login',
+            stakeholder: 'admin', // wrong
+            entryDef: defKey('src/c.ts', 'login'),
+            entryPath: 'POST /api/auth/login',
+          },
+        ],
+      });
+      const diff = compareFlows(producedDb, gt);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          details: expect.stringContaining('stakeholder'),
+        }),
+      ]);
+    });
+  });
+
+  // ============================================================
+  // definition_metadata
+  // ============================================================
+  describe('compareDefinitionMetadata', () => {
+    /** Builds a stub judge that returns canned scores per (reference, candidate) pair. */
+    function stubJudge(scores: Record<string, number>): ProseJudgeFn {
+      return async (req) => {
+        const score = scores[`${req.reference}|${req.candidate}`] ?? 0;
+        return {
+          similarity: score,
+          passed: score >= req.minSimilarity,
+          reasoning: `stub score ${score}`,
+        };
+      };
+    }
+
+    /** Build a fixture with one definition and pre-populated metadata in the produced DB. */
+    function buildWithMetadata(metadata: Array<{ key: string; value: string }>): void {
+      const gt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: metadata.map((m) => ({
+          defKey: defKey('src/foo.ts', 'login'),
+          key: m.key,
+          exactValue: m.value,
+        })),
+      };
+      buildGroundTruthDb(producedDb, gt);
+    }
+
+    it('passes when all expected metadata is present and matches exactly', async () => {
+      buildWithMetadata([
+        { key: 'purpose', value: 'Authenticates a user.' },
+        { key: 'pure', value: 'false' },
+      ]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'purpose',
+            exactValue: 'Authenticates a user.',
+          },
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'pure',
+            exactValue: 'false',
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.expectedCount).toBe(2);
+    });
+
+    it('reports critical when GT references a definition that does not exist in produced', async () => {
+      // Build a DB with one def, but GT metadata references a non-existent def
+      buildWithMetadata([{ key: 'purpose', value: 'whatever' }]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/missing.ts', 'ghost'),
+            key: 'purpose',
+            exactValue: 'should not match anything',
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: expect.stringContaining('src/missing.ts::ghost'),
+        }),
+      ]);
+    });
+
+    it('reports major when an aspect is not annotated for an existing definition', async () => {
+      buildWithMetadata([
+        { key: 'purpose', value: 'Authenticates a user.' },
+        // pure NOT annotated
+      ]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'purpose',
+            exactValue: 'Authenticates a user.',
+          },
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'pure',
+            exactValue: 'false',
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'major',
+          naturalKey: expect.stringContaining('src/foo.ts::login'),
+          details: expect.stringContaining('pure'),
+        }),
+      ]);
+    });
+
+    it('reports major mismatch when pure value differs (exact match)', async () => {
+      buildWithMetadata([{ key: 'pure', value: 'true' }]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'pure',
+            exactValue: 'false',
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'major',
+          details: expect.stringContaining('pure'),
+        }),
+      ]);
+    });
+
+    it('reports MINOR (not major) when domain set differs (vocabulary drift)', async () => {
+      buildWithMetadata([{ key: 'domain', value: '["http"]' }]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            acceptableSet: ['authentication', 'security'],
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      // Minor diff present, but table still passes (no critical/major)
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'minor',
+          details: expect.stringContaining('domain'),
+        }),
+      ]);
+    });
+
+    it('domain set match is order-independent', async () => {
+      buildWithMetadata([{ key: 'domain', value: '["http","authentication"]' }]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            acceptableSet: ['authentication', 'http'], // reversed
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+    });
+
+    it('domain subset semantics: produced is a strict subset of acceptableSet → pass', async () => {
+      // LLM picked just one tag from a vocabulary of three; that's still acceptable
+      buildWithMetadata([{ key: 'domain', value: '["authentication"]' }]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            acceptableSet: ['authentication', 'auth', 'http', 'security'],
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+    });
+
+    it('domain subset semantics: outlier tag in produced → minor mismatch', async () => {
+      // LLM picked one OK tag and one out-of-vocabulary tag
+      buildWithMetadata([{ key: 'domain', value: '["authentication","payments"]' }]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            acceptableSet: ['authentication', 'auth', 'http', 'security'],
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(true); // minor only
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'minor',
+          details: expect.stringContaining('payments'),
+        }),
+      ]);
+    });
+
+    it('domain subset semantics: empty produced array → minor mismatch', async () => {
+      buildWithMetadata([{ key: 'domain', value: '[]' }]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            acceptableSet: ['authentication'],
+          },
+        ],
+      };
+
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(true); // minor only
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'minor',
+        }),
+      ]);
+    });
+
+    it('records prose-drift minor diff when judge score < threshold', async () => {
+      buildWithMetadata([{ key: 'purpose', value: 'Sends emails to nobody.' }]);
+
+      const reference = 'Authenticates a user by verifying credentials.';
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'purpose',
+            proseReference: reference,
+            minSimilarity: 0.75,
+          },
+        ],
+      };
+
+      const judge = stubJudge({ [`${reference}|Sends emails to nobody.`]: 0.2 });
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, judge);
+
+      // Minor prose drift → does NOT flip passed
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'prose-drift',
+          severity: 'minor',
+        }),
+      ]);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+    });
+
+    it('bumps proseChecks.passed when judge approves', async () => {
+      buildWithMetadata([{ key: 'purpose', value: 'Verifies user identity and signs an auth token.' }]);
+
+      const reference = 'Authenticates a user by verifying credentials and returning a JWT.';
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'purpose',
+            proseReference: reference,
+          },
+        ],
+      };
+
+      const judge = stubJudge({
+        [`${reference}|Verifies user identity and signs an auth token.`]: 0.9,
+      });
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, judge);
+
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('uses default min similarity 0.75 when not specified', async () => {
+      buildWithMetadata([{ key: 'purpose', value: 'cand' }]);
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'purpose',
+            proseReference: 'ref',
+            // no minSimilarity → default 0.75
+          },
+        ],
+      };
+      // 0.74 < 0.75 → fail
+      const judge = stubJudge({ 'ref|cand': 0.74 });
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, judge);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+    });
+
+    // --- themeReference strategy (Phase 1: replaces acceptableSet vocab spaghetti) ---
+
+    it('themeReference: passes when judge approves the produced tag list', async () => {
+      buildWithMetadata([{ key: 'domain', value: '["security","user-management"]' }]);
+
+      const themeRef = 'tags should reflect that this function hashes a password during user registration';
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            themeReference: themeRef,
+          },
+        ],
+      };
+
+      // The candidate is formatted as readable prose: "tags: security, user-management"
+      const judge = stubJudge({ [`${themeRef}|tags: security, user-management`]: 0.85 });
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('themeReference: minor prose-drift when judge score below threshold', async () => {
+      buildWithMetadata([{ key: 'domain', value: '["unrelated","off-topic"]' }]);
+
+      const themeRef = 'tags should reflect a password hashing function';
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            themeReference: themeRef,
+          },
+        ],
+      };
+
+      const judge = stubJudge({ [`${themeRef}|tags: unrelated, off-topic`]: 0.2 });
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true); // minor only — gate not flipped
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: expect.stringContaining('domain'),
+        }),
+      ]);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+    });
+
+    it('themeReference: minor mismatch when produced array is below minTagsRequired floor', async () => {
+      buildWithMetadata([{ key: 'domain', value: '[]' }]); // empty array
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            themeReference: 'tags should reflect anything',
+            minTagsRequired: 1, // floor
+          },
+        ],
+      };
+
+      // The judge should NOT be called when the floor fails — throw if it is.
+      const failingJudge: ProseJudgeFn = async () => {
+        throw new Error('judge must not be called when produced tags fail the floor check');
+      };
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, failingJudge);
+      expect(diff.passed).toBe(true); // minor only
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'minor',
+          details: expect.stringContaining('minTagsRequired'),
+        }),
+      ]);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 0 });
+    });
+
+    it('themeReference: default min similarity is 0.6 (not 0.75)', async () => {
+      buildWithMetadata([{ key: 'domain', value: '["a"]' }]);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            themeReference: 'ref',
+            // no minSimilarity → default 0.6 for theme refs
+          },
+        ],
+      };
+
+      // 0.59 < 0.6 → fail
+      const failJudge = stubJudge({ 'ref|tags: a': 0.59 });
+      const diffFail = await compareDefinitionMetadata(producedDb, expectedGt, failJudge);
+      expect(diffFail.proseChecks).toEqual({ passed: 0, failed: 1 });
+
+      // 0.6 == 0.6 → pass (boundary inclusive)
+      const passJudge = stubJudge({ 'ref|tags: a': 0.6 });
+      const diffPass = await compareDefinitionMetadata(producedDb, expectedGt, passJudge);
+      expect(diffPass.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('themeReference: minor mismatch when produced value is not a JSON array', async () => {
+      buildWithMetadata([{ key: 'domain', value: 'not-json' }]); // builder writes the literal string
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/foo.ts', language: 'typescript' }],
+        definitions: [{ file: 'src/foo.ts', name: 'login', kind: 'function', isExported: true, line: 1 }],
+        definitionMetadata: [
+          {
+            defKey: defKey('src/foo.ts', 'login'),
+            key: 'domain',
+            themeReference: 'ref',
+          },
+        ],
+      };
+
+      const noJudgeCalls: ProseJudgeFn = async () => {
+        throw new Error('judge must not be called when produced value is not a JSON array');
+      };
+      const diff = await compareDefinitionMetadata(producedDb, expectedGt, noJudgeCalls);
+      expect(diff.passed).toBe(true); // minor only
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'minor',
+          details: expect.stringMatching(/JSON.*array|themeReference|parse/i),
+        }),
+      ]);
+    });
+  });
+
+  // ============================================================
+  // relationship_annotations
+  // ============================================================
+  describe('compareRelationshipAnnotations', () => {
+    /** Stub judge keyed on `${reference}|${candidate}`. */
+    function stubJudge(scores: Record<string, number>): ProseJudgeFn {
+      return async (req) => {
+        const score = scores[`${req.reference}|${req.candidate}`] ?? 0;
+        return {
+          similarity: score,
+          passed: score >= req.minSimilarity,
+          reasoning: `stub score ${score}`,
+        };
+      };
+    }
+
+    /**
+     * Two-file fixture with one inheritance edge (TasksRepository → BaseRepository)
+     * and one "uses" edge (TasksService → tasksRepository). The shape mirrors the
+     * real todo-api relationships well enough to validate the comparator end-to-end.
+     */
+    const baseFixture: GroundTruth = {
+      fixtureName: 't',
+      files: [
+        { path: 'src/repo.ts', language: 'typescript' },
+        { path: 'src/svc.ts', language: 'typescript' },
+      ],
+      definitions: [
+        { file: 'src/repo.ts', name: 'BaseRepository', kind: 'class', isExported: true, line: 1 },
+        {
+          file: 'src/repo.ts',
+          name: 'TasksRepository',
+          kind: 'class',
+          isExported: true,
+          line: 5,
+          extendsName: 'BaseRepository',
+        },
+        { file: 'src/repo.ts', name: 'tasksRepository', kind: 'const', isExported: true, line: 10 },
+        { file: 'src/svc.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+      ],
+    };
+
+    /**
+     * Build the produced DB with the given relationship rows. Each row's
+     * semanticReference is stored as the produced `semantic` value (the builder
+     * does no validation), so this is the easiest way to inject a
+     * 'PENDING_LLM_ANNOTATION' placeholder into a fake produced DB.
+     */
+    function buildWithRelationships(rows: GroundTruth['relationships']): void {
+      buildGroundTruthDb(producedDb, { ...baseFixture, relationships: rows });
+    }
+
+    it('passes when every GT relationship is present with matching type and approved prose', async () => {
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/repo.ts', 'TasksRepository'),
+          toDef: defKey('src/repo.ts', 'BaseRepository'),
+          relationshipType: 'extends',
+          semanticReference: 'TasksRepository inherits from BaseRepository.',
+        },
+        {
+          fromDef: defKey('src/svc.ts', 'TasksService'),
+          toDef: defKey('src/repo.ts', 'tasksRepository'),
+          relationshipType: 'uses',
+          semanticReference: 'Calls the repository to read and write tasks.',
+        },
+      ]);
+
+      const judge = stubJudge({
+        'TasksRepository inherits from BaseRepository.|TasksRepository inherits from BaseRepository.': 0.95,
+        'Calls the repository to read and write tasks.|Calls the repository to read and write tasks.': 0.9,
+      });
+
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/repo.ts', 'TasksRepository'),
+            toDef: defKey('src/repo.ts', 'BaseRepository'),
+            relationshipType: 'extends',
+            semanticReference: 'TasksRepository inherits from BaseRepository.',
+          },
+          {
+            fromDef: defKey('src/svc.ts', 'TasksService'),
+            toDef: defKey('src/repo.ts', 'tasksRepository'),
+            relationshipType: 'uses',
+            semanticReference: 'Calls the repository to read and write tasks.',
+          },
+        ],
+      };
+
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.proseChecks).toEqual({ passed: 2, failed: 0 });
+    });
+
+    it('reports critical when a GT relationship is missing in produced', async () => {
+      // Build only the inheritance edge — the "uses" edge is missing.
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/repo.ts', 'TasksRepository'),
+          toDef: defKey('src/repo.ts', 'BaseRepository'),
+          relationshipType: 'extends',
+          semanticReference: 'inherits',
+        },
+      ]);
+
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/repo.ts', 'TasksRepository'),
+            toDef: defKey('src/repo.ts', 'BaseRepository'),
+            relationshipType: 'extends',
+            semanticReference: 'inherits',
+          },
+          {
+            fromDef: defKey('src/svc.ts', 'TasksService'),
+            toDef: defKey('src/repo.ts', 'tasksRepository'),
+            relationshipType: 'uses',
+            semanticReference: 'calls',
+          },
+        ],
+      };
+
+      const judge = stubJudge({ 'inherits|inherits': 0.95 });
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: 'src/svc.ts::TasksService->src/repo.ts::tasksRepository',
+        }),
+      ]);
+    });
+
+    it('reports critical when GT references a definition that does not exist in produced', async () => {
+      buildWithRelationships([]);
+
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/missing.ts', 'Ghost'),
+            toDef: defKey('src/repo.ts', 'BaseRepository'),
+            relationshipType: 'extends',
+            semanticReference: 'should not match anything',
+          },
+        ],
+      };
+
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: expect.stringContaining('src/missing.ts::Ghost'),
+        }),
+      ]);
+    });
+
+    it('reports major when relationship_type differs (extends vs uses)', async () => {
+      // Builder uses set() with 'uses', so we need to bypass the inheritance-stickiness
+      // by writing the row directly. Easiest path: build via the GT helper but
+      // pass relationshipType:'uses' so the produced row stores 'uses' for an
+      // edge GT expects to be 'extends'.
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/repo.ts', 'TasksRepository'),
+          toDef: defKey('src/repo.ts', 'BaseRepository'),
+          relationshipType: 'uses', // ← wrong type
+          semanticReference: 'TasksRepository uses BaseRepository.',
+        },
+      ]);
+
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/repo.ts', 'TasksRepository'),
+            toDef: defKey('src/repo.ts', 'BaseRepository'),
+            relationshipType: 'extends', // ← GT says extends
+            semanticReference: 'TasksRepository inherits from BaseRepository.',
+          },
+        ],
+      };
+
+      const judge = stubJudge({
+        'TasksRepository inherits from BaseRepository.|TasksRepository uses BaseRepository.': 0.9,
+      });
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            kind: 'mismatch',
+            severity: 'major',
+            naturalKey: 'src/repo.ts::TasksRepository->src/repo.ts::BaseRepository',
+            details: expect.stringContaining('relationship_type'),
+          }),
+        ])
+      );
+    });
+
+    it('reports major when produced semantic equals PENDING_LLM_ANNOTATION', async () => {
+      // The placeholder semantic is what parse-time inheritance edges start as
+      // before the relationships LLM stage replaces them. If the LLM drops the
+      // edge, the placeholder leaks through — this is exactly the bug class
+      // iteration 3 wants to catch.
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/repo.ts', 'TasksRepository'),
+          toDef: defKey('src/repo.ts', 'BaseRepository'),
+          relationshipType: 'extends',
+          semanticReference: 'PENDING_LLM_ANNOTATION',
+        },
+      ]);
+
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/repo.ts', 'TasksRepository'),
+            toDef: defKey('src/repo.ts', 'BaseRepository'),
+            relationshipType: 'extends',
+            semanticReference: 'TasksRepository inherits from BaseRepository.',
+          },
+        ],
+      };
+
+      // Even if the judge would happily approve the placeholder, the comparator
+      // should refuse to forward to the judge and report a major diff first.
+      const generousJudge = stubJudge({
+        'TasksRepository inherits from BaseRepository.|PENDING_LLM_ANNOTATION': 1.0,
+      });
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, generousJudge);
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'major',
+          naturalKey: 'src/repo.ts::TasksRepository->src/repo.ts::BaseRepository',
+          details: expect.stringContaining('PENDING_LLM_ANNOTATION'),
+        }),
+      ]);
+      // The placeholder must NOT have been counted as a passed prose check.
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 0 });
+    });
+
+    it('records prose-drift minor diff when judge score < threshold', async () => {
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/svc.ts', 'TasksService'),
+          toDef: defKey('src/repo.ts', 'tasksRepository'),
+          relationshipType: 'uses',
+          semanticReference: 'Sends marketing emails.',
+        },
+      ]);
+
+      const reference = 'Reads and writes tasks via the repository.';
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/svc.ts', 'TasksService'),
+            toDef: defKey('src/repo.ts', 'tasksRepository'),
+            relationshipType: 'uses',
+            semanticReference: reference,
+            minSimilarity: 0.75,
+          },
+        ],
+      };
+
+      const judge = stubJudge({ [`${reference}|Sends marketing emails.`]: 0.2 });
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, judge);
+
+      expect(diff.passed).toBe(true); // minor only
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: 'src/svc.ts::TasksService->src/repo.ts::tasksRepository',
+        }),
+      ]);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+    });
+
+    it('bumps proseChecks.passed when judge approves and produces no diff', async () => {
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/svc.ts', 'TasksService'),
+          toDef: defKey('src/repo.ts', 'tasksRepository'),
+          relationshipType: 'uses',
+          semanticReference: 'Reads and writes tasks via the repository.',
+        },
+      ]);
+
+      const reference = 'Reads and writes tasks via the repository.';
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/svc.ts', 'TasksService'),
+            toDef: defKey('src/repo.ts', 'tasksRepository'),
+            relationshipType: 'uses',
+            semanticReference: reference,
+          },
+        ],
+      };
+
+      const judge = stubJudge({ [`${reference}|Reads and writes tasks via the repository.`]: 0.95 });
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, judge);
+
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('ignores extra produced relationships not declared in ground truth', async () => {
+      // Produced has an extra "uses" edge the GT does not enumerate. The eval
+      // should NOT flag this — the GT is an existence claim ("at least these
+      // edges exist"), not a strict-equality claim. Symbols stage routinely
+      // produces more edges than we manually catalog.
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/repo.ts', 'TasksRepository'),
+          toDef: defKey('src/repo.ts', 'BaseRepository'),
+          relationshipType: 'extends',
+          semanticReference: 'inherits',
+        },
+        {
+          fromDef: defKey('src/svc.ts', 'TasksService'),
+          toDef: defKey('src/repo.ts', 'tasksRepository'),
+          relationshipType: 'uses',
+          semanticReference: 'extra-not-in-gt',
+        },
+      ]);
+
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/repo.ts', 'TasksRepository'),
+            toDef: defKey('src/repo.ts', 'BaseRepository'),
+            relationshipType: 'extends',
+            semanticReference: 'inherits',
+          },
+        ],
+      };
+
+      const judge = stubJudge({ 'inherits|inherits': 0.95 });
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      // expectedCount counts the GT, producedCount counts everything in the table.
+      expect(diff.expectedCount).toBe(1);
+      expect(diff.producedCount).toBe(2);
+    });
+
+    it('uses default min similarity 0.75 when not specified', async () => {
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/svc.ts', 'TasksService'),
+          toDef: defKey('src/repo.ts', 'tasksRepository'),
+          relationshipType: 'uses',
+          semanticReference: 'cand',
+        },
+      ]);
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/svc.ts', 'TasksService'),
+            toDef: defKey('src/repo.ts', 'tasksRepository'),
+            relationshipType: 'uses',
+            semanticReference: 'ref',
+            // no minSimilarity → default 0.75
+          },
+        ],
+      };
+      const judge = stubJudge({ 'ref|cand': 0.74 });
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, judge);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+    });
+
+    it('skips judge call when GT entry has no semanticReference (existence-only check)', async () => {
+      buildWithRelationships([
+        {
+          fromDef: defKey('src/repo.ts', 'TasksRepository'),
+          toDef: defKey('src/repo.ts', 'BaseRepository'),
+          relationshipType: 'extends',
+          semanticReference: 'whatever the LLM said',
+        },
+      ]);
+      const expectedGt: GroundTruth = {
+        ...baseFixture,
+        relationships: [
+          {
+            fromDef: defKey('src/repo.ts', 'TasksRepository'),
+            toDef: defKey('src/repo.ts', 'BaseRepository'),
+            relationshipType: 'extends',
+            // no semanticReference → existence + type only
+          },
+        ],
+      };
+      // A judge that throws if called — proves we never invoked it.
+      const judge: ProseJudgeFn = async () => {
+        throw new Error('judge should not be called when there is no semanticReference');
+      };
+      const diff = await compareRelationshipAnnotations(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 0 });
+    });
+  });
+
+  // ============================================================
+  // module_cohesion (Phase 1: rubric-based modules verification)
+  // ============================================================
+  describe('compareModuleCohesion', () => {
+    /** Stub judge keyed on `${reference}|${candidate}`. */
+    function stubJudge(scores: Record<string, number>): ProseJudgeFn {
+      return async (req) => {
+        const score = scores[`${req.reference}|${req.candidate}`] ?? 0;
+        return {
+          similarity: score,
+          passed: score >= req.minSimilarity,
+          reasoning: `stub score ${score}`,
+        };
+      };
+    }
+
+    /**
+     * Build a small fixture with two modules and four definitions, where the
+     * builder assigns the definitions to specific modules. We then compare
+     * against a different ground truth that uses moduleCohesion claims.
+     */
+    function buildTwoModuleFixture(
+      defAssignments: Array<{ defName: string; moduleFullPath: string }>,
+      moduleDescriptions: Record<string, string>
+    ): void {
+      const buildGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        modules: [],
+      };
+      // Build modules implied by the assignments
+      const modulePaths = Array.from(new Set(defAssignments.map((a) => a.moduleFullPath)));
+      buildGt.modules = modulePaths.map((p) => ({
+        fullPath: p,
+        name: p.split('.').pop() ?? p,
+        members: defAssignments
+          .filter((a) => a.moduleFullPath === p)
+          .map((a) => {
+            const file = a.defName === 'AuthService' || a.defName === 'authService' ? 'src/auth.ts' : 'src/tasks.ts';
+            return defKey(file, a.defName);
+          }),
+      }));
+      buildGroundTruthDb(producedDb, buildGt);
+
+      // Set descriptions on the produced modules (the builder writes undefined)
+      const conn = producedDb.getConnection();
+      for (const [path, desc] of Object.entries(moduleDescriptions)) {
+        conn.prepare('UPDATE modules SET description = ? WHERE full_path = ?').run(desc, path);
+      }
+    }
+
+    it('strict cohesion passes when all members are in one module and the role judge approves', async () => {
+      buildTwoModuleFixture(
+        [
+          { defName: 'AuthService', moduleFullPath: 'project.services.auth' },
+          { defName: 'authService', moduleFullPath: 'project.services.auth' },
+        ],
+        { 'project.services.auth': 'Authentication service' }
+      );
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        moduleCohesion: [
+          {
+            label: 'auth-service-bundle',
+            members: [defKey('src/auth.ts', 'AuthService'), defKey('src/auth.ts', 'authService')],
+            expectedRole: 'authentication service module',
+          },
+        ],
+      };
+
+      const judge = stubJudge({ 'authentication service module|auth: Authentication service': 0.9 });
+      const diff = await compareModuleCohesion(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.expectedCount).toBe(1);
+      expect(diff.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('strict cohesion: MAJOR when members are scattered across modules', async () => {
+      buildTwoModuleFixture(
+        [
+          { defName: 'AuthService', moduleFullPath: 'project.services.auth' },
+          { defName: 'authService', moduleFullPath: 'project.services.tasks' }, // wrong!
+        ],
+        {}
+      );
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        moduleCohesion: [
+          {
+            label: 'auth-bundle',
+            members: [defKey('src/auth.ts', 'AuthService'), defKey('src/auth.ts', 'authService')],
+            expectedRole: 'auth service',
+            cohesion: 'strict',
+          },
+        ],
+      };
+
+      const diff = await compareModuleCohesion(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'major',
+          naturalKey: 'auth-bundle',
+          details: expect.stringContaining('cohesion'),
+        }),
+      ]);
+    });
+
+    it('majority cohesion passes when >=50% share a module (boundary inclusive)', async () => {
+      buildTwoModuleFixture(
+        [
+          { defName: 'AuthService', moduleFullPath: 'project.services.auth' },
+          { defName: 'authService', moduleFullPath: 'project.services.auth' },
+          { defName: 'TasksService', moduleFullPath: 'project.services.tasks' }, // odd one out
+        ],
+        { 'project.services.auth': 'Authentication service' }
+      );
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        moduleCohesion: [
+          {
+            label: 'auth-bundle',
+            members: [
+              defKey('src/auth.ts', 'AuthService'),
+              defKey('src/auth.ts', 'authService'),
+              defKey('src/tasks.ts', 'TasksService'),
+            ],
+            expectedRole: 'auth service module',
+            cohesion: 'majority', // 2/3 in one module is OK
+          },
+        ],
+      };
+
+      const judge = stubJudge({ 'auth service module|auth: Authentication service': 0.9 });
+      const diff = await compareModuleCohesion(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+    });
+
+    it('CRITICAL when a member is unassigned to any module', async () => {
+      // Build with only one of the two members assigned
+      buildTwoModuleFixture([{ defName: 'AuthService', moduleFullPath: 'project.services.auth' }], {
+        'project.services.auth': 'Authentication service',
+      });
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        moduleCohesion: [
+          {
+            label: 'auth-bundle',
+            members: [defKey('src/auth.ts', 'AuthService'), defKey('src/auth.ts', 'authService')],
+            expectedRole: 'auth service',
+          },
+        ],
+      };
+
+      const diff = await compareModuleCohesion(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: 'auth-bundle',
+          details: expect.stringContaining('unassigned'),
+        }),
+      ]);
+    });
+
+    it('CRITICAL when GT references a definition that does not exist in produced', async () => {
+      buildTwoModuleFixture([{ defName: 'AuthService', moduleFullPath: 'project.services.auth' }], {
+        'project.services.auth': 'Authentication service',
+      });
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        moduleCohesion: [
+          {
+            label: 'ghost-group',
+            members: [defKey('src/missing.ts', 'Ghost')],
+            expectedRole: 'something',
+          },
+        ],
+      };
+
+      const diff = await compareModuleCohesion(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: 'ghost-group',
+          details: expect.stringContaining('unknown definition'),
+        }),
+      ]);
+    });
+
+    it('role judge fail produces MINOR prose-drift, gate stays open', async () => {
+      buildTwoModuleFixture(
+        [
+          { defName: 'AuthService', moduleFullPath: 'project.misc' },
+          { defName: 'authService', moduleFullPath: 'project.misc' },
+        ],
+        { 'project.misc': 'Miscellaneous stuff' }
+      );
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        moduleCohesion: [
+          {
+            label: 'auth-bundle',
+            members: [defKey('src/auth.ts', 'AuthService'), defKey('src/auth.ts', 'authService')],
+            expectedRole: 'authentication service module',
+          },
+        ],
+      };
+
+      const judge = stubJudge({ 'authentication service module|misc: Miscellaneous stuff': 0.2 });
+      const diff = await compareModuleCohesion(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true); // minor only
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: 'auth-bundle',
+        }),
+      ]);
+      expect(diff.proseChecks).toEqual({ passed: 0, failed: 1 });
+    });
+
+    it('default minRoleSimilarity is 0.6', async () => {
+      buildTwoModuleFixture(
+        [
+          { defName: 'AuthService', moduleFullPath: 'project.services.auth' },
+          { defName: 'authService', moduleFullPath: 'project.services.auth' },
+        ],
+        { 'project.services.auth': 'cand' }
+      );
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        moduleCohesion: [
+          {
+            label: 'auth-bundle',
+            members: [defKey('src/auth.ts', 'AuthService'), defKey('src/auth.ts', 'authService')],
+            expectedRole: 'ref',
+            // no minRoleSimilarity → default 0.6
+          },
+        ],
+      };
+
+      // 0.59 < 0.6 → fail
+      const failJudge = stubJudge({ 'ref|auth: cand': 0.59 });
+      const diffFail = await compareModuleCohesion(producedDb, expectedGt, failJudge);
+      expect(diffFail.proseChecks).toEqual({ passed: 0, failed: 1 });
+
+      // 0.6 == 0.6 → pass
+      const passJudge = stubJudge({ 'ref|auth: cand': 0.6 });
+      const diffPass = await compareModuleCohesion(producedDb, expectedGt, passJudge);
+      expect(diffPass.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('handles a winner module with NULL description gracefully', async () => {
+      buildTwoModuleFixture(
+        [
+          { defName: 'AuthService', moduleFullPath: 'project.services.auth' },
+          { defName: 'authService', moduleFullPath: 'project.services.auth' },
+        ],
+        {} // no description set
+      );
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/auth.ts', language: 'typescript' },
+          { path: 'src/tasks.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/auth.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/auth.ts', name: 'authService', kind: 'const', isExported: true, line: 2 },
+          { file: 'src/tasks.ts', name: 'TasksService', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/tasks.ts', name: 'tasksService', kind: 'const', isExported: true, line: 2 },
+        ],
+        moduleCohesion: [
+          {
+            label: 'auth-bundle',
+            members: [defKey('src/auth.ts', 'AuthService'), defKey('src/auth.ts', 'authService')],
+            expectedRole: 'auth service',
+          },
+        ],
+      };
+
+      // The candidate format should fall back to "(no description)" when description is null
+      const judge = stubJudge({ 'auth service|auth: (no description)': 0.7 });
+      const diff = await compareModuleCohesion(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+  });
+
+  // ============================================================
+  // interaction_rubric (Phase 2: anchor-based interactions verification)
+  // ============================================================
+  describe('compareInteractionRubric', () => {
+    /** Stub judge keyed on `${reference}|${candidate}`. */
+    function stubJudge(scores: Record<string, number>): ProseJudgeFn {
+      return async (req) => {
+        const score = scores[`${req.reference}|${req.candidate}`] ?? 0;
+        return {
+          similarity: score,
+          passed: score >= req.minSimilarity,
+          reasoning: `stub score ${score}`,
+        };
+      };
+    }
+
+    /**
+     * Build a fixture with two modules each containing one definition,
+     * connected by an interaction edge. Returns the GroundTruth used to
+     * build (so tests can pass it OR a different one for comparison).
+     */
+    function buildTwoModFixture(
+      interactionSource: 'ast' | 'ast-import' | 'llm-inferred' | 'contract-matched',
+      interactionSemantic: string | null
+    ): GroundTruth {
+      const buildGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/c.ts', language: 'typescript' },
+          { path: 'src/s.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/c.ts', name: 'AuthController', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/s.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+        ],
+        modules: [
+          { fullPath: 'project.api.auth', name: 'AuthAPI', members: [defKey('src/c.ts', 'AuthController')] },
+          { fullPath: 'project.services.auth', name: 'AuthService', members: [defKey('src/s.ts', 'AuthService')] },
+        ],
+        interactions: [
+          {
+            fromModulePath: 'project.api.auth',
+            toModulePath: 'project.services.auth',
+            pattern: 'business',
+            source: interactionSource,
+            ...(interactionSemantic !== null && { semanticReference: interactionSemantic }),
+          },
+        ],
+      };
+      buildGroundTruthDb(producedDb, buildGt);
+      // The builder doesn't write the semantic field for interactions; set it
+      // directly via raw SQL so tests can exercise the prose path.
+      if (interactionSemantic !== null) {
+        producedDb.getConnection().prepare('UPDATE interactions SET semantic = ?').run(interactionSemantic);
+      }
+      return buildGt;
+    }
+
+    it('passes when anchors resolve to modules connected by an acceptable interaction', async () => {
+      buildTwoModFixture('ast', null);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/c.ts', language: 'typescript' },
+          { path: 'src/s.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/c.ts', name: 'AuthController', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/s.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+        ],
+        interactionRubric: [
+          {
+            label: 'auth-controller-uses-auth-service',
+            fromAnchor: defKey('src/c.ts', 'AuthController'),
+            toAnchor: defKey('src/s.ts', 'AuthService'),
+          },
+        ],
+      };
+
+      const diff = await compareInteractionRubric(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(true);
+      expect(diff.diffs).toHaveLength(0);
+      expect(diff.expectedCount).toBe(1);
+    });
+
+    it('CRITICAL when an anchor def does not exist', async () => {
+      buildTwoModFixture('ast', null);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/c.ts', language: 'typescript' },
+          { path: 'src/s.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/c.ts', name: 'AuthController', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/s.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+        ],
+        interactionRubric: [
+          {
+            label: 'ghost',
+            fromAnchor: defKey('src/missing.ts', 'Ghost'),
+            toAnchor: defKey('src/s.ts', 'AuthService'),
+          },
+        ],
+      };
+
+      const diff = await compareInteractionRubric(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'critical',
+          naturalKey: 'ghost',
+          details: expect.stringContaining('unknown FROM anchor'),
+        }),
+      ]);
+    });
+
+    it('MAJOR when no interaction edge exists between resolved modules', async () => {
+      // Build with a self-loop interaction (api.auth → api.auth) that doesn't
+      // match any cross-module rubric.
+      const buildGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/c.ts', language: 'typescript' },
+          { path: 'src/s.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/c.ts', name: 'AuthController', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/s.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+        ],
+        modules: [
+          { fullPath: 'project.api.auth', name: 'AuthAPI', members: [defKey('src/c.ts', 'AuthController')] },
+          { fullPath: 'project.services.auth', name: 'AuthService', members: [defKey('src/s.ts', 'AuthService')] },
+        ],
+        // Note: NO interactions
+      };
+      buildGroundTruthDb(producedDb, buildGt);
+
+      const expectedGt: GroundTruth = {
+        ...buildGt,
+        interactionRubric: [
+          {
+            label: 'auth-pair',
+            fromAnchor: defKey('src/c.ts', 'AuthController'),
+            toAnchor: defKey('src/s.ts', 'AuthService'),
+          },
+        ],
+      };
+
+      const diff = await compareInteractionRubric(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'missing',
+          severity: 'major',
+          naturalKey: 'auth-pair',
+          details: expect.stringContaining('no interaction edge'),
+        }),
+      ]);
+    });
+
+    it("MAJOR when interaction source isn't in the acceptable set", async () => {
+      buildTwoModFixture('llm-inferred', null);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/c.ts', language: 'typescript' },
+          { path: 'src/s.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/c.ts', name: 'AuthController', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/s.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+        ],
+        interactionRubric: [
+          {
+            label: 'auth-pair',
+            fromAnchor: defKey('src/c.ts', 'AuthController'),
+            toAnchor: defKey('src/s.ts', 'AuthService'),
+            // Default acceptableSources excludes 'llm-inferred'
+          },
+        ],
+      };
+
+      const diff = await compareInteractionRubric(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(false);
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'major',
+          naturalKey: 'auth-pair',
+          details: expect.stringContaining("source 'llm-inferred'"),
+        }),
+      ]);
+    });
+
+    it('passes when llm-inferred is in the acceptable set explicitly', async () => {
+      buildTwoModFixture('llm-inferred', null);
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/c.ts', language: 'typescript' },
+          { path: 'src/s.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/c.ts', name: 'AuthController', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/s.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+        ],
+        interactionRubric: [
+          {
+            label: 'auth-pair',
+            fromAnchor: defKey('src/c.ts', 'AuthController'),
+            toAnchor: defKey('src/s.ts', 'AuthService'),
+            acceptableSources: ['ast', 'ast-import', 'llm-inferred', 'contract-matched'],
+          },
+        ],
+      };
+
+      const diff = await compareInteractionRubric(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(true);
+    });
+
+    it('semantic prose check passes when judge approves (theme mode)', async () => {
+      buildTwoModFixture('ast', 'authenticates user credentials before forwarding the request');
+
+      const expectedGt: GroundTruth = {
+        fixtureName: 't',
+        files: [
+          { path: 'src/c.ts', language: 'typescript' },
+          { path: 'src/s.ts', language: 'typescript' },
+        ],
+        definitions: [
+          { file: 'src/c.ts', name: 'AuthController', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/s.ts', name: 'AuthService', kind: 'class', isExported: true, line: 1 },
+        ],
+        interactionRubric: [
+          {
+            label: 'auth-pair',
+            fromAnchor: defKey('src/c.ts', 'AuthController'),
+            toAnchor: defKey('src/s.ts', 'AuthService'),
+            semanticReference: 'authentication delegation from controller to service',
+          },
+        ],
+      };
+
+      const judge = stubJudge({
+        'authentication delegation from controller to service|authenticates user credentials before forwarding the request': 0.85,
+      });
+      const diff = await compareInteractionRubric(producedDb, expectedGt, judge);
+      expect(diff.passed).toBe(true);
+      expect(diff.proseChecks).toEqual({ passed: 1, failed: 0 });
+    });
+
+    it('MINOR when both anchors resolve to the same module (self-loop, gate stays open)', async () => {
+      const buildGt: GroundTruth = {
+        fixtureName: 't',
+        files: [{ path: 'src/c.ts', language: 'typescript' }],
+        definitions: [
+          { file: 'src/c.ts', name: 'A', kind: 'class', isExported: true, line: 1 },
+          { file: 'src/c.ts', name: 'B', kind: 'class', isExported: true, line: 2 },
+        ],
+        modules: [
+          {
+            fullPath: 'project.module',
+            name: 'Module',
+            members: [defKey('src/c.ts', 'A'), defKey('src/c.ts', 'B')],
+          },
+        ],
+      };
+      buildGroundTruthDb(producedDb, buildGt);
+
+      const expectedGt: GroundTruth = {
+        ...buildGt,
+        interactionRubric: [
+          {
+            label: 'self-loop',
+            fromAnchor: defKey('src/c.ts', 'A'),
+            toAnchor: defKey('src/c.ts', 'B'),
+          },
+        ],
+      };
+
+      const diff = await compareInteractionRubric(producedDb, expectedGt, stubJudge({}));
+      expect(diff.passed).toBe(true); // minor only — gate stays open
+      expect(diff.diffs).toEqual([
+        expect.objectContaining({
+          kind: 'mismatch',
+          severity: 'minor',
+          naturalKey: 'self-loop',
+          details: expect.stringContaining('same module'),
+        }),
+      ]);
+    });
+  });
+});
diff --git a/evals/harness/comparator/tables/contracts.ts b/evals/harness/comparator/tables/contracts.ts
new file mode 100644
index 0000000..d8118ce
--- /dev/null
+++ b/evals/harness/comparator/tables/contracts.ts
@@ -0,0 +1,63 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+/**
+ * Compare the `contracts` table.
+ *
+ * Natural key: `(protocol, normalized_key)`.
+ *
+ * Severity matrix:
+ *   - Missing GT contract (required) → CRITICAL
+ *   - Missing GT contract (optional)  → MINOR (LLM legitimately misses some)
+ *   - Extra produced contract         → MINOR (the LLM may detect more than
+ *                                       we enumerate; the GT is an existence
+ *                                       claim, not strict equality)
+ *
+ * Contract participants are not yet checked; they're a separate concern.
+ */
+export function compareContracts(produced: IndexDatabase, gt: GroundTruth): TableDiff {
+  const conn = produced.getConnection();
+  const producedRows = conn.prepare('SELECT protocol, normalized_key AS normalizedKey FROM contracts').all() as Array<{
+    protocol: string;
+    normalizedKey: string;
+  }>;
+  const producedKeys = new Set(producedRows.map((r) => `${r.protocol}::${r.normalizedKey}`));
+  const expected = gt.contracts ?? [];
+
+  // Build map keyed on natural key → optional flag
+  const expectedMap = new Map<string, { optional: boolean }>();
+  for (const c of expected) {
+    expectedMap.set(`${c.protocol}::${c.normalizedKey}`, { optional: c.optional === true });
+  }
+
+  const diffs: RowDiff[] = [];
+  for (const [key, meta] of expectedMap) {
+    if (!producedKeys.has(key)) {
+      diffs.push({
+        kind: 'missing',
+        severity: meta.optional ? 'minor' : 'critical',
+        naturalKey: key,
+        details: `Contract '${key}' is in ground truth but missing from produced DB${meta.optional ? ' (optional)' : ''}`,
+      });
+    }
+  }
+  for (const p of producedKeys) {
+    if (!expectedMap.has(p)) {
+      diffs.push({
+        kind: 'extra',
+        severity: 'minor',
+        naturalKey: p,
+        details: `Produced DB has contract '${p}' not declared in ground truth`,
+      });
+    }
+  }
+
+  return {
+    table: 'contracts',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expected.length,
+    producedCount: producedRows.length,
+    diffs,
+  };
+}
diff --git a/evals/harness/comparator/tables/definition-metadata.ts b/evals/harness/comparator/tables/definition-metadata.ts
new file mode 100644
index 0000000..78d8474
--- /dev/null
+++ b/evals/harness/comparator/tables/definition-metadata.ts
@@ -0,0 +1,236 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, GroundTruthDefinitionMetadata, ProseJudgeFn, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+import { DEFAULT_PROSE_MIN_SIMILARITY, parseJsonStringArray } from './shared.js';
+
+interface ProducedMetadataRow {
+  defKey: string; // file::name
+  key: string;
+  value: string;
+}
+
+/**
+ * Compare the `definition_metadata` table. Async because prose-bearing entries
+ * call the LLM judge.
+ *
+ * Comparison policy per entry — chosen by which field of GroundTruthDefinitionMetadata is set:
+ *   - exactValue   → byte-for-byte string match. Mismatch = MAJOR.
+ *   - acceptableSet → JSON parse + non-empty subset check. Outliers = MINOR (vocabulary drift).
+ *   - proseReference → judgeFn(reference, candidate). Below threshold = MINOR prose-drift.
+ *
+ * Missing definition (def itself absent in produced) = CRITICAL.
+ * Missing aspect (def exists, aspect not annotated) = MAJOR.
+ */
+export async function compareDefinitionMetadata(
+  produced: IndexDatabase,
+  gt: GroundTruth,
+  judgeFn: ProseJudgeFn
+): Promise<TableDiff> {
+  const conn = produced.getConnection();
+  const rows = conn
+    .prepare(
+      `SELECT (f.path || '::' || d.name) AS defKey, dm.key AS key, dm.value AS value
+       FROM definition_metadata dm
+       JOIN definitions d ON dm.definition_id = d.id
+       JOIN files f ON d.file_id = f.id`
+    )
+    .all() as ProducedMetadataRow[];
+
+  // Map: defKey -> Map<aspectKey, value>
+  const producedByDef = new Map<string, Map<string, string>>();
+  for (const r of rows) {
+    let aspectMap = producedByDef.get(r.defKey);
+    if (!aspectMap) {
+      aspectMap = new Map();
+      producedByDef.set(r.defKey, aspectMap);
+    }
+    aspectMap.set(r.key, r.value);
+  }
+
+  // Set of all defKeys present in produced (for the "def missing" check)
+  const producedDefKeys = new Set<string>(
+    (
+      conn
+        .prepare("SELECT (f.path || '::' || d.name) AS defKey FROM definitions d JOIN files f ON d.file_id = f.id")
+        .all() as Array<{ defKey: string }>
+    ).map((r) => r.defKey)
+  );
+
+  const expected = gt.definitionMetadata ?? [];
+  const diffs: RowDiff[] = [];
+  let proseChecksPassed = 0;
+  let proseChecksFailed = 0;
+
+  for (const entry of expected) {
+    const defKey = entry.defKey as unknown as string;
+
+    // Critical: GT references a definition that doesn't exist in produced
+    if (!producedDefKeys.has(defKey)) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: `${defKey}.${entry.key}`,
+        details: `Ground truth references unknown definition '${defKey}' for metadata key '${entry.key}'`,
+      });
+      continue;
+    }
+
+    const aspectMap = producedByDef.get(defKey);
+    const actualValue = aspectMap?.get(entry.key);
+
+    // Major: definition exists but the LLM did not annotate this aspect
+    if (actualValue === undefined) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'major',
+        naturalKey: `${defKey}.${entry.key}`,
+        details: `Definition '${defKey}' exists but aspect '${entry.key}' is not annotated`,
+      });
+      continue;
+    }
+
+    // Apply the right strategy based on which GT field is set
+    const result = compareSingleMetadataEntry(entry, actualValue);
+    if (result.kind === 'exact-mismatch') {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: `${defKey}.${entry.key}`,
+        details: `${entry.key}: expected '${result.expected}', produced '${result.actual}'`,
+      });
+    } else if (result.kind === 'set-mismatch') {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'minor',
+        naturalKey: `${defKey}.${entry.key}`,
+        details: `${entry.key}: expected set [${result.expected.join(', ')}], produced [${result.actual.join(', ')}]`,
+      });
+    } else if (result.kind === 'tags-floor-fail') {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'minor',
+        naturalKey: `${defKey}.${entry.key}`,
+        details: `${entry.key}: produced ${result.actualLength} tag(s), but minTagsRequired=${result.required}`,
+      });
+    } else if (result.kind === 'tags-parse-fail') {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'minor',
+        naturalKey: `${defKey}.${entry.key}`,
+        details: `${entry.key}: themeReference set but produced value is not a JSON string array (got ${truncate(actualValue, 60)})`,
+      });
+    } else if (result.kind === 'prose' || result.kind === 'theme') {
+      // Async judge call. Theme strategy uses a tolerant tag-list judging
+      // prompt; prose strategy uses the strict similarity prompt.
+      const defaultMinSim = result.kind === 'theme' ? DEFAULT_THEME_MIN_SIMILARITY : DEFAULT_PROSE_MIN_SIMILARITY;
+      const minSim = entry.minSimilarity ?? defaultMinSim;
+      const judgment = await judgeFn({
+        field: `definition_metadata.${entry.key} for ${defKey}`,
+        reference: result.reference,
+        candidate: result.candidate,
+        minSimilarity: minSim,
+        mode: result.kind === 'theme' ? 'theme' : 'prose',
+      });
+      if (judgment.passed) {
+        proseChecksPassed += 1;
+      } else {
+        proseChecksFailed += 1;
+        diffs.push({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: `${defKey}.${entry.key}`,
+          details: `prose drift: similarity ${judgment.similarity.toFixed(2)} < ${minSim} — ${judgment.reasoning}`,
+        });
+      }
+    }
+    // 'exact-match' and 'set-match' produce no diff
+  }
+
+  return {
+    table: 'definition_metadata',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expected.length,
+    producedCount: rows.length,
+    diffs,
+    proseChecks: { passed: proseChecksPassed, failed: proseChecksFailed },
+  };
+}
+
+/**
+ * Default minimum similarity for `themeReference` tag-array judging.
+ * Lower than the prose default (0.75) because the candidate is a short
+ * comma-separated tag list rather than a full sentence — the judge has
+ * less surface area to score against.
+ */
+const DEFAULT_THEME_MIN_SIMILARITY = 0.6;
+
+type SingleEntryResult =
+  | { kind: 'exact-match' }
+  | { kind: 'exact-mismatch'; expected: string; actual: string }
+  | { kind: 'set-match' }
+  | { kind: 'set-mismatch'; expected: string[]; actual: string[] }
+  | { kind: 'prose'; reference: string; candidate: string }
+  | { kind: 'theme'; reference: string; candidate: string }
+  | { kind: 'tags-floor-fail'; actualLength: number; required: number }
+  | { kind: 'tags-parse-fail' };
+
+/**
+ * Apply the right comparison strategy for a single GT metadata entry.
+ * Pure synchronous function — the async judge call happens in the caller.
+ *
+ * Strategy precedence (first match wins): exactValue → acceptableSet →
+ * themeReference → proseReference. The GT type encourages exactly one to be
+ * set, but defining a precedence keeps the function total.
+ */
+function compareSingleMetadataEntry(entry: GroundTruthDefinitionMetadata, actualValue: string): SingleEntryResult {
+  if (entry.exactValue !== undefined) {
+    return entry.exactValue === actualValue
+      ? { kind: 'exact-match' }
+      : { kind: 'exact-mismatch', expected: entry.exactValue, actual: actualValue };
+  }
+  if (entry.acceptableSet !== undefined) {
+    const actualSet = parseJsonStringArray(actualValue) ?? [];
+    // Subset check: actualSet must be (a) non-empty AND (b) a subset of acceptableSet.
+    // Outliers in actualSet (tags not in the vocabulary) trigger a mismatch.
+    if (actualSet.length === 0) {
+      return { kind: 'set-mismatch', expected: [...entry.acceptableSet].sort(), actual: [] };
+    }
+    const acceptableHash = new Set(entry.acceptableSet);
+    const outliers = actualSet.filter((t) => !acceptableHash.has(t));
+    if (outliers.length === 0) {
+      return { kind: 'set-match' };
+    }
+    return {
+      kind: 'set-mismatch',
+      expected: [...entry.acceptableSet].sort(),
+      actual: [...actualSet].sort(),
+    };
+  }
+  if (entry.themeReference !== undefined) {
+    const tags = parseJsonStringArray(actualValue);
+    if (tags === null) {
+      return { kind: 'tags-parse-fail' };
+    }
+    const floor = entry.minTagsRequired ?? 1;
+    if (tags.length < floor) {
+      return { kind: 'tags-floor-fail', actualLength: tags.length, required: floor };
+    }
+    // Format candidate as readable prose for the judge: "tags: a, b, c"
+    return {
+      kind: 'theme',
+      reference: entry.themeReference,
+      candidate: `tags: ${tags.join(', ')}`,
+    };
+  }
+  if (entry.proseReference !== undefined) {
+    return { kind: 'prose', reference: entry.proseReference, candidate: actualValue };
+  }
+  // None of the strategy fields set — programmer error.
+  throw new Error(
+    `Ground truth metadata entry for ${entry.defKey}.${entry.key} has none of exactValue/acceptableSet/themeReference/proseReference set`
+  );
+}
+
+function truncate(s: string, n: number): string {
+  return s.length <= n ? s : `${s.slice(0, n - 1)}…`;
+}
diff --git a/evals/harness/comparator/tables/definitions.ts b/evals/harness/comparator/tables/definitions.ts
new file mode 100644
index 0000000..5e787e5
--- /dev/null
+++ b/evals/harness/comparator/tables/definitions.ts
@@ -0,0 +1,171 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+import { LINE_TOLERANCE, arraysEqualSorted, parseJsonStringArray } from './shared.js';
+
+interface ProducedDefRow {
+  path: string;
+  name: string;
+  kind: string;
+  isExported: number;
+  isDefault: number;
+  line: number;
+  endLine: number;
+  extendsName: string | null;
+  implementsNames: string | null; // JSON
+  extendsInterfaces: string | null; // JSON
+}
+
+/**
+ * Compare the `definitions` table.
+ *
+ * Natural key: `(file_path, name)`. Checks (in order, with their severity):
+ * - missing/extra → critical / major
+ * - kind mismatch → major
+ * - line drift > tolerance → minor
+ * - endLine drift > tolerance → minor (only when GT declares endLine)
+ * - extendsName → major
+ * - implementsNames (set) → major (only when GT declares it)
+ * - extendsInterfaces (set) → major (only when GT declares it)
+ * - isExported → major
+ * - isDefault → major
+ */
+export function compareDefinitions(produced: IndexDatabase, gt: GroundTruth): TableDiff {
+  const conn = produced.getConnection();
+  const producedRows = conn
+    .prepare(
+      `SELECT f.path AS path, d.name AS name, d.kind AS kind,
+              d.is_exported AS isExported, d.is_default AS isDefault,
+              d.line AS line, d.end_line AS endLine,
+              d.extends_name AS extendsName,
+              d.implements_names AS implementsNames,
+              d.extends_interfaces AS extendsInterfaces
+       FROM definitions d
+       JOIN files f ON d.file_id = f.id`
+    )
+    .all() as ProducedDefRow[];
+
+  const producedByKey = new Map<string, ProducedDefRow>();
+  for (const r of producedRows) {
+    producedByKey.set(`${r.path}::${r.name}`, r);
+  }
+
+  const expectedByKey = new Map(gt.definitions.map((d) => [`${d.file}::${d.name}`, d]));
+
+  const diffs: RowDiff[] = [];
+
+  for (const [key, expected] of expectedByKey) {
+    const actual = producedByKey.get(key);
+    if (!actual) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: key,
+        details: `Definition '${expected.name}' (${expected.kind}) is in ground truth but missing from produced DB`,
+      });
+      continue;
+    }
+
+    if (actual.kind !== expected.kind) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: key,
+        details: `kind: expected '${expected.kind}', produced '${actual.kind}'`,
+      });
+    }
+
+    if (Math.abs(actual.line - expected.line) > LINE_TOLERANCE) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'minor',
+        naturalKey: key,
+        details: `line: expected ${expected.line} (±${LINE_TOLERANCE}), produced ${actual.line}`,
+      });
+    }
+
+    if (expected.endLine != null && Math.abs(actual.endLine - expected.endLine) > LINE_TOLERANCE) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'minor',
+        naturalKey: key,
+        details: `endLine: expected ${expected.endLine} (±${LINE_TOLERANCE}), produced ${actual.endLine}`,
+      });
+    }
+
+    const expectedExtends = expected.extendsName ?? null;
+    const actualExtends = actual.extendsName ?? null;
+    if (expectedExtends !== actualExtends) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: key,
+        details: `extendsName: expected ${JSON.stringify(expectedExtends)}, produced ${JSON.stringify(actualExtends)}`,
+      });
+    }
+
+    if (expected.implementsNames !== undefined) {
+      const actualImpl = parseJsonStringArray(actual.implementsNames);
+      const expectedImpl = expected.implementsNames;
+      if (!arraysEqualSorted(actualImpl, expectedImpl)) {
+        diffs.push({
+          kind: 'mismatch',
+          severity: 'major',
+          naturalKey: key,
+          details: `implementsNames: expected ${JSON.stringify(expectedImpl)}, produced ${JSON.stringify(actualImpl)}`,
+        });
+      }
+    }
+
+    if (expected.extendsInterfaces !== undefined) {
+      const actualExt = parseJsonStringArray(actual.extendsInterfaces);
+      const expectedExt = expected.extendsInterfaces;
+      if (!arraysEqualSorted(actualExt, expectedExt)) {
+        diffs.push({
+          kind: 'mismatch',
+          severity: 'major',
+          naturalKey: key,
+          details: `extendsInterfaces: expected ${JSON.stringify(expectedExt)}, produced ${JSON.stringify(actualExt)}`,
+        });
+      }
+    }
+
+    if ((actual.isExported === 1) !== expected.isExported) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: key,
+        details: `isExported: expected ${expected.isExported}, produced ${actual.isExported === 1}`,
+      });
+    }
+
+    const expectedDefault = expected.isDefault ?? false;
+    if ((actual.isDefault === 1) !== expectedDefault) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: key,
+        details: `isDefault: expected ${expectedDefault}, produced ${actual.isDefault === 1}`,
+      });
+    }
+  }
+
+  for (const [key] of producedByKey) {
+    if (!expectedByKey.has(key)) {
+      diffs.push({
+        kind: 'extra',
+        severity: 'major',
+        naturalKey: key,
+        details: `Produced DB has definition '${key}' not declared in ground truth`,
+      });
+    }
+  }
+
+  return {
+    table: 'definitions',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expectedByKey.size,
+    producedCount: producedByKey.size,
+    diffs,
+  };
+}
diff --git a/evals/harness/comparator/tables/feature-cohesion.ts b/evals/harness/comparator/tables/feature-cohesion.ts
new file mode 100644
index 0000000..2289023
--- /dev/null
+++ b/evals/harness/comparator/tables/feature-cohesion.ts
@@ -0,0 +1,88 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, ProseJudgeFn, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+const DEFAULT_FEATURE_ROLE_MIN_SIMILARITY = 0.6;
+
+interface ProducedFeatureRow {
+  id: number;
+  slug: string;
+  name: string;
+  description: string | null;
+}
+
+/**
+ * Compare LLM-driven features via a theme-search rubric.
+ *
+ * Each rubric entry describes a target feature concept (e.g.,
+ * "User authentication and identity"). The comparator iterates ALL produced
+ * features, theme-judges each name+description against the expected role,
+ * and picks the best match. Critical if no feature scores above threshold.
+ *
+ * Severity:
+ *   - No feature matches expected theme → CRITICAL
+ *
+ * No cohesion / flow-assignment check: squint's flow→feature assignment is
+ * non-deterministic and the flow entry anchors are unreliable. Theme-only
+ * matching keeps the rubric robust to LLM variance.
+ */
+export async function compareFeatureCohesion(
+  produced: IndexDatabase,
+  gt: GroundTruth,
+  judgeFn: ProseJudgeFn
+): Promise<TableDiff> {
+  const conn = produced.getConnection();
+
+  const featureRows = conn.prepare('SELECT id, slug, name, description FROM features').all() as ProducedFeatureRow[];
+
+  const groups = gt.featureCohesion ?? [];
+  const diffs: RowDiff[] = [];
+  let proseChecksPassed = 0;
+  let proseChecksFailed = 0;
+
+  for (const entry of groups) {
+    const minSim = entry.minRoleSimilarity ?? DEFAULT_FEATURE_ROLE_MIN_SIMILARITY;
+
+    let bestFeature: ProducedFeatureRow | null = null;
+    let bestScore = -1;
+    let bestReasoning = '';
+
+    for (const feature of featureRows) {
+      const candidate = `${feature.name}: ${feature.description ?? '(no description)'}`;
+      const judgment = await judgeFn({
+        field: `feature_cohesion.${entry.label} (candidate: ${feature.slug})`,
+        reference: entry.expectedRole,
+        candidate,
+        minSimilarity: minSim,
+        mode: 'theme',
+      });
+      if (judgment.similarity > bestScore) {
+        bestScore = judgment.similarity;
+        bestFeature = feature;
+        bestReasoning = judgment.reasoning;
+      }
+    }
+
+    if (bestFeature === null || bestScore < minSim) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: entry.label,
+        details: `feature cohesion '${entry.label}': no feature matches the expected role (best score ${bestScore.toFixed(2)} < ${minSim}${bestFeature ? `, best candidate '${bestFeature.slug}': ${bestReasoning}` : ', no features at all'})`,
+      });
+      proseChecksFailed += 1;
+      continue;
+    }
+
+    proseChecksPassed += 1;
+  }
+
+  return {
+    table: 'feature_cohesion',
+    passed: tableDiffPassed(diffs),
+    expectedCount: groups.length,
+    producedCount: featureRows.length,
+    diffs,
+    proseChecks: { passed: proseChecksPassed, failed: proseChecksFailed },
+  };
+}
diff --git a/evals/harness/comparator/tables/files.ts b/evals/harness/comparator/tables/files.ts
new file mode 100644
index 0000000..dab549a
--- /dev/null
+++ b/evals/harness/comparator/tables/files.ts
@@ -0,0 +1,44 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+/**
+ * Compare the `files` table.
+ * Natural key: `path`. Mismatch policy: missing = critical, extra = major.
+ */
+export function compareFiles(produced: IndexDatabase, gt: GroundTruth): TableDiff {
+  const conn = produced.getConnection();
+  const producedRows = conn.prepare('SELECT path FROM files').all() as Array<{ path: string }>;
+  const producedSet = new Set(producedRows.map((r) => r.path));
+  const expectedSet = new Set(gt.files.map((f) => f.path));
+
+  const diffs: RowDiff[] = [];
+  for (const expected of expectedSet) {
+    if (!producedSet.has(expected)) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: expected,
+        details: `File '${expected}' is in ground truth but missing from produced DB`,
+      });
+    }
+  }
+  for (const producedPath of producedSet) {
+    if (!expectedSet.has(producedPath)) {
+      diffs.push({
+        kind: 'extra',
+        severity: 'major',
+        naturalKey: producedPath,
+        details: `Produced DB has file '${producedPath}' not declared in ground truth`,
+      });
+    }
+  }
+
+  return {
+    table: 'files',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expectedSet.size,
+    producedCount: producedSet.size,
+    diffs,
+  };
+}
diff --git a/evals/harness/comparator/tables/flow-rubric.ts b/evals/harness/comparator/tables/flow-rubric.ts
new file mode 100644
index 0000000..528ede0
--- /dev/null
+++ b/evals/harness/comparator/tables/flow-rubric.ts
@@ -0,0 +1,114 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { FlowStakeholder, GroundTruth, ProseJudgeFn, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+/**
+ * Default minimum similarity for the flow role check. Uses theme-judge mode
+ * for tolerance — flow names + descriptions are short and the LLM picks
+ * different vocab across runs.
+ */
+const DEFAULT_FLOW_ROLE_MIN_SIMILARITY = 0.6;
+
+interface ProducedFlowRow {
+  id: number;
+  slug: string;
+  name: string;
+  description: string | null;
+  stakeholder: string;
+}
+
+/**
+ * Compare LLM-driven flows via a theme-search rubric.
+ *
+ * Each rubric entry describes a thematic concept ("User logs in with
+ * credentials") plus an acceptable stakeholder set. The comparator iterates
+ * ALL produced flows, scores each candidate's name+description against the
+ * expected role via the theme judge, and picks the best match. The match
+ * passes if:
+ *   1. At least one flow scores >= minRoleSimilarity, AND
+ *   2. Its stakeholder is in acceptableStakeholders (when set).
+ *
+ * Severity:
+ *   - No flow scores >= threshold (no thematic match)  → CRITICAL
+ *   - Best match's stakeholder not in acceptable set   → MAJOR
+ *
+ * The rubric is intentionally tolerant — squint's flows stage produces a
+ * small number of high-level journeys with LLM-picked names/slugs/paths,
+ * none of which are deterministic. Theme search decouples the GT from
+ * those LLM choices entirely.
+ */
+export async function compareFlowRubric(
+  produced: IndexDatabase,
+  gt: GroundTruth,
+  judgeFn: ProseJudgeFn
+): Promise<TableDiff> {
+  const conn = produced.getConnection();
+
+  const flowRows = conn
+    .prepare('SELECT id, slug, name, description, stakeholder FROM flows')
+    .all() as ProducedFlowRow[];
+
+  const rubric = gt.flowRubric ?? [];
+  const diffs: RowDiff[] = [];
+  let proseChecksPassed = 0;
+  let proseChecksFailed = 0;
+
+  for (const entry of rubric) {
+    const minSim = entry.minRoleSimilarity ?? DEFAULT_FLOW_ROLE_MIN_SIMILARITY;
+
+    // Theme-judge every flow against the expected role; track the best match
+    let bestFlow: ProducedFlowRow | null = null;
+    let bestScore = -1;
+    let bestReasoning = '';
+
+    for (const flow of flowRows) {
+      const candidate = `${flow.name}: ${flow.description ?? '(no description)'}`;
+      const judgment = await judgeFn({
+        field: `flow_rubric.${entry.label} (candidate: ${flow.slug})`,
+        reference: entry.expectedRole,
+        candidate,
+        minSimilarity: minSim,
+        mode: 'theme',
+      });
+      if (judgment.similarity > bestScore) {
+        bestScore = judgment.similarity;
+        bestFlow = flow;
+        bestReasoning = judgment.reasoning;
+      }
+    }
+
+    if (bestFlow === null || bestScore < minSim) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: entry.label,
+        details: `flow rubric '${entry.label}': no flow matches the expected role (best score ${bestScore.toFixed(2)} < ${minSim}${bestFlow ? `, best candidate '${bestFlow.slug}': ${bestReasoning}` : ', no flows at all'})`,
+      });
+      proseChecksFailed += 1;
+      continue;
+    }
+
+    proseChecksPassed += 1;
+
+    // Stakeholder check on the best-matching flow
+    if (entry.acceptableStakeholders && entry.acceptableStakeholders.length > 0) {
+      if (!entry.acceptableStakeholders.includes(bestFlow.stakeholder as FlowStakeholder)) {
+        diffs.push({
+          kind: 'mismatch',
+          severity: 'major',
+          naturalKey: entry.label,
+          details: `flow rubric '${entry.label}': matched flow '${bestFlow.slug}' has stakeholder '${bestFlow.stakeholder}' not in acceptable set [${entry.acceptableStakeholders.join(', ')}]`,
+        });
+      }
+    }
+  }
+
+  return {
+    table: 'flow_rubric',
+    passed: tableDiffPassed(diffs),
+    expectedCount: rubric.length,
+    producedCount: flowRows.length,
+    diffs,
+    proseChecks: { passed: proseChecksPassed, failed: proseChecksFailed },
+  };
+}
diff --git a/evals/harness/comparator/tables/flows.ts b/evals/harness/comparator/tables/flows.ts
new file mode 100644
index 0000000..36a26b2
--- /dev/null
+++ b/evals/harness/comparator/tables/flows.ts
@@ -0,0 +1,77 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+interface ProducedFlowRow {
+  slug: string;
+  name: string;
+  stakeholder: string | null;
+  entryPath: string | null;
+}
+
+/**
+ * Compare the `flows` table.
+ *
+ * Natural key: `slug`. Missing flow = critical. Wrong stakeholder or entryPath
+ * = major. (flow_steps and flow_definition_steps are separate tables.)
+ */
+export function compareFlows(produced: IndexDatabase, gt: GroundTruth): TableDiff {
+  const conn = produced.getConnection();
+  const producedRows = conn
+    .prepare('SELECT slug, name, stakeholder, entry_path AS entryPath FROM flows')
+    .all() as ProducedFlowRow[];
+
+  const producedMap = new Map(producedRows.map((r) => [r.slug, r]));
+  const expected = gt.flows ?? [];
+  const expectedMap = new Map(expected.map((f) => [f.slug, f]));
+
+  const diffs: RowDiff[] = [];
+
+  for (const [slug, e] of expectedMap) {
+    const a = producedMap.get(slug);
+    if (!a) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: slug,
+        details: `Flow '${slug}' is in ground truth but missing from produced DB`,
+      });
+      continue;
+    }
+    if (a.stakeholder !== e.stakeholder) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: slug,
+        details: `stakeholder: expected '${e.stakeholder}', produced '${a.stakeholder}'`,
+      });
+    }
+    if (e.entryPath != null && a.entryPath !== e.entryPath) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: slug,
+        details: `entryPath: expected '${e.entryPath}', produced '${a.entryPath}'`,
+      });
+    }
+  }
+
+  for (const [slug] of producedMap) {
+    if (!expectedMap.has(slug)) {
+      diffs.push({
+        kind: 'extra',
+        severity: 'major',
+        naturalKey: slug,
+        details: `Produced DB has flow '${slug}' not declared in ground truth`,
+      });
+    }
+  }
+
+  return {
+    table: 'flows',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expected.length,
+    producedCount: producedRows.length,
+    diffs,
+  };
+}
diff --git a/evals/harness/comparator/tables/imports.ts b/evals/harness/comparator/tables/imports.ts
new file mode 100644
index 0000000..efe78cf
--- /dev/null
+++ b/evals/harness/comparator/tables/imports.ts
@@ -0,0 +1,145 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+interface ProducedImportRow {
+  importId: number;
+  fromPath: string;
+  source: string;
+  type: string;
+  isExternal: number;
+  isTypeOnly: number;
+  /** Pipe-joined sorted symbol names from the symbols table. */
+  symbolNames: string;
+}
+
+/**
+ * Compare the `imports` table together with its symbol child rows.
+ *
+ * Natural key: `(fromFile, type, source)`. Joins to `symbols` to verify the
+ * imported symbol set matches when the GT declares it. Checks isTypeOnly and
+ * isExternal flags. All mismatches are major.
+ */
+export function compareImports(produced: IndexDatabase, gt: GroundTruth): TableDiff {
+  const conn = produced.getConnection();
+  const rows = conn
+    .prepare(
+      `SELECT i.id AS importId, f.path AS fromPath, i.source AS source, i.type AS type,
+              i.is_external AS isExternal, i.is_type_only AS isTypeOnly,
+              s.name AS symbolName
+       FROM imports i
+       JOIN files f ON i.from_file_id = f.id
+       LEFT JOIN symbols s ON s.reference_id = i.id
+       ORDER BY i.id`
+    )
+    .all() as Array<{
+    importId: number;
+    fromPath: string;
+    source: string;
+    type: string;
+    isExternal: number;
+    isTypeOnly: number;
+    symbolName: string | null;
+  }>;
+
+  // Group symbol rows by their parent import (LEFT JOIN explodes 1 import × N symbols).
+  const grouped = new Map<number, ProducedImportRow>();
+  for (const r of rows) {
+    let entry = grouped.get(r.importId);
+    if (!entry) {
+      entry = {
+        importId: r.importId,
+        fromPath: r.fromPath,
+        source: r.source,
+        type: r.type,
+        isExternal: r.isExternal,
+        isTypeOnly: r.isTypeOnly,
+        symbolNames: '',
+      };
+      grouped.set(r.importId, entry);
+    }
+    if (r.symbolName) {
+      entry.symbolNames = entry.symbolNames ? `${entry.symbolNames}|${r.symbolName}` : r.symbolName;
+    }
+  }
+  const producedRows = Array.from(grouped.values()).map((r) => ({
+    ...r,
+    // Sort symbol names so equality is order-independent
+    symbolNames: r.symbolNames.split('|').filter(Boolean).sort().join('|'),
+  }));
+
+  const importKey = (r: { fromPath: string; type: string; source: string }) => `${r.fromPath}|${r.type}|${r.source}`;
+
+  const producedByKey = new Map(producedRows.map((r) => [importKey(r), r]));
+  const expected = gt.imports ?? [];
+
+  const diffs: RowDiff[] = [];
+
+  for (const e of expected) {
+    const k = importKey({ fromPath: e.fromFile, type: e.type, source: e.source });
+    const a = producedByKey.get(k);
+    if (!a) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'major',
+        naturalKey: k,
+        details: `Import '${e.source}' (${e.type}) from '${e.fromFile}' is in ground truth but missing from produced DB`,
+      });
+      continue;
+    }
+
+    const expectedTypeOnly = e.isTypeOnly === true;
+    if (expectedTypeOnly !== (a.isTypeOnly === 1)) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: k,
+        details: `isTypeOnly: expected ${expectedTypeOnly}, produced ${a.isTypeOnly === 1}`,
+      });
+    }
+
+    const expectedExternal = e.isExternal === true;
+    if (expectedExternal !== (a.isExternal === 1)) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: k,
+        details: `isExternal: expected ${expectedExternal}, produced ${a.isExternal === 1}`,
+      });
+    }
+
+    if (e.symbols && e.symbols.length > 0) {
+      const expectedSymbols = e.symbols
+        .map((s) => s.name)
+        .sort()
+        .join('|');
+      if (expectedSymbols !== a.symbolNames) {
+        diffs.push({
+          kind: 'mismatch',
+          severity: 'major',
+          naturalKey: k,
+          details: `symbols: expected [${expectedSymbols}], produced [${a.symbolNames}]`,
+        });
+      }
+    }
+  }
+
+  for (const [k] of producedByKey) {
+    if (!expected.some((e) => importKey({ fromPath: e.fromFile, type: e.type, source: e.source }) === k)) {
+      diffs.push({
+        kind: 'extra',
+        severity: 'major',
+        naturalKey: k,
+        details: `Produced DB has import '${k}' not declared in ground truth`,
+      });
+    }
+  }
+
+  return {
+    table: 'imports',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expected.length,
+    producedCount: producedRows.length,
+    diffs,
+  };
+}
diff --git a/evals/harness/comparator/tables/index.ts b/evals/harness/comparator/tables/index.ts
new file mode 100644
index 0000000..c38ad85
--- /dev/null
+++ b/evals/harness/comparator/tables/index.ts
@@ -0,0 +1,29 @@
+/**
+ * Per-table comparator strategies.
+ *
+ * Each comparator returns a TableDiff with structural diffs only — prose-judged
+ * fields are handled inline by the per-table comparator that needs them, using
+ * the ProseJudgeFn injected via the dispatcher.
+ *
+ * Key invariant: comparisons are ID-agnostic. Joins use natural keys (file
+ * paths, definition names, module full_paths, contract protocol+key, etc.) so
+ * that two DBs built with different insertion orders still match.
+ *
+ * Adding a new comparator: create a new file in this directory, then re-export
+ * it here AND wire it into the COMPARATORS map in `comparator/index.ts`.
+ */
+
+export { compareContracts } from './contracts.js';
+export { compareDefinitionMetadata } from './definition-metadata.js';
+export { compareDefinitions } from './definitions.js';
+export { compareFeatureCohesion } from './feature-cohesion.js';
+export { compareFiles } from './files.js';
+export { compareFlowRubric } from './flow-rubric.js';
+export { compareFlows } from './flows.js';
+export { compareImports } from './imports.js';
+export { compareInteractionRubric } from './interaction-rubric.js';
+export { compareInteractions } from './interactions.js';
+export { compareModuleCohesion } from './module-cohesion.js';
+export { compareModuleMembers } from './module-members.js';
+export { compareModules } from './modules.js';
+export { compareRelationshipAnnotations } from './relationship-annotations.js';
diff --git a/evals/harness/comparator/tables/interaction-rubric.ts b/evals/harness/comparator/tables/interaction-rubric.ts
new file mode 100644
index 0000000..25cdace
--- /dev/null
+++ b/evals/harness/comparator/tables/interaction-rubric.ts
@@ -0,0 +1,237 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, InteractionSource, ProseJudgeFn, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+/**
+ * Default minimum similarity for the semantic prose check. Lower than the
+ * prose default (0.75) because LLM-generated semantic prose for interactions
+ * is short ("validates auth credentials before forwarding the request") and
+ * the theme judge mode is more tolerant.
+ */
+const DEFAULT_SEMANTIC_MIN_SIMILARITY = 0.6;
+
+/**
+ * Default acceptable sources when the rubric entry omits `acceptableSources`.
+ * Excludes 'llm-inferred' because it's the most variance-prone source — the
+ * cross-process inference step in iter 6 generates speculative edges that
+ * may or may not appear across runs.
+ */
+const DEFAULT_ACCEPTABLE_SOURCES: InteractionSource[] = ['ast', 'ast-import', 'contract-matched'];
+
+interface ProducedInteractionRow {
+  fromModuleId: number;
+  toModuleId: number;
+  fromPath: string;
+  toPath: string;
+  source: string;
+  semantic: string | null;
+}
+
+/**
+ * Compare LLM-driven interactions via an anchor-based rubric.
+ *
+ * Each rubric entry names a "from anchor" definition and a "to anchor"
+ * definition. The comparator looks up the modules those defs are assigned
+ * to (via `module_members`) and then verifies an interaction edge exists
+ * between those modules with an acceptable `source` and (optionally) a
+ * semantic prose that the theme judge approves.
+ *
+ * Severity matrix:
+ *   - Anchor def doesn't exist in produced       → CRITICAL
+ *   - Anchor def has no module assignment        → CRITICAL
+ *   - Both anchors resolve to the same module    → MAJOR (no cross-module edge)
+ *   - No interaction edge between resolved mods  → MAJOR
+ *   - Interaction `source` not in acceptableSet  → MAJOR
+ *   - Semantic prose drift below threshold       → MINOR (prose-drift)
+ */
+export async function compareInteractionRubric(
+  produced: IndexDatabase,
+  gt: GroundTruth,
+  judgeFn: ProseJudgeFn
+): Promise<TableDiff> {
+  const conn = produced.getConnection();
+
+  // defKey → moduleId map (from module_members JOIN)
+  const memberRows = conn
+    .prepare(
+      `SELECT (f.path || '::' || d.name) AS defKey,
+              mm.module_id AS moduleId,
+              m.full_path AS fullPath
+       FROM module_members mm
+       JOIN definitions d ON mm.definition_id = d.id
+       JOIN files f ON d.file_id = f.id
+       JOIN modules m ON mm.module_id = m.id`
+    )
+    .all() as Array<{ defKey: string; moduleId: number; fullPath: string }>;
+  const defToModule = new Map<string, { moduleId: number; fullPath: string }>();
+  for (const r of memberRows) {
+    defToModule.set(r.defKey, { moduleId: r.moduleId, fullPath: r.fullPath });
+  }
+
+  // Set of all defKeys present in produced
+  const producedDefKeys = new Set<string>(
+    (
+      conn
+        .prepare("SELECT (f.path || '::' || d.name) AS defKey FROM definitions d JOIN files f ON d.file_id = f.id")
+        .all() as Array<{ defKey: string }>
+    ).map((r) => r.defKey)
+  );
+
+  // Index interactions by (fromModuleId, toModuleId)
+  const interactionRows = conn
+    .prepare(
+      `SELECT i.from_module_id AS fromModuleId,
+              i.to_module_id AS toModuleId,
+              fm.full_path AS fromPath,
+              tm.full_path AS toPath,
+              i.source AS source,
+              i.semantic AS semantic
+       FROM interactions i
+       JOIN modules fm ON i.from_module_id = fm.id
+       JOIN modules tm ON i.to_module_id = tm.id`
+    )
+    .all() as ProducedInteractionRow[];
+  const interactionByModulePair = new Map<string, ProducedInteractionRow>();
+  for (const i of interactionRows) {
+    interactionByModulePair.set(`${i.fromModuleId}->${i.toModuleId}`, i);
+  }
+
+  const rubric = gt.interactionRubric ?? [];
+  const diffs: RowDiff[] = [];
+  let proseChecksPassed = 0;
+  let proseChecksFailed = 0;
+
+  for (const entry of rubric) {
+    const fromKey = entry.fromAnchor as unknown as string;
+    const toKey = entry.toAnchor as unknown as string;
+
+    // Critical: anchor def not in produced
+    if (!producedDefKeys.has(fromKey)) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: entry.label,
+        details: `interaction rubric '${entry.label}' references unknown FROM anchor '${fromKey}'`,
+      });
+      continue;
+    }
+    if (!producedDefKeys.has(toKey)) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: entry.label,
+        details: `interaction rubric '${entry.label}' references unknown TO anchor '${toKey}'`,
+      });
+      continue;
+    }
+
+    // Critical: anchor def is unassigned to any module
+    const fromAssign = defToModule.get(fromKey);
+    const toAssign = defToModule.get(toKey);
+    if (!fromAssign) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: entry.label,
+        details: `interaction rubric '${entry.label}': FROM anchor '${fromKey}' is unassigned to any module`,
+      });
+      continue;
+    }
+    if (!toAssign) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: entry.label,
+        details: `interaction rubric '${entry.label}': TO anchor '${toKey}' is unassigned to any module`,
+      });
+      continue;
+    }
+
+    // Self-loop: from and to resolve to the same module. The interactions
+    // table only stores cross-module edges, so a self-loop rubric entry
+    // can never match. Treat as MINOR (not major) — the LLM legitimately
+    // groups semantically related defs into one module on some runs (good
+    // cohesion). The "missing" cross-module edge isn't a quality regression,
+    // it's a structural consequence of tight grouping.
+    if (fromAssign.moduleId === toAssign.moduleId) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'minor',
+        naturalKey: entry.label,
+        details: `interaction rubric '${entry.label}': both anchors resolve to the same module '${fromAssign.fullPath}', no cross-module edge to verify (LLM grouped tightly)`,
+      });
+      continue;
+    }
+
+    // Look up the interaction edge between the two resolved modules
+    const interaction = interactionByModulePair.get(`${fromAssign.moduleId}->${toAssign.moduleId}`);
+    if (!interaction) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'major',
+        naturalKey: entry.label,
+        details: `interaction rubric '${entry.label}': no interaction edge between '${fromAssign.fullPath}' (containing ${fromKey}) and '${toAssign.fullPath}' (containing ${toKey})`,
+      });
+      continue;
+    }
+
+    // Source check
+    const acceptable = entry.acceptableSources ?? DEFAULT_ACCEPTABLE_SOURCES;
+    if (!acceptable.includes(interaction.source as InteractionSource)) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: entry.label,
+        details: `interaction rubric '${entry.label}': source '${interaction.source}' not in acceptable set [${acceptable.join(', ')}]`,
+      });
+      continue;
+    }
+
+    // Optional semantic prose check
+    if (entry.semanticReference != null) {
+      if (interaction.semantic == null) {
+        diffs.push({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: entry.label,
+          details: `interaction rubric '${entry.label}': semantic is null in produced DB; expected prose matching '${truncate(entry.semanticReference)}'`,
+        });
+        proseChecksFailed += 1;
+        continue;
+      }
+
+      const minSim = entry.minSimilarity ?? DEFAULT_SEMANTIC_MIN_SIMILARITY;
+      const judgment = await judgeFn({
+        field: `interaction_rubric.${entry.label} semantic check`,
+        reference: entry.semanticReference,
+        candidate: interaction.semantic,
+        minSimilarity: minSim,
+        mode: 'theme',
+      });
+      if (judgment.passed) {
+        proseChecksPassed += 1;
+      } else {
+        diffs.push({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: entry.label,
+          details: `interaction rubric '${entry.label}': semantic drift ${judgment.similarity.toFixed(2)} < ${minSim} — ${judgment.reasoning}`,
+        });
+        proseChecksFailed += 1;
+      }
+    }
+  }
+
+  return {
+    table: 'interaction_rubric',
+    passed: tableDiffPassed(diffs),
+    expectedCount: rubric.length,
+    producedCount: interactionRows.length,
+    diffs,
+    proseChecks: { passed: proseChecksPassed, failed: proseChecksFailed },
+  };
+}
+
+function truncate(s: string, n = 60): string {
+  return s.length <= n ? s : `${s.slice(0, n - 1)}…`;
+}
diff --git a/evals/harness/comparator/tables/interactions.ts b/evals/harness/comparator/tables/interactions.ts
new file mode 100644
index 0000000..054eb88
--- /dev/null
+++ b/evals/harness/comparator/tables/interactions.ts
@@ -0,0 +1,87 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+interface ProducedInteractionRow {
+  fromPath: string;
+  toPath: string;
+  pattern: string | null;
+  source: string;
+}
+
+/**
+ * Compare the `interactions` table.
+ *
+ * Natural key: `(fromModulePath, toModulePath)`. Checks `source` and `pattern`
+ * exactly. Missing or extra interactions and any field mismatch are major.
+ */
+export function compareInteractions(produced: IndexDatabase, gt: GroundTruth): TableDiff {
+  const conn = produced.getConnection();
+  const producedRows = conn
+    .prepare(
+      `SELECT from_m.full_path AS fromPath, to_m.full_path AS toPath,
+              i.pattern AS pattern, i.source AS source
+       FROM interactions i
+       JOIN modules from_m ON i.from_module_id = from_m.id
+       JOIN modules to_m ON i.to_module_id = to_m.id`
+    )
+    .all() as ProducedInteractionRow[];
+
+  const producedMap = new Map<string, ProducedInteractionRow>();
+  for (const r of producedRows) {
+    producedMap.set(`${r.fromPath}->${r.toPath}`, r);
+  }
+
+  const expected = gt.interactions ?? [];
+  const expectedMap = new Map(expected.map((i) => [`${i.fromModulePath}->${i.toModulePath}`, i]));
+
+  const diffs: RowDiff[] = [];
+
+  for (const [key, e] of expectedMap) {
+    const a = producedMap.get(key);
+    if (!a) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'major',
+        naturalKey: key,
+        details: `Interaction '${key}' is in ground truth but missing from produced DB`,
+      });
+      continue;
+    }
+    if (a.source !== e.source) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: key,
+        details: `source: expected '${e.source}', produced '${a.source}'`,
+      });
+    }
+    if ((e.pattern ?? null) !== (a.pattern ?? null)) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: key,
+        details: `pattern: expected ${JSON.stringify(e.pattern)}, produced ${JSON.stringify(a.pattern)}`,
+      });
+    }
+  }
+
+  for (const [key] of producedMap) {
+    if (!expectedMap.has(key)) {
+      diffs.push({
+        kind: 'extra',
+        severity: 'major',
+        naturalKey: key,
+        details: `Produced DB has interaction '${key}' not declared in ground truth`,
+      });
+    }
+  }
+
+  return {
+    table: 'interactions',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expected.length,
+    producedCount: producedRows.length,
+    diffs,
+  };
+}
diff --git a/evals/harness/comparator/tables/module-cohesion.ts b/evals/harness/comparator/tables/module-cohesion.ts
new file mode 100644
index 0000000..edb60c2
--- /dev/null
+++ b/evals/harness/comparator/tables/module-cohesion.ts
@@ -0,0 +1,291 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, ModuleCohesionGroup, ProseJudgeFn, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+/**
+ * Default minimum similarity for the role-judge call. Lower than the prose
+ * default (0.75) because module names + descriptions are short and the
+ * candidate is mechanically formatted ("name: description"). Iter 4's prose
+ * checks already use 0.6 for the same reason.
+ */
+const DEFAULT_ROLE_MIN_SIMILARITY = 0.6;
+
+interface MemberAssignment {
+  defKey: string;
+  moduleId: number | null;
+  moduleFullPath: string | null;
+}
+
+interface ProducedModuleRow {
+  id: number;
+  fullPath: string;
+  name: string;
+  description: string | null;
+}
+
+/**
+ * Compare LLM-driven module assignments via a cohesion + role rubric.
+ *
+ * Replaces the strict `compareModules` + `compareModuleMembers` exact-matching
+ * for LLM-driven module-stage iterations. Verifies the *property* that
+ * semantically related definitions live in the same module that plays the
+ * expected role, rather than the *spelling* of the LLM's slug choices.
+ *
+ * Severity matrix:
+ *   GT references unknown definition       → CRITICAL
+ *   Any group member is unassigned         → CRITICAL
+ *   Strict cohesion violated               → MAJOR
+ *   Majority cohesion violated             → MAJOR
+ *   Role judge below threshold             → MINOR (prose-drift)
+ *
+ * The "winner" module is the one containing all members (strict) or the
+ * largest share (majority). Its name+description is sent to the prose judge
+ * with `expectedRole` as the reference.
+ */
+export async function compareModuleCohesion(
+  produced: IndexDatabase,
+  gt: GroundTruth,
+  judgeFn: ProseJudgeFn
+): Promise<TableDiff> {
+  const conn = produced.getConnection();
+
+  // Build defKey → { moduleId, fullPath } map for produced assignments
+  const memberRows = conn
+    .prepare(
+      `SELECT (f.path || '::' || d.name) AS defKey,
+              m.id AS moduleId,
+              m.full_path AS fullPath
+       FROM module_members mm
+       JOIN definitions d ON mm.definition_id = d.id
+       JOIN files f ON d.file_id = f.id
+       JOIN modules m ON mm.module_id = m.id`
+    )
+    .all() as Array<{ defKey: string; moduleId: number; fullPath: string }>;
+  const assignmentByDef = new Map<string, { moduleId: number; fullPath: string }>();
+  for (const r of memberRows) {
+    assignmentByDef.set(r.defKey, { moduleId: r.moduleId, fullPath: r.fullPath });
+  }
+
+  // Set of defKeys present in produced — for the "GT references unknown def" check
+  const producedDefKeys = new Set<string>(
+    (
+      conn
+        .prepare("SELECT (f.path || '::' || d.name) AS defKey FROM definitions d JOIN files f ON d.file_id = f.id")
+        .all() as Array<{ defKey: string }>
+    ).map((r) => r.defKey)
+  );
+
+  // Module lookup by id (for fetching name + description after we pick a winner)
+  const moduleRows = conn
+    .prepare('SELECT id, full_path AS fullPath, name, description FROM modules')
+    .all() as ProducedModuleRow[];
+  const moduleById = new Map<number, ProducedModuleRow>();
+  for (const m of moduleRows) {
+    moduleById.set(m.id, m);
+  }
+
+  const groups = gt.moduleCohesion ?? [];
+  const diffs: RowDiff[] = [];
+  let proseChecksPassed = 0;
+  let proseChecksFailed = 0;
+
+  for (const group of groups) {
+    const groupResult = await evaluateGroup(group, assignmentByDef, producedDefKeys, moduleById, judgeFn);
+    diffs.push(...groupResult.diffs);
+    proseChecksPassed += groupResult.proseChecksPassed;
+    proseChecksFailed += groupResult.proseChecksFailed;
+  }
+
+  return {
+    table: 'module_cohesion',
+    passed: tableDiffPassed(diffs),
+    expectedCount: groups.length,
+    producedCount: memberRows.length,
+    diffs,
+    proseChecks: { passed: proseChecksPassed, failed: proseChecksFailed },
+  };
+}
+
+interface GroupEvalResult {
+  diffs: RowDiff[];
+  proseChecksPassed: number;
+  proseChecksFailed: number;
+}
+
+async function evaluateGroup(
+  group: ModuleCohesionGroup,
+  assignmentByDef: Map<string, { moduleId: number; fullPath: string }>,
+  producedDefKeys: Set<string>,
+  moduleById: Map<number, ProducedModuleRow>,
+  judgeFn: ProseJudgeFn
+): Promise<GroupEvalResult> {
+  const diffs: RowDiff[] = [];
+
+  // Resolve member assignments + check for unknown defs
+  const assignments: MemberAssignment[] = [];
+  for (const member of group.members) {
+    const memberKey = member as unknown as string;
+    if (!producedDefKeys.has(memberKey)) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey: group.label,
+        details: `cohesion group '${group.label}' references unknown definition '${memberKey}'`,
+      });
+      // Stop processing this group — there's no useful comparison after a missing def
+      return { diffs, proseChecksPassed: 0, proseChecksFailed: 0 };
+    }
+    const assigned = assignmentByDef.get(memberKey);
+    assignments.push({
+      defKey: memberKey,
+      moduleId: assigned?.moduleId ?? null,
+      moduleFullPath: assigned?.fullPath ?? null,
+    });
+  }
+
+  // Critical: any member completely unassigned to any module
+  const unassigned = assignments.filter((a) => a.moduleId === null);
+  if (unassigned.length > 0) {
+    diffs.push({
+      kind: 'missing',
+      severity: 'critical',
+      naturalKey: group.label,
+      details: `cohesion group '${group.label}' has ${unassigned.length} unassigned member(s): ${unassigned
+        .map((a) => a.defKey)
+        .join(', ')}`,
+    });
+    return { diffs, proseChecksPassed: 0, proseChecksFailed: 0 };
+  }
+
+  // Bucket assigned members by their containing module
+  const buckets = new Map<number, MemberAssignment[]>();
+  for (const a of assignments) {
+    if (a.moduleId === null) continue;
+    let bucket = buckets.get(a.moduleId);
+    if (!bucket) {
+      bucket = [];
+      buckets.set(a.moduleId, bucket);
+    }
+    bucket.push(a);
+  }
+
+  // Pick the winning module: the one with the most members
+  let winnerModuleId: number | null = null;
+  let winnerCount = 0;
+  for (const [moduleId, bucket] of buckets) {
+    if (bucket.length > winnerCount) {
+      winnerCount = bucket.length;
+      winnerModuleId = moduleId;
+    }
+  }
+
+  // Cohesion check
+  const cohesionMode = group.cohesion ?? 'strict';
+  if (cohesionMode === 'strict') {
+    if (winnerCount !== assignments.length) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: group.label,
+        details: `cohesion(strict) failed for '${group.label}': members scattered across ${buckets.size} modules — ${formatBuckets(buckets, moduleById)}`,
+      });
+      return { diffs, proseChecksPassed: 0, proseChecksFailed: 0 };
+    }
+  } else {
+    // 'majority': winner must contain at least 50% of members.
+    // Boundary inclusive: 6/12 passes (the LLM may legitimately split a group
+    // like the 12-member frontend client across an internal/auth/tasks subtree
+    // and the largest leaf might hold exactly half). Strictly less than half
+    // still fails — that's a real scatter.
+    const totalMembers = assignments.length;
+    if (winnerCount * 2 < totalMembers) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: group.label,
+        details: `cohesion(majority) failed for '${group.label}': winning module has ${winnerCount}/${totalMembers} members — ${formatBuckets(buckets, moduleById)}`,
+      });
+      return { diffs, proseChecksPassed: 0, proseChecksFailed: 0 };
+    }
+  }
+
+  // Role judge: send the winning module's name + description to the LLM
+  if (winnerModuleId === null) {
+    // Should be unreachable given the assignment checks above, but keep total
+    diffs.push({
+      kind: 'mismatch',
+      severity: 'major',
+      naturalKey: group.label,
+      details: `cohesion '${group.label}': internal — could not pick a winner module`,
+    });
+    return { diffs, proseChecksPassed: 0, proseChecksFailed: 0 };
+  }
+  const winnerModule = moduleById.get(winnerModuleId);
+  if (!winnerModule) {
+    diffs.push({
+      kind: 'mismatch',
+      severity: 'major',
+      naturalKey: group.label,
+      details: `cohesion '${group.label}': winning module id ${winnerModuleId} not found in modules table`,
+    });
+    return { diffs, proseChecksPassed: 0, proseChecksFailed: 0 };
+  }
+
+  const candidate = formatModuleAsCandidate(winnerModule);
+  const minSim = group.minRoleSimilarity ?? DEFAULT_ROLE_MIN_SIMILARITY;
+  // Use the tolerant 'theme' judge mode for role checks: the candidate is a
+  // short LLM-produced label (name + brief description), conceptually the
+  // same kind of input as the tag-list theme strategy. The strict prose
+  // mode is too harsh for this — it scores around 0.4 because the short
+  // label can't paraphrase every detail in the rubric's expectedRole.
+  const judgment = await judgeFn({
+    field: `module_cohesion.${group.label} role check`,
+    reference: group.expectedRole,
+    candidate,
+    minSimilarity: minSim,
+    mode: 'theme',
+  });
+
+  if (judgment.passed) {
+    return { diffs, proseChecksPassed: 1, proseChecksFailed: 0 };
+  }
+  diffs.push({
+    kind: 'prose-drift',
+    severity: 'minor',
+    naturalKey: group.label,
+    details: `role drift: similarity ${judgment.similarity.toFixed(2)} < ${minSim} — ${judgment.reasoning}`,
+  });
+  return { diffs, proseChecksPassed: 0, proseChecksFailed: 1 };
+}
+
+/**
+ * Format the winning module's name + description as a single short string
+ * that the prose judge can compare against the rubric's `expectedRole`.
+ *
+ * Uses the LEAF NAME of the module (last segment of full_path), not the
+ * `name` column, because the LLM-picked `name` is sometimes a more verbose
+ * "Authentication API" while the slug stays compact ("auth"). The leaf is
+ * what an end user sees; the description carries the semantic detail.
+ *
+ * Falls back to "(no description)" if the description column is null —
+ * tested against this exact string in the unit suite.
+ */
+function formatModuleAsCandidate(module: ProducedModuleRow): string {
+  const segments = module.fullPath.split('.');
+  const leaf = segments[segments.length - 1] ?? module.fullPath;
+  const description = module.description ?? '(no description)';
+  return `${leaf}: ${description}`;
+}
+
+/**
+ * Format a per-module bucket count for human-readable diff details.
+ * "moduleA(3), moduleB(1)"
+ */
+function formatBuckets(buckets: Map<number, MemberAssignment[]>, moduleById: Map<number, ProducedModuleRow>): string {
+  const parts: string[] = [];
+  for (const [moduleId, members] of buckets) {
+    const path = moduleById.get(moduleId)?.fullPath ?? `id-${moduleId}`;
+    parts.push(`${path}(${members.length})`);
+  }
+  return parts.join(', ');
+}
diff --git a/evals/harness/comparator/tables/module-members.ts b/evals/harness/comparator/tables/module-members.ts
new file mode 100644
index 0000000..299810e
--- /dev/null
+++ b/evals/harness/comparator/tables/module-members.ts
@@ -0,0 +1,65 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+/**
+ * Compare the `module_members` table.
+ *
+ * Natural key: definition `defKey` (file::name). Each definition must be
+ * assigned to its expected module. Missing assignment = major. Wrong module = major.
+ */
+export function compareModuleMembers(produced: IndexDatabase, gt: GroundTruth): TableDiff {
+  const conn = produced.getConnection();
+  // Map: defKey -> module fullPath assigned in produced DB
+  const producedMap = new Map<string, string>();
+  const rows = conn
+    .prepare(
+      `SELECT f.path || '::' || d.name AS defKey, m.full_path AS fullPath
+       FROM module_members mm
+       JOIN definitions d ON mm.definition_id = d.id
+       JOIN files f ON d.file_id = f.id
+       JOIN modules m ON mm.module_id = m.id`
+    )
+    .all() as Array<{ defKey: string; fullPath: string }>;
+  for (const r of rows) {
+    producedMap.set(r.defKey, r.fullPath);
+  }
+
+  // Build expected map from gt.modules
+  const expectedMap = new Map<string, string>();
+  for (const m of gt.modules ?? []) {
+    for (const memberKey of m.members ?? []) {
+      expectedMap.set(memberKey, m.fullPath);
+    }
+  }
+
+  const diffs: RowDiff[] = [];
+  for (const [key, expectedPath] of expectedMap) {
+    const actualPath = producedMap.get(key);
+    if (!actualPath) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'major',
+        naturalKey: key,
+        details: `Definition '${key}' is unassigned in produced DB; expected module '${expectedPath}'`,
+      });
+      continue;
+    }
+    if (actualPath !== expectedPath) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey: key,
+        details: `module assignment: expected '${expectedPath}', produced '${actualPath}'`,
+      });
+    }
+  }
+
+  return {
+    table: 'module_members',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expectedMap.size,
+    producedCount: producedMap.size,
+    diffs,
+  };
+}
diff --git a/evals/harness/comparator/tables/modules.ts b/evals/harness/comparator/tables/modules.ts
new file mode 100644
index 0000000..24475f1
--- /dev/null
+++ b/evals/harness/comparator/tables/modules.ts
@@ -0,0 +1,134 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import type { GroundTruth, ProseJudgeFn, RowDiff, TableDiff } from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+
+/**
+ * Lower default threshold for module descriptions vs definition_metadata.
+ * The tree-phase prompt asks for a single short sentence per module
+ * (`buildTreeSystemPrompt` examples are ~5–10 words), which gives the
+ * judge less surface area to score → cosine drifts naturally lower.
+ *
+ * Iteration 4 starts at 0.6 — the same floor we found necessary for
+ * iteration 3's terse relationship semantics. Per-entry overrides via
+ * `GroundTruthModule.minSimilarity` remain available for borderline cases.
+ */
+const DEFAULT_MODULE_PROSE_MIN_SIMILARITY = 0.6;
+
+interface ProducedModuleRow {
+  fullPath: string;
+  description: string | null;
+}
+
+/**
+ * Compare the `modules` table.
+ *
+ * Natural key: `full_path`. Async because module descriptions are LLM prose
+ * and need to be judged when GT declares a `descriptionReference`.
+ *
+ * Severity matrix:
+ *   GT module missing in produced       → MAJOR
+ *   Extra produced module               → MINOR (suppressed if it's an
+ *                                          ancestor of any GT module — those
+ *                                          are auto-created scaffolding rows)
+ *   Description prose drift             → MINOR (prose-drift kind)
+ *   Produced description NULL when GT
+ *     declared a reference              → MINOR (prose-drift kind, distinct
+ *                                          from "judge said no" — no judge call)
+ *   Module 'project' root               → IGNORED (always present)
+ */
+export async function compareModules(
+  produced: IndexDatabase,
+  gt: GroundTruth,
+  judgeFn: ProseJudgeFn
+): Promise<TableDiff> {
+  const conn = produced.getConnection();
+  const producedRows = conn
+    .prepare('SELECT full_path AS fullPath, description FROM modules')
+    .all() as ProducedModuleRow[];
+  const producedByPath = new Map<string, ProducedModuleRow>();
+  for (const r of producedRows) {
+    producedByPath.set(r.fullPath, r);
+  }
+
+  const expected = gt.modules ?? [];
+  const expectedSet = new Set(expected.map((m) => m.fullPath));
+
+  const diffs: RowDiff[] = [];
+  let proseChecksPassed = 0;
+  let proseChecksFailed = 0;
+
+  for (const e of expected) {
+    const producedRow = producedByPath.get(e.fullPath);
+    if (!producedRow) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'major',
+        naturalKey: e.fullPath,
+        details: `Module '${e.fullPath}' is in ground truth but missing from produced DB`,
+      });
+      continue;
+    }
+
+    // Optional prose check on description (only when GT declares a reference)
+    if (e.descriptionReference != null) {
+      if (producedRow.description == null) {
+        // Distinct case: the LLM never wrote a description for this module.
+        // Judge can't compare against null, so flag it directly.
+        diffs.push({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey: e.fullPath,
+          details: `module description is null in produced DB; expected prose matching: '${truncate(e.descriptionReference)}'`,
+        });
+        proseChecksFailed += 1;
+      } else {
+        const minSim = e.minSimilarity ?? DEFAULT_MODULE_PROSE_MIN_SIMILARITY;
+        const judgment = await judgeFn({
+          field: `modules.description for ${e.fullPath}`,
+          reference: e.descriptionReference,
+          candidate: producedRow.description,
+          minSimilarity: minSim,
+        });
+        if (judgment.passed) {
+          proseChecksPassed += 1;
+        } else {
+          proseChecksFailed += 1;
+          diffs.push({
+            kind: 'prose-drift',
+            severity: 'minor',
+            naturalKey: e.fullPath,
+            details: `prose drift: similarity ${judgment.similarity.toFixed(2)} < ${minSim} — ${judgment.reasoning}`,
+          });
+        }
+      }
+    }
+  }
+
+  // Produced DB will always have auto-created intermediate ancestors and the
+  // 'project' root. Don't report those — only report extras with no descendants.
+  for (const p of producedRows) {
+    if (expectedSet.has(p.fullPath)) continue;
+    if (p.fullPath === 'project') continue;
+    const isAncestor = expected.some((e) => e.fullPath.startsWith(`${p.fullPath}.`));
+    if (isAncestor) continue;
+    diffs.push({
+      kind: 'extra',
+      severity: 'minor',
+      naturalKey: p.fullPath,
+      details: `Produced DB has module '${p.fullPath}' not declared in ground truth`,
+    });
+  }
+
+  return {
+    table: 'modules',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expected.length,
+    producedCount: producedRows.length,
+    diffs,
+    proseChecks: { passed: proseChecksPassed, failed: proseChecksFailed },
+  };
+}
+
+function truncate(s: string, n = 60): string {
+  return s.length <= n ? s : `${s.slice(0, n - 1)}…`;
+}
diff --git a/evals/harness/comparator/tables/relationship-annotations.ts b/evals/harness/comparator/tables/relationship-annotations.ts
new file mode 100644
index 0000000..0b76c52
--- /dev/null
+++ b/evals/harness/comparator/tables/relationship-annotations.ts
@@ -0,0 +1,194 @@
+import type { IndexDatabase } from '../../../../src/db/database-facade.js';
+import {
+  type GroundTruth,
+  type GroundTruthRelationship,
+  type ProseJudgeFn,
+  type RowDiff,
+  type TableDiff,
+  parseDefKey,
+} from '../../types.js';
+import { tableDiffPassed } from '../severity.js';
+import { DEFAULT_PROSE_MIN_SIMILARITY } from './shared.js';
+
+interface ProducedRelationshipRow {
+  fromKey: string; // file::name
+  toKey: string;
+  relationshipType: string;
+  semantic: string;
+}
+
+/**
+ * The exact placeholder string parse-time inheritance edges start as
+ * (`graph-repository.ts:createInheritanceRelationships`). The relationships
+ * LLM stage is supposed to replace it with real prose; if it leaks through to
+ * the produced DB, the LLM dropped the annotation and we report it as MAJOR.
+ */
+const PENDING_LLM_ANNOTATION = 'PENDING_LLM_ANNOTATION';
+
+/**
+ * Compare the `relationship_annotations` table. Async because semantic-bearing
+ * entries call the LLM judge.
+ *
+ * Severity matrix:
+ *   GT relationship missing in produced  → CRITICAL
+ *   relationship_type mismatch           → MAJOR
+ *   semantic === PENDING_LLM_ANNOTATION  → MAJOR (LLM dropped this annotation)
+ *   prose drift below similarity         → MINOR (prose-drift kind)
+ *   extra produced relationships         → IGNORED (intentional — see below)
+ *
+ * Why extras are ignored: squint's symbols stage produces many "uses" edges
+ * from the call graph that we don't enumerate in GT. The eval claim is "all
+ * GT-declared edges exist with valid semantic", not strict equality. This
+ * matches the iteration 3 plan and prevents flaky drift on benign extras.
+ */
+export async function compareRelationshipAnnotations(
+  produced: IndexDatabase,
+  gt: GroundTruth,
+  judgeFn: ProseJudgeFn
+): Promise<TableDiff> {
+  const conn = produced.getConnection();
+  const rows = conn
+    .prepare(
+      `SELECT
+         (ff.path || '::' || fd.name) AS fromKey,
+         (tf.path || '::' || td.name) AS toKey,
+         ra.relationship_type AS relationshipType,
+         ra.semantic AS semantic
+       FROM relationship_annotations ra
+       JOIN definitions fd ON ra.from_definition_id = fd.id
+       JOIN files ff ON fd.file_id = ff.id
+       JOIN definitions td ON ra.to_definition_id = td.id
+       JOIN files tf ON td.file_id = tf.id`
+    )
+    .all() as ProducedRelationshipRow[];
+
+  // Map by edge key `${fromKey}->${toKey}` for O(1) GT lookup.
+  const producedByEdge = new Map<string, ProducedRelationshipRow>();
+  for (const r of rows) {
+    producedByEdge.set(edgeKey(r.fromKey, r.toKey), r);
+  }
+
+  // Set of all definition keys present in produced (for the "GT references
+  // unknown definition" critical case). Same join the dispatcher uses for
+  // definition_metadata.
+  const producedDefKeys = new Set<string>(
+    (
+      conn
+        .prepare("SELECT (f.path || '::' || d.name) AS defKey FROM definitions d JOIN files f ON d.file_id = f.id")
+        .all() as Array<{ defKey: string }>
+    ).map((r) => r.defKey)
+  );
+
+  const expected = gt.relationships ?? [];
+  const diffs: RowDiff[] = [];
+  let proseChecksPassed = 0;
+  let proseChecksFailed = 0;
+
+  for (const entry of expected) {
+    const fromKey = entry.fromDef as unknown as string;
+    const toKey = entry.toDef as unknown as string;
+    const naturalKey = `${fromKey}->${toKey}`;
+
+    // Critical: GT references a definition the produced DB doesn't even have.
+    // Distinguishes "the LLM dropped this edge" from "your GT has a typo".
+    const missingDef = !producedDefKeys.has(fromKey) ? fromKey : !producedDefKeys.has(toKey) ? toKey : null;
+    if (missingDef !== null) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey,
+        details: `Ground truth references unknown definition '${missingDef}' (parsed from ${describeEntry(entry)})`,
+      });
+      continue;
+    }
+
+    const producedRow = producedByEdge.get(edgeKey(fromKey, toKey));
+
+    // Critical: GT-declared edge does not exist in produced.
+    if (!producedRow) {
+      diffs.push({
+        kind: 'missing',
+        severity: 'critical',
+        naturalKey,
+        details: `Relationship ${naturalKey} (${entry.relationshipType}) missing in produced relationship_annotations`,
+      });
+      continue;
+    }
+
+    // Major: relationship_type mismatch (e.g. GT says extends, produced says uses).
+    if (producedRow.relationshipType !== entry.relationshipType) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey,
+        details: `relationship_type: expected '${entry.relationshipType}', produced '${producedRow.relationshipType}'`,
+      });
+      // Don't run prose check or PENDING check for a wrong-type edge — the
+      // type mismatch already trumps everything else for this edge.
+      continue;
+    }
+
+    // Major: the parse-time placeholder leaked through. The relationships
+    // LLM stage was supposed to replace it; the LLM dropped this annotation.
+    if (producedRow.semantic === PENDING_LLM_ANNOTATION) {
+      diffs.push({
+        kind: 'mismatch',
+        severity: 'major',
+        naturalKey,
+        details: `semantic is still '${PENDING_LLM_ANNOTATION}' — relationships annotate stage failed to replace the parse-time placeholder for this edge`,
+      });
+      continue;
+    }
+
+    // Minor (prose-drift): semantic disagrees with the GT reference text.
+    // Skip the judge call if the GT didn't declare a reference — this is an
+    // existence-and-type-only check.
+    if (entry.semanticReference != null) {
+      const minSim = entry.minSimilarity ?? DEFAULT_PROSE_MIN_SIMILARITY;
+      const judgment = await judgeFn({
+        field: `relationship_annotations.semantic for ${naturalKey}`,
+        reference: entry.semanticReference,
+        candidate: producedRow.semantic,
+        minSimilarity: minSim,
+      });
+      if (judgment.passed) {
+        proseChecksPassed += 1;
+      } else {
+        proseChecksFailed += 1;
+        diffs.push({
+          kind: 'prose-drift',
+          severity: 'minor',
+          naturalKey,
+          details: `prose drift: similarity ${judgment.similarity.toFixed(2)} < ${minSim} — ${judgment.reasoning}`,
+        });
+      }
+    }
+  }
+
+  return {
+    table: 'relationship_annotations',
+    passed: tableDiffPassed(diffs),
+    expectedCount: expected.length,
+    producedCount: rows.length,
+    diffs,
+    proseChecks: { passed: proseChecksPassed, failed: proseChecksFailed },
+  };
+}
+
+function edgeKey(fromKey: string, toKey: string): string {
+  return `${fromKey}->${toKey}`;
+}
+
+/**
+ * Pretty-print a GT entry for an error message. Falls back to JSON if the
+ * keys can't be parsed (e.g. caller passed a malformed defKey).
+ */
+function describeEntry(entry: GroundTruthRelationship): string {
+  try {
+    const from = parseDefKey(entry.fromDef);
+    const to = parseDefKey(entry.toDef);
+    return `${from.file}::${from.name} → ${to.file}::${to.name} [${entry.relationshipType}]`;
+  } catch {
+    return JSON.stringify({ from: entry.fromDef, to: entry.toDef, type: entry.relationshipType });
+  }
+}
diff --git a/evals/harness/comparator/tables/shared.ts b/evals/harness/comparator/tables/shared.ts
new file mode 100644
index 0000000..a7c0350
--- /dev/null
+++ b/evals/harness/comparator/tables/shared.ts
@@ -0,0 +1,40 @@
+/**
+ * Shared helpers used by multiple per-table comparators.
+ *
+ * Kept tiny on purpose — anything specific to a single table belongs in that
+ * table's file.
+ */
+
+/** Definition `line` field tolerance: ground truth declares approximate lines. */
+export const LINE_TOLERANCE = 2;
+
+/** Default minimum LLM-judged similarity score for a `proseReference` to pass. */
+export const DEFAULT_PROSE_MIN_SIMILARITY = 0.75;
+
+/**
+ * Parse a SQLite TEXT column that holds a JSON array of strings.
+ * Returns null on missing column or malformed JSON. Used for `domain`,
+ * `implementsNames`, `extendsInterfaces`, and `interactions.symbols`.
+ */
+export function parseJsonStringArray(value: string | null): string[] | null {
+  if (value == null) return null;
+  try {
+    const parsed = JSON.parse(value);
+    return Array.isArray(parsed) ? parsed.map(String) : null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Order-independent string-array equality. Used by definition comparators
+ * to compare implementsNames / extendsInterfaces sets.
+ */
+export function arraysEqualSorted(a: readonly string[] | null, b: readonly string[] | null): boolean {
+  if (a == null && b == null) return true;
+  if (a == null || b == null) return false;
+  if (a.length !== b.length) return false;
+  const sa = [...a].sort();
+  const sb = [...b].sort();
+  return sa.every((v, i) => v === sb[i]);
+}
diff --git a/evals/harness/fixture-config.ts b/evals/harness/fixture-config.ts
new file mode 100644
index 0000000..bb794e1
--- /dev/null
+++ b/evals/harness/fixture-config.ts
@@ -0,0 +1,57 @@
+import { execSync } from 'node:child_process';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+/**
+ * Per-fixture path layout. One `defineFixture()` call replaces ~10 hardcoded
+ * path constants in each eval test file. New fixtures get the same layout for free.
+ */
+export interface FixtureConfig {
+  /** Short name (matches fixture directory and baseline filename). */
+  name: string;
+  /** Absolute path to the squint repo root. */
+  repoRoot: string;
+  /** Absolute path to the fixture sources (evals/fixtures/<name>). */
+  fixtureDir: string;
+  /** Absolute path to the per-run results directory (evals/results). */
+  resultsRoot: string;
+  /** Absolute path to the persisted baseline JSON (evals/baselines/<name>.json). */
+  baselinePath: string;
+  /** Absolute path to the squint dev binary. */
+  squintBin: string;
+  /**
+   * Absolute path to the LLM judge cache. Lives OUTSIDE evals/results/ so the
+   * results-rotation cleanup cannot delete it. Gitignored.
+   */
+  judgeCachePath: string;
+  /** Resolve the current squint git short SHA, or 'unknown' on failure. */
+  squintCommit: () => string;
+}
+
+export function defineFixture(name: string): FixtureConfig {
+  // __dirname for this file is evals/harness/. Repo root is two levels up.
+  const __dirname = path.dirname(fileURLToPath(import.meta.url));
+  const repoRoot = path.resolve(__dirname, '..', '..');
+
+  return {
+    name,
+    repoRoot,
+    fixtureDir: path.resolve(repoRoot, 'evals/fixtures', name),
+    resultsRoot: path.resolve(repoRoot, 'evals/results'),
+    baselinePath: path.resolve(repoRoot, 'evals/baselines', `${name}.json`),
+    // Use bin/run.js (compiled) instead of bin/dev.js (TS loader). bin/dev.js
+    // breaks when tsx is in devDependencies because oclif's dev-mode TS loader
+    // detection fails on @oclif/core 4.8 + tsx 4.21. Compiled mode is also
+    // closer to how end users invoke squint, so eval runs are more
+    // production-realistic. Requires `pnpm run build:server` first.
+    squintBin: path.resolve(repoRoot, 'bin/run.js'),
+    judgeCachePath: path.resolve(repoRoot, 'evals/.judge-cache.json'),
+    squintCommit: () => {
+      try {
+        return execSync('git rev-parse --short HEAD', { cwd: repoRoot }).toString().trim();
+      } catch {
+        return 'unknown';
+      }
+    },
+  };
+}
diff --git a/evals/harness/iteration.ts b/evals/harness/iteration.ts
new file mode 100644
index 0000000..527ca93
--- /dev/null
+++ b/evals/harness/iteration.ts
@@ -0,0 +1,161 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { IndexDatabase } from '../../src/db/database-facade.js';
+import { compare } from './comparator/index.js';
+import type { FixtureConfig } from './fixture-config.js';
+import { updateBaseline } from './reporter/baseline.js';
+import { renderJsonReport, renderMarkdownReport } from './reporter/index.js';
+import { rotateResults } from './results-rotation.js';
+import { type RunResult, type StageId, runIngest } from './runner.js';
+import { type ProseJudgeFn, type TableName, makeStubJudge } from './types.js';
+import type { DiffReport, GroundTruth } from './types.js';
+
+/**
+ * One end-to-end iteration of the eval loop:
+ *   1. Spawn `squint ingest --to-stage <stage>` against the fixture
+ *   2. Cost guardrail (refuses to run if estimated cost exceeds budget)
+ *   3. Open the produced DB and call compare()
+ *   4. Persist diff.md + diff.json + baseline + rotate
+ *   5. Echo a one-line summary to stdout
+ *   6. Throw on critical/major diffs (test framework picks it up)
+ *
+ * Replaces the ~80 LOC of boilerplate that was duplicated between
+ * iteration 1 and 2 blocks in todo-api.eval.ts. New iterations are now
+ * ~10 lines.
+ */
+
+export interface IterationStepOptions {
+  /** Fixture paths and metadata. */
+  fixture: FixtureConfig;
+  /** Ground truth for this fixture (the same object across iterations). */
+  groundTruth: GroundTruth;
+  /** Human-readable label for logging (e.g. "parse", "symbols"). */
+  label: string;
+  /** Last pipeline stage to run via `squint ingest --to-stage`. */
+  toStage: StageId;
+  /** Tables to compare against ground truth. */
+  scope: TableName[];
+  /**
+   * Prose judge. Default: makeStubJudge() — fine for parse-only iterations.
+   * For LLM stages with prose references, pass `makeLlmProseJudge({...})`.
+   */
+  judgeFn?: ProseJudgeFn;
+  /** Per-stage timeout in ms. Default 60s. */
+  timeoutMs?: number;
+  /**
+   * Cost budget in USD. Default reads EVAL_COST_BUDGET_USD env var or 0.10.
+   * If the squint subprocess reports a higher running cost, the eval throws.
+   */
+  costBudgetUsd?: number;
+  /**
+   * Inject `runIngest` (for tests). Defaults to the real subprocess runner.
+   */
+  runIngestFn?: typeof runIngest;
+}
+
+export interface IterationStepResult {
+  report: DiffReport;
+  runResult: RunResult;
+  runDir: string;
+}
+
+export async function runIterationStep(opts: IterationStepOptions): Promise<IterationStepResult> {
+  const { fixture, groundTruth, label, toStage, scope } = opts;
+  const judgeFn = opts.judgeFn ?? makeStubJudge();
+  const timeoutMs = opts.timeoutMs ?? 60_000;
+  const budget = opts.costBudgetUsd ?? Number(process.env.EVAL_COST_BUDGET_USD ?? '0.10');
+  const runIngestImpl = opts.runIngestFn ?? runIngest;
+
+  // ----------------------------------------------------------
+  // 1. Per-run results directory
+  // ----------------------------------------------------------
+  const ts = new Date().toISOString().replace(/[:.]/g, '-');
+  const runDir = path.join(fixture.resultsRoot, ts);
+  fs.mkdirSync(runDir, { recursive: true });
+  const producedDbPath = path.join(runDir, 'produced.db');
+
+  // ----------------------------------------------------------
+  // 2. Run squint ingest --to-stage <stage>
+  // ----------------------------------------------------------
+  const runResult = await runIngestImpl({
+    fixtureDir: fixture.fixtureDir,
+    outputDb: producedDbPath,
+    toStage,
+    timeoutMs,
+    stdoutPath: path.join(runDir, 'stdout.log'),
+    stderrPath: path.join(runDir, 'stderr.log'),
+    squintBin: fixture.squintBin,
+  });
+
+  if (runResult.exitCode !== 0) {
+    throw new Error(
+      `squint ingest --to-stage ${toStage} failed (exit ${runResult.exitCode}); see ${runResult.stderrPath}`
+    );
+  }
+  if (!fs.existsSync(producedDbPath)) {
+    throw new Error(`squint ingest succeeded but produced DB is missing at ${producedDbPath}`);
+  }
+
+  // Cost guardrail — only enforces when squint actually reported a cost.
+  // (Stages with no LLM calls return undefined; that's fine.)
+  if (runResult.costEstimate != null && runResult.costEstimate > budget) {
+    throw new Error(
+      `squint ingest cost $${runResult.costEstimate.toFixed(4)} exceeded budget $${budget.toFixed(2)} (override via EVAL_COST_BUDGET_USD)`
+    );
+  }
+
+  // ----------------------------------------------------------
+  // 3. Compare produced vs ground truth
+  // ----------------------------------------------------------
+  const produced = new IndexDatabase(producedDbPath);
+  let report: DiffReport;
+  try {
+    report = await compare({
+      produced,
+      groundTruth,
+      scope,
+      judgeFn,
+      squintCommit: fixture.squintCommit(),
+    });
+  } finally {
+    produced.close();
+  }
+
+  // ----------------------------------------------------------
+  // 4. Persist diff report + update baseline + rotate
+  // ----------------------------------------------------------
+  fs.writeFileSync(path.join(runDir, 'diff.md'), renderMarkdownReport(report));
+  fs.writeFileSync(path.join(runDir, 'diff.json'), renderJsonReport(report));
+  const baselineUpdate = updateBaseline(fixture.baselinePath, report);
+  rotateResults(fixture.resultsRoot, 10);
+
+  // ----------------------------------------------------------
+  // 5. Echo summary
+  // ----------------------------------------------------------
+  const proseTotal = report.summary.proseChecks.passed + report.summary.proseChecks.failed;
+  const proseStr = proseTotal > 0 ? ` prose=${report.summary.proseChecks.passed}/${proseTotal}` : '';
+  const costStr = runResult.costEstimate != null ? ` cost=$${runResult.costEstimate.toFixed(4)}` : '';
+  // eslint-disable-next-line no-console
+  console.log(
+    `[eval] ${fixture.name} ${label} → critical=${report.summary.critical} major=${report.summary.major} minor=${report.summary.minor}${proseStr}${costStr} (report: ${path.relative(fixture.repoRoot, runDir)})`
+  );
+  for (const reg of baselineUpdate.regressions) {
+    // eslint-disable-next-line no-console
+    console.log(`[eval] regression: ${reg}`);
+  }
+  for (const imp of baselineUpdate.improvements) {
+    // eslint-disable-next-line no-console
+    console.log(`[eval] improvement: ${imp}`);
+  }
+
+  // ----------------------------------------------------------
+  // 6. Throw on critical/major diffs (test framework picks up)
+  // ----------------------------------------------------------
+  if (!report.passed) {
+    throw new Error(
+      `Iteration '${label}' failed: see ${path.relative(fixture.repoRoot, path.join(runDir, 'diff.md'))}`
+    );
+  }
+
+  return { report, runResult, runDir };
+}
diff --git a/evals/harness/reporter/baseline.test.ts b/evals/harness/reporter/baseline.test.ts
new file mode 100644
index 0000000..fb9256a
--- /dev/null
+++ b/evals/harness/reporter/baseline.test.ts
@@ -0,0 +1,151 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import type { DiffReport } from '../types.js';
+import { computeBaselineFromReport, loadBaseline, updateBaseline } from './baseline.js';
+
+/**
+ * The baseline scoreboard at evals/baselines/<fixture>.json tracks
+ * pass-rate per stage across iterations. The reporter computes a delta
+ * (improvements vs regressions) when updating it so PR review can see
+ * progress at a glance.
+ */
+describe('baseline scoreboard', () => {
+  let dir: string;
+  let baselinePath: string;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-eval-base-'));
+    baselinePath = path.join(dir, 'todo-api.json');
+  });
+
+  afterEach(() => {
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  const sampleReport: DiffReport = {
+    fixtureName: 'todo-api',
+    passed: true,
+    scope: ['files', 'definitions'],
+    tables: [
+      { table: 'files', passed: true, expectedCount: 13, producedCount: 13, diffs: [] },
+      { table: 'definitions', passed: true, expectedCount: 42, producedCount: 42, diffs: [] },
+    ],
+    summary: { critical: 0, major: 0, minor: 0, proseChecks: { passed: 0, failed: 0 } },
+    durationMs: 1000,
+    squintCommit: 'abc123',
+  };
+
+  describe('computeBaselineFromReport', () => {
+    it('extracts a stage scorecard from the report', () => {
+      const baseline = computeBaselineFromReport(sampleReport);
+      expect(baseline.fixture).toBe('todo-api');
+      expect(baseline.squintCommit).toBe('abc123');
+      expect(baseline.tableScores).toEqual({
+        files: { passed: true, expected: 13, produced: 13, critical: 0, major: 0, minor: 0 },
+        definitions: { passed: true, expected: 42, produced: 42, critical: 0, major: 0, minor: 0 },
+      });
+    });
+
+    it('counts diffs by severity per table', () => {
+      const failingReport: DiffReport = {
+        ...sampleReport,
+        passed: false,
+        tables: [
+          {
+            table: 'definitions',
+            passed: false,
+            expectedCount: 42,
+            producedCount: 40,
+            diffs: [
+              { kind: 'missing', severity: 'critical', naturalKey: 'a', details: '' },
+              { kind: 'mismatch', severity: 'major', naturalKey: 'b', details: '' },
+              { kind: 'mismatch', severity: 'minor', naturalKey: 'c', details: '' },
+              { kind: 'mismatch', severity: 'minor', naturalKey: 'd', details: '' },
+            ],
+          },
+        ],
+        summary: { critical: 1, major: 1, minor: 2, proseChecks: { passed: 0, failed: 0 } },
+      };
+      const baseline = computeBaselineFromReport(failingReport);
+      expect(baseline.tableScores.definitions).toEqual({
+        passed: false,
+        expected: 42,
+        produced: 40,
+        critical: 1,
+        major: 1,
+        minor: 2,
+      });
+    });
+  });
+
+  describe('loadBaseline', () => {
+    it('returns null if no baseline file exists', () => {
+      expect(loadBaseline(baselinePath)).toBeNull();
+    });
+
+    it('parses an existing baseline JSON file', () => {
+      const baseline = computeBaselineFromReport(sampleReport);
+      fs.writeFileSync(baselinePath, JSON.stringify(baseline, null, 2));
+      const loaded = loadBaseline(baselinePath);
+      expect(loaded?.fixture).toBe('todo-api');
+      expect(loaded?.tableScores.files?.passed).toBe(true);
+    });
+  });
+
+  describe('updateBaseline', () => {
+    it('writes a new baseline file', () => {
+      const result = updateBaseline(baselinePath, sampleReport);
+      expect(fs.existsSync(baselinePath)).toBe(true);
+      expect(result.improvements).toEqual([]);
+      expect(result.regressions).toEqual([]);
+    });
+
+    it('detects regressions vs prior baseline', () => {
+      // Write a passing baseline first
+      updateBaseline(baselinePath, sampleReport);
+      // Now produce a failing report
+      const failing: DiffReport = {
+        ...sampleReport,
+        passed: false,
+        tables: [
+          { table: 'files', passed: true, expectedCount: 13, producedCount: 13, diffs: [] },
+          {
+            table: 'definitions',
+            passed: false,
+            expectedCount: 42,
+            producedCount: 40,
+            diffs: [{ kind: 'missing', severity: 'critical', naturalKey: 'x', details: '' }],
+          },
+        ],
+        summary: { critical: 1, major: 0, minor: 0, proseChecks: { passed: 0, failed: 0 } },
+      };
+      const result = updateBaseline(baselinePath, failing);
+      expect(result.regressions).toEqual([expect.stringContaining('definitions')]);
+      expect(result.improvements).toEqual([]);
+    });
+
+    it('detects improvements vs prior baseline', () => {
+      const failing: DiffReport = {
+        ...sampleReport,
+        passed: false,
+        tables: [
+          { table: 'files', passed: true, expectedCount: 13, producedCount: 13, diffs: [] },
+          {
+            table: 'definitions',
+            passed: false,
+            expectedCount: 42,
+            producedCount: 40,
+            diffs: [{ kind: 'missing', severity: 'critical', naturalKey: 'x', details: '' }],
+          },
+        ],
+        summary: { critical: 1, major: 0, minor: 0, proseChecks: { passed: 0, failed: 0 } },
+      };
+      updateBaseline(baselinePath, failing);
+      const result = updateBaseline(baselinePath, sampleReport);
+      expect(result.improvements).toEqual([expect.stringContaining('definitions')]);
+      expect(result.regressions).toEqual([]);
+    });
+  });
+});
diff --git a/evals/harness/reporter/baseline.ts b/evals/harness/reporter/baseline.ts
new file mode 100644
index 0000000..b77b303
--- /dev/null
+++ b/evals/harness/reporter/baseline.ts
@@ -0,0 +1,103 @@
+import fs from 'node:fs';
+import { countDiffsBySeverity } from '../comparator/severity.js';
+import type { DiffReport, TableName } from '../types.js';
+
+/**
+ * Per-table scoreboard within a baseline.
+ */
+export interface TableScore {
+  passed: boolean;
+  expected: number;
+  produced: number;
+  critical: number;
+  major: number;
+  minor: number;
+}
+
+/**
+ * Persisted scoreboard per fixture, committed to git so PR review can see
+ * the eval delta at a glance.
+ */
+export interface Baseline {
+  fixture: string;
+  lastRun: string; // ISO timestamp
+  squintCommit?: string;
+  tableScores: Partial<Record<TableName, TableScore>>;
+}
+
+export interface BaselineUpdateResult {
+  improvements: string[];
+  regressions: string[];
+  baseline: Baseline;
+}
+
+/**
+ * Compute a baseline scorecard from a single DiffReport.
+ */
+export function computeBaselineFromReport(report: DiffReport): Baseline {
+  const tableScores: Partial<Record<TableName, TableScore>> = {};
+  for (const t of report.tables) {
+    const counts = countDiffsBySeverity(t.diffs);
+    tableScores[t.table] = {
+      passed: t.passed,
+      expected: t.expectedCount,
+      produced: t.producedCount,
+      ...counts,
+    };
+  }
+
+  return {
+    fixture: report.fixtureName,
+    lastRun: new Date().toISOString(),
+    squintCommit: report.squintCommit,
+    tableScores,
+  };
+}
+
+/**
+ * Load a baseline JSON file from disk. Returns null if it does not exist.
+ */
+export function loadBaseline(filePath: string): Baseline | null {
+  if (!fs.existsSync(filePath)) return null;
+  const raw = fs.readFileSync(filePath, 'utf-8');
+  return JSON.parse(raw) as Baseline;
+}
+
+/**
+ * Update a baseline file with the new report. Computes a delta vs the prior
+ * baseline (if any), writes the new baseline to disk, and returns the delta.
+ */
+export function updateBaseline(filePath: string, report: DiffReport): BaselineUpdateResult {
+  const prior = loadBaseline(filePath);
+  const next = computeBaselineFromReport(report);
+
+  const improvements: string[] = [];
+  const regressions: string[] = [];
+
+  if (prior) {
+    for (const [table, nextScore] of Object.entries(next.tableScores)) {
+      const priorScore = prior.tableScores[table as TableName];
+      if (!priorScore || !nextScore) continue;
+      if (priorScore.passed && !nextScore.passed) {
+        regressions.push(`${table}: pass → fail`);
+      } else if (!priorScore.passed && nextScore.passed) {
+        improvements.push(`${table}: fail → pass`);
+      } else if (!nextScore.passed && !priorScore.passed) {
+        // Both failing — measure severity counts
+        const priorTotal = priorScore.critical + priorScore.major;
+        const nextTotal = nextScore.critical + nextScore.major;
+        if (nextTotal > priorTotal) {
+          regressions.push(`${table}: ${priorTotal} → ${nextTotal} blocking diffs`);
+        } else if (nextTotal < priorTotal) {
+          improvements.push(`${table}: ${priorTotal} → ${nextTotal} blocking diffs`);
+        }
+      }
+    }
+  }
+
+  // Trailing newline keeps biome's default JSON formatter happy on every
+  // commit (it would otherwise re-flag the auto-updated baseline forever).
+  fs.writeFileSync(filePath, `${JSON.stringify(next, null, 2)}\n`);
+
+  return { improvements, regressions, baseline: next };
+}
diff --git a/evals/harness/reporter/index.ts b/evals/harness/reporter/index.ts
new file mode 100644
index 0000000..45fe23c
--- /dev/null
+++ b/evals/harness/reporter/index.ts
@@ -0,0 +1,86 @@
+import type { DiffReport, RowDiff, Severity, TableDiff } from '../types.js';
+
+/**
+ * Render a DiffReport as a human-readable Markdown document for triage.
+ */
+export function renderMarkdownReport(report: DiffReport): string {
+  const badge = report.passed ? '✅ PASS' : '❌ FAIL';
+  const lines: string[] = [];
+
+  lines.push(`# Squint Eval Report — ${report.fixtureName} — ${badge}`);
+  lines.push('');
+  if (report.squintCommit) {
+    lines.push(`**Squint commit**: \`${report.squintCommit}\``);
+  }
+  lines.push(`**Duration**: ${report.durationMs}ms`);
+  lines.push(`**Scope**: ${report.scope.join(', ')}`);
+  lines.push('');
+  lines.push('## Summary');
+  lines.push('');
+  lines.push(`- Critical: ${report.summary.critical}`);
+  lines.push(`- Major: ${report.summary.major}`);
+  lines.push(`- Minor: ${report.summary.minor}`);
+  if (report.summary.proseChecks.passed + report.summary.proseChecks.failed > 0) {
+    lines.push(
+      `- Prose checks: ${report.summary.proseChecks.passed} passed, ${report.summary.proseChecks.failed} failed`
+    );
+  }
+  lines.push('');
+
+  for (const table of report.tables) {
+    lines.push(...renderTableSection(table));
+    lines.push('');
+  }
+
+  return lines.join('\n');
+}
+
+function renderTableSection(table: TableDiff): string[] {
+  const status = table.passed ? '✅' : '❌';
+  const lines: string[] = [];
+  lines.push(`## Table: ${table.table} ${status} (${table.producedCount}/${table.expectedCount})`);
+  lines.push('');
+
+  if (table.diffs.length === 0) {
+    lines.push('All rows matched.');
+    return lines;
+  }
+
+  // Group by severity in display order
+  const order: Severity[] = ['critical', 'major', 'minor'];
+  const labels: Record<Severity, string> = {
+    critical: '### 🔴 CRITICAL',
+    major: '### 🟠 Major',
+    minor: '### 🟡 Minor',
+  };
+
+  for (const sev of order) {
+    const subset = table.diffs.filter((d) => d.severity === sev);
+    if (subset.length === 0) continue;
+    lines.push(labels[sev]);
+    lines.push('');
+    for (const d of subset) {
+      lines.push(...renderRowDiff(d));
+    }
+    lines.push('');
+  }
+
+  return lines;
+}
+
+function renderRowDiff(d: RowDiff): string[] {
+  const lines: string[] = [];
+  lines.push(`- **${d.kind}** \`${d.naturalKey}\``);
+  lines.push(`  - ${d.details}`);
+  if (d.fixHintId) {
+    lines.push(`  - Fix hint: \`${d.fixHintId}\``);
+  }
+  return lines;
+}
+
+/**
+ * Render a DiffReport as pretty-printed JSON for the baseline scoreboard / CI.
+ */
+export function renderJsonReport(report: DiffReport): string {
+  return JSON.stringify(report, null, 2);
+}
diff --git a/evals/harness/reporter/reporter.test.ts b/evals/harness/reporter/reporter.test.ts
new file mode 100644
index 0000000..6669e61
--- /dev/null
+++ b/evals/harness/reporter/reporter.test.ts
@@ -0,0 +1,159 @@
+import { describe, expect, it } from 'vitest';
+import type { DiffReport } from '../types.js';
+import { renderJsonReport, renderMarkdownReport } from './index.js';
+
+/**
+ * Reporter tests use frozen DiffReport inputs and assert on the rendered
+ * output. Snapshot-style: precise enough to catch regressions in formatting
+ * but not so brittle that minor wording changes break everything.
+ */
+describe('reporter', () => {
+  const passingReport: DiffReport = {
+    fixtureName: 'todo-api',
+    passed: true,
+    scope: ['files', 'definitions'],
+    tables: [
+      {
+        table: 'files',
+        passed: true,
+        expectedCount: 13,
+        producedCount: 13,
+        diffs: [],
+      },
+      {
+        table: 'definitions',
+        passed: true,
+        expectedCount: 42,
+        producedCount: 42,
+        diffs: [],
+      },
+    ],
+    summary: { critical: 0, major: 0, minor: 0, proseChecks: { passed: 0, failed: 0 } },
+    durationMs: 1234,
+    squintCommit: 'c938a65',
+  };
+
+  const failingReport: DiffReport = {
+    fixtureName: 'todo-api',
+    passed: false,
+    scope: ['files', 'definitions', 'contracts'],
+    tables: [
+      { table: 'files', passed: true, expectedCount: 13, producedCount: 13, diffs: [] },
+      {
+        table: 'definitions',
+        passed: false,
+        expectedCount: 42,
+        producedCount: 41,
+        diffs: [
+          {
+            kind: 'missing',
+            severity: 'critical',
+            naturalKey: 'src/foo.ts::missingFn',
+            details: 'Definition missing',
+          },
+          {
+            kind: 'mismatch',
+            severity: 'minor',
+            naturalKey: 'src/foo.ts::Foo',
+            details: 'line: expected 5 (±2), produced 12',
+          },
+        ],
+      },
+      {
+        table: 'contracts',
+        passed: false,
+        expectedCount: 4,
+        producedCount: 3,
+        diffs: [
+          {
+            kind: 'missing',
+            severity: 'critical',
+            naturalKey: 'events::task.completed',
+            details: 'Contract missing',
+            fixHintId: 'events-pubsub-detection',
+          },
+        ],
+      },
+    ],
+    summary: { critical: 2, major: 0, minor: 1, proseChecks: { passed: 0, failed: 0 } },
+    durationMs: 5432,
+    squintCommit: 'abc1234',
+  };
+
+  describe('renderMarkdownReport', () => {
+    it('starts with a header containing the fixture name and pass/fail badge', () => {
+      const md = renderMarkdownReport(passingReport);
+      expect(md).toContain('# Squint Eval Report — todo-api');
+      expect(md).toContain('PASS');
+    });
+
+    it('shows fail badge for failing reports', () => {
+      const md = renderMarkdownReport(failingReport);
+      expect(md).toContain('FAIL');
+    });
+
+    it('lists per-table sections with counts', () => {
+      const md = renderMarkdownReport(passingReport);
+      expect(md).toContain('## Table: files');
+      expect(md).toContain('13/13');
+      expect(md).toContain('## Table: definitions');
+      expect(md).toContain('42/42');
+    });
+
+    it('renders critical diffs with prominent severity tags', () => {
+      const md = renderMarkdownReport(failingReport);
+      expect(md).toContain('CRITICAL');
+      expect(md).toContain('src/foo.ts::missingFn');
+      expect(md).toContain('events::task.completed');
+    });
+
+    it('groups diffs by severity within a table section', () => {
+      const md = renderMarkdownReport(failingReport);
+      // Critical section should appear before minor in the definitions block
+      const defsSection = md.split('## Table: definitions')[1].split('## Table:')[0];
+      const criticalIdx = defsSection.indexOf('CRITICAL');
+      const minorIdx = defsSection.indexOf('Minor');
+      expect(criticalIdx).toBeGreaterThan(-1);
+      expect(minorIdx).toBeGreaterThan(criticalIdx);
+    });
+
+    it('shows the summary line with severity counts', () => {
+      const md = renderMarkdownReport(failingReport);
+      expect(md).toMatch(/Critical:\s*2/);
+      expect(md).toMatch(/Major:\s*0/);
+      expect(md).toMatch(/Minor:\s*1/);
+    });
+
+    it('includes the squint commit', () => {
+      const md = renderMarkdownReport(passingReport);
+      expect(md).toContain('c938a65');
+    });
+
+    it('shows fix-hint id when present', () => {
+      const md = renderMarkdownReport(failingReport);
+      expect(md).toContain('events-pubsub-detection');
+    });
+  });
+
+  describe('renderJsonReport', () => {
+    it('produces valid JSON', () => {
+      const json = renderJsonReport(passingReport);
+      expect(() => JSON.parse(json)).not.toThrow();
+    });
+
+    it('preserves all critical fields', () => {
+      const json = renderJsonReport(failingReport);
+      const parsed = JSON.parse(json) as DiffReport;
+      expect(parsed.fixtureName).toBe('todo-api');
+      expect(parsed.passed).toBe(false);
+      expect(parsed.summary.critical).toBe(2);
+      expect(parsed.tables).toHaveLength(3);
+      expect(parsed.tables[1].diffs).toHaveLength(2);
+    });
+
+    it('is pretty-printed (multi-line)', () => {
+      const json = renderJsonReport(passingReport);
+      expect(json.split('\n').length).toBeGreaterThan(5);
+    });
+  });
+});
diff --git a/evals/harness/results-rotation.test.ts b/evals/harness/results-rotation.test.ts
new file mode 100644
index 0000000..fc6a4bd
--- /dev/null
+++ b/evals/harness/results-rotation.test.ts
@@ -0,0 +1,78 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { rotateResults } from './results-rotation.js';
+
+describe('rotateResults', () => {
+  let root: string;
+
+  beforeEach(() => {
+    root = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-eval-rotate-'));
+  });
+
+  afterEach(() => {
+    fs.rmSync(root, { recursive: true, force: true });
+    process.env.EVAL_KEEP_ALL = undefined;
+  });
+
+  function makeRun(name: string, mtimeOffsetMs: number): void {
+    const dir = path.join(root, name);
+    fs.mkdirSync(dir, { recursive: true });
+    // Touch a file inside so the dir mtime is meaningful
+    fs.writeFileSync(path.join(dir, 'diff.md'), 'x');
+    const t = new Date(Date.now() + mtimeOffsetMs);
+    fs.utimesSync(dir, t, t);
+  }
+
+  it('keeps the N most recent run directories', () => {
+    makeRun('run-1', -5000);
+    makeRun('run-2', -4000);
+    makeRun('run-3', -3000);
+    makeRun('run-4', -2000);
+    makeRun('run-5', -1000);
+
+    const result = rotateResults(root, 3);
+
+    expect(result.kept.sort()).toEqual(['run-3', 'run-4', 'run-5']);
+    expect(result.removed.sort()).toEqual(['run-1', 'run-2']);
+    expect(fs.existsSync(path.join(root, 'run-1'))).toBe(false);
+    expect(fs.existsSync(path.join(root, 'run-5'))).toBe(true);
+  });
+
+  it('keeps everything when total runs <= keep', () => {
+    makeRun('a', -1000);
+    makeRun('b', 0);
+    const result = rotateResults(root, 5);
+    expect(result.removed).toEqual([]);
+    expect(fs.existsSync(path.join(root, 'a'))).toBe(true);
+    expect(fs.existsSync(path.join(root, 'b'))).toBe(true);
+  });
+
+  it('ignores non-directory entries (e.g. .gitkeep)', () => {
+    makeRun('run-1', 0);
+    fs.writeFileSync(path.join(root, '.gitkeep'), '');
+    const result = rotateResults(root, 1);
+    expect(result.kept).toEqual(['run-1']);
+    expect(result.removed).toEqual([]);
+    expect(fs.existsSync(path.join(root, '.gitkeep'))).toBe(true);
+  });
+
+  it('is a no-op when EVAL_KEEP_ALL=1', () => {
+    makeRun('a', -3000);
+    makeRun('b', -2000);
+    makeRun('c', -1000);
+    process.env.EVAL_KEEP_ALL = '1';
+    const result = rotateResults(root, 1);
+    expect(result.removed).toEqual([]);
+    expect(fs.existsSync(path.join(root, 'a'))).toBe(true);
+    expect(fs.existsSync(path.join(root, 'b'))).toBe(true);
+    expect(fs.existsSync(path.join(root, 'c'))).toBe(true);
+  });
+
+  it('handles a missing results directory gracefully', () => {
+    const nonExistent = path.join(root, 'never-created');
+    const result = rotateResults(nonExistent, 5);
+    expect(result).toEqual({ kept: [], removed: [] });
+  });
+});
diff --git a/evals/harness/results-rotation.ts b/evals/harness/results-rotation.ts
new file mode 100644
index 0000000..821c24a
--- /dev/null
+++ b/evals/harness/results-rotation.ts
@@ -0,0 +1,41 @@
+import fs from 'node:fs';
+import path from 'node:path';
+
+/**
+ * Rotate eval result directories — keep only the N most recent runs.
+ *
+ * Each "run" is a sub-directory of `resultsRoot` whose name is an ISO timestamp
+ * (e.g., `2026-04-07T20-45-29-454Z`). Non-directory entries and the `.gitkeep`
+ * file are ignored. The newest `keep` directories are retained; the rest are
+ * deleted recursively.
+ *
+ * Override with EVAL_KEEP_ALL=1 to disable rotation entirely.
+ */
+export function rotateResults(resultsRoot: string, keep = 10): { kept: string[]; removed: string[] } {
+  if (process.env.EVAL_KEEP_ALL === '1') {
+    return { kept: [], removed: [] };
+  }
+  if (!fs.existsSync(resultsRoot)) {
+    return { kept: [], removed: [] };
+  }
+
+  const entries = fs
+    .readdirSync(resultsRoot, { withFileTypes: true })
+    .filter((e) => e.isDirectory())
+    .map((e) => ({
+      name: e.name,
+      mtimeMs: fs.statSync(path.join(resultsRoot, e.name)).mtimeMs,
+    }))
+    // Sort newest-first by mtime (timestamp dirs are also lexicographically sortable
+    // but mtime is more robust against clock skew or manual edits).
+    .sort((a, b) => b.mtimeMs - a.mtimeMs);
+
+  const kept = entries.slice(0, keep).map((e) => e.name);
+  const toRemove = entries.slice(keep);
+
+  for (const r of toRemove) {
+    fs.rmSync(path.join(resultsRoot, r.name), { recursive: true, force: true });
+  }
+
+  return { kept, removed: toRemove.map((r) => r.name) };
+}
diff --git a/evals/harness/runner.test.ts b/evals/harness/runner.test.ts
new file mode 100644
index 0000000..2039965
--- /dev/null
+++ b/evals/harness/runner.test.ts
@@ -0,0 +1,241 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { buildIngestArgv, parseCostLine, runIngest } from './runner.js';
+
+/**
+ * The runner spawns `squint ingest` as a subprocess. Tests cover:
+ * - argv shape (no real subprocess needed — pure function)
+ * - cost line parsing (pure function)
+ * - timeout / exit code handling (with a fake spawn)
+ *
+ * No real subprocess is launched in this test file.
+ */
+describe('runner — buildIngestArgv', () => {
+  it('emits the minimal required argv', () => {
+    const argv = buildIngestArgv({
+      fixtureDir: '/abs/fixture',
+      outputDb: '/abs/produced.db',
+    });
+    expect(argv).toEqual(['ingest', '/abs/fixture', '-o', '/abs/produced.db']);
+  });
+
+  it('passes --from-stage and --to-stage when provided', () => {
+    const argv = buildIngestArgv({
+      fixtureDir: '/f',
+      outputDb: '/p.db',
+      fromStage: 'parse',
+      toStage: 'parse',
+    });
+    expect(argv).toContain('--from-stage');
+    expect(argv).toContain('parse');
+    expect(argv).toContain('--to-stage');
+    // both occurrences of 'parse' present
+    expect(argv.filter((x) => x === 'parse')).toHaveLength(2);
+  });
+
+  it('passes -m model when provided', () => {
+    const argv = buildIngestArgv({
+      fixtureDir: '/f',
+      outputDb: '/p.db',
+      model: 'openrouter:google/gemini-2.5-flash',
+    });
+    expect(argv).toContain('-m');
+    expect(argv).toContain('openrouter:google/gemini-2.5-flash');
+  });
+
+  it('passes --force when requested', () => {
+    const argv = buildIngestArgv({ fixtureDir: '/f', outputDb: '/p.db', force: true });
+    expect(argv).toContain('--force');
+  });
+});
+
+describe('runner — parseCostLine', () => {
+  it('parses a "Total cost: $X" line', () => {
+    expect(parseCostLine('  Total cost: $0.0123')).toBe(0.0123);
+    expect(parseCostLine('Total cost: $0.50')).toBe(0.5);
+  });
+
+  it('parses a "cost: $X" line', () => {
+    expect(parseCostLine('cost: $0.05')).toBe(0.05);
+  });
+
+  it('parses squint\'s actual "← LLM" summary line format (the format that matters in production)', () => {
+    // This is what squint actually emits — captured from a real run.
+    // See src/commands/llm/_shared/llm-utils.ts:310-318 (formatCost + parts.join).
+    expect(parseCostLine('  ← LLM  4.6s  in: 2,930  out: 603  cached: 0  $0.0024  [2/200]')).toBe(0.0024);
+    expect(parseCostLine('  ← LLM  2.2s  in: 3,010  out: 397  cached: 0  $0.0019')).toBe(0.0019);
+    expect(parseCostLine('  ← LLM  1.6s  in: 1,720  out: 194  cached: 0  $0.0010  [5/200]')).toBe(0.001);
+    // Larger amounts (≥$0.01) — squint formats them with two decimals
+    expect(parseCostLine('  ← LLM  5s  in: 100  out: 100  cached: 0  $0.50')).toBe(0.5);
+  });
+
+  it('returns null for non-cost lines', () => {
+    expect(parseCostLine('parsing files...')).toBeNull();
+    expect(parseCostLine('')).toBeNull();
+    expect(parseCostLine('  → LLM  openrouter:google/gemini-2.5-flash  ~3,500 tok')).toBeNull();
+  });
+});
+
+describe('runner — runIngest with stubbed spawn', () => {
+  let logDir: string;
+  let stdoutPath: string;
+  let stderrPath: string;
+
+  beforeEach(() => {
+    logDir = fs.mkdtempSync(path.join(os.tmpdir(), 'squint-runner-test-'));
+    stdoutPath = path.join(logDir, 'stdout.log');
+    stderrPath = path.join(logDir, 'stderr.log');
+  });
+
+  afterEach(() => {
+    fs.rmSync(logDir, { recursive: true, force: true });
+  });
+
+  const baseOpts = (): { fixtureDir: string; outputDb: string; stdoutPath: string; stderrPath: string } => ({
+    fixtureDir: '/f',
+    outputDb: '/p.db',
+    stdoutPath,
+    stderrPath,
+  });
+
+  it('returns exitCode 0 on a successful child', async () => {
+    const fakeSpawn = makeFakeSpawn({ exitCode: 0, stdout: 'parse complete\nTotal cost: $0.02\n' });
+    const result = await runIngest({ ...baseOpts(), fromStage: 'parse', toStage: 'parse' }, { spawn: fakeSpawn });
+    expect(result.exitCode).toBe(0);
+    expect(result.costEstimate).toBe(0.02);
+  });
+
+  it('returns the non-zero exit code on failure', async () => {
+    const fakeSpawn = makeFakeSpawn({ exitCode: 1, stdout: '', stderr: 'boom' });
+    const result = await runIngest(baseOpts(), { spawn: fakeSpawn });
+    expect(result.exitCode).toBe(1);
+  });
+
+  it('rejects when child exceeds timeout — production close-handler path', async () => {
+    // Simulates the REAL production path: child does NOT emit 'error' on kill,
+    // it just emits 'close' with a non-zero/null exit code. This catches
+    // regressions where the error-path masks the close-path.
+    const fakeSpawn = makeFakeSpawn({
+      exitCode: 0,
+      stdout: '',
+      delayMs: 100,
+      closeOnKill: true, // emit 'close' (not 'error') when kill() is called
+    });
+    await expect(runIngest({ ...baseOpts(), timeoutMs: 10 }, { spawn: fakeSpawn })).rejects.toThrow(/timeout/i);
+  });
+
+  it('aggregates multiple cost lines into a total', async () => {
+    const fakeSpawn = makeFakeSpawn({
+      exitCode: 0,
+      stdout: 'symbols complete\ncost: $0.03\nrelationships complete\ncost: $0.04\n',
+    });
+    const result = await runIngest(baseOpts(), { spawn: fakeSpawn });
+    expect(result.costEstimate).toBeCloseTo(0.07, 5);
+  });
+
+  it('streams stdout to the configured log file', async () => {
+    const fakeSpawn = makeFakeSpawn({ exitCode: 0, stdout: 'hello world\n' });
+    const result = await runIngest(baseOpts(), { spawn: fakeSpawn });
+    expect(fs.readFileSync(result.stdoutPath, 'utf-8')).toBe('hello world\n');
+  });
+
+  it('escalates to SIGKILL when child ignores SIGTERM', async () => {
+    // Child never emits 'close' even after kill('SIGTERM'). The runner must
+    // escalate to SIGKILL after the grace period and force-resolve via 'close'.
+    const fakeSpawn = makeFakeSpawn({
+      exitCode: 0,
+      stdout: '',
+      delayMs: 10_000, // would never finish in time
+      ignoreSigterm: true,
+    });
+    const start = Date.now();
+    await expect(runIngest({ ...baseOpts(), timeoutMs: 20, sigkillGraceMs: 30 }, { spawn: fakeSpawn })).rejects.toThrow(
+      /timeout/i
+    );
+    // Should reject within timeout + grace + small slack, not 10s
+    expect(Date.now() - start).toBeLessThan(500);
+  });
+});
+
+// ============================================================
+// Test helpers
+// ============================================================
+
+interface FakeSpawnOpts {
+  exitCode: number;
+  stdout?: string;
+  stderr?: string;
+  delayMs?: number;
+  /** When true, kill() emits 'close' with exit code 143 (SIGTERM), like a real child. */
+  closeOnKill?: boolean;
+  /** When true, the child ignores SIGTERM and only responds to SIGKILL. */
+  ignoreSigterm?: boolean;
+}
+
+function makeFakeSpawn(opts: FakeSpawnOpts) {
+  return vi.fn(() => {
+    const stdoutListeners: Array<(chunk: Buffer) => void> = [];
+    const stderrListeners: Array<(chunk: Buffer) => void> = [];
+    const closeListeners: Array<(code: number) => void> = [];
+    const errorListeners: Array<(err: Error) => void> = [];
+
+    let scheduledFire: NodeJS.Timeout | undefined;
+    let alreadyClosed = false;
+
+    const fireClose = (code: number) => {
+      if (alreadyClosed) return;
+      alreadyClosed = true;
+      for (const fn of closeListeners) fn(code);
+    };
+
+    const child = {
+      stdout: {
+        on(event: string, fn: (chunk: Buffer) => void) {
+          if (event === 'data') stdoutListeners.push(fn);
+        },
+      },
+      stderr: {
+        on(event: string, fn: (chunk: Buffer) => void) {
+          if (event === 'data') stderrListeners.push(fn);
+        },
+      },
+      on(event: string, fn: (...args: unknown[]) => void) {
+        if (event === 'close') closeListeners.push(fn as (code: number) => void);
+        if (event === 'error') errorListeners.push(fn as (err: Error) => void);
+      },
+      kill(signal?: string) {
+        if (signal === 'SIGKILL' || !opts.ignoreSigterm) {
+          if (scheduledFire) clearTimeout(scheduledFire);
+          if (opts.closeOnKill || opts.ignoreSigterm) {
+            fireClose(143);
+          } else {
+            for (const fn of errorListeners) fn(new Error('killed'));
+          }
+        }
+        // SIGTERM with ignoreSigterm: do nothing — child stays alive
+      },
+    };
+
+    const fire = () => {
+      if (alreadyClosed) return;
+      if (opts.stdout) {
+        for (const fn of stdoutListeners) fn(Buffer.from(opts.stdout));
+      }
+      if (opts.stderr) {
+        for (const fn of stderrListeners) fn(Buffer.from(opts.stderr));
+      }
+      fireClose(opts.exitCode);
+    };
+
+    if (opts.delayMs) {
+      scheduledFire = setTimeout(fire, opts.delayMs);
+    } else {
+      // Defer to next tick so listeners can attach
+      setImmediate(fire);
+    }
+
+    return child as unknown as ReturnType<typeof import('node:child_process').spawn>;
+  });
+}
diff --git a/evals/harness/runner.ts b/evals/harness/runner.ts
new file mode 100644
index 0000000..f047c0f
--- /dev/null
+++ b/evals/harness/runner.ts
@@ -0,0 +1,289 @@
+import type { ChildProcess, SpawnOptions } from 'node:child_process';
+import { spawn as defaultSpawn } from 'node:child_process';
+import fs from 'node:fs';
+import path from 'node:path';
+
+/**
+ * Pipeline stage IDs accepted by `squint ingest --from-stage / --to-stage`.
+ * Mirrors STAGE_IDS in src/commands/ingest.ts:27-43.
+ */
+export type StageId =
+  | 'parse'
+  | 'symbols'
+  | 'symbols-verify'
+  | 'domains-consolidate'
+  | 'relationships'
+  | 'relationships-verify'
+  | 'modules'
+  | 'modules-verify'
+  | 'contracts'
+  | 'interactions'
+  | 'interactions-validate'
+  | 'interactions-verify'
+  | 'flows'
+  | 'flows-verify'
+  | 'features';
+
+export interface RunOptions {
+  fixtureDir: string;
+  outputDb: string;
+  fromStage?: StageId;
+  toStage?: StageId;
+  model?: string;
+  force?: boolean;
+  /** Hard timeout in milliseconds. Default 600_000 (10 minutes). */
+  timeoutMs?: number;
+  /**
+   * Grace period (ms) between SIGTERM and SIGKILL when forcibly stopping a
+   * child that exceeded the timeout. Default 5_000. Tests use a small value.
+   */
+  sigkillGraceMs?: number;
+  /** Where to write captured stdout. */
+  stdoutPath: string;
+  /** Where to write captured stderr. */
+  stderrPath: string;
+  /** Tee child stdout/stderr to current process? Default false. */
+  showOutput?: boolean;
+  /** Override the squint dev binary path (for tests). */
+  squintBin?: string;
+}
+
+export interface RunResult {
+  exitCode: number;
+  stdoutPath: string;
+  stderrPath: string;
+  durationMs: number;
+  /** Sum of all `cost: $X` lines parsed from stdout. */
+  costEstimate?: number;
+}
+
+/**
+ * Narrow spawn signature — only the overload the runner actually uses.
+ * Easier to substitute in tests than `typeof child_process.spawn`.
+ */
+export type SpawnFn = (command: string, args: readonly string[], options?: SpawnOptions) => ChildProcess;
+
+/**
+ * Spawn dependency injection — tests pass a fake spawn.
+ */
+export interface RunnerDeps {
+  spawn?: SpawnFn;
+}
+
+/**
+ * Build the argv that will be passed to `node bin/dev.js`.
+ * Pure function — no side effects, easy to test.
+ */
+export function buildIngestArgv(opts: {
+  fixtureDir: string;
+  outputDb: string;
+  fromStage?: StageId;
+  toStage?: StageId;
+  model?: string;
+  force?: boolean;
+}): string[] {
+  const argv: string[] = ['ingest', opts.fixtureDir, '-o', opts.outputDb];
+  if (opts.fromStage) argv.push('--from-stage', opts.fromStage);
+  if (opts.toStage) argv.push('--to-stage', opts.toStage);
+  if (opts.model) argv.push('-m', opts.model);
+  if (opts.force) argv.push('--force');
+  return argv;
+}
+
+/**
+ * Parse a single stdout line for a USD cost. Returns null on no match.
+ *
+ * Matches three formats:
+ *   1. "← LLM  4.6s  in: 2,930  out: 603  cached: 0  $0.0024  [2/200]"
+ *      — squint's actual per-call summary line (the format that matters
+ *        in production; see src/commands/llm/_shared/llm-utils.ts:310-318)
+ *   2. "Total cost: $0.0123" — aggregate summary
+ *   3. "cost: $0.05" — generic
+ *
+ * Order of matching: explicit "cost" prefix wins (more specific). Fall back
+ * to the LLM-summary-line shape (a $X.XX trailing a "← LLM" prefix).
+ */
+export function parseCostLine(line: string): number | null {
+  // Format 2 & 3: explicit "cost" prefix
+  const costPrefixed = line.match(/cost[: ]\s*\$([0-9]+\.?[0-9]*)/i);
+  if (costPrefixed) return toFiniteNumber(costPrefixed[1]);
+
+  // Format 1: squint's "← LLM ... $X.XXXX" summary. Anchor on the LLM
+  // summary marker so we don't accidentally match dollar signs in other
+  // contexts (e.g. user prompts that contain "$10" string literals).
+  const llmSummary = line.match(/←\s*LLM\b.*\$([0-9]+\.?[0-9]*)/);
+  if (llmSummary) return toFiniteNumber(llmSummary[1]);
+
+  return null;
+}
+
+function toFiniteNumber(s: string): number | null {
+  const value = Number.parseFloat(s);
+  return Number.isFinite(value) ? value : null;
+}
+
+/**
+ * Build a child-process env that excludes the vitest-specific keys that
+ * confuse oclif's command resolution. Returns a new object — does not mutate
+ * the input.
+ */
+function filterChildEnv(parent: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
+  const filtered: NodeJS.ProcessEnv = {};
+  for (const [key, value] of Object.entries(parent)) {
+    if (key === 'NODE_ENV' || key === 'NODE_PATH') continue;
+    if (key === 'VITEST' || key.startsWith('VITEST_')) continue;
+    filtered[key] = value;
+  }
+  return filtered;
+}
+
+/**
+ * Run squint ingest as a subprocess. Streams stdout/stderr to log files,
+ * enforces a hard timeout, parses cost lines into a running total.
+ */
+export async function runIngest(opts: RunOptions, deps: RunnerDeps = {}): Promise<RunResult> {
+  const spawnFn: SpawnFn = deps.spawn ?? (defaultSpawn as unknown as SpawnFn);
+  const start = Date.now();
+
+  const argv = buildIngestArgv(opts);
+  const squintBin = opts.squintBin ?? path.resolve(process.cwd(), 'bin', 'dev.js');
+
+  // Ensure log directories exist
+  fs.mkdirSync(path.dirname(opts.stdoutPath), { recursive: true });
+  fs.mkdirSync(path.dirname(opts.stderrPath), { recursive: true });
+  const stdoutStream = fs.createWriteStream(opts.stdoutPath);
+  const stderrStream = fs.createWriteStream(opts.stderrPath);
+
+  // Surface stream errors instead of letting them become unhandled rejections.
+  // Disk-full / permission errors should fail loudly, not silently.
+  let streamError: Error | undefined;
+  stdoutStream.on('error', (err) => {
+    streamError = err;
+  });
+  stderrStream.on('error', (err) => {
+    streamError = err;
+  });
+
+  // CRITICAL: scrub vitest-specific env vars before spawning squint.
+  //
+  // When the eval runs inside a vitest worker, vitest sets `NODE_ENV=test`
+  // (and several VITEST_* vars). When the spawned squint subprocess inherits
+  // `NODE_ENV=test`, oclif's command parser switches into a degraded mode
+  // where it interprets `ingest <path>` as a colon-joined topic-command
+  // name `ingest:<path>`, which doesn't exist. Net effect: every eval run
+  // would fail with "command ingest:<path> not found".
+  //
+  // Empirically (verified by spawning with each var set/unset individually),
+  // `NODE_ENV` is THE variable that breaks things. NODE_PATH and the
+  // VITEST_* vars are harmless in isolation. We strip them all anyway as
+  // defence in depth — squint should run as if invoked from a clean shell,
+  // not from inside a test runner.
+  const childEnv = filterChildEnv(process.env);
+  const spawnOpts: SpawnOptions = { stdio: ['ignore', 'pipe', 'pipe'], env: childEnv };
+  const child = spawnFn('node', [squintBin, ...argv], spawnOpts);
+
+  let costEstimate: number | undefined;
+  let stdoutBuffer = '';
+
+  const handleStdoutChunk = (chunk: Buffer): void => {
+    const text = chunk.toString('utf-8');
+    stdoutStream.write(text);
+    if (opts.showOutput) process.stdout.write(text);
+    // Parse cost lines (line-buffered)
+    stdoutBuffer += text;
+    let nl = stdoutBuffer.indexOf('\n');
+    while (nl !== -1) {
+      const line = stdoutBuffer.slice(0, nl);
+      stdoutBuffer = stdoutBuffer.slice(nl + 1);
+      const cost = parseCostLine(line);
+      if (cost !== null) {
+        costEstimate = (costEstimate ?? 0) + cost;
+      }
+      nl = stdoutBuffer.indexOf('\n');
+    }
+  };
+
+  const handleStderrChunk = (chunk: Buffer): void => {
+    const text = chunk.toString('utf-8');
+    stderrStream.write(text);
+    if (opts.showOutput) process.stderr.write(text);
+  };
+
+  child.stdout?.on('data', handleStdoutChunk);
+  child.stderr?.on('data', handleStderrChunk);
+
+  // Wait for a write stream to fully flush before resolving — otherwise readers
+  // race the buffered file content.
+  const closeStream = (stream: fs.WriteStream): Promise<void> =>
+    new Promise((res) => {
+      if (stream.writableEnded) {
+        res();
+        return;
+      }
+      stream.end(() => res());
+    });
+
+  return new Promise<RunResult>((resolve, reject) => {
+    const timeoutMs = opts.timeoutMs ?? 600_000;
+    const sigkillGraceMs = opts.sigkillGraceMs ?? 5_000;
+    let timedOut = false;
+    let sigkillTimer: NodeJS.Timeout | undefined;
+    const timer = setTimeout(() => {
+      timedOut = true;
+      child.kill('SIGTERM');
+      // Escalate to SIGKILL if the child ignores SIGTERM (stuck event loop, etc.)
+      sigkillTimer = setTimeout(() => {
+        try {
+          child.kill('SIGKILL');
+        } catch {
+          // child may have already exited between SIGTERM and the grace timer
+        }
+      }, sigkillGraceMs);
+    }, timeoutMs);
+
+    const cleanup = (): void => {
+      clearTimeout(timer);
+      if (sigkillTimer) clearTimeout(sigkillTimer);
+    };
+
+    const finalize = async (): Promise<{ stdoutPath: string; stderrPath: string }> => {
+      await Promise.all([closeStream(stdoutStream), closeStream(stderrStream)]);
+      return { stdoutPath: opts.stdoutPath, stderrPath: opts.stderrPath };
+    };
+
+    child.on('error', (err) => {
+      cleanup();
+      void finalize().then(() => {
+        if (streamError) reject(streamError);
+        else if (timedOut) reject(new Error(`squint ingest timeout after ${timeoutMs}ms`));
+        else reject(err);
+      });
+    });
+
+    child.on('close', (code) => {
+      cleanup();
+      void finalize().then(() => {
+        if (streamError) {
+          reject(streamError);
+          return;
+        }
+        if (timedOut) {
+          reject(new Error(`squint ingest timeout after ${timeoutMs}ms`));
+          return;
+        }
+        // Final flush of any pending cost line in the buffer
+        if (stdoutBuffer.length > 0) {
+          const cost = parseCostLine(stdoutBuffer);
+          if (cost !== null) costEstimate = (costEstimate ?? 0) + cost;
+        }
+        resolve({
+          exitCode: code ?? 0,
+          stdoutPath: opts.stdoutPath,
+          stderrPath: opts.stderrPath,
+          durationMs: Date.now() - start,
+          costEstimate,
+        });
+      });
+    });
+  });
+}
diff --git a/evals/harness/types.ts b/evals/harness/types.ts
new file mode 100644
index 0000000..1def9b4
--- /dev/null
+++ b/evals/harness/types.ts
@@ -0,0 +1,612 @@
+/**
+ * Types for the squint evaluation harness.
+ *
+ * Design rules:
+ * - Natural keys only (file paths, definition names, module full_paths) — never DB IDs
+ * - Mirror src/db/schema.ts column names but use camelCase
+ * - Decoupled from src/ types so the harness can be tested in isolation
+ */
+
+// ============================================================
+// Ground truth declarative records (input to the builder)
+// ============================================================
+
+export type DefinitionKind =
+  | 'function'
+  | 'class'
+  | 'variable'
+  | 'const'
+  | 'type'
+  | 'interface'
+  | 'enum'
+  | 'method'
+  | 'module';
+export type ImportType = 'import' | 'dynamic-import' | 'require' | 're-export' | 'export-all';
+export type SymbolKind = 'named' | 'default' | 'namespace' | 'side-effect';
+export type RelationshipType = 'uses' | 'extends' | 'implements';
+export type InteractionPattern = 'utility' | 'business' | 'test-internal';
+// Mirrors src/db/schema.ts InteractionSource — must stay in sync with the live schema.
+export type InteractionSource = 'ast' | 'ast-import' | 'llm-inferred' | 'contract-matched';
+export type FlowStakeholder = 'user' | 'admin' | 'system' | 'developer' | 'external';
+
+export interface GroundTruthFile {
+  path: string; // relative path from fixture root, e.g. 'src/index.ts'
+  language: string; // 'typescript' | 'javascript'
+}
+
+export interface GroundTruthDefinition {
+  file: string; // natural key — must match a GroundTruthFile.path
+  name: string;
+  kind: DefinitionKind;
+  isExported: boolean;
+  isDefault?: boolean; // default false
+  /** 1-based line number. Comparator allows ±2 line tolerance unless overridden. */
+  line: number;
+  /** Optional: end line, also 1-based. */
+  endLine?: number;
+  extendsName?: string | null;
+  implementsNames?: string[] | null;
+  extendsInterfaces?: string[] | null;
+}
+
+export interface GroundTruthImport {
+  fromFile: string; // natural key
+  source: string; // raw import source as written, e.g. './service.js' or 'express'
+  type: ImportType;
+  isExternal?: boolean;
+  isTypeOnly?: boolean;
+  /** Imported symbols (named, default, namespace) for this import statement. */
+  symbols?: GroundTruthImportSymbol[];
+}
+
+export interface GroundTruthImportSymbol {
+  /** Original exported name. */
+  name: string;
+  /** Local alias (often same as name). Defaults to name. */
+  localName?: string;
+  kind: SymbolKind;
+}
+
+export interface GroundTruthUsage {
+  file: string; // file in which the usage occurs
+  symbolName: string; // local name of the symbol used
+  line: number; // 1-based
+  context: string; // e.g. 'call_expression', 'member_expression'
+  isMethodCall?: boolean;
+  isConstructorCall?: boolean;
+}
+
+export interface GroundTruthDefinitionMetadata {
+  defKey: DefKey; // natural key for the definition
+  key: string; // 'purpose' | 'domain' | 'role' | 'pure' | etc.
+  /**
+   * EXACTLY ONE of `exactValue`, `proseReference`, `acceptableSet`, or
+   * `themeReference` must be set. The comparator picks its strategy based on
+   * which field is present.
+   */
+  /** Byte-for-byte string match. Use for booleans like 'pure': "true"/"false". Mismatch is **major**. */
+  exactValue?: string;
+  /** LLM-judged similarity vs reference text. Use for free-form prose like 'purpose'. Failure is **minor** prose-drift. */
+  proseReference?: string;
+  /**
+   * Subset check after JSON parse. Use for tag arrays like 'domain': ["auth","http"].
+   *
+   * Semantics: produced value must be a JSON array of strings that is BOTH
+   *  (a) non-empty (LLM did pick some tags), AND
+   *  (b) a subset of `acceptableSet` (every produced tag appears in the GT vocabulary).
+   *
+   * Largely superseded by `themeReference` for noisy LLM-generated tag fields —
+   * `acceptableSet` requires hand-maintaining vocabulary lists, which becomes a
+   * treadmill as the LLM picks new synonyms. Prefer `themeReference` for those.
+   * Keep `acceptableSet` for cases where the vocabulary really is closed and
+   * exhaustive (e.g., a small enum-like field).
+   *
+   * Mismatch is **minor** (vocabulary drift expected).
+   */
+  acceptableSet?: string[];
+  /**
+   * LLM-judged semantic theme for tag arrays. Use for noisy LLM-generated tag
+   * fields like 'domain' where the vocabulary the LLM picks varies legitimately.
+   *
+   * Semantics: the comparator parses the produced value as a JSON string array,
+   * formats it as readable prose ("tags: a, b, c"), and asks the prose judge to
+   * score similarity against `themeReference`. Below threshold = MINOR prose-drift.
+   *
+   * Replaces the `acceptableSet` whack-a-mole — write a one-sentence description
+   * of what tags should reflect, and let the judge handle synonyms.
+   */
+  themeReference?: string;
+  /**
+   * Deterministic floor for `themeReference` and `acceptableSet`: the produced
+   * tag array must contain at least this many tags. Default 1.
+   * Below the floor → MINOR mismatch (the LLM gave up and produced an empty array).
+   */
+  minTagsRequired?: number;
+  /** Min similarity for prose judge (default 0.75 for proseReference, 0.6 for themeReference). */
+  minSimilarity?: number;
+}
+
+export interface GroundTruthRelationship {
+  fromDef: DefKey;
+  toDef: DefKey;
+  relationshipType: RelationshipType;
+  /** Optional reference text for the prose `semantic` field. */
+  semanticReference?: string;
+  minSimilarity?: number;
+}
+
+export interface GroundTruthModule {
+  fullPath: string; // e.g. 'project.controllers.auth'
+  name: string;
+  parentFullPath?: string | null;
+  isTest?: boolean;
+  /** Members assigned to this module by their natural definition keys. */
+  members?: DefKey[];
+  /** Optional reference text for the prose `description` field. */
+  descriptionReference?: string;
+  minSimilarity?: number;
+}
+
+/**
+ * Theme-search rubric for the LLM-driven features stage.
+ *
+ * The features stage groups flows into product-level features. The LLM picks
+ * the feature names + slugs + descriptions AND which flows belong where.
+ * Both the feature metadata and the flow→feature assignment are non-
+ * deterministic, so we use a theme-search match instead of trying to
+ * anchor on specific flows:
+ *
+ *   For each rubric entry, the comparator iterates ALL produced features
+ *   and theme-judges each name+description against the expected role.
+ *   The entry passes if at least one feature scores above the threshold.
+ *
+ * This is intentionally tolerant — squint produces a small number of
+ * features (1-3 for todo-api) and the LLM picks names like "Authentication"
+ * vs "User Auth" vs "Identity Management" all of which describe the same
+ * concept. Theme search handles the synonym variance.
+ */
+export interface FeatureCohesionGroup {
+  /** Stable label for diff reporting and cache stability. */
+  label: string;
+  /** A feature whose name+description matches this MUST exist. */
+  expectedRole: string;
+  /** Min similarity for the role judge (default 0.6). */
+  minRoleSimilarity?: number;
+}
+
+/**
+ * Flow rubric for the LLM-driven flows stage.
+ *
+ * The flows stage produces a small number of relatively HIGH-LEVEL journey
+ * descriptions (e.g. "user processes authentication" covering login+register).
+ * Slugs, entry paths, names, descriptions are all LLM-picked and unstable.
+ * Even the entry_path column is non-deterministic — squint sometimes stores
+ * a module full_path, sometimes a controller name, sometimes an HTTP path.
+ *
+ * The rubric therefore uses a theme-search match: for each entry, the
+ * comparator iterates all produced flows and picks the BEST matching one
+ * (theme judge against expectedRole). If a flow exists whose name+description
+ * matches the expected role with score >= minRoleSimilarity AND whose
+ * stakeholder is in acceptableStakeholders, the entry passes.
+ *
+ * This makes the GT robust to all the LLM-picked metadata variance —
+ * we test "is there a flow about X for stakeholder Y" rather than asserting
+ * exact slug/path matches that flake.
+ */
+export interface FlowRubricEntry {
+  /** Stable label for diff reporting and cache stability. */
+  label: string;
+  /** The thematic concept the matching flow should represent. */
+  expectedRole: string;
+  /** Acceptable stakeholders — the LLM may pick any from this set. */
+  acceptableStakeholders?: FlowStakeholder[];
+  /** Min similarity for the role judge (default 0.6). */
+  minRoleSimilarity?: number;
+}
+
+/**
+ * Interaction rubric for the LLM-driven interactions stage.
+ *
+ * Replaces strict `(fromModulePath, toModulePath)` exact-match GT with a
+ * property-based assertion: "the module containing definition X should
+ * interact with the module containing definition Y, optionally with this
+ * source kind and this prose semantic". The comparator resolves anchor
+ * defs to their containing modules at compare time, so the GT is decoupled
+ * from iter 4's LLM-picked module names.
+ */
+export interface InteractionRubricEntry {
+  /** Stable label for diff reporting and cache stability. */
+  label: string;
+  /**
+   * One or more anchor definitions on the FROM side. The comparator
+   * resolves the FIRST anchor that has a module assignment.
+   */
+  fromAnchor: DefKey;
+  /** One or more anchor definitions on the TO side. */
+  toAnchor: DefKey;
+  /**
+   * Acceptable interaction sources — the LLM may pick any. Defaults to
+   * ['ast', 'ast-import', 'contract-matched'] (the deterministic ones).
+   * llm-inferred is excluded by default because it's the most variance-prone.
+   */
+  acceptableSources?: InteractionSource[];
+  /** Optional prose theme for the semantic field, judged in theme mode. */
+  semanticReference?: string;
+  /** Min similarity for the prose judge (default 0.6). */
+  minSimilarity?: number;
+}
+
+/**
+ * Member-cohesion rubric for the LLM-driven modules stage.
+ *
+ * Replaces the strict `modules`/`module_members` exact-match GT with a
+ * property-based assertion: "these definitions should live in the same
+ * module, and that module should play this role". This is robust to
+ * LLM tree-shape variation (different slugs, different depths, different
+ * groupings) because it tests the *semantic* property, not the spelling.
+ *
+ * The companion comparator is `compareModuleCohesion` (virtual table
+ * `module_cohesion`), which JOINs `modules` + `module_members` and verifies
+ * each group via cohesion + an LLM judge call against `expectedRole`.
+ */
+export interface ModuleCohesionGroup {
+  /** Stable label for diff reporting and cache stability. */
+  label: string;
+  /** Definitions that should share a module. */
+  members: DefKey[];
+  /** Prose describing what role the containing module should play. */
+  expectedRole: string;
+  /**
+   * Cohesion mode:
+   * - 'strict' (default): every member must be in the same module
+   * - 'majority': >50% of members must share a single module (the rest count
+   *   as drift, not failure — useful when one base class might land in the
+   *   parent module while subclasses land in the leaf)
+   */
+  cohesion?: 'strict' | 'majority';
+  /** Minimum similarity for the role judge. Default 0.6. */
+  minRoleSimilarity?: number;
+}
+
+export interface GroundTruthContract {
+  protocol: string; // 'http' | 'event' | etc.
+  normalizedKey: string; // e.g. 'POST /auth/login' or 'task.completed'
+  participants: GroundTruthContractParticipant[];
+  /**
+   * If true, this contract is "expected but not required" — the LLM may
+   * legitimately fail to extract it on some runs. Missing produces a MINOR
+   * warning instead of a CRITICAL gate failure.
+   *
+   * Use for contracts like in-process events where the boundary status is
+   * ambiguous and the LLM's detection is non-deterministic.
+   */
+  optional?: boolean;
+}
+
+export interface GroundTruthContractParticipant {
+  defKey: DefKey;
+  role: string; // 'server' | 'client' | 'producer' | 'consumer' | etc.
+}
+
+export interface GroundTruthInteraction {
+  fromModulePath: string;
+  toModulePath: string;
+  pattern: InteractionPattern | null;
+  source: InteractionSource;
+  /** Definition-level links underlying this interaction. */
+  links?: GroundTruthInteractionLink[];
+  semanticReference?: string;
+  minSimilarity?: number;
+}
+
+export interface GroundTruthInteractionLink {
+  fromDef: DefKey;
+  toDef: DefKey;
+  contractKey?: ContractKey; // optional: link to contract
+}
+
+export interface GroundTruthFlow {
+  slug: string;
+  name: string;
+  entryDef?: DefKey;
+  entryModulePath?: string;
+  entryPath?: string; // e.g. 'POST /api/auth/login'
+  stakeholder: FlowStakeholder;
+  /** Ordered module-level steps (interactions). */
+  steps?: Array<{ from: string; to: string }>; // module path pairs identifying the interaction
+  /** Ordered definition-level steps. */
+  definitionSteps?: Array<{ from: DefKey; to: DefKey }>;
+  descriptionReference?: string;
+  minSimilarity?: number;
+}
+
+export interface GroundTruthFeature {
+  slug: string;
+  name: string;
+  flowSlugs: string[];
+  descriptionReference?: string;
+  minSimilarity?: number;
+}
+
+/**
+ * The complete ground truth for a single fixture, composed in
+ * `evals/ground-truth/<name>/index.ts`.
+ */
+export interface GroundTruth {
+  fixtureName: string;
+  files: GroundTruthFile[];
+  definitions: GroundTruthDefinition[];
+  imports?: GroundTruthImport[];
+  usages?: GroundTruthUsage[];
+  definitionMetadata?: GroundTruthDefinitionMetadata[];
+  relationships?: GroundTruthRelationship[];
+  modules?: GroundTruthModule[];
+  /**
+   * Cohesion-based GT for the LLM-driven modules stage. When set, use the
+   * `module_cohesion` virtual table in scope (NOT `modules`/`module_members`).
+   * See `ModuleCohesionGroup` for the rationale.
+   */
+  moduleCohesion?: ModuleCohesionGroup[];
+  contracts?: GroundTruthContract[];
+  interactions?: GroundTruthInteraction[];
+  /**
+   * Anchor-based GT for the LLM-driven interactions stage. When set, use
+   * the `interaction_rubric` virtual table in scope INSTEAD of `interactions`.
+   * See `InteractionRubricEntry` for the rationale.
+   */
+  interactionRubric?: InteractionRubricEntry[];
+  /**
+   * Entry-point-based GT for the LLM-driven flows stage. When set, use the
+   * `flow_rubric` virtual table in scope INSTEAD of `flows`. See
+   * `FlowRubricEntry` for the rationale.
+   */
+  flowRubric?: FlowRubricEntry[];
+  /**
+   * Cohesion-based GT for the LLM-driven features stage. When set, use the
+   * `feature_cohesion` virtual table in scope INSTEAD of `features`. See
+   * `FeatureCohesionGroup` for the rationale.
+   */
+  featureCohesion?: FeatureCohesionGroup[];
+  flows?: GroundTruthFlow[];
+  features?: GroundTruthFeature[];
+}
+
+// ============================================================
+// Natural keys (branded — see below)
+// ============================================================
+
+/**
+ * Branded string types so a raw `string` cannot be passed where a `DefKey` is
+ * expected. Forces all construction through `defKey()` / `contractKey()`,
+ * which catches a real class of bugs (e.g., passing a file path where a
+ * definition key is expected) at compile time.
+ *
+ * The `__brand` field exists only in the type system — there is no runtime cost.
+ */
+export type DefKey = string & { readonly __brand: 'DefKey' };
+export type ContractKey = string & { readonly __brand: 'ContractKey' };
+
+export function defKey(file: string, name: string): DefKey {
+  return `${file}::${name}` as DefKey;
+}
+
+export function parseDefKey(key: DefKey): { file: string; name: string } {
+  // Use lastIndexOf so definition names containing '::' are handled correctly.
+  // (File paths cannot contain '::' in any platform's path syntax.)
+  const idx = (key as string).lastIndexOf('::');
+  if (idx === -1) throw new Error(`Invalid defKey: ${key}`);
+  return { file: (key as string).slice(0, idx), name: (key as string).slice(idx + 2) };
+}
+
+export function contractKey(protocol: string, normalizedKey: string): ContractKey {
+  return `${protocol}::${normalizedKey}` as ContractKey;
+}
+
+// ============================================================
+// Diff report (output of the comparator)
+// ============================================================
+
+export type Severity = 'critical' | 'major' | 'minor';
+
+export type TableName =
+  | 'files'
+  | 'definitions'
+  | 'imports'
+  | 'symbols'
+  | 'usages'
+  | 'definition_metadata'
+  | 'relationship_annotations'
+  | 'modules'
+  | 'module_members'
+  /**
+   * Virtual table — not a real DB table. The `compareModuleCohesion`
+   * comparator joins `modules` + `module_members` and verifies the
+   * `gt.moduleCohesion` rubric. Use this in scope INSTEAD of `modules` /
+   * `module_members` for LLM-driven module-stage iterations.
+   */
+  | 'module_cohesion'
+  /**
+   * Virtual table — `compareInteractionRubric` resolves anchor defs to
+   * their containing modules and verifies an interaction edge between them.
+   * Use this in scope INSTEAD of `interactions` for LLM-driven iterations.
+   */
+  | 'interaction_rubric'
+  /**
+   * Virtual table — `compareFlowRubric` matches flows by entry point and
+   * verifies stakeholder + required step edges + role prose.
+   */
+  | 'flow_rubric'
+  /**
+   * Virtual table — `compareFeatureCohesion` joins features + feature_flows
+   * and verifies cohesion + role for each rubric flow group.
+   */
+  | 'feature_cohesion'
+  | 'contracts'
+  | 'contract_participants'
+  | 'interactions'
+  | 'interaction_definition_links'
+  | 'flows'
+  | 'flow_steps'
+  | 'flow_definition_steps'
+  | 'features';
+
+/** A single concrete difference inside a table. */
+export interface RowDiff {
+  kind: 'missing' | 'extra' | 'mismatch' | 'prose-drift';
+  severity: Severity;
+  /** Natural key of the row in question, for human reading. */
+  naturalKey: string;
+  /** Free-form details for the reporter. */
+  details: string;
+  /** Optional fix-hint id resolved by reporter. */
+  fixHintId?: string;
+}
+
+export interface TableDiff {
+  table: TableName;
+  passed: boolean;
+  /** Number of expected rows in ground truth (for prose checks: number of references). */
+  expectedCount: number;
+  /** Number of rows produced by squint. */
+  producedCount: number;
+  diffs: RowDiff[];
+  /**
+   * Per-table prose-judge tally. Comparators that judge prose fields populate
+   * this directly. Passed prose checks do NOT generate RowDiffs (only failed
+   * ones do, as `prose-drift` kind), so this counter is the only way to track
+   * passes. Defaults to {0,0} when no prose checks were run for the table.
+   */
+  proseChecks?: { passed: number; failed: number };
+}
+
+export interface DiffSummary {
+  critical: number;
+  major: number;
+  minor: number;
+  proseChecks: { passed: number; failed: number };
+}
+
+export interface DiffReport {
+  fixtureName: string;
+  passed: boolean;
+  scope: TableName[];
+  tables: TableDiff[];
+  summary: DiffSummary;
+  durationMs: number;
+  squintCommit?: string;
+}
+
+// ============================================================
+// Prose judge
+// ============================================================
+
+export interface ProseJudgeRequest {
+  /** Identifying label for logging/caching, e.g. "definition_metadata.purpose for src/foo.ts::bar". */
+  field: string;
+  reference: string;
+  candidate: string;
+  minSimilarity: number;
+  /**
+   * Judging mode. The two modes use different system prompts and different
+   * cache namespaces:
+   *
+   * - 'prose' (default): the reference and candidate are both natural-language
+   *   descriptions. The judge scores STRICT semantic similarity — it surfaces
+   *   missing concepts and vague descriptions. Use for `purpose`, module
+   *   descriptions, relationship semantics, etc.
+   *
+   * - 'theme': the reference describes what concept a tag list should reflect,
+   *   and the candidate is a tag list (formatted as "tags: a, b, c"). The
+   *   judge scores TOLERANT semantic fit — it accepts any reasonable tags for
+   *   the concept, even if they use different vocabulary. Use for noisy
+   *   LLM-generated tag fields like `domain`.
+   */
+  mode?: 'prose' | 'theme';
+}
+
+export interface ProseJudgeResult {
+  similarity: number; // 0..1
+  passed: boolean;
+  reasoning: string;
+}
+
+/**
+ * Marker symbol set on stub/no-op judge functions. The compare() orchestrator
+ * checks for this when prose-bearing scopes are requested and refuses to run
+ * — so a stub judge can never silently pass real prose checks.
+ */
+export const STUB_JUDGE_MARKER = Symbol.for('squint.eval.stubJudge');
+
+/**
+ * Pluggable judge function. Real implementation calls an LLM;
+ * tests inject a stub. Stubs MUST set the STUB_JUDGE_MARKER property
+ * so the orchestrator can refuse to use them on real prose-check scopes.
+ */
+export type ProseJudgeFn = ((req: ProseJudgeRequest) => Promise<ProseJudgeResult>) & {
+  [STUB_JUDGE_MARKER]?: true;
+};
+
+/**
+ * Build a stub judge that always passes. Used by tests and by iterations
+ * that have no prose checks in scope. Tagged with STUB_JUDGE_MARKER so
+ * compare() can detect it and refuse to run on prose-bearing scopes.
+ */
+export function makeStubJudge(): ProseJudgeFn {
+  const fn: ProseJudgeFn = async () => ({
+    similarity: 1,
+    passed: true,
+    reasoning: 'stub judge — always passes',
+  });
+  fn[STUB_JUDGE_MARKER] = true;
+  return fn;
+}
+
+/**
+ * Single source of truth for "which tables have prose-judged fields, and how
+ * to count declared references in a GroundTruth".
+ *
+ * Adding a new prose-bearing table = ONE new entry here. Previously this was
+ * encoded in two places (PROSE_BEARING_TABLES set + a hardcoded if-chain in
+ * countDeclaredProseReferences). The set is now derived from the keys.
+ */
+export const PROSE_REFERENCE_COUNTERS: Partial<Record<TableName, (gt: GroundTruth) => number>> = {
+  definition_metadata: (gt) =>
+    (gt.definitionMetadata ?? []).filter((m) => m.proseReference != null || m.themeReference != null).length,
+  relationship_annotations: (gt) => (gt.relationships ?? []).filter((r) => r.semanticReference != null).length,
+  modules: (gt) => (gt.modules ?? []).filter((m) => m.descriptionReference != null).length,
+  // Cohesion rubric ALWAYS makes a judge call per group (the role check),
+  // so the count is the entire rubric length.
+  module_cohesion: (gt) => (gt.moduleCohesion ?? []).length,
+  interaction_rubric: (gt) => (gt.interactionRubric ?? []).filter((i) => i.semanticReference != null).length,
+  flow_rubric: (gt) => (gt.flowRubric ?? []).length,
+  feature_cohesion: (gt) => (gt.featureCohesion ?? []).length,
+  interactions: (gt) => (gt.interactions ?? []).filter((i) => i.semanticReference != null).length,
+  flows: (gt) => (gt.flows ?? []).filter((f) => f.descriptionReference != null).length,
+  features: (gt) => (gt.features ?? []).filter((f) => f.descriptionReference != null).length,
+};
+
+/**
+ * Tables that involve prose-judged fields, derived from PROSE_REFERENCE_COUNTERS.
+ * If any of these are in scope AND the GT actually declares prose references,
+ * a stub judge is forbidden.
+ */
+export const PROSE_BEARING_TABLES: ReadonlySet<TableName> = new Set(
+  Object.keys(PROSE_REFERENCE_COUNTERS) as TableName[]
+);
+
+// ============================================================
+// Fix hint database
+// ============================================================
+
+export interface FixHint {
+  id: string;
+  /** Conditions under which this hint applies. */
+  when: {
+    table: TableName;
+    kind?: RowDiff['kind'];
+    /** Substring match against naturalKey. */
+    keyContains?: string;
+  };
+  /** Markdown body shown in the report. */
+  body: string;
+}
diff --git a/evals/results/.gitkeep b/evals/results/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/evals/setup.ts b/evals/setup.ts
new file mode 100644
index 0000000..dcaa34a
--- /dev/null
+++ b/evals/setup.ts
@@ -0,0 +1,24 @@
+/**
+ * Vitest setup for the eval harness.
+ *
+ * Loaded via `setupFiles` in `vitest.eval.config.ts` so it runs ONCE in each
+ * vitest worker before any test code is imported.
+ *
+ * Sole responsibility: force-load `.env` with `override: true` so the
+ * `OPENROUTER_API_KEY` (and any other secrets) used by the in-process LLM
+ * judge AND by spawned `squint ingest` subprocesses always come from the
+ * project-local `.env` file. Without `override`, dotenv keeps any shell-level
+ * env var, which can drift (stale credits, wrong account, etc.) and lead to
+ * confusing eval failures.
+ *
+ * The spawned subprocess inherits the worker's env, so loading here is
+ * sufficient — no separate dotenv call inside the squint binary is needed
+ * for the eval-harness flow.
+ */
+import path from 'node:path';
+import { config as loadDotenv } from 'dotenv';
+
+loadDotenv({
+  path: path.resolve(process.cwd(), '.env'),
+  override: true,
+});
diff --git a/evals/todo-api.eval.ts b/evals/todo-api.eval.ts
new file mode 100644
index 0000000..c2084e4
--- /dev/null
+++ b/evals/todo-api.eval.ts
@@ -0,0 +1,316 @@
+import { describe, it } from 'vitest';
+import { todoApiGroundTruth } from './ground-truth/todo-api/index.js';
+import { makeLlmProseJudge } from './harness/comparator/llm-prose-judge.js';
+import { defineFixture } from './harness/fixture-config.js';
+import { runIterationStep } from './harness/iteration.js';
+
+const TODO_API = defineFixture('todo-api');
+
+describe('todo-api eval', () => {
+  it('iteration 1: parse stage produces expected files, definitions, and imports', async () => {
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'parse',
+      toStage: 'parse',
+      scope: ['files', 'definitions', 'imports'],
+      timeoutMs: 60_000,
+    });
+  }, 120_000);
+
+  it('iteration 2: symbols stage produces expected definition_metadata', async () => {
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'symbols',
+      toStage: 'symbols',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata'],
+      // Real LLM judge — uses gemini-2.5-flash by default (override via EVAL_JUDGE_MODEL).
+      // Cache lives at evals/.judge-cache.json (gitignored). Re-runs with the same
+      // (model, reference, candidate) tuples cost $0.
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 180_000,
+    });
+  }, 300_000);
+
+  it('iteration 3: relationships stage produces expected relationship_annotations', async () => {
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'relationships',
+      toStage: 'relationships',
+      // Scope includes definition_metadata as a regression check on iteration 2 —
+      // running --to-stage relationships also runs symbols, so any vocabulary
+      // drift in symbols would surface here too.
+      scope: ['files', 'definitions', 'imports', 'definition_metadata', 'relationship_annotations'],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 240_000,
+    });
+  }, 360_000);
+
+  it('iteration 3.5: relationships-verify stage preserves relationship_annotations', async () => {
+    // Regression detector for the relationships-verify stage. Mirrors iter 4.5
+    // for modules-verify. Phase 1 (deterministic) checks ghost rows, type
+    // mismatches, stale files, and PENDING_LLM_ANNOTATION leaks — all empty
+    // for the well-formed iter-3 state on todo-api. Phase 2 (LLM coherence
+    // verifier) re-annotates only edges flagged "wrong"; for a clean DB
+    // it should mark every edge correct and write nothing.
+    //
+    // Iter 3's GT works unchanged here — we already proved iter 3 → iter 4
+    // is byte-equivalent in `relationship_annotations` for this fixture.
+    // If a future squint change makes relationships-verify start moving
+    // things around, this iteration will go red and force a triage decision.
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'relationships-verify',
+      toStage: 'relationships-verify',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata', 'relationship_annotations'],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 300_000,
+      costBudgetUsd: 0.2,
+    });
+  }, 420_000);
+
+  it('iteration 4: modules stage produces expected module cohesion', async () => {
+    // Uses the cohesion rubric (`module_cohesion` virtual table) instead of
+    // strict `modules`/`module_members` exact matching. The rubric verifies
+    // that semantically related definitions land in the same module and that
+    // module's name+description matches a hand-authored expected role —
+    // robust to LLM tree-shape variation.
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'modules',
+      toStage: 'modules',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata', 'relationship_annotations', 'module_cohesion'],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 360_000,
+      costBudgetUsd: 0.2,
+    });
+  }, 480_000);
+
+  it('iteration 4.5: modules-verify stage preserves cohesion', async () => {
+    // Regression detector for the modules-verify stage. Same cohesion rubric
+    // as iter 4 — verifies the verify stage doesn't degrade member grouping
+    // or move definitions out of their semantic clusters.
+    //
+    // Cost budget bumped to 0.30 as defense in depth: if Phase 2 ever fires
+    // a reassignment, the cascade regenerates interactions+flows which is
+    // expensive. The cost guardrail will trip loudly instead of silently.
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'modules-verify',
+      toStage: 'modules-verify',
+      scope: ['files', 'definitions', 'imports', 'definition_metadata', 'relationship_annotations', 'module_cohesion'],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 420_000,
+      costBudgetUsd: 0.3,
+    });
+  }, 540_000);
+
+  it('iteration 5: contracts stage extracts expected HTTP routes and events', async () => {
+    // The contracts extract stage scans boundary-role definitions (controllers,
+    // handlers, clients) and produces a normalized list of cross-process
+    // protocols: HTTP routes, event topics, queue names, etc.
+    //
+    // Variance hot spots are mostly post-processed away by squint's normalization
+    // (HTTP method casing, route param placeholders). The natural key
+    // (protocol, normalized_key) is stable enough for strict matching. The
+    // 9 HTTP routes + 2 events for todo-api are hand-authored against the
+    // controller and client source.
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'contracts',
+      toStage: 'contracts',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 420_000,
+      costBudgetUsd: 0.3,
+    });
+  }, 540_000);
+
+  it('iteration 6: interactions stage produces expected module-pair edges', async () => {
+    // The interactions stage derives module-to-module edges from the AST call
+    // graph + import graph + contract matching, then runs an LLM Step 1 to
+    // assign semantics + pattern (utility/business) to each edge.
+    //
+    // Uses the anchor-based interactionRubric (instead of strict module-name
+    // exact match) so the rubric stays decoupled from iter 4's LLM-picked
+    // module names. Each entry asserts: "the module containing definition X
+    // should interact with the module containing definition Y, with a source
+    // in the AST-derived set, and a semantic that matches this theme".
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'interactions',
+      toStage: 'interactions',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 480_000,
+      costBudgetUsd: 0.4,
+    });
+  }, 600_000);
+
+  it('iteration 6.5: interactions-validate stage preserves the rubric', async () => {
+    // Regression detector for interactions-validate. This is a deterministic
+    // post-LLM cleanup pass that scans LLM-inferred edges for hallucinations:
+    //   - REVERSED      (inferred A→B but AST shows B→A)
+    //   - DIRECTION_CONFUSED (inferred direction disagrees with static evidence)
+    //   - NO_IMPORTS    (inferred edge has no static evidence)
+    //
+    // For todo-api the validate pass typically deletes a handful of LLM-only
+    // edges. The interactionRubric defaults to acceptableSources excluding
+    // 'llm-inferred' anyway, so the rubric is unaffected.
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'interactions-validate',
+      toStage: 'interactions-validate',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 480_000,
+      costBudgetUsd: 0.4,
+    });
+  }, 600_000);
+
+  it('iteration 6.6: interactions-verify stage preserves the rubric', async () => {
+    // Regression detector for interactions-verify. Phase 1 checks referential
+    // integrity and coverage; Phase 2 calls the LLM to auto-remediate any
+    // gaps. For a clean fixture this is a no-op on the rubric assertions.
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'interactions-verify',
+      toStage: 'interactions-verify',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 540_000,
+      costBudgetUsd: 0.4,
+    });
+  }, 660_000);
+
+  it('iteration 7: flows stage produces expected user journeys', async () => {
+    // The flows stage runs entry-point classification (LLM), then traces
+    // definition-level paths through interactions, then calls the enhancer
+    // (LLM) to assign stakeholder + name + description, then calls the
+    // gap generator (LLM) to fill uncovered interactions.
+    //
+    // Uses the theme-search flowRubric — entry paths and slugs are LLM-
+    // picked and unstable, so the rubric finds the best-matching flow
+    // by description theme alone. Asserts at least one user-stakeholder
+    // flow per concept area (auth, tasks). Extra flows are fine.
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'flows',
+      toStage: 'flows',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+        'flow_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 600_000,
+      costBudgetUsd: 0.5,
+    });
+  }, 720_000);
+
+  it('iteration 7.5: flows-verify stage preserves the flow rubric', async () => {
+    // Regression detector for flows-verify. Phase 1 checks referential
+    // integrity (every flow step references a valid interaction); Phase 2
+    // calls the LLM to evaluate flow quality (coherence, completeness).
+    //
+    // Previously blocked by a squint bug — syncInheritanceInteractions
+    // wrote bare GROUP_CONCAT strings into the symbols column, which
+    // crashed parseSymbols (JSON.parse("BaseController")). Fixed in
+    // commit 4d7ac1b: now uses JSON_GROUP_ARRAY + defensive try/catch.
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'flows-verify',
+      toStage: 'flows-verify',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+        'flow_rubric',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 660_000,
+      costBudgetUsd: 0.5,
+    });
+  }, 780_000);
+
+  it('iteration 8: features stage groups flows into expected product features', async () => {
+    await runIterationStep({
+      fixture: TODO_API,
+      groundTruth: todoApiGroundTruth,
+      label: 'features',
+      toStage: 'features',
+      scope: [
+        'files',
+        'definitions',
+        'imports',
+        'definition_metadata',
+        'relationship_annotations',
+        'module_cohesion',
+        'contracts',
+        'interaction_rubric',
+        'flow_rubric',
+        'feature_cohesion',
+      ],
+      judgeFn: makeLlmProseJudge({ cachePath: TODO_API.judgeCachePath }),
+      timeoutMs: 720_000,
+      costBudgetUsd: 0.5,
+    });
+  }, 840_000);
+});
diff --git a/evals/tsconfig.json b/evals/tsconfig.json
new file mode 100644
index 0000000..da8581d
--- /dev/null
+++ b/evals/tsconfig.json
@@ -0,0 +1,10 @@
+{
+  "extends": "../tsconfig.json",
+  "compilerOptions": {
+    "rootDir": "..",
+    "noEmit": true,
+    "types": ["node"]
+  },
+  "include": ["**/*.ts", "../src/**/*.ts"],
+  "exclude": ["fixtures/*/node_modules", "results", "fixtures/*/dist"]
+}
diff --git a/package.json b/package.json
index 3b96dfe..0f0217e 100644
--- a/package.json
+++ b/package.json
@@ -21,11 +21,14 @@
     "dev:all": "sh ./bin/dev-all.sh",
     "test": "vitest run",
     "test:watch": "vitest",
+    "eval": "vitest run --config vitest.eval.config.ts",
+    "eval:watch": "vitest --config vitest.eval.config.ts",
     "test:coverage": "vitest run --coverage",
     "test:coverage:ui": "cd ui && pnpm run test:coverage",
     "test:coverage:all": "pnpm run test:coverage && pnpm run test:coverage:ui",
     "test:all": "pnpm test && cd ui && pnpm test",
     "typecheck": "tsc --noEmit",
+    "typecheck:eval": "tsc --noEmit -p evals/tsconfig.json",
     "lint": "biome check .",
     "lint:fix": "biome check --write .",
     "format": "biome format --write ."
@@ -68,18 +71,19 @@
   },
   "devDependencies": {
     "@biomejs/biome": "^1.9.0",
-    "@types/better-sqlite3": "^7.6.13",
-    "@types/node": "^22.0.0",
-    "@vitest/coverage-v8": "^2.1.9",
+    "dotenv": "^17.4.1",
     "@commitlint/cli": "^19.6.0",
     "@commitlint/config-conventional": "^19.6.0",
     "@semantic-release/changelog": "^6.0.3",
     "@semantic-release/exec": "^7.0.3",
     "@semantic-release/git": "^10.0.1",
+    "@types/better-sqlite3": "^7.6.13",
+    "@types/node": "^22.0.0",
+    "@vitest/coverage-v8": "^2.1.9",
     "conventional-changelog-conventionalcommits": "^8.0.0",
     "lefthook": "^1.6.0",
-    "typescript": "^5.6.0",
     "semantic-release": "^24.2.0",
+    "typescript": "^5.6.0",
     "vitest": "^2.1.0"
   },
   "engines": {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 41a9029..c3ebb51 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -66,6 +66,9 @@ importers:
       conventional-changelog-conventionalcommits:
         specifier: ^8.0.0
         version: 8.0.0
+      dotenv:
+        specifier: ^17.4.1
+        version: 17.4.1
       lefthook:
         specifier: ^1.6.0
         version: 1.13.6
@@ -173,28 +176,24 @@ packages:
     engines: {node: '>=14.21.3'}
     cpu: [arm64]
     os: [linux]
-    libc: [musl]
 
   '@biomejs/cli-linux-arm64@1.9.4':
     resolution: {integrity: sha512-fJIW0+LYujdjUgJJuwesP4EjIBl/N/TcOX3IvIHJQNsAqvV2CHIogsmA94BPG6jZATS4Hi+xv4SkBBQSt1N4/g==}
     engines: {node: '>=14.21.3'}
     cpu: [arm64]
     os: [linux]
-    libc: [glibc]
 
   '@biomejs/cli-linux-x64-musl@1.9.4':
     resolution: {integrity: sha512-gEhi/jSBhZ2m6wjV530Yy8+fNqG8PAinM3oV7CyO+6c3CEh16Eizm21uHVsyVBEB6RIM8JHIl6AGYCv6Q6Q9Tg==}
     engines: {node: '>=14.21.3'}
     cpu: [x64]
     os: [linux]
-    libc: [musl]
 
   '@biomejs/cli-linux-x64@1.9.4':
     resolution: {integrity: sha512-lRCJv/Vi3Vlwmbd6K+oQ0KhLHMAysN8lXoCI7XeHlxaajk06u7G+UsFSO01NAs5iYuWKmVZjmiOzJ0OJmGsMwg==}
     engines: {node: '>=14.21.3'}
     cpu: [x64]
     os: [linux]
-    libc: [glibc]
 
   '@biomejs/cli-win32-arm64@1.9.4':
     resolution: {integrity: sha512-tlbhLk+WXZmgwoIKwHIHEBZUwxml7bRJgk0X2sPyNR3S93cdRq6XulAZRQJ17FYGGzWne0fgrXBKpl7l4M87Hg==}
@@ -770,79 +769,66 @@ packages:
     resolution: {integrity: sha512-F8sWbhZ7tyuEfsmOxwc2giKDQzN3+kuBLPwwZGyVkLlKGdV1nvnNwYD0fKQ8+XS6hp9nY7B+ZeK01EBUE7aHaw==}
     cpu: [arm]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-arm-musleabihf@4.57.1':
     resolution: {integrity: sha512-rGfNUfn0GIeXtBP1wL5MnzSj98+PZe/AXaGBCRmT0ts80lU5CATYGxXukeTX39XBKsxzFpEeK+Mrp9faXOlmrw==}
     cpu: [arm]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-arm64-gnu@4.57.1':
     resolution: {integrity: sha512-MMtej3YHWeg/0klK2Qodf3yrNzz6CGjo2UntLvk2RSPlhzgLvYEB3frRvbEF2wRKh1Z2fDIg9KRPe1fawv7C+g==}
     cpu: [arm64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-arm64-musl@4.57.1':
     resolution: {integrity: sha512-1a/qhaaOXhqXGpMFMET9VqwZakkljWHLmZOX48R0I/YLbhdxr1m4gtG1Hq7++VhVUmf+L3sTAf9op4JlhQ5u1Q==}
     cpu: [arm64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-loong64-gnu@4.57.1':
     resolution: {integrity: sha512-QWO6RQTZ/cqYtJMtxhkRkidoNGXc7ERPbZN7dVW5SdURuLeVU7lwKMpo18XdcmpWYd0qsP1bwKPf7DNSUinhvA==}
     cpu: [loong64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-loong64-musl@4.57.1':
     resolution: {integrity: sha512-xpObYIf+8gprgWaPP32xiN5RVTi/s5FCR+XMXSKmhfoJjrpRAjCuuqQXyxUa/eJTdAE6eJ+KDKaoEqjZQxh3Gw==}
     cpu: [loong64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-ppc64-gnu@4.57.1':
     resolution: {integrity: sha512-4BrCgrpZo4hvzMDKRqEaW1zeecScDCR+2nZ86ATLhAoJ5FQ+lbHVD3ttKe74/c7tNT9c6F2viwB3ufwp01Oh2w==}
     cpu: [ppc64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-ppc64-musl@4.57.1':
     resolution: {integrity: sha512-NOlUuzesGauESAyEYFSe3QTUguL+lvrN1HtwEEsU2rOwdUDeTMJdO5dUYl/2hKf9jWydJrO9OL/XSSf65R5+Xw==}
     cpu: [ppc64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-riscv64-gnu@4.57.1':
     resolution: {integrity: sha512-ptA88htVp0AwUUqhVghwDIKlvJMD/fmL/wrQj99PRHFRAG6Z5nbWoWG4o81Nt9FT+IuqUQi+L31ZKAFeJ5Is+A==}
     cpu: [riscv64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-musl@4.57.1':
     resolution: {integrity: sha512-S51t7aMMTNdmAMPpBg7OOsTdn4tySRQvklmL3RpDRyknk87+Sp3xaumlatU+ppQ+5raY7sSTcC2beGgvhENfuw==}
     cpu: [riscv64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-linux-s390x-gnu@4.57.1':
     resolution: {integrity: sha512-Bl00OFnVFkL82FHbEqy3k5CUCKH6OEJL54KCyx2oqsmZnFTR8IoNqBF+mjQVcRCT5sB6yOvK8A37LNm/kPJiZg==}
     cpu: [s390x]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-x64-gnu@4.57.1':
     resolution: {integrity: sha512-ABca4ceT4N+Tv/GtotnWAeXZUZuM/9AQyCyKYyKnpk4yoA7QIAuBt6Hkgpw8kActYlew2mvckXkvx0FfoInnLg==}
     cpu: [x64]
     os: [linux]
-    libc: [glibc]
 
   '@rollup/rollup-linux-x64-musl@4.57.1':
     resolution: {integrity: sha512-HFps0JeGtuOR2convgRRkHCekD7j+gdAuXM+/i6kGzQtFhlCtQkpwtNzkNj6QhCDp7DRJ7+qC/1Vg2jt5iSOFw==}
     cpu: [x64]
     os: [linux]
-    libc: [musl]
 
   '@rollup/rollup-openbsd-x64@4.57.1':
     resolution: {integrity: sha512-H+hXEv9gdVQuDTgnqD+SQffoWoc0Of59AStSzTEj/feWTBAnSfSD3+Dql1ZruJQxmykT/JVY0dE8Ka7z0DH1hw==}
@@ -1533,6 +1519,10 @@ packages:
     resolution: {integrity: sha512-QM8q3zDe58hqUqjraQOmzZ1LIH9SWQJTlEKCH4kJ2oQvLZk7RbQXvtDM2XEq3fwkV9CCvvH4LA0AV+ogFsBM2Q==}
     engines: {node: '>=8'}
 
+  dotenv@17.4.1:
+    resolution: {integrity: sha512-k8DaKGP6r1G30Lx8V4+pCsLzKr8vLmV2paqEj1Y55GdAgJuIqpRp5FfajGF8KtwMxCz9qJc6wUIJnm053d/WCw==}
+    engines: {node: '>=12'}
+
   duplexer2@0.1.4:
     resolution: {integrity: sha512-asLFVfWWtJ90ZyOUHMqk7/S2w2guQKxUI2itj3d92ADHhxUSbCMGi1f1cBcJ7xM1To+pE/Khbwo1yuNbMEPKeA==}
 
@@ -4540,6 +4530,8 @@ snapshots:
     dependencies:
       is-obj: 2.0.0
 
+  dotenv@17.4.1: {}
+
   duplexer2@0.1.4:
     dependencies:
       readable-stream: 2.3.8
diff --git a/src/commands/interactions/generate.ts b/src/commands/interactions/generate.ts
index dc90f50..81d6da1 100644
--- a/src/commands/interactions/generate.ts
+++ b/src/commands/interactions/generate.ts
@@ -71,42 +71,30 @@ export default class InteractionsGenerate extends BaseLlmCommand {
     // Get enriched module call graph
     const enrichedEdges = db.callGraph.getEnrichedModuleCallGraph();
 
-    if (enrichedEdges.length === 0) {
-      if (isJson) {
-        this.log(JSON.stringify({ error: 'No module call graph edges found', hint: 'Run llm modules first' }));
-      } else {
-        this.log(chalk.yellow('No module call graph edges found.'));
-        this.log(chalk.gray('Ensure modules are assigned first with `squint llm modules`'));
-      }
-      return;
-    }
+    // Tag test-internal interactions: if either module is a test module, override pattern
+    const testModuleIds = db.modules.getTestModuleIds();
 
-    // Count utility vs business edges
     const utilityCount = enrichedEdges.filter((e) => e.edgePattern === 'utility').length;
     const businessCount = enrichedEdges.filter((e) => e.edgePattern === 'business').length;
 
-    if (!isJson && verbose) {
-      this.log(chalk.gray(`Found ${enrichedEdges.length} module-to-module edges`));
-      this.log(chalk.gray(`  Business logic: ${businessCount}, Utility: ${utilityCount}`));
-    }
+    let interactions: InteractionSuggestion[] = [];
 
-    // Step 1: Generate semantics for each edge using LLM (in batches)
-    const interactions: InteractionSuggestion[] = await processBatchSemantics(
-      enrichedEdges,
-      batchSize,
-      model,
-      db,
-      this,
-      isJson,
-      verbose
-    );
+    if (enrichedEdges.length > 0) {
+      if (!isJson && verbose) {
+        this.log(chalk.gray(`Found ${enrichedEdges.length} module-to-module edges`));
+        this.log(chalk.gray(`  Business logic: ${businessCount}, Utility: ${utilityCount}`));
+      }
 
-    // Tag test-internal interactions: if either module is a test module, override pattern
-    const testModuleIds = db.modules.getTestModuleIds();
-    tagTestInternalInteractions(interactions, testModuleIds, { command: this, isJson, verbose });
+      // Step 1: Generate semantics for each edge using LLM (in batches)
+      interactions = await processBatchSemantics(enrichedEdges, batchSize, model, db, this, isJson, verbose);
 
-    // Persist interactions
-    persistInteractions(db, interactions, verbose, isJson, dryRun, this);
+      tagTestInternalInteractions(interactions, testModuleIds, { command: this, isJson, verbose });
+
+      // Persist interactions
+      persistInteractions(db, interactions, verbose, isJson, dryRun, this);
+    } else if (!isJson && verbose) {
+      this.log(chalk.gray('No call-graph edges found, skipping Step 1 (LLM semantics)'));
+    }
 
     // Step 2: Import-based interactions (deterministic — no LLM)
     const { importBasedCount } = !dryRun
diff --git a/src/db/repositories/interaction-analysis.ts b/src/db/repositories/interaction-analysis.ts
index 32b3c17..dfbdefd 100644
--- a/src/db/repositories/interaction-analysis.ts
+++ b/src/db/repositories/interaction-analysis.ts
@@ -220,14 +220,22 @@ export class InteractionAnalysis {
           LIMIT 1
         ),
         symbols = (
-          SELECT GROUP_CONCAT(DISTINCT d.name)
-          FROM relationship_annotations ra
-          JOIN module_members mm1 ON ra.from_definition_id = mm1.definition_id
-          JOIN module_members mm2 ON ra.to_definition_id = mm2.definition_id
-          JOIN definitions d ON ra.to_definition_id = d.id
-          WHERE mm1.module_id = interactions.from_module_id
-            AND mm2.module_id = interactions.to_module_id
-            AND ra.relationship_type IN ('extends', 'implements')
+          -- JSON_GROUP_ARRAY produces a real JSON array (e.g. ["BaseController"])
+          -- so the column round-trips through parseSymbols(). The previous
+          -- GROUP_CONCAT(DISTINCT ...) wrote a bare CSV string that crashed
+          -- flows-verify with a SyntaxError on JSON.parse('BaseController').
+          -- SQLite's JSON_GROUP_ARRAY does not accept DISTINCT inline, so we
+          -- push DISTINCT into an inner subquery to preserve dedup behavior.
+          SELECT JSON_GROUP_ARRAY(name) FROM (
+            SELECT DISTINCT d.name AS name
+            FROM relationship_annotations ra
+            JOIN module_members mm1 ON ra.from_definition_id = mm1.definition_id
+            JOIN module_members mm2 ON ra.to_definition_id = mm2.definition_id
+            JOIN definitions d ON ra.to_definition_id = d.id
+            WHERE mm1.module_id = interactions.from_module_id
+              AND mm2.module_id = interactions.to_module_id
+              AND ra.relationship_type IN ('extends', 'implements')
+          )
         )
       WHERE pattern = 'inheritance' AND semantic IS NULL
     `)
diff --git a/src/db/repositories/interaction-repository.ts b/src/db/repositories/interaction-repository.ts
index 2edd198..fcb13ec 100644
--- a/src/db/repositories/interaction-repository.ts
+++ b/src/db/repositories/interaction-repository.ts
@@ -67,7 +67,14 @@ const INTERACTION_WITH_PATHS_SELECT = `
 
 function parseSymbols(row: Interaction): Interaction {
   if (row.symbols) {
-    row.symbols = JSON.parse(row.symbols as unknown as string);
+    try {
+      row.symbols = JSON.parse(row.symbols as unknown as string);
+    } catch {
+      // Malformed symbols column — drop the bad value rather than crash
+      // the entire flows-verify pipeline. The interaction row itself remains
+      // valid; only its symbols list is unavailable.
+      row.symbols = null;
+    }
   }
   return row;
 }
diff --git a/src/parser/adapters/ruby/reference-extractor.ts b/src/parser/adapters/ruby/reference-extractor.ts
index f42b42e..544bfac 100644
--- a/src/parser/adapters/ruby/reference-extractor.ts
+++ b/src/parser/adapters/ruby/reference-extractor.ts
@@ -81,16 +81,19 @@ function findProjectRoot(filePath: string, knownFiles: Set<string>): string {
   const fsRoot = path.parse(dir).root;
 
   while (dir !== fsRoot) {
-    // Check for common Rails/Ruby project root indicators
+    // Check for common Rails/Ruby project root indicators.
+    // knownFiles only contains source files (.rb), so Gemfile/Rakefile won't
+    // be in the set. Also check for the Rails app/ directory convention by
+    // looking for any known file under dir/app/.
     if (
       knownFiles.has(path.join(dir, 'Gemfile')) ||
       knownFiles.has(path.join(dir, 'Rakefile')) ||
-      knownFiles.has(path.join(dir, 'config/application.rb'))
+      knownFiles.has(path.join(dir, 'config/application.rb')) ||
+      hasKnownFileUnder(path.join(dir, 'app'), knownFiles)
     ) {
       return dir;
     }
     const parent = path.dirname(dir);
-    // Guard against infinite loop (shouldn't happen with absolute paths but just in case)
     if (parent === dir) break;
     dir = parent;
   }
@@ -98,6 +101,20 @@ function findProjectRoot(filePath: string, knownFiles: Set<string>): string {
   return path.dirname(absoluteFilePath);
 }
 
+/** Check if any file in knownFiles starts with the given directory prefix. */
+/**
+ * Check if any file in knownFiles lives under the given directory.
+ * O(N) linear scan — acceptable for typical projects (hundreds of files).
+ * For large monorepos, a sorted array with binary search would be better.
+ */
+function hasKnownFileUnder(dirPath: string, knownFiles: Set<string>): boolean {
+  const prefix = dirPath + path.sep;
+  for (const f of knownFiles) {
+    if (f.startsWith(prefix)) return true;
+  }
+  return false;
+}
+
 /**
  * Extract the string content from a Ruby string node.
  * Handles both single-quoted and double-quoted strings.
@@ -167,6 +184,20 @@ function getConstantText(node: SyntaxNode): string {
   return node.text;
 }
 
+/**
+ * Count the number of arguments in a Ruby argument_list node.
+ */
+function countCallArgs(argsNode: SyntaxNode): number {
+  let count = 0;
+  for (let i = 0; i < argsNode.childCount; i++) {
+    const child = argsNode.child(i);
+    if (child && child.type !== ',' && child.type !== '(' && child.type !== ')') {
+      count++;
+    }
+  }
+  return count;
+}
+
 /**
  * Create a side-effect import symbol (for require/require_relative without destructuring).
  */
@@ -209,6 +240,7 @@ export function extractRubyReferences(
   knownFiles: Set<string>
 ): FileReference[] {
   const references: FileReference[] = [];
+  const constantUsages = new Map<string, { resolvedPath: string; usages: SymbolUsage[] }>();
   const projectRoot = findProjectRoot(filePath, knownFiles);
 
   function walk(node: SyntaxNode): void {
@@ -296,6 +328,11 @@ export function extractRubyReferences(
             const resolvedPath = resolveConstantViaAutoloading(constantName, projectRoot, knownFiles);
             const isExternal = !resolvedPath;
 
+            // Mark this constant as handled so the post-walk constant-receiver
+            // loop doesn't create a duplicate reference for the same name.
+            // Use `null` resolvedPath sentinel to indicate "already emitted".
+            constantUsages.set(constantName, { resolvedPath: '', usages: [] });
+
             references.push({
               type: 'import',
               source: constantName,
@@ -311,6 +348,43 @@ export function extractRubyReferences(
           }
         }
       }
+
+      // Constant-receiver calls: BookSerializer.new(book), User.authenticate(...)
+      // In Zeitwerk apps these are implicit cross-file dependencies. Resolve the
+      // constant via Rails autoloading and collect call-site usages so the
+      // call-graph service can build proper source:'ast' interaction edges.
+      const receiverNode = node.childForFieldName('receiver');
+      if (receiverNode && (receiverNode.type === 'constant' || receiverNode.type === 'scope_resolution')) {
+        const constantName = getConstantText(receiverNode);
+
+        if (!constantUsages.has(constantName)) {
+          const resolvedPath = resolveConstantViaAutoloading(constantName, projectRoot, knownFiles);
+          if (resolvedPath) {
+            constantUsages.set(constantName, { resolvedPath, usages: [] });
+          }
+        }
+
+        const entry = constantUsages.get(constantName);
+        if (entry) {
+          const callMethodNode = node.childForFieldName('method');
+          const argsNode = node.childForFieldName('arguments');
+          const callMethodName = callMethodNode?.text ?? '';
+
+          entry.usages.push({
+            position: {
+              row: receiverNode.startPosition.row,
+              column: receiverNode.startPosition.column,
+            },
+            context: 'call',
+            callsite: {
+              argumentCount: argsNode ? countCallArgs(argsNode) : 0,
+              isMethodCall: true,
+              isConstructorCall: callMethodName === 'new',
+              receiverName: constantName,
+            },
+          });
+        }
+      }
     }
 
     // Recurse into children
@@ -321,6 +395,30 @@ export function extractRubyReferences(
   }
 
   walk(rootNode);
+
+  // Create references from collected constant-receiver data (one per constant,
+  // with all call-site usages attached for call-graph integration).
+  // Skip constants already emitted by include/extend/prepend (resolvedPath = '' sentinel).
+  for (const [constantName, { resolvedPath, usages }] of constantUsages) {
+    if (!resolvedPath) continue;
+    references.push({
+      type: 'import',
+      source: constantName,
+      resolvedPath,
+      isExternal: false,
+      isTypeOnly: false,
+      imports: [
+        {
+          name: constantName,
+          localName: constantName,
+          kind: 'named',
+          usages,
+        },
+      ],
+      position: usages[0] ? { row: usages[0].position.row, column: usages[0].position.column } : { row: 0, column: 0 },
+    });
+  }
+
   return references;
 }
 
diff --git a/test/db/repositories/interaction-repository.test.ts b/test/db/repositories/interaction-repository.test.ts
index 884325d..ef8e99a 100644
--- a/test/db/repositories/interaction-repository.test.ts
+++ b/test/db/repositories/interaction-repository.test.ts
@@ -164,6 +164,27 @@ describe('InteractionRepository', () => {
 
       expect(interaction!.symbols).toEqual(['a', 'b', 'c']);
     });
+
+    it('does not crash when symbols column contains a malformed (non-JSON) value', () => {
+      // Regression: a buggy backfill in syncInheritanceInteractions used to write
+      // raw GROUP_CONCAT output (a bare comma-separated string like "BaseController")
+      // into the symbols column instead of a JSON array. parseSymbols then crashed
+      // the entire flows-verify pipeline with `SyntaxError: Unexpected token 'B'`.
+      // The backfill is fixed (it now uses JSON_GROUP_ARRAY) but parseSymbols also
+      // wraps JSON.parse in try/catch as defense-in-depth: any other writer that
+      // ever produces malformed data should degrade gracefully, not crash.
+      const id = repo.insert(moduleId1, moduleId2);
+      // Manually inject a bare-string symbols value, bypassing the repository's
+      // JSON.stringify guard.
+      db.prepare('UPDATE interactions SET symbols = ? WHERE id = ?').run('BaseController', id);
+
+      // The call must NOT throw.
+      const interaction = repo.getById(id);
+
+      expect(interaction).not.toBeNull();
+      // Malformed symbols are dropped (set to null, not preserved as the bare string).
+      expect(interaction!.symbols).toBeNull();
+    });
   });
 
   describe('getByModules', () => {
@@ -621,6 +642,69 @@ describe('InteractionRepository', () => {
       // Second run should not create any new interactions
       expect(result2.created).toBe(0);
     });
+
+    it('backfills symbols column as a valid JSON array (regression: was bare CSV)', () => {
+      // Regression: the backfill UPDATE used to write raw GROUP_CONCAT(DISTINCT d.name)
+      // into interactions.symbols, producing a bare string like "ApiHandler" instead of
+      // a JSON array. Downstream parseSymbols then crashed flows-verify with
+      // `SyntaxError: Unexpected token 'A', "ApiHandler" is not valid JSON`.
+      // The fix uses JSON_GROUP_ARRAY so the column always round-trips through JSON.parse.
+      relationshipRepo.set(defId1, defId2, 'Auth extends Api', 'extends');
+
+      interactionAnalysis.syncInheritanceInteractions();
+
+      // Read the raw symbols column directly to verify the on-disk format.
+      const row = db
+        .prepare(
+          `SELECT symbols FROM interactions
+           WHERE from_module_id = ? AND to_module_id = ? AND pattern = 'inheritance'`
+        )
+        .get(moduleId1, moduleId2) as { symbols: string | null };
+
+      expect(row).toBeDefined();
+      expect(row.symbols).not.toBeNull();
+      // Must parse as a JSON array (not throw).
+      const parsed = JSON.parse(row.symbols!);
+      expect(Array.isArray(parsed)).toBe(true);
+      expect(parsed).toContain('ApiHandler');
+
+      // And the repository's high-level getter must return symbols as a string array.
+      const interaction = repo.getByModules(moduleId1, moduleId2);
+      expect(interaction).not.toBeNull();
+      expect(interaction!.symbols).toEqual(['ApiHandler']);
+    });
+
+    it('backfilled symbols deduplicates target def names', () => {
+      // Two extends edges from different defs in module1 → same def in module2.
+      // GROUP_CONCAT(DISTINCT) used to dedup; JSON_GROUP_ARRAY does not, so the
+      // fix wraps the inner SELECT in DISTINCT to preserve dedup behavior.
+      const fileId = fileRepo.insert({
+        path: '/test/file2.ts',
+        language: 'typescript',
+        contentHash: 'def456',
+        sizeBytes: 100,
+        modifiedAt: '2024-01-01T00:00:00.000Z',
+      });
+      const defId4 = fileRepo.insertDefinition(fileId, {
+        name: 'AuthService2',
+        kind: 'class',
+        isExported: true,
+        isDefault: false,
+        position: { row: 0, column: 0 },
+        endPosition: { row: 5, column: 1 },
+      });
+      moduleRepo.assignSymbol(defId4, moduleId1);
+      // Both defId1 and defId4 (in module1) extend defId2 (in module2)
+      relationshipRepo.set(defId1, defId2, 'Auth extends Api', 'extends');
+      relationshipRepo.set(defId4, defId2, 'Auth2 extends Api', 'extends');
+
+      interactionAnalysis.syncInheritanceInteractions();
+
+      const interaction = repo.getByModules(moduleId1, moduleId2);
+      expect(interaction).not.toBeNull();
+      // Both edges target ApiHandler, so the deduplicated array contains it exactly once.
+      expect(interaction!.symbols).toEqual(['ApiHandler']);
+    });
   });
 
   describe('getModuleCallGraph', () => {
diff --git a/test/parser/adapters/ruby/reference-extractor.test.ts b/test/parser/adapters/ruby/reference-extractor.test.ts
index 04d35d9..e6310e9 100644
--- a/test/parser/adapters/ruby/reference-extractor.test.ts
+++ b/test/parser/adapters/ruby/reference-extractor.test.ts
@@ -397,3 +397,162 @@ describe('resolveRubyImportPath', () => {
     expect(result).toBeNull();
   });
 });
+
+describe('constant-receiver references (Zeitwerk implicit imports)', () => {
+  it('detects BookSerializer.new(book) as a reference to the serializer file', () => {
+    const code = `
+class BooksController < BaseController
+  def index
+    books = Book.all
+    render json: books.map { |b| BookSerializer.new(b).as_json }
+  end
+end`;
+    const projectRoot = '/project';
+    const knownFiles = new Set([
+      path.join(projectRoot, 'Gemfile'),
+      path.join(projectRoot, 'app/controllers/books_controller.rb'),
+      path.join(projectRoot, 'app/serializers/book_serializer.rb'),
+      path.join(projectRoot, 'app/models/book.rb'),
+    ]);
+    const refs = extractRubyReferences(
+      parse(code),
+      path.join(projectRoot, 'app/controllers/books_controller.rb'),
+      knownFiles
+    );
+
+    const bookSerializerRef = refs.find((r) => r.source === 'BookSerializer');
+    expect(bookSerializerRef).toBeDefined();
+    expect(bookSerializerRef!.resolvedPath).toBe(path.join(projectRoot, 'app/serializers/book_serializer.rb'));
+    expect(bookSerializerRef!.isExternal).toBe(false);
+    expect(bookSerializerRef!.type).toBe('import');
+
+    // Usages must be populated for call-graph integration
+    const bsUsages = bookSerializerRef!.imports[0].usages;
+    expect(bsUsages.length).toBeGreaterThanOrEqual(1);
+    expect(bsUsages[0].context).toBe('call');
+    expect(bsUsages[0].callsite?.isConstructorCall).toBe(true);
+    expect(bsUsages[0].callsite?.receiverName).toBe('BookSerializer');
+
+    const bookRef = refs.find((r) => r.source === 'Book');
+    expect(bookRef).toBeDefined();
+    expect(bookRef!.resolvedPath).toBe(path.join(projectRoot, 'app/models/book.rb'));
+    expect(bookRef!.imports[0].usages.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it('handles class method calls: User.authenticate(...)', () => {
+    const code = `
+class SessionsController
+  def create
+    user = User.authenticate(params[:email], params[:password])
+  end
+end`;
+    const projectRoot = '/project';
+    const knownFiles = new Set([
+      path.join(projectRoot, 'Gemfile'),
+      path.join(projectRoot, 'app/controllers/sessions_controller.rb'),
+      path.join(projectRoot, 'app/models/user.rb'),
+    ]);
+    const refs = extractRubyReferences(
+      parse(code),
+      path.join(projectRoot, 'app/controllers/sessions_controller.rb'),
+      knownFiles
+    );
+
+    const userRef = refs.find((r) => r.source === 'User');
+    expect(userRef).toBeDefined();
+    expect(userRef!.resolvedPath).toBe(path.join(projectRoot, 'app/models/user.rb'));
+  });
+
+  it('deduplicates constant references within the same file', () => {
+    const code = `
+class OrdersController
+  def index
+    render json: orders.map { |o| OrderSerializer.new(o).as_json }
+  end
+  def show
+    render json: OrderSerializer.new(@order).as_json
+  end
+end`;
+    const projectRoot = '/project';
+    const knownFiles = new Set([
+      path.join(projectRoot, 'Gemfile'),
+      path.join(projectRoot, 'app/controllers/orders_controller.rb'),
+      path.join(projectRoot, 'app/serializers/order_serializer.rb'),
+    ]);
+    const refs = extractRubyReferences(
+      parse(code),
+      path.join(projectRoot, 'app/controllers/orders_controller.rb'),
+      knownFiles
+    );
+
+    const orderSerializerRefs = refs.filter((r) => r.source === 'OrderSerializer');
+    expect(orderSerializerRefs).toHaveLength(1);
+
+    // Both call sites should be captured as usages on the single reference
+    const usages = orderSerializerRefs[0].imports[0].usages;
+    expect(usages).toHaveLength(2);
+    expect(usages.every((u) => u.context === 'call')).toBe(true);
+  });
+
+  it('ignores unresolvable constants (framework classes, external gems)', () => {
+    const code = `
+class User < ApplicationRecord
+  has_secure_password
+  validates :email, presence: true
+end`;
+    const projectRoot = '/project';
+    const knownFiles = new Set([path.join(projectRoot, 'Gemfile'), path.join(projectRoot, 'app/models/user.rb')]);
+    const refs = extractRubyReferences(parse(code), path.join(projectRoot, 'app/models/user.rb'), knownFiles);
+
+    // No resolved constant-receiver imports (ApplicationRecord is in the extends clause, not a call receiver)
+    const resolvedImports = refs.filter((r) => !r.isExternal && r.type === 'import');
+    expect(resolvedImports).toHaveLength(0);
+  });
+
+  it('does not duplicate references when include and constant-receiver call both appear', () => {
+    const code = `
+class Book < ApplicationRecord
+  include Searchable
+  def search
+    Searchable.reindex(self)
+  end
+end`;
+    const projectRoot = '/project';
+    const knownFiles = new Set([
+      path.join(projectRoot, 'Gemfile'),
+      path.join(projectRoot, 'app/models/book.rb'),
+      path.join(projectRoot, 'app/models/searchable.rb'),
+    ]);
+    const refs = extractRubyReferences(parse(code), path.join(projectRoot, 'app/models/book.rb'), knownFiles);
+
+    // Should produce exactly one reference for Searchable (from include), not two
+    const searchableRefs = refs.filter((r) => r.source === 'Searchable' && !r.isExternal);
+    expect(searchableRefs).toHaveLength(1);
+  });
+
+  it('handles scope_resolution receivers (namespaced constants)', () => {
+    const code = `
+class OrdersController
+  def create
+    result = Admin::AuditService.log(current_user, 'order_created')
+  end
+end`;
+    const projectRoot = '/project';
+    const knownFiles = new Set([
+      path.join(projectRoot, 'Gemfile'),
+      path.join(projectRoot, 'app/controllers/orders_controller.rb'),
+      path.join(projectRoot, 'app/services/admin/audit_service.rb'),
+    ]);
+    const refs = extractRubyReferences(
+      parse(code),
+      path.join(projectRoot, 'app/controllers/orders_controller.rb'),
+      knownFiles
+    );
+
+    const auditRef = refs.find((r) => r.source === 'Admin::AuditService');
+    expect(auditRef).toBeDefined();
+    expect(auditRef!.resolvedPath).toBe(path.join(projectRoot, 'app/services/admin/audit_service.rb'));
+    expect(auditRef!.imports[0].usages).toHaveLength(1);
+    expect(auditRef!.imports[0].usages[0].callsite?.receiverName).toBe('Admin::AuditService');
+  });
+});
diff --git a/vitest.config.ts b/vitest.config.ts
index b352b47..d204f68 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -2,7 +2,14 @@ import { defineConfig } from 'vitest/config';
 
 export default defineConfig({
   test: {
-    include: ['test/**/*.test.ts', 'src/**/*.test.ts'],
+    include: [
+      'test/**/*.test.ts',
+      'src/**/*.test.ts',
+      // Harness unit tests are free (no LLM, no subprocess) and must run in CI.
+      // The actual eval scenarios live in evals/**/*.eval.ts and run via the
+      // separate `npm run eval` command (vitest.eval.config.ts).
+      'evals/harness/**/*.test.ts',
+    ],
     coverage: {
       enabled: false, // Enable via CLI: --coverage
       provider: 'v8',
diff --git a/vitest.eval.config.ts b/vitest.eval.config.ts
new file mode 100644
index 0000000..bd09e03
--- /dev/null
+++ b/vitest.eval.config.ts
@@ -0,0 +1,31 @@
+import { defineConfig } from 'vitest/config';
+
+/**
+ * Vitest config for LLM-driven evaluation SCENARIOS only.
+ *
+ * Run via: `npm run eval`.
+ *
+ * Scope:
+ *   evals/**\/*.eval.ts — real squint ingestion as a subprocess, real LLM calls,
+ *   real money. Manually invoked.
+ *
+ * NOT here:
+ *   evals/harness/**\/*.test.ts — these are free unit tests with zero subprocess
+ *   and zero LLM calls. They live in the MAIN vitest.config.ts so every CI run
+ *   exercises them.
+ */
+export default defineConfig({
+  test: {
+    include: ['evals/**/*.eval.ts'],
+    // Eval scenarios can take minutes (subprocess + LLM). Default per-test timeout high.
+    testTimeout: 600_000,
+    hookTimeout: 60_000,
+    // Run sequentially — multiple subprocesses fighting for the same fixture dir is bad.
+    fileParallelism: false,
+    // Force-load .env with override BEFORE any test code is imported so the
+    // OPENROUTER_API_KEY (and similar) used by the in-process judge AND by
+    // spawned squint subprocesses always comes from the project-local .env
+    // file, never a stale shell-level env var.
+    setupFiles: ['./evals/setup.ts'],
+  },
+});