From f55ac7e5be18e0f3edd2295642578a9f0ba0c955 Mon Sep 17 00:00:00 2001
From: Michael Sitarzewski <msitarzewski@users.noreply.github.com>
Date: Mon, 9 Mar 2026 15:22:22 -0500
Subject: [PATCH] Add follow-up questions, revision citations, CLI persistence,
 provider updates

Follow-up questions: generate_followups() uses cheapest model with JSON mode
to suggest 3 post-consensus follow-up questions. Stored in followups_json on
Thread model, displayed as clickable items in ConsensusNav/ThreadNav sidebar.
_run_consensus returns 8-tuple (added followups), all callers updated.

Revision citations: handle_revise() now accepts tool_registry + web_search,
extracts citations. revision_citations on ConsensusContext + RoundResult,
persisted to DB, included in Sources nav.

CLI persistence: persist_consensus() saves full round history to DB from CLI.
Top-level --rounds/--challengers cascade to subcommands. _parse_challengers()
accepts int count or comma-separated model refs.

Calibration date filters: category + since/until inputs on CalibrationDashboard.

OpenAI: reasoning_effort high for GPT-5.x (no tools), gpt-5.2 catalog entry.
Perplexity: retry for APIConnectionError (2 attempts, 1s delay).
Alembic: DUH_DATABASE_URL env var overrides alembic.ini.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitignore                                    |   3 +
 alembic/env.py                                |  14 +-
 memory-bank/activeContext.md                  | 119 ++++----
 memory-bank/progress.md                       |  47 +++-
 memory-bank/tasks/2026-03/README.md           |  32 +++
 memory-bank/toc.md                            |   2 +-
 src/duh/api/routes/ask.py                     |   2 +
 src/duh/api/routes/threads.py                 |  12 +
 src/duh/api/routes/ws.py                      |  41 ++-
 src/duh/cli/app.py                            | 253 ++++++++++++++++--
 src/duh/consensus/handlers.py                 |  96 ++++++-
 src/duh/consensus/machine.py                  |   5 +
 src/duh/mcp/server.py                         |   1 +
 src/duh/memory/migrations.py                  |   5 +
 src/duh/memory/models.py                      |   3 +
 src/duh/providers/catalog.py                  |   1 +
 src/duh/providers/openai.py                   |  17 ++
 src/duh/providers/perplexity.py               |  36 ++-
 tests/unit/test_cli.py                        |   8 +-
 tests/unit/test_cli_batch.py                  |  15 +-
 tests/unit/test_cli_display.py                | 142 ++++++++++
 tests/unit/test_cli_tools.py                  |  10 +-
 tests/unit/test_cli_voting.py                 |   2 +-
 tests/unit/test_mcp_server.py                 |   2 +-
 tests/unit/test_providers_openai.py           |   4 +-
 web/src/__tests__/stores.test.ts              |  45 ++++
 web/src/api/types.ts                          |   2 +
 .../calibration/CalibrationDashboard.tsx      |  79 +++++-
 web/src/components/consensus/ConsensusNav.tsx |  31 ++-
 .../components/consensus/ConsensusPanel.tsx   |   1 +
 web/src/components/threads/ThreadNav.tsx      |  35 +++
 web/src/stores/calibration.ts                 |  20 +-
 web/src/stores/consensus.ts                   |   6 +
 33 files changed, 962 insertions(+), 129 deletions(-)

diff --git a/.gitignore b/.gitignore
index ca4bd90..1952a59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -58,3 +58,6 @@ memory-bank/setup.md
 web/node_modules/
 web/dist/
 
+# npm wrapper
+npm/like-duh/node_modules/
+
diff --git a/alembic/env.py b/alembic/env.py
index b24a771..7c71480 100644
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -35,9 +35,17 @@ def _expand_url(section: dict[str, str]) -> dict[str, str]:
     return section
 
 
+def _resolve_url() -> str:
+    """Return database URL from env var, falling back to alembic.ini."""
+    env_url = os.environ.get("DUH_DATABASE_URL")
+    if env_url:
+        return env_url
+    return config.get_main_option("sqlalchemy.url") or ""
+
+
 def run_migrations_offline() -> None:
     """Run migrations in 'offline' mode."""
-    url = config.get_main_option("sqlalchemy.url")
+    url = _resolve_url()
     context.configure(
         url=url,
         target_metadata=target_metadata,
@@ -59,6 +67,8 @@ def do_run_migrations(connection) -> None:  # type: ignore[no-untyped-def]
 async def run_async_migrations() -> None:
     """Run migrations in 'online' mode with async engine."""
     section = _expand_url(config.get_section(config.config_ini_section, {}))
+    section["sqlalchemy.url"] = _resolve_url()
+    section = _expand_url(section)
     connectable = async_engine_from_config(
         section,
         prefix="sqlalchemy.",
@@ -74,6 +84,8 @@ async def run_async_migrations() -> None:
 def run_migrations_online() -> None:
     """Run migrations in 'online' mode (sync or async)."""
     section = _expand_url(config.get_section(config.config_ini_section, {}))
+    section["sqlalchemy.url"] = _resolve_url()
+    section = _expand_url(section)
     url = section.get("sqlalchemy.url", "")
 
     if _is_async_url(url):
diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md
index 15e18e7..2dd36b4 100644
--- a/memory-bank/activeContext.md
+++ b/memory-bank/activeContext.md
@@ -1,79 +1,84 @@
 # Active Context
 
-**Last Updated**: 2026-03-08
-**Current Phase**: `question-refinement` branch — pre-consensus question refinement, native web search, citations, tools-by-default
-**Next Action**: Branch in progress, uncommitted changes staged
+**Last Updated**: 2026-03-09
+**Current Phase**: Post PR #14 merge — follow-up questions, revision citations, CLI persistence, calibration filters, provider updates
+**Next Action**: Commit and push uncommitted work to new branch
 
-## Latest Work (2026-03-08)
+## Latest Work (2026-03-09)
 
-### Question Refinement
-- Pre-consensus clarification step: analyze question → ask clarifying questions → enrich with answers → proceed to consensus
-- `src/duh/consensus/refine.py` — `analyze_question()` + `enrich_question()`, uses MOST EXPENSIVE model (not cheapest)
-- API: `POST /api/refine` → `RefineResponse{needs_refinement, questions[]}`, `POST /api/enrich` → `EnrichResponse{enriched_question}`
-- CLI: `duh ask --refine "question"` — interactive `click.prompt()` loop, default `--no-refine`
-- Frontend: consensus store `'refining'` status, `submitQuestion` → refine → clarify → enrich → `startConsensus`
-- `RefinementPanel.tsx` — tabbed UI inside GlassPanel, checkmarks on answered tabs, Skip + Start Consensus buttons
-- Graceful fallback: any failure → proceed to consensus with original question
+### Follow-up Questions (new end-to-end feature)
+- `generate_followups()` in `src/duh/consensus/handlers.py:930` — uses cheapest model with JSON mode to suggest 3 follow-up questions after consensus completes
+- Prompt asks for different angles: deeper technical detail, practical implications, risks/edge cases, related decisions
+- `followups` field added to `ConsensusContext` in `machine.py`
+- `_run_consensus` returns 8-tuple now (was 7): `(decision, confidence, rigor, dissent, cost, overview, citations, followups)`
+- All callers updated: CLI ask, CLI auto, CLI decompose, CLI batch, REST API, WebSocket, MCP server
+- **Persistence**: `followups_json` TEXT column on Thread model + SQLite auto-migration in `ensure_schema()`
+- **Thread detail API**: returns `followups` parsed from `followups_json`
+- **WebSocket**: sends `followups` in `complete` event, persists via `_persist_consensus`
+- **Frontend**: `ConsensusNav` + `ThreadNav` show clickable follow-ups in Disclosure section
+  - Clicking a follow-up calls `submitQuestion()` to start a new consensus
+  - `consensus.ts` store: `followups` state, included in reset
+  - `types.ts`: `followups` on `ThreadDetail` and `WSComplete`
 
-### Native Provider Web Search
-- Providers use server-side search instead of DDG proxy when `config.tools.web_search.native` is true
-- `web_search: bool` param added to `ModelProvider.send()` protocol
-- Anthropic: `web_search_20250305` server tool in tools[]
-- Google: `GoogleSearch()` grounding (replaces function tools — can't coexist)
-- Mistral: `{"type": "web_search"}` appended to tools
-- OpenAI: `web_search_options={}` only for `_SEARCH_MODELS` set; others fall back to DDG
-- Perplexity: no-op (always searches natively)
-- `tool_augmented_send`: filters DDG `web_search` tool when native=True, passes flag to provider
+### Revision Citations (enhancement to existing citation system)
+- `revision_citations` field added to both `ConsensusContext` and `RoundResult` in `machine.py`
+- `handle_revise()` now accepts `tool_registry` + `web_search` params — enables tool-augmented revision with web search
+- `handle_revise()` extracts citations from response into `ctx.revision_citations`
+- `handle_propose()` now extracts `proposal_citations` directly in handler (moved from ws.py)
+- WebSocket sends revision citations in REVISE `phase_complete` event
+- `_persist_consensus` saves revision citations to DB as `citations_json` on reviser contribution
+- `ConsensusPanel.tsx` passes `revisionCitations` to REVISE phase card
+- `ConsensusNav.tsx` includes revision citations in Sources section (role: 'revise')
+- `_run_consensus` citation collection now includes revision citations from both round history and current round
 
-### Citations — Persisted + Domain-Grouped
-- `Citation` dataclass (url, title, snippet) on `ModelResponse.citations`
-- Extraction per provider: Anthropic (`web_search_tool_result`), Google (grounding metadata), Perplexity (`response.citations`)
-- **Persistence**: `citations_json` TEXT column on `Contribution` model, SQLite auto-migration via `ensure_schema()`
-- `proposal_citations` tracked on `ConsensusContext` → archived to `RoundResult` → persisted via `_persist_consensus`
-- Thread detail API returns `citations` on `ContributionResponse`
-- **Domain-grouped Sources nav**: ConsensusNav (live) + ThreadNav (stored) group citations by hostname
-  - Nested Disclosure: outer "Sources (17)" → inner "wikipedia.org (3)" → P/C/R role badges per citation
-  - P (green) = propose, C (amber) = challenge, R (blue) = revise
-- `CitationList` shared component for inline display below content
+### CLI Enhancements
+- Top-level `--rounds` and `--challengers` options on `cli()` group cascade to subcommands (subcommand wins if both set)
+- `_parse_challengers()` accepts either int count or comma-separated model refs (e.g. `3` or `openai:gpt-5,google:gemini-2.5-pro`)
+- `challenger_count` param flows through `_run_consensus` → `select_challengers(count=N)`
+- **CLI DB persistence**: new `persist_consensus()` function in `app.py` — CLI `ask` command now persists full consensus round history to DB (proposals, challenges, revisions, citations, decisions, overview, followups)
+- `_ask_async` creates DB factory via `_create_db()`, disposes engine in `finally` block
+- Top-level `--rounds` also cascades into `batch` subcommand
 
-### Anthropic Streaming + max_tokens
-- `AnthropicProvider.send()` now uses streaming internally via `_collect_stream()` — avoids 10-minute timeout
-- `max_tokens` bumped from 16384 → 32768 across all 6 handler defaults (propose, challenge, revise, commit, voting, decomposition)
-- Citations are part of the value — truncating them undermines trust
+### Calibration Date Filters (frontend)
+- `CalibrationDashboard.tsx`: category dropdown + since/until date inputs + Apply button
+- `INTENT_CATEGORIES` constant: `['factual', 'technical', 'creative', 'judgment', 'strategic']`
+- `calibration.ts` store: `since`/`until` state + `setSince`/`setUntil` setters, passed to API call
+- Store tests: 4 new tests for date filter state and API param passing
 
-### Parallel Challenge Streaming
-- `_stream_challenges()` in `ws.py` uses `asyncio.as_completed()` to send each challenge result to the frontend as it finishes
-- Previously: all challengers ran in parallel but results were batched after all completed
-- Now: first challenger to respond appears immediately in the UI
+### Provider Updates
+- **OpenAI**: `_REASONING_EFFORT_MODELS` set (gpt-5, gpt-5-mini, gpt-5-nano, gpt-5.2, gpt-5.4) — sends `reasoning_effort: "high"` when no function tools present (incompatible with tools on /v1/chat/completions)
+- **OpenAI**: also sends `reasoning_effort: "high"` in structured output path (`_send_structured`)
+- **OpenAI**: `gpt-5.2` added to `NO_TEMPERATURE_MODELS` in `catalog.py`
+- **Perplexity**: retry logic for `APIConnectionError` — 2 attempts, 1s delay between retries
+- **Perplexity**: `APIConnectionError` mapped to `ProviderTimeoutError`
 
-### Tools Enabled by Default
-- `web_search` tool wired through CLI, REST, and WebSocket paths by default
-- Provider tool format fix: `tool_augmented_send` builds generic `{name, description, parameters}` — each provider transforms to native format in `send()`
+### Infrastructure
+- `alembic/env.py`: `DUH_DATABASE_URL` env var overrides `alembic.ini` — `_resolve_url()` used in offline, online sync, and online async migration paths
+- `.gitignore`: `npm/like-duh/node_modules/` added
 
-### Sidebar UX
-- New-question button (Heroicons pencil-square) + collapsible sidebar toggle
-- Shell manages `desktopSidebarOpen` (default true) + `mobileSidebarOpen` separately
-- TopBar shows sidebar toggle when desktop sidebar collapsed or always on mobile
-
-### Test Results
-- 1641 Python tests + 194 Vitest tests (1835 total)
-- Build clean, all tests pass
+### Test Updates
+- All test files updated for 8-tuple `_run_consensus` return value
+- `test_cli_display.py`: new `TestShowCitations` class (8 tests — empty, single, dedup, grouping, sort, title fallback, no-url skip, numbered)
+- `test_cli_display.py`: new `TestShowFinalDecisionOverview` class (2 tests — shows/hides overview panel)
+- `test_cli_tools.py`: mock return values corrected from 4-tuple to 8-tuple
+- `test_providers_openai.py`: test switched from `gpt-5.2` to `gpt-4o` (since 5.2 now has special reasoning_effort behavior)
+- `stores.test.ts`: 4 new calibration date filter tests
+- `test_cli_batch.py`, `test_cli_voting.py`, `test_mcp_server.py`: 8-tuple updates
 
 ---
 
 ## Current State
 
-- **Branch `question-refinement`** — in progress, not yet merged
-- **1641 Python tests + 194 Vitest tests** (1835 total)
-- All previous features intact (v0.1–v0.6)
-- Prior work merged: z-index fix, GPT-5.4, .env docs, password reset
+- **Branch `main`** — uncommitted changes across 29 files (+828/-63)
+- All previous features intact (v0.1-v0.6, question-refinement PR #13, messaging-refinement PR #14)
+- Prior merged: question refinement, native web search, citations, tools-by-default, sidebar UX, README rewrite, CLI citation display
 
 ## Open Questions (Still Unresolved)
 
 - Licensing (MIT vs Apache 2.0)
 - Output licensing for multi-provider synthesized content
-- Vector search solution for SQLite (sqlite-vss vs ChromaDB vs FAISS) — v1.0 decision
+- Vector search solution for SQLite (sqlite-vss vs ChromaDB vs FAISS) -- v1.0 decision
 - Client library packaging: monorepo `client/` dir vs separate repo?
 - MCP server transport: stdio vs SSE vs streamable HTTP?
-- Hosted demo economics (try.duh.dev) — deferred to post-1.0
-- A2A protocol — deferred to post-1.0
+- Hosted demo economics (try.duh.dev) -- deferred to post-1.0
+- A2A protocol -- deferred to post-1.0
diff --git a/memory-bank/progress.md b/memory-bank/progress.md
index dd0e044..db0c127 100644
--- a/memory-bank/progress.md
+++ b/memory-bank/progress.md
@@ -4,9 +4,31 @@
 
 ---
 
-## Current State: Post v0.6.0 — `question-refinement` Branch In Progress
+## Current State: Post PR #14 — Follow-ups, Revision Citations, CLI Persistence
 
-### Question Refinement + Native Web Search + Citations (2026-03-08)
+### Follow-up Questions + Revision Citations + CLI Persistence + Provider Updates (2026-03-09)
+
+- **Follow-up questions**: `generate_followups()` uses cheapest model w/ JSON mode to suggest 3 follow-up questions after consensus
+  - `followups` on ConsensusContext, `followups_json` TEXT on Thread model + migration
+  - `_run_consensus` now returns 8-tuple (was 7, added `followups`)
+  - All callers updated: CLI, REST, WS, MCP, batch, decompose
+  - Frontend: clickable follow-ups in ConsensusNav + ThreadNav (Disclosure), triggers new consensus
+  - WS `complete` event includes `followups`, thread detail API returns them
+- **Revision citations**: `handle_revise()` now accepts `tool_registry` + `web_search`, extracts citations
+  - `revision_citations` on ConsensusContext + RoundResult, persisted to DB
+  - `handle_propose()` now extracts proposal_citations directly in handler
+  - WS sends revision citations in REVISE phase, ConsensusNav includes them in Sources
+- **CLI persistence**: new `persist_consensus()` in `app.py` — CLI `ask` saves full round history to DB
+  - `_ask_async` creates DB factory, disposes engine in finally block
+- **CLI enhancements**: top-level `--rounds` and `--challengers` cascade to subcommands
+  - `_parse_challengers()` accepts int count or comma-separated model refs
+- **Calibration date filters**: frontend category + since/until date inputs on CalibrationDashboard
+- **OpenAI**: `reasoning_effort: "high"` for GPT-5.x models (when no tools), gpt-5.2 in NO_TEMPERATURE_MODELS
+- **Perplexity**: retry logic for APIConnectionError (2 attempts, 1s delay)
+- **Alembic**: `DUH_DATABASE_URL` env var overrides alembic.ini
+- Tests: new TestShowCitations (8), TestShowFinalDecisionOverview (2), calibration date filter tests (4), all 8-tuple updates
+
+### Question Refinement + Native Web Search + Citations (2026-03-08, merged PR #13 + #14)
 
 - **Question refinement**: pre-consensus clarification step (analyze → clarify → enrich → consensus)
   - `src/duh/consensus/refine.py`, API routes (`/api/refine`, `/api/enrich`), CLI `--refine` flag
@@ -224,9 +246,18 @@ Phase 0 benchmark framework — fully functional, pilot-tested on 5 questions.
 | 2026-03-07 | GPT-5.4 added to model catalog (1M ctx, $2.50/$15.00, no-temperature) | Done |
 | 2026-03-07 | .env.example updated with provider API key placeholders | Done |
 | 2026-03-07 | README updated with all provider env vars | Done |
-| 2026-03-08 | Question refinement (analyze → clarify → enrich → consensus) | In Progress |
-| 2026-03-08 | Native provider web search (Anthropic/Google/Mistral/OpenAI/Perplexity) | In Progress |
-| 2026-03-08 | Citations extraction + frontend CitationList + ConsensusNav Sources | In Progress |
-| 2026-03-08 | Tools enabled by default (web_search wired through CLI/REST/WS) | In Progress |
-| 2026-03-08 | Provider tool format fix (generic → native transform per provider) | In Progress |
-| 2026-03-08 | Sidebar UX (new-question button, collapsible toggle) | In Progress |
+| 2026-03-08 | Question refinement (analyze → clarify → enrich → consensus) | Done (PR #13) |
+| 2026-03-08 | Native provider web search (Anthropic/Google/Mistral/OpenAI/Perplexity) | Done (PR #13) |
+| 2026-03-08 | Citations extraction + frontend CitationList + ConsensusNav Sources | Done (PR #13) |
+| 2026-03-08 | Tools enabled by default (web_search wired through CLI/REST/WS) | Done (PR #13) |
+| 2026-03-08 | Provider tool format fix (generic → native transform per provider) | Done (PR #13) |
+| 2026-03-08 | Sidebar UX (new-question button, collapsible toggle) | Done (PR #13) |
+| 2026-03-08 | README rewrite + CLI citation display (7-tuple _run_consensus) | Done (PR #14) |
+| 2026-03-09 | Follow-up questions (generate, persist, display, clickable) | In Progress |
+| 2026-03-09 | Revision citations (handle_revise with tools/search, persist, display) | In Progress |
+| 2026-03-09 | CLI DB persistence (persist_consensus, _ask_async DB factory) | In Progress |
+| 2026-03-09 | CLI top-level --rounds/--challengers cascade + _parse_challengers | In Progress |
+| 2026-03-09 | Calibration date filters (frontend category/since/until) | In Progress |
+| 2026-03-09 | OpenAI reasoning_effort for GPT-5.x, gpt-5.2 catalog | In Progress |
+| 2026-03-09 | Perplexity retry logic for APIConnectionError | In Progress |
+| 2026-03-09 | Alembic DUH_DATABASE_URL env var support | In Progress |
diff --git a/memory-bank/tasks/2026-03/README.md b/memory-bank/tasks/2026-03/README.md
index 028b13b..4850729 100644
--- a/memory-bank/tasks/2026-03/README.md
+++ b/memory-bank/tasks/2026-03/README.md
@@ -9,6 +9,38 @@
 - Files: `mail.py`, `auth.py`, `schema.py`, `loader.py`, `LoginPage.tsx`, `ResetPasswordPage.tsx`, `TopBar.tsx`
 - See: [070307_password-reset.md](./070307_password-reset.md)
 
+## 2026-03-08: Question Refinement + Native Web Search + Citations (PR #13 + #14)
+- Pre-consensus question refinement: analyze → clarify → enrich → consensus
+- Native provider web search (Anthropic/Google/Mistral/OpenAI/Perplexity)
+- Citations: extraction per provider, persistence, domain-grouped Sources nav with P/C/R badges
+- Tools enabled by default (web_search wired through CLI, REST, WS)
+- Sidebar UX: new-question button + collapsible toggle
+- Anthropic streaming + parallel challenge streaming + max_tokens 32768
+- README rewrite: repositioned as AI infrastructure, CLI citation display
+- `_run_consensus` 7-tuple return (added citations)
+- 1641 Python + 194 Vitest tests (1835 total)
+- Files: refine.py, handlers.py, machine.py, ws.py, ask.py, threads.py, app.py, all providers, ConsensusNav.tsx, ThreadNav.tsx, CitationList.tsx, RefinementPanel.tsx, consensus.ts, types.ts
+
+## 2026-03-09: Follow-ups + Revision Citations + CLI Persistence + Provider Updates
+- **Follow-up questions**: `generate_followups()` — cheapest model, JSON mode, 3 questions post-consensus
+  - `followups` on ConsensusContext, `followups_json` on Thread model + migration
+  - `_run_consensus` 8-tuple return (added followups), all callers updated
+  - Frontend: clickable follow-ups in ConsensusNav + ThreadNav Disclosure, triggers new consensus
+- **Revision citations**: `handle_revise()` accepts tool_registry + web_search, extracts citations
+  - `revision_citations` on ConsensusContext + RoundResult, persisted to DB
+  - `handle_propose()` extracts proposal_citations directly in handler
+  - WS sends revision citations in REVISE phase, ConsensusPanel passes to phase card
+- **CLI persistence**: `persist_consensus()` saves full round history to DB from CLI
+  - `_ask_async` creates DB factory, disposes engine in finally
+- **CLI options**: top-level `--rounds`/`--challengers` cascade to subcommands
+  - `_parse_challengers()`: int count or comma-separated model refs
+- **Calibration filters**: category + since/until date inputs on CalibrationDashboard
+- **OpenAI**: `reasoning_effort: "high"` for GPT-5.x (no tools), gpt-5.2 in NO_TEMPERATURE_MODELS
+- **Perplexity**: retry for APIConnectionError (2 attempts, 1s delay)
+- **Alembic**: `DUH_DATABASE_URL` env var overrides alembic.ini
+- Tests: TestShowCitations (8), TestShowFinalDecisionOverview (2), calibration date tests (4), all 8-tuple updates
+- Files: handlers.py, machine.py, app.py, ws.py, ask.py, threads.py, models.py, migrations.py, mcp/server.py, openai.py, perplexity.py, catalog.py, alembic/env.py, CalibrationDashboard.tsx, ConsensusNav.tsx, ConsensusPanel.tsx, ThreadNav.tsx, calibration.ts, consensus.ts, types.ts, + 7 test files
+
 ## 2026-03-07: Z-index Fix + GPT-5.4 + .env Docs
 - Fixed z-index stacking contexts trapping dropdowns (Shell z-10, TopBar z-20 removed)
 - Added CSS z-index tokens (`--z-background`, `--z-dropdown`, `--z-overlay`, `--z-modal`)
diff --git a/memory-bank/toc.md b/memory-bank/toc.md
index 66f86ef..8356e53 100644
--- a/memory-bank/toc.md
+++ b/memory-bank/toc.md
@@ -4,7 +4,7 @@
 - [projectbrief.md](./projectbrief.md) — Vision, tenets, architecture, build sequence
 - [techContext.md](./techContext.md) — Tech stack decisions with rationale (Python, Docker, SQLAlchemy, frontend, tools, etc.)
 - [decisions.md](./decisions.md) — Architectural decisions with context, alternatives, and consequences (26 ADRs)
-- [activeContext.md](./activeContext.md) — Current state, question-refinement branch in progress
+- [activeContext.md](./activeContext.md) — Current state, post PR #14 — follow-ups, revision citations, CLI persistence
 - [progress.md](./progress.md) — Milestone tracking, what's built, what's next
 - [competitive-landscape.md](./competitive-landscape.md) — Research on existing tools, frameworks, and academic work
 - [quick-start.md](./quick-start.md) — Session entry point, v0.5 complete, key file references
diff --git a/src/duh/api/routes/ask.py b/src/duh/api/routes/ask.py
index 3dd634b..6b11c8a 100644
--- a/src/duh/api/routes/ask.py
+++ b/src/duh/api/routes/ask.py
@@ -112,6 +112,7 @@ async def _handle_consensus(  # type: ignore[no-untyped-def]
         cost,
         _overview,
         _citations,
+        _followups,
     ) = await _run_consensus(
         body.question,
         config,
@@ -192,6 +193,7 @@ async def _handle_decompose(body: AskRequest, config, pm) -> AskResponse:  # typ
             cost,
             _overview,
             _citations,
+            _followups,
         ) = await _run_consensus(body.question, config, pm)
         return AskResponse(
             decision=decision,
diff --git a/src/duh/api/routes/threads.py b/src/duh/api/routes/threads.py
index 1fcb537..e6c3a59 100644
--- a/src/duh/api/routes/threads.py
+++ b/src/duh/api/routes/threads.py
@@ -56,6 +56,7 @@ class ThreadDetailResponse(BaseModel):
     status: str
     created_at: str
     turns: list[TurnResponse] = Field(default_factory=list)
+    followups: list[str] = Field(default_factory=list)
 
 
 class ThreadListResponse(BaseModel):
@@ -189,12 +190,23 @@ def _build_thread_detail(thread: object) -> ThreadDetailResponse:
             )
         )
 
+    # Parse followups from JSON
+    followups_raw = getattr(thread, "followups_json", None)
+    followups: list[str] = []
+    if followups_raw:
+        import contextlib
+        import json as _json
+
+        with contextlib.suppress(ValueError, TypeError):
+            followups = _json.loads(followups_raw)
+
     return ThreadDetailResponse(
         thread_id=thread.id,  # type: ignore[attr-defined]
         question=thread.question,  # type: ignore[attr-defined]
         status=thread.status,  # type: ignore[attr-defined]
         created_at=thread.created_at.isoformat(),  # type: ignore[attr-defined]
         turns=turns,
+        followups=followups,
     )
 
 
diff --git a/src/duh/api/routes/ws.py b/src/duh/api/routes/ws.py
index eb27d38..0a52a28 100644
--- a/src/duh/api/routes/ws.py
+++ b/src/duh/api/routes/ws.py
@@ -145,17 +145,13 @@ async def _stream_consensus(
             tool_registry=tool_registry,
             web_search=use_native_search,
         )
-        propose_citations = [
-            {"url": c.url, "title": c.title} for c in (propose_resp.citations or [])
-        ]
-        ctx.proposal_citations = propose_citations
         await ws.send_json(
             {
                 "type": "phase_complete",
                 "phase": "PROPOSE",
                 "content": ctx.proposal or "",
                 "truncated": propose_resp.finish_reason != "stop",
-                "citations": propose_citations if propose_citations else None,
+                "citations": ctx.proposal_citations or None,
             }
         )
 
@@ -193,13 +189,16 @@ async def _stream_consensus(
                 "round": ctx.current_round,
             }
         )
-        revise_resp = await handle_revise(ctx, pm)
+        revise_resp = await handle_revise(
+            ctx, pm, tool_registry=tool_registry, web_search=use_native_search
+        )
         await ws.send_json(
             {
                 "type": "phase_complete",
                 "phase": "REVISE",
                 "content": ctx.revision or "",
                 "truncated": revise_resp.finish_reason != "stop",
+                "citations": ctx.revision_citations or None,
             }
         )
 
@@ -221,8 +220,11 @@ async def _stream_consensus(
 
     sm.transition(ConsensusState.COMPLETE)
 
-    # Generate executive overview (best-effort)
+    # Generate executive overview and follow-up questions (best-effort)
     await generate_overview(ctx, pm)
+    from duh.consensus.handlers import generate_followups
+
+    await generate_followups(ctx, pm)
 
     # Persist to DB if available
     thread_id: str | None = None
@@ -230,7 +232,11 @@ async def _stream_consensus(
     if db_factory is not None:
         try:
             thread_id = await _persist_consensus(
-                db_factory, question, ctx.round_history, ctx.overview
+                db_factory,
+                question,
+                ctx.round_history,
+                ctx.overview,
+                followups=ctx.followups or None,
             )
         except Exception:
             logger.exception("Failed to persist consensus thread")
@@ -245,6 +251,7 @@ async def _stream_consensus(
             "cost": pm.total_cost,
             "thread_id": thread_id,
             "overview": ctx.overview,
+            "followups": ctx.followups if ctx.followups else None,
         }
     )
     await ws.close()
@@ -347,6 +354,7 @@ async def _persist_consensus(
     question: str,
     round_history: list[RoundResult],
     overview: str | None = None,
+    followups: list[str] | None = None,
 ) -> str:
     """Persist consensus round history to the database.
 
@@ -392,8 +400,20 @@ async def _persist_consensus(
                     ch.content,
                     citations_json=ch_cit,
                 )
+            rev_cit = None
+            if rr.revision_citations:
+                rev_cit = json.dumps(
+                    [
+                        {"url": c["url"], "title": c.get("title")}
+                        for c in rr.revision_citations
+                    ]
+                )
             await repo.add_contribution(
-                turn.id, rr.proposal_model, "reviser", rr.revision
+                turn.id,
+                rr.proposal_model,
+                "reviser",
+                rr.revision,
+                citations_json=rev_cit,
             )
             await repo.save_decision(
                 turn.id,
@@ -407,5 +427,8 @@ async def _persist_consensus(
         if overview:
             await repo.save_thread_summary(thread.id, overview, "overview")
 
+        if followups:
+            thread.followups_json = json.dumps(followups)
+
         await session.commit()
         return str(thread.id)
diff --git a/src/duh/cli/app.py b/src/duh/cli/app.py
index c0bf7e7..0b3b22c 100644
--- a/src/duh/cli/app.py
+++ b/src/duh/cli/app.py
@@ -25,6 +25,7 @@
 
     from duh.cli.display import ConsensusDisplay
     from duh.config.schema import DuhConfig
+    from duh.consensus.machine import RoundResult
     from duh.memory.models import Thread, Vote
     from duh.providers.base import ModelInfo
     from duh.providers.manager import ProviderManager
@@ -40,6 +41,21 @@ def _error(msg: str) -> None:
     sys.exit(1)
 
 
+def _parse_challengers(
+    value: str | None,
+) -> tuple[list[str] | None, int | None]:
+    """Parse --challengers as int (count) or comma-separated model refs.
+
+    Returns (model_list, count) — exactly one will be set, or both None.
+    """
+    if not value:
+        return None, None
+    try:
+        return None, int(value)
+    except ValueError:
+        return value.split(","), None
+
+
 def _load_config(config_path: str | None) -> DuhConfig:
     """Load config with user-friendly error handling."""
     try:
@@ -199,6 +215,104 @@ def _setup_tools(config: DuhConfig) -> ToolRegistry | None:
     return registry
 
 
+async def persist_consensus(
+    db_factory: async_sessionmaker[AsyncSession],
+    question: str,
+    round_history: list[RoundResult],
+    overview: str | None = None,
+    followups: list[str] | None = None,
+) -> str:
+    """Persist full consensus round history to the database.
+
+    Saves proposals, challenger responses, revisions, citations,
+    decisions, and overview — the same rich format used by the web UI.
+
+    Returns the new thread ID.
+    """
+    import json as _json
+
+    from duh.memory.repository import MemoryRepository
+
+    async with db_factory() as session:
+        repo = MemoryRepository(session)
+        thread = await repo.create_thread(question)
+        thread.status = "complete"
+
+        for rr in round_history:
+            turn = await repo.create_turn(thread.id, rr.round_number, "COMMIT")
+
+            # Proposal with citations
+            proposal_cit = None
+            if rr.proposal_citations:
+                proposal_cit = _json.dumps(
+                    [
+                        {"url": c["url"], "title": c.get("title")}
+                        for c in rr.proposal_citations
+                    ]
+                )
+            await repo.add_contribution(
+                turn.id,
+                rr.proposal_model,
+                "proposer",
+                rr.proposal,
+                citations_json=proposal_cit,
+            )
+
+            # Challenger responses with citations
+            for ch in rr.challenges:
+                ch_cit = None
+                if ch.citations:
+                    ch_cit = _json.dumps(
+                        [
+                            {"url": c["url"], "title": c.get("title")}
+                            for c in ch.citations
+                        ]
+                    )
+                await repo.add_contribution(
+                    turn.id,
+                    ch.model_ref,
+                    "challenger",
+                    ch.content,
+                    citations_json=ch_cit,
+                )
+
+            # Revision with citations
+            rev_cit = None
+            if rr.revision_citations:
+                rev_cit = _json.dumps(
+                    [
+                        {"url": c["url"], "title": c.get("title")}
+                        for c in rr.revision_citations
+                    ]
+                )
+            await repo.add_contribution(
+                turn.id,
+                rr.proposal_model,
+                "reviser",
+                rr.revision,
+                citations_json=rev_cit,
+            )
+
+            # Decision
+            await repo.save_decision(
+                turn.id,
+                thread.id,
+                rr.decision,
+                rr.confidence,
+                rigor=rr.rigor,
+                dissent=rr.dissent,
+            )
+
+        if overview:
+            await repo.save_thread_summary(thread.id, overview, "overview")
+
+        if followups:
+            thread.followups_json = _json.dumps(followups)
+
+        await session.commit()
+        return str(thread.id)
+
+
 async def _run_consensus(
     question: str,
     config: DuhConfig,
@@ -209,13 +323,23 @@ async def _run_consensus(
     panel: list[str] | None = None,
     proposer_override: str | None = None,
     challengers_override: list[str] | None = None,
+    challenger_count: int | None = None,
     web_search: bool = False,
+    db_factory: async_sessionmaker[AsyncSession] | None = None,
 ) -> tuple[
-    str, float, float, str | None, float, str | None, list[dict[str, str | None]]
+    str,
+    float,
+    float,
+    str | None,
+    float,
+    str | None,
+    list[dict[str, str | None]],
+    list[str],
 ]:
     """Run the full consensus loop.
 
-    Returns (decision, confidence, rigor, dissent, total_cost, overview, citations).
+    Returns (decision, confidence, rigor, dissent, total_cost, overview,
+    citations, followups).
     """
     from duh.consensus.convergence import check_convergence
     from duh.consensus.handlers import (
@@ -272,7 +396,10 @@ async def _run_consensus(
         # CHALLENGE
         sm.transition(ConsensusState.CHALLENGE)
         challengers = challengers_override or select_challengers(
-            pm, proposer, panel=effective_panel
+            pm,
+            proposer,
+            panel=effective_panel,
+            **({"count": challenger_count} if challenger_count else {}),
         )
         if display:
             detail = f"{len(challengers)} models"
@@ -299,10 +426,17 @@ async def _run_consensus(
         if display:
             reviser = ctx.proposal_model or proposer
             with display.phase_status("REVISE", reviser):
-                await handle_revise(ctx, pm)
+                await handle_revise(
+                    ctx,
+                    pm,
+                    tool_registry=tool_registry,
+                    web_search=web_search,
+                )
             display.show_revise(ctx.revision_model or reviser, ctx.revision or "")
         else:
-            await handle_revise(ctx, pm)
+            await handle_revise(
+                ctx, pm, tool_registry=tool_registry, web_search=web_search
+            )
 
         # COMMIT
         sm.transition(ConsensusState.COMMIT)
@@ -327,8 +461,11 @@ async def _run_consensus(
 
     sm.transition(ConsensusState.COMPLETE)
 
-    # Generate executive overview (best-effort)
+    # Generate executive overview and follow-up questions (best-effort)
     await generate_overview(ctx, pm)
+    from duh.consensus.handlers import generate_followups
+
+    await generate_followups(ctx, pm)
 
     # Show tool usage if any
     if display and ctx.tool_calls_log:
@@ -340,10 +477,27 @@ async def _run_consensus(
         all_citations.extend(rr.proposal_citations)
         for ch in rr.challenges:
             all_citations.extend(ch.citations)
+        all_citations.extend(rr.revision_citations)
     # Include current round (may not be archived yet)
     all_citations.extend(ctx.proposal_citations)
     for ch in ctx.challenges:
         all_citations.extend(ch.citations)
+    all_citations.extend(ctx.revision_citations)
+
+    # Persist full round history if DB available
+    if db_factory is not None:
+        try:
+            await persist_consensus(
+                db_factory,
+                question,
+                ctx.round_history,
+                overview=ctx.overview,
+                followups=ctx.followups or None,
+            )
+        except Exception:
+            import logging as _logging
+
+            _logging.getLogger(__name__).exception("Failed to persist consensus thread")
 
     return (
         ctx.decision or "",
@@ -353,6 +507,7 @@ async def _run_consensus(
         pm.total_cost,
         ctx.overview,
         all_citations,
+        ctx.followups,
     )
 
 
@@ -368,14 +523,32 @@ async def _run_consensus(
     default=None,
     help="Path to config file.",
 )
+@click.option(
+    "--rounds",
+    type=int,
+    default=None,
+    help="Max consensus rounds (overrides config).",
+)
+@click.option(
+    "--challengers",
+    default=None,
+    help="Count or model refs (e.g. 3 or openai:gpt-5,google:gemini-2.5-pro).",
+)
 @click.pass_context
-def cli(ctx: click.Context, config_path: str | None) -> None:
+def cli(
+    ctx: click.Context,
+    config_path: str | None,
+    rounds: int | None,
+    challengers: str | None,
+) -> None:
     """duh - Multi-model consensus engine.
 
     Ask multiple LLMs, get one answer they agree on.
     """
     ctx.ensure_object(dict)
     ctx.obj["config_path"] = config_path
+    ctx.obj["rounds"] = rounds
+    ctx.obj["challengers"] = challengers
     if ctx.invoked_subcommand is None:
         click.echo(ctx.get_help())
 
@@ -416,7 +589,7 @@ def cli(ctx: click.Context, config_path: str | None) -> None:
 @click.option(
     "--challengers",
     default=None,
-    help="Override challengers (comma-separated model refs).",
+    help="Count or model refs (e.g. 3 or openai:gpt-5,google:gemini-2.5-pro).",
 )
 @click.option(
     "--panel",
@@ -447,6 +620,11 @@ def ask(
     and produces a revised consensus decision.
     """
     config = _load_config(ctx.obj["config_path"])
+
+    # Top-level options cascade into subcommand (subcommand wins)
+    rounds = rounds or ctx.obj.get("rounds")
+    challengers = challengers or ctx.obj.get("challengers")
+
     if rounds is not None:
         config.general.max_rounds = rounds
 
@@ -456,7 +634,7 @@ def ask(
 
     # Parse model selection overrides
     panel_list = panel.split(",") if panel else None
-    challengers_list = challengers.split(",") if challengers else None
+    challengers_list, challenger_count = _parse_challengers(challengers)
 
     # Question refinement (pre-consensus clarification)
     if refine:
@@ -498,13 +676,14 @@ def ask(
                 panel=panel_list,
                 proposer_override=proposer,
                 challengers_override=challengers_list,
+                challenger_count=challenger_count,
             )
         )
     except DuhError as e:
         _error(str(e))
         return  # unreachable
 
-    decision, confidence, rigor, dissent, cost, overview, citations = result
+    decision, confidence, rigor, dissent, cost, overview, citations, _followups = result
 
     from duh.cli.display import ConsensusDisplay
 
@@ -547,8 +726,16 @@ async def _ask_async(
     panel: list[str] | None = None,
     proposer_override: str | None = None,
     challengers_override: list[str] | None = None,
+    challenger_count: int | None = None,
 ) -> tuple[
-    str, float, float, str | None, float, str | None, list[dict[str, str | None]]
+    str,
+    float,
+    float,
+    str | None,
+    float,
+    str | None,
+    list[dict[str, str | None]],
+    list[str],
 ]:
     """Async implementation for the ask command."""
     from duh.cli.display import ConsensusDisplay
@@ -563,19 +750,25 @@ async def _ask_async(
 
     tool_registry = _setup_tools(config)
     use_native_search = config.tools.enabled and config.tools.web_search.native
+    factory, engine = await _create_db(config)
     display = ConsensusDisplay()
     display.start()
-    return await _run_consensus(
-        question,
-        config,
-        pm,
-        display=display,
-        tool_registry=tool_registry,
-        panel=panel,
-        proposer_override=proposer_override,
-        challengers_override=challengers_override,
-        web_search=use_native_search,
-    )
+    try:
+        return await _run_consensus(
+            question,
+            config,
+            pm,
+            display=display,
+            tool_registry=tool_registry,
+            panel=panel,
+            proposer_override=proposer_override,
+            challengers_override=challengers_override,
+            challenger_count=challenger_count,
+            web_search=use_native_search,
+            db_factory=factory,
+        )
+    finally:
+        await engine.dispose()
 
 
 async def _ask_voting_async(
@@ -666,6 +859,7 @@ async def _ask_auto_async(
             cost,
             overview,
             citations,
+            _followups,
         ) = await _run_consensus(question, config, pm, display=display)
         display.show_final_decision(
             decision, confidence, rigor, cost, dissent, overview=overview
@@ -743,7 +937,16 @@ async def _ask_decompose_async(
     # Single-subtask optimization: skip synthesis
     if len(subtask_specs) == 1:
         result = await _run_consensus(question, config, pm, display=display)
-        decision, confidence, rigor, dissent, cost, overview, citations = result
+        (
+            decision,
+            confidence,
+            rigor,
+            dissent,
+            cost,
+            overview,
+            citations,
+            _followups,
+        ) = result
         display.show_final_decision(
             decision, confidence, rigor, cost, dissent, overview=overview
         )
@@ -2260,6 +2463,9 @@ def batch(
     (each line is {"question": "..."}).
     """
     config = _load_config(ctx.obj["config_path"])
+
+    # Top-level --rounds cascades into batch
+    rounds = rounds or ctx.obj.get("rounds")
     if rounds is not None:
         config.general.max_rounds = rounds
 
@@ -2397,6 +2603,7 @@ async def _batch_async(
                     _cost,
                     _overview,
                     _citations,
+                    _fups,
                 ) = await _run_consensus(question, config, pm)
 
             q_cost = pm.total_cost - cost_before
diff --git a/src/duh/consensus/handlers.py b/src/duh/consensus/handlers.py
index 848845a..fac7376 100644
--- a/src/duh/consensus/handlers.py
+++ b/src/duh/consensus/handlers.py
@@ -328,6 +328,10 @@ async def handle_propose(
     # Update context
     ctx.proposal = response.content
     ctx.proposal_model = model_ref
+    ctx.proposal_citations = [
+        {"url": c.url, "title": c.title, "snippet": c.snippet}
+        for c in (response.citations or [])
+    ]
 
     return response
 
@@ -631,6 +635,8 @@ async def handle_revise(
     *,
     temperature: float = 0.7,
     max_tokens: int = 32768,
+    tool_registry: ToolRegistry | None = None,
+    web_search: bool = False,
 ) -> ModelResponse:
     """Execute the REVISE phase of consensus.
 
@@ -680,9 +686,27 @@ async def handle_revise(
     messages = build_revise_prompt(ctx, max_tokens=max_tokens)
     provider, model_id = provider_manager.get_provider(reviser_ref)
 
-    response = await provider.send(
-        messages, model_id, max_tokens=max_tokens, temperature=temperature
-    )
+    if tool_registry is not None:
+        from duh.tools.augmented_send import tool_augmented_send
+
+        response = await tool_augmented_send(
+            provider,
+            model_id,
+            messages,
+            tool_registry,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            web_search=web_search,
+        )
+        _log_tool_calls(ctx, response, "revise")
+    else:
+        response = await provider.send(
+            messages,
+            model_id,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            web_search=web_search,
+        )
 
     # Record cost
     model_info = provider_manager.get_model_info(reviser_ref)
@@ -691,6 +715,10 @@ async def handle_revise(
     # Update context
     ctx.revision = response.content
     ctx.revision_model = reviser_ref
+    ctx.revision_citations = [
+        {"url": c.url, "title": c.title, "snippet": c.snippet}
+        for c in (response.citations or [])
+    ]
 
     return response
 
@@ -902,3 +930,65 @@ async def generate_overview(
         return None
     except Exception:
         return None
+
+
+async def generate_followups(
+    ctx: ConsensusContext,
+    provider_manager: ProviderManager,
+    *,
+    count: int = 3,
+) -> list[str]:
+    """Generate suggested follow-up questions based on the consensus.
+
+    Uses the cheapest model with JSON mode to produce follow-up
+    questions that dig deeper into the decision, explore gaps, or
+    investigate related angles the user might care about.
+
+    Returns an empty list on failure so callers can gracefully degrade.
+    """
+    import json as _json
+
+    models = provider_manager.list_all_models()
+    if not models:
+        return []
+
+    cheapest = min(models, key=lambda m: m.input_cost_per_mtok)
+    provider, model_id = provider_manager.get_provider(cheapest.model_ref)
+
+    challenges_summary = ""
+    for ch in ctx.challenges:
+        challenges_summary += f"\n- [{ch.model_ref}] ({ch.framing}): {ch.content[:200]}"
+
+    prompt = (
+        f"Given this consensus decision, suggest {count} follow-up questions "
+        "the user should ask next. Each question should explore a different "
+        "angle: deeper technical detail, practical implications, risks or "
+        "edge cases, or related decisions that follow from this one.\n\n"
+        "Make questions specific and actionable, not generic. They should "
+        "build on the debate that happened, not repeat it.\n\n"
+        f"Original question: {ctx.question}\n"
+        f"Decision: {ctx.decision}\n"
+        f"Confidence: {ctx.confidence:.0%}\n"
+        f"Key challenges:{challenges_summary or ' (none)'}\n"
+        f"Dissent: {ctx.dissent or 'None'}\n\n"
+        f'Return JSON: {{"questions": ["question1", "question2", ...]}}'
+    )
+
+    try:
+        response = await provider.send(
+            [PromptMessage(role="user", content=prompt)],
+            model_id,
+            max_tokens=1000,
+            temperature=0.7,
+            response_format="json",
+        )
+        provider_manager.record_usage(cheapest, response.usage)
+        data = _json.loads(response.content)
+        questions = data.get("questions", [])
+        if isinstance(questions, list):
+            followups = [q for q in questions if isinstance(q, str)][:count]
+            ctx.followups = followups
+            return followups
+        return []
+    except Exception:
+        return []
diff --git a/src/duh/consensus/machine.py b/src/duh/consensus/machine.py
index 941dbf5..f0c6ae3 100644
--- a/src/duh/consensus/machine.py
+++ b/src/duh/consensus/machine.py
@@ -58,6 +58,7 @@ class RoundResult:
     rigor: float = 0.0
     dissent: str | None = None
     proposal_citations: tuple[dict[str, str | None], ...] = ()
+    revision_citations: tuple[dict[str, str | None], ...] = ()
 
 
 @dataclass(frozen=True, slots=True)
@@ -92,11 +93,13 @@ class ConsensusContext:
     challenges: list[ChallengeResult] = field(default_factory=list)
     revision: str | None = None
     revision_model: str | None = None
+    revision_citations: list[dict[str, str | None]] = field(default_factory=list)
     decision: str | None = None
     confidence: float = 0.0
     rigor: float = 0.0
     dissent: str | None = None
     overview: str | None = None
+    followups: list[str] = field(default_factory=list)
     converged: bool = False
 
     # History
@@ -122,6 +125,7 @@ def _clear_round_data(self) -> None:
         self.challenges = []
         self.revision = None
         self.revision_model = None
+        self.revision_citations = []
         self.decision = None
         self.confidence = 0.0
         self.rigor = 0.0
@@ -142,6 +146,7 @@ def _archive_round(self) -> None:
                 rigor=self.rigor,
                 dissent=self.dissent,
                 proposal_citations=tuple(self.proposal_citations),
+                revision_citations=tuple(self.revision_citations),
             )
         )
 
diff --git a/src/duh/mcp/server.py b/src/duh/mcp/server.py
index 53b3e9b..db7e826 100644
--- a/src/duh/mcp/server.py
+++ b/src/duh/mcp/server.py
@@ -143,6 +143,7 @@ async def _handle_ask(args: dict) -> list[TextContent]:  # type: ignore[type-arg
             cost,
             _overview,
             _citations,
+            _followups,
         ) = await _run_consensus(question, config, pm)
         return [
             TextContent(
diff --git a/src/duh/memory/migrations.py b/src/duh/memory/migrations.py
index be2f8f6..c455590 100644
--- a/src/duh/memory/migrations.py
+++ b/src/duh/memory/migrations.py
@@ -69,3 +69,8 @@ async def ensure_schema(engine: AsyncEngine) -> None:
             await conn.exec_driver_sql(
                 "ALTER TABLE threads ADD COLUMN slug VARCHAR(200) DEFAULT NULL"
             )
+        if "followups_json" not in thread_cols:
+            logger.info("Adding 'followups_json' column to threads table")
+            await conn.exec_driver_sql(
+                "ALTER TABLE threads ADD COLUMN followups_json TEXT DEFAULT NULL"
+            )
diff --git a/src/duh/memory/models.py b/src/duh/memory/models.py
index 56286d0..bcdbc10 100644
--- a/src/duh/memory/models.py
+++ b/src/duh/memory/models.py
@@ -75,6 +75,9 @@ class Thread(Base):
     user_id: Mapped[str | None] = mapped_column(
         ForeignKey("users.id"), nullable=True, index=True, default=None
     )
+    followups_json: Mapped[str | None] = mapped_column(
+        Text, nullable=True, default=None
+    )
     created_at: Mapped[datetime] = mapped_column(DateTime, default=_utcnow)
     updated_at: Mapped[datetime] = mapped_column(
         DateTime, default=_utcnow, onupdate=_utcnow
diff --git a/src/duh/providers/catalog.py b/src/duh/providers/catalog.py
index a6cbd32..93927b9 100644
--- a/src/duh/providers/catalog.py
+++ b/src/duh/providers/catalog.py
@@ -241,6 +241,7 @@
     "gpt-5",
     "gpt-5-mini",
     "gpt-5-nano",
+    "gpt-5.2",
     "gpt-5.4",
 }
 
diff --git a/src/duh/providers/openai.py b/src/duh/providers/openai.py
index 82b70a7..9df11e4 100644
--- a/src/duh/providers/openai.py
+++ b/src/duh/providers/openai.py
@@ -43,6 +43,16 @@
     "gpt-4o-mini-search-preview",
 }
 
+# GPT-5.x models support reasoning_effort but don't always reason by default.
+# Dedicated reasoning models (o3, o3-mini, o4-mini) always reason at full capacity.
+_REASONING_EFFORT_MODELS: set[str] = {
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-5-nano",
+    "gpt-5.2",
+    "gpt-5.4",
+}
+
 
 def _map_error(e: openai.APIError) -> Exception:
     """Map OpenAI SDK errors to duh error hierarchy."""
@@ -160,6 +170,11 @@ async def send(
                 }
                 for t in tools
             ]
+        else:
+            # reasoning_effort is incompatible with function tools on
+            # /v1/chat/completions for gpt-5.x models.
+            if model_id in _REASONING_EFFORT_MODELS:
+                kwargs["reasoning_effort"] = "high"
         if web_search and model_id in _SEARCH_MODELS:
             kwargs["web_search_options"] = {}
 
@@ -229,6 +244,8 @@ async def stream(
         }
         if model_id not in _NO_TEMPERATURE_MODELS:
             kwargs["temperature"] = temperature
+        if model_id in _REASONING_EFFORT_MODELS:
+            kwargs["reasoning_effort"] = "high"
         if stop_sequences:
             kwargs["stop"] = stop_sequences
 
diff --git a/src/duh/providers/perplexity.py b/src/duh/providers/perplexity.py
index 9aea342..8000996 100644
--- a/src/duh/providers/perplexity.py
+++ b/src/duh/providers/perplexity.py
@@ -2,7 +2,9 @@
 
 from __future__ import annotations
 
+import asyncio
 import contextlib
+import logging
 import time
 from typing import TYPE_CHECKING, Any
 
@@ -30,6 +32,11 @@
 
     from duh.providers.base import PromptMessage
 
+logger = logging.getLogger(__name__)
+
+_MAX_RETRIES = 2
+_RETRY_DELAY = 1.0  # seconds
+
 PROVIDER_ID = "perplexity"
 _KNOWN_MODELS = MODEL_CATALOG[PROVIDER_ID]
 _DEFAULT_CAPS = PROVIDER_CAPS[PROVIDER_ID]
@@ -47,6 +54,8 @@ def _map_error(e: openai.APIError) -> Exception:
                 with contextlib.suppress(ValueError):
                     retry_after = float(raw)
         return ProviderRateLimitError(PROVIDER_ID, retry_after=retry_after)
+    if isinstance(e, openai.APIConnectionError):
+        return ProviderTimeoutError(PROVIDER_ID, f"Connection error: {e}")
     if isinstance(e, openai.APITimeoutError):
         return ProviderTimeoutError(PROVIDER_ID, str(e))
     if isinstance(e, openai.InternalServerError):
@@ -153,10 +162,29 @@ async def send(
             ]
 
         start = time.monotonic()
-        try:
-            response = await self._client.chat.completions.create(**kwargs)
-        except openai.APIError as e:
-            raise _map_error(e) from e
+        last_exc: openai.APIError | None = None
+        response = None
+        for attempt in range(_MAX_RETRIES):
+            try:
+                response = await self._client.chat.completions.create(**kwargs)
+                break
+            except openai.APIConnectionError as e:
+                last_exc = e
+                if attempt < _MAX_RETRIES - 1:
+                    logger.warning(
+                        "Perplexity connection error (attempt %d/%d), "
+                        "retrying in %.0fs: %s",
+                        attempt + 1,
+                        _MAX_RETRIES,
+                        _RETRY_DELAY,
+                        e,
+                    )
+                    await asyncio.sleep(_RETRY_DELAY)
+            except openai.APIError as e:
+                raise _map_error(e) from e
+        if response is None:
+            assert last_exc is not None
+            raise _map_error(last_exc) from last_exc
 
         latency_ms = (time.monotonic() - start) * 1000
 
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 4158647..5c1d800 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -77,6 +77,7 @@ def test_displays_decision(
             0.0042,
             None,
             [],
+            [],
         )
 
         result = runner.invoke(cli, ["ask", "What database?"])
@@ -105,6 +106,7 @@ def test_displays_dissent(
             0.01,
             None,
             [],
+            [],
         )
 
         result = runner.invoke(cli, ["ask", "What database?"])
@@ -125,7 +127,7 @@ def test_no_dissent_when_none(
         from duh.config.schema import DuhConfig
 
         mock_config.return_value = DuhConfig()
-        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0, None, [])
+        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0, None, [], [])
 
         result = runner.invoke(cli, ["ask", "Question?"])
 
@@ -144,7 +146,7 @@ def test_rounds_option(
 
         config = DuhConfig()
         mock_config.return_value = config
-        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0, None, [])
+        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0, None, [], [])
 
         result = runner.invoke(cli, ["ask", "--rounds", "5", "Question?"])
 
@@ -645,7 +647,7 @@ def test_ask_full_loop(self, runner: CliRunner) -> None:
 
         async def fake_ask(
             question: str, cfg: Any, **kwargs: Any
-        ) -> tuple[str, float, float, str | None, float, str | None]:
+        ) -> tuple[str, float, float, str | None, float, str | None, list[dict[str, str | None]], list[str]]:
             pm = ProviderManager()
             await pm.register(provider)
             from duh.cli.app import _run_consensus
diff --git a/tests/unit/test_cli_batch.py b/tests/unit/test_cli_batch.py
index df261e3..22f4a14 100644
--- a/tests/unit/test_cli_batch.py
+++ b/tests/unit/test_cli_batch.py
@@ -460,10 +460,11 @@ async def fake_consensus(
             float,
             str | None,
             list[dict[str, str | None]],
+            list[str],
         ]:
             nonlocal consensus_called
             consensus_called = True
-            return ("Use SQLite.", 0.85, 1.0, None, 0.01, None, [])
+            return ("Use SQLite.", 0.85, 1.0, None, 0.01, None, [], [])
 
         with (
             patch("duh.cli.app.load_config", return_value=config),
@@ -554,8 +555,8 @@ async def fake_consensus(
             pm: Any,
             display: Any = None,
             tool_registry: Any = None,
-        ) -> tuple[str, float, float, str | None, float, str | None]:
-            return ("Answer.", 0.9, 1.0, None, 0.01, None, [])
+        ) -> tuple[str, float, float, str | None, float, str | None, list[dict[str, str | None]], list[str]]:
+            return ("Answer.", 0.9, 1.0, None, 0.01, None, [], [])
 
         with (
             patch("duh.cli.app.load_config", return_value=config),
@@ -609,12 +610,12 @@ async def fake_consensus(
             pm: Any,
             display: Any = None,
             tool_registry: Any = None,
-        ) -> tuple[str, float, float, str | None, float, str | None]:
+        ) -> tuple[str, float, float, str | None, float, str | None, list[dict[str, str | None]], list[str]]:
             nonlocal call_count
             call_count += 1
             if question == "Q2":
                 raise RuntimeError("Provider timeout")
-            return ("Answer.", 0.9, 1.0, None, 0.01, None, [])
+            return ("Answer.", 0.9, 1.0, None, 0.01, None, [], [])
 
         with (
             patch("duh.cli.app.load_config", return_value=config),
@@ -658,10 +659,10 @@ async def fake_consensus(
             pm: Any,
             display: Any = None,
             tool_registry: Any = None,
-        ) -> tuple[str, float, float, str | None, float, str | None]:
+        ) -> tuple[str, float, float, str | None, float, str | None, list[dict[str, str | None]], list[str]]:
             if question == "Q2":
                 raise RuntimeError("Model unavailable")
-            return ("Answer.", 0.9, 1.0, None, 0.01, None, [])
+            return ("Answer.", 0.9, 1.0, None, 0.01, None, [], [])
 
         with (
             patch("duh.cli.app.load_config", return_value=config),
diff --git a/tests/unit/test_cli_display.py b/tests/unit/test_cli_display.py
index 5c72456..d8a3534 100644
--- a/tests/unit/test_cli_display.py
+++ b/tests/unit/test_cli_display.py
@@ -386,3 +386,145 @@ def test_complete_round_display(self) -> None:
         # Stats
         assert "3 models" in out
         assert "$0.0500" in out
+
+
+# ── show_citations ───────────────────────────────────────────
+
+
+class TestShowCitations:
+    def test_empty_citations_no_output(self) -> None:
+        display, buf = _make_display()
+        display.show_citations([])
+        assert _output(buf) == ""
+
+    def test_single_citation(self) -> None:
+        display, buf = _make_display()
+        display.show_citations(
+            [
+                {"url": "https://example.com/page", "title": "Example Page"},
+            ]
+        )
+        out = _output(buf)
+        assert "Sources" in out
+        assert "(1)" in out
+        assert "Example Page" in out
+        assert "https://example.com/page" in out
+
+    def test_deduplicates_by_url(self) -> None:
+        display, buf = _make_display()
+        display.show_citations(
+            [
+                {"url": "https://example.com/a", "title": "Page A"},
+                {"url": "https://example.com/a", "title": "Page A duplicate"},
+                {"url": "https://example.com/b", "title": "Page B"},
+            ]
+        )
+        out = _output(buf)
+        assert "(2)" in out  # 2 unique URLs
+        assert "Page A" in out
+        assert "Page A duplicate" not in out
+        assert "Page B" in out
+
+    def test_groups_by_hostname(self) -> None:
+        display, buf = _make_display()
+        display.show_citations(
+            [
+                {"url": "https://docs.python.org/a", "title": "Doc A"},
+                {"url": "https://docs.python.org/b", "title": "Doc B"},
+                {"url": "https://example.com/x", "title": "Example"},
+            ]
+        )
+        out = _output(buf)
+        assert "(3)" in out
+        # Both python.org docs should appear (grouped by host, sorted by count)
+        assert "Doc A" in out
+        assert "Doc B" in out
+        assert "Example" in out
+
+    def test_sorts_groups_by_count_descending(self) -> None:
+        display, buf = _make_display()
+        display.show_citations(
+            [
+                {"url": "https://a.com/1", "title": "A1"},
+                {"url": "https://b.com/1", "title": "B1"},
+                {"url": "https://b.com/2", "title": "B2"},
+                {"url": "https://b.com/3", "title": "B3"},
+            ]
+        )
+        out = _output(buf)
+        # b.com has 3 citations, should appear first (lower index numbers)
+        b1_pos = out.index("B1")
+        a1_pos = out.index("A1")
+        assert b1_pos < a1_pos
+
+    def test_title_falls_back_to_hostname(self) -> None:
+        display, buf = _make_display()
+        display.show_citations(
+            [
+                {"url": "https://example.com/page", "title": None},
+            ]
+        )
+        out = _output(buf)
+        assert "example.com" in out
+
+    def test_no_url_entries_skipped(self) -> None:
+        display, buf = _make_display()
+        display.show_citations(
+            [
+                {"url": "", "title": "No URL"},
+                {"url": None, "title": "Also no URL"},
+            ]
+        )
+        # All entries have empty/None URLs, so nothing to show
+        assert _output(buf) == ""
+
+    def test_mixed_valid_and_empty_urls(self) -> None:
+        display, buf = _make_display()
+        display.show_citations(
+            [
+                {"url": "", "title": "Skip me"},
+                {"url": "https://example.com/real", "title": "Real"},
+            ]
+        )
+        out = _output(buf)
+        assert "(1)" in out
+        assert "Real" in out
+        assert "Skip me" not in out
+
+    def test_numbered_entries(self) -> None:
+        display, buf = _make_display()
+        display.show_citations(
+            [
+                {"url": "https://a.com/1", "title": "First"},
+                {"url": "https://a.com/2", "title": "Second"},
+            ]
+        )
+        out = _output(buf)
+        assert "[1]" in out
+        assert "[2]" in out
+
+
+# ── show_final_decision with overview ────────────────────────
+
+
+class TestShowFinalDecisionOverview:
+    def test_shows_overview_when_provided(self) -> None:
+        display, buf = _make_display()
+        display.show_final_decision(
+            "Decision text.",
+            0.9,
+            1.0,
+            0.05,
+            None,
+            overview="Executive summary here.",
+        )
+        out = _output(buf)
+        assert "Executive Overview" in out
+        assert "Executive summary here." in out
+        assert "Decision text." in out
+
+    def test_no_overview_panel_when_none(self) -> None:
+        display, buf = _make_display()
+        display.show_final_decision("Answer.", 1.0, 1.0, 0.0, None, overview=None)
+        out = _output(buf)
+        assert "Executive Overview" not in out
diff --git a/tests/unit/test_cli_tools.py b/tests/unit/test_cli_tools.py
index adce8ff..c1bd586 100644
--- a/tests/unit/test_cli_tools.py
+++ b/tests/unit/test_cli_tools.py
@@ -123,7 +123,7 @@ def test_tools_flag_enables_tools(
     ) -> None:
         config = DuhConfig(tools=ToolsConfig(enabled=False))
         mock_config.return_value = config
-        mock_run.return_value = ("Answer", 0.9, None, 0.01)
+        mock_run.return_value = ("Answer", 0.9, 1.0, None, 0.01, None, [], [])
 
         runner.invoke(cli, ["ask", "--tools", "test question"])
         # After CLI processes --tools flag, config should be overridden
@@ -139,7 +139,7 @@ def test_no_tools_flag_disables_tools(
     ) -> None:
         config = DuhConfig(tools=ToolsConfig(enabled=True))
         mock_config.return_value = config
-        mock_run.return_value = ("Answer", 0.9, None, 0.01)
+        mock_run.return_value = ("Answer", 0.9, 1.0, None, 0.01, None, [], [])
 
         runner.invoke(cli, ["ask", "--no-tools", "test question"])
         assert config.tools.enabled is False
@@ -154,7 +154,7 @@ def test_no_flag_preserves_config(
     ) -> None:
         config = DuhConfig(tools=ToolsConfig(enabled=True))
         mock_config.return_value = config
-        mock_run.return_value = ("Answer", 0.9, None, 0.01)
+        mock_run.return_value = ("Answer", 0.9, 1.0, None, 0.01, None, [], [])
 
         runner.invoke(cli, ["ask", "test question"])
         # Should remain True since no flag was passed
@@ -242,7 +242,7 @@ def test_tools_enabled_passes_registry(
         config = DuhConfig(tools=ToolsConfig(enabled=True))
         mock_config.return_value = config
         mock_providers.return_value.list_all_models.return_value = ["model1"]
-        mock_consensus.return_value = ("Answer", 0.9, 1.0, None, 0.01, None)
+        mock_consensus.return_value = ("Answer", 0.9, 1.0, None, 0.01, None, [], [])
 
         runner.invoke(cli, ["ask", "test question"])
 
@@ -263,7 +263,7 @@ def test_tools_disabled_passes_none(
         config = DuhConfig(tools=ToolsConfig(enabled=False))
         mock_config.return_value = config
         mock_providers.return_value.list_all_models.return_value = ["model1"]
-        mock_consensus.return_value = ("Answer", 0.9, 1.0, None, 0.01, None)
+        mock_consensus.return_value = ("Answer", 0.9, 1.0, None, 0.01, None, [], [])
 
         runner.invoke(cli, ["ask", "test question"])
 
diff --git a/tests/unit/test_cli_voting.py b/tests/unit/test_cli_voting.py
index 6ce5f85..df1e0ef 100644
--- a/tests/unit/test_cli_voting.py
+++ b/tests/unit/test_cli_voting.py
@@ -147,7 +147,7 @@ def test_default_protocol_is_consensus(
         from duh.config.schema import DuhConfig
 
         mock_config.return_value = DuhConfig()
-        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0, None, [])
+        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0, None, [], [])
 
         result = runner.invoke(cli, ["ask", "Question?"])
         assert result.exit_code == 0
diff --git a/tests/unit/test_mcp_server.py b/tests/unit/test_mcp_server.py
index 7cc2745..e405b1a 100644
--- a/tests/unit/test_mcp_server.py
+++ b/tests/unit/test_mcp_server.py
@@ -177,7 +177,7 @@ async def test_consensus_protocol(self) -> None:
             patch(
                 "duh.cli.app._run_consensus",
                 new_callable=AsyncMock,
-                return_value=("Use SQLite.", 0.9, 1.0, "minor dissent", 0.05, None, []),
+                return_value=("Use SQLite.", 0.9, 1.0, "minor dissent", 0.05, None, [], []),
             ),
         ):
             result = await _handle_ask({"question": "What DB?", "rounds": 2})
diff --git a/tests/unit/test_providers_openai.py b/tests/unit/test_providers_openai.py
index 2b7c401..9317e9d 100644
--- a/tests/unit/test_providers_openai.py
+++ b/tests/unit/test_providers_openai.py
@@ -239,13 +239,13 @@ async def test_passes_params_to_sdk(self):
         ]
         await provider.send(
             msgs,
-            "gpt-5.2",
+            "gpt-4o",
             max_tokens=1000,
             temperature=0.3,
             stop_sequences=["STOP"],
         )
         call_kwargs = client.chat.completions.create.call_args.kwargs
-        assert call_kwargs["model"] == "gpt-5.2"
+        assert call_kwargs["model"] == "gpt-4o"
         assert call_kwargs["max_completion_tokens"] == 1000
         assert call_kwargs["temperature"] == 0.3
         assert call_kwargs["stop"] == ["STOP"]
diff --git a/web/src/__tests__/stores.test.ts b/web/src/__tests__/stores.test.ts
index ead0eb0..36a152e 100644
--- a/web/src/__tests__/stores.test.ts
+++ b/web/src/__tests__/stores.test.ts
@@ -409,6 +409,8 @@ describe('useCalibrationStore', () => {
       loading: false,
       error: null,
       category: null,
+      since: null,
+      until: null,
     })
   })
 
@@ -472,4 +474,47 @@ describe('useCalibrationStore', () => {
     useCalibrationStore.getState().setCategory(null)
     expect(useCalibrationStore.getState().category).toBeNull()
   })
+
+  it('setSince updates since filter', () => {
+    useCalibrationStore.getState().setSince('2025-01-01')
+    expect(useCalibrationStore.getState().since).toBe('2025-01-01')
+
+    useCalibrationStore.getState().setSince(null)
+    expect(useCalibrationStore.getState().since).toBeNull()
+  })
+
+  it('setUntil updates until filter', () => {
+    useCalibrationStore.getState().setUntil('2025-12-31')
+    expect(useCalibrationStore.getState().until).toBe('2025-12-31')
+
+    useCalibrationStore.getState().setUntil(null)
+    expect(useCalibrationStore.getState().until).toBeNull()
+  })
+
+  it('fetchCalibration passes filters to API', async () => {
+    mockedApi.calibration.mockResolvedValue({
+      buckets: [],
+      total_decisions: 0,
+      total_with_outcomes: 0,
+      overall_accuracy: 0,
+      ece: 0,
+    })
+
+    useCalibrationStore.getState().setCategory('factual')
+    useCalibrationStore.getState().setSince('2025-06-01')
+    useCalibrationStore.getState().setUntil('2025-06-30')
+    await useCalibrationStore.getState().fetchCalibration()
+
+    expect(mockedApi.calibration).toHaveBeenCalledWith({
+      category: 'factual',
+      since: '2025-06-01',
+      until: '2025-06-30',
+    })
+  })
+
+  it('has correct initial state for date filters', () => {
+    const state = useCalibrationStore.getState()
+    expect(state.since).toBeNull()
+    expect(state.until).toBeNull()
+  })
 })
diff --git a/web/src/api/types.ts b/web/src/api/types.ts
index ffcd29c..e96e475 100644
--- a/web/src/api/types.ts
+++ b/web/src/api/types.ts
@@ -135,6 +135,7 @@ export interface ThreadDetail {
   status: string
   created_at: string
   turns: Turn[]
+  followups?: string[]
 }
 
 export interface RecallResult {
@@ -302,6 +303,7 @@ export interface WSComplete {
   cost: number
   thread_id: string | null
   overview: string | null
+  followups: string[] | null
 }
 
 export interface WSChallengeError {
diff --git a/web/src/components/calibration/CalibrationDashboard.tsx b/web/src/components/calibration/CalibrationDashboard.tsx
index 502a25e..ab4de36 100644
--- a/web/src/components/calibration/CalibrationDashboard.tsx
+++ b/web/src/components/calibration/CalibrationDashboard.tsx
@@ -1,6 +1,8 @@
-import { useEffect } from 'react'
+import { useCallback, useEffect } from 'react'
 import { useCalibrationStore } from '@/stores'
 
+const INTENT_CATEGORIES = ['factual', 'technical', 'creative', 'judgment', 'strategic']
+
 function eceRating(ece: number): { label: string; color: string } {
   if (ece < 0.05) return { label: 'Excellent', color: 'var(--color-success, #22c55e)' }
   if (ece < 0.1) return { label: 'Good', color: 'var(--color-primary)' }
@@ -17,13 +19,44 @@ export function CalibrationDashboard() {
     ece,
     loading,
     error,
+    category,
+    since,
+    until,
     fetchCalibration,
+    setCategory,
+    setSince,
+    setUntil,
   } = useCalibrationStore()
 
   useEffect(() => {
     fetchCalibration()
   }, [fetchCalibration])
 
+  const handleCategoryChange = useCallback(
+    (e: React.ChangeEvent<HTMLSelectElement>) => {
+      setCategory(e.target.value || null)
+    },
+    [setCategory],
+  )
+
+  const handleSinceChange = useCallback(
+    (e: React.ChangeEvent<HTMLInputElement>) => {
+      setSince(e.target.value || null)
+    },
+    [setSince],
+  )
+
+  const handleUntilChange = useCallback(
+    (e: React.ChangeEvent<HTMLInputElement>) => {
+      setUntil(e.target.value || null)
+    },
+    [setUntil],
+  )
+
+  const handleApplyFilters = useCallback(() => {
+    fetchCalibration()
+  }, [fetchCalibration])
+
   if (loading) {
     return (
       <div className="flex items-center justify-center py-20 text-[var(--color-text-dim)]">
@@ -52,6 +85,50 @@ export function CalibrationDashboard() {
         outcomes.
       </p>
 
+      {/* Filters */}
+      <div className="flex flex-wrap items-end gap-3">
+        <label className="flex flex-col gap-1 text-xs text-[var(--color-text-dim)]">
+          Category
+          <select
+            value={category ?? ''}
+            onChange={handleCategoryChange}
+            className="bg-[var(--color-surface)] border border-[var(--color-border)] rounded-[var(--radius-sm)] px-2 py-1.5 text-sm text-[var(--color-text)] font-mono"
+          >
+            <option value="">All</option>
+            {INTENT_CATEGORIES.map((c) => (
+              <option key={c} value={c}>
+                {c}
+              </option>
+            ))}
+          </select>
+        </label>
+        <label className="flex flex-col gap-1 text-xs text-[var(--color-text-dim)]">
+          Since
+          <input
+            type="date"
+            value={since ?? ''}
+            onChange={handleSinceChange}
+            className="bg-[var(--color-surface)] border border-[var(--color-border)] rounded-[var(--radius-sm)] px-2 py-1.5 text-sm text-[var(--color-text)] font-mono"
+          />
+        </label>
+        <label className="flex flex-col gap-1 text-xs text-[var(--color-text-dim)]">
+          Until
+          <input
+            type="date"
+            value={until ?? ''}
+            onChange={handleUntilChange}
+            className="bg-[var(--color-surface)] border border-[var(--color-border)] rounded-[var(--radius-sm)] px-2 py-1.5 text-sm text-[var(--color-text)] font-mono"
+          />
+        </label>
+        <button
+          onClick={handleApplyFilters}
+          disabled={loading}
+          className="px-3 py-1.5 text-sm font-medium bg-[var(--color-primary)] text-white rounded-[var(--radius-sm)] hover:opacity-90 disabled:opacity-50"
+        >
+          Apply
+        </button>
+      </div>
+
       {/* Metric cards */}
       <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
         <MetricCard label="Decisions" value={String(totalDecisions)} />
diff --git a/web/src/components/consensus/ConsensusNav.tsx b/web/src/components/consensus/ConsensusNav.tsx
index c1247f8..15d431a 100644
--- a/web/src/components/consensus/ConsensusNav.tsx
+++ b/web/src/components/consensus/ConsensusNav.tsx
@@ -55,7 +55,7 @@ function shortModel(model: string): string {
 }
 
 export function ConsensusNav() {
-  const { status, rounds, currentRound, currentPhase } = useConsensusStore()
+  const { status, rounds, currentRound, currentPhase, followups, submitQuestion } = useConsensusStore()
 
   if (rounds.length === 0) return null
 
@@ -76,6 +76,9 @@ export function ConsensusNav() {
           if (!seen.has(c.url)) { seen.add(c.url); tagged.push({ ...c, role: 'challenge' }) }
         }
       }
+      for (const c of round.revisionCitations ?? []) {
+        if (!seen.has(c.url)) { seen.add(c.url); tagged.push({ ...c, role: 'revise' }) }
+      }
     }
     // Group by hostname
     const groups = new Map<string, TaggedCitation[]>()
@@ -216,6 +219,32 @@ export function ConsensusNav() {
           </div>
         )}
 
+        {followups?.length > 0 && (
+          <div className="pt-2 mt-2 border-t border-[var(--color-border)]">
+            <Disclosure
+              header={
+                <span className="font-mono text-[10px] text-[var(--color-text-dim)] uppercase tracking-wide">
+                  Follow up ({followups.length})
+                </span>
+              }
+              defaultOpen
+            >
+              <ul className="space-y-2 mt-1.5">
+                {followups.map((q, i) => (
+                  <li key={i}>
+                    <button
+                      className="text-left text-[11px] leading-snug text-[var(--color-text-secondary)] hover:text-[var(--color-primary)] transition-colors"
+                      onClick={() => submitQuestion(q)}
+                    >
+                      {q}
+                    </button>
+                  </li>
+                ))}
+              </ul>
+            </Disclosure>
+          </div>
+        )}
+
       </nav>
     </GlassPanel>
   )
diff --git a/web/src/components/consensus/ConsensusPanel.tsx b/web/src/components/consensus/ConsensusPanel.tsx
index e2e2897..9fbeb46 100644
--- a/web/src/components/consensus/ConsensusPanel.tsx
+++ b/web/src/components/consensus/ConsensusPanel.tsx
@@ -120,6 +120,7 @@ export function ConsensusPanel() {
                     collapsible={isCompletedRound}
                     defaultOpen={false}
                     truncated={round.truncated.includes('REVISE')}
+                    citations={round.revisionCitations}
                   />
                 )}
 
diff --git a/web/src/components/threads/ThreadNav.tsx b/web/src/components/threads/ThreadNav.tsx
index 9c88bc9..0b5e594 100644
--- a/web/src/components/threads/ThreadNav.tsx
+++ b/web/src/components/threads/ThreadNav.tsx
@@ -1,4 +1,5 @@
 import { GlassPanel, Disclosure } from '@/components/shared'
+import { useConsensusStore } from '@/stores/consensus'
 import { useThreadsStore } from '@/stores/threads'
 import type { Citation } from '@/api/types'
 
@@ -145,7 +146,41 @@ export function ThreadNav() {
             </Disclosure>
           </div>
         )}
+
+        <FollowupSection followups={thread.followups} />
       </nav>
     </GlassPanel>
   )
 }
+
+function FollowupSection({ followups }: { followups?: string[] }) {
+  const submitQuestion = useConsensusStore((s) => s.submitQuestion)
+
+  if (!followups || followups.length === 0) return null
+
+  return (
+    <div className="pt-2 mt-2 border-t border-[var(--color-border)]">
+      <Disclosure
+        header={
+          <span className="font-mono text-[10px] text-[var(--color-text-dim)] uppercase tracking-wide">
+            Follow up ({followups.length})
+          </span>
+        }
+        defaultOpen
+      >
+        <ul className="space-y-2 mt-1.5">
+          {followups.map((q, i) => (
+            <li key={i}>
+              <button
+                className="text-left text-[11px] leading-snug text-[var(--color-text-secondary)] hover:text-[var(--color-primary)] transition-colors"
+                onClick={() => submitQuestion(q)}
+              >
+                {q}
+              </button>
+            </li>
+          ))}
+        </ul>
+      </Disclosure>
+    </div>
+  )
+}
diff --git a/web/src/stores/calibration.ts b/web/src/stores/calibration.ts
index efce8f1..083f410 100644
--- a/web/src/stores/calibration.ts
+++ b/web/src/stores/calibration.ts
@@ -13,10 +13,14 @@ interface CalibrationState {
 
   // Filters
   category: string | null
+  since: string | null
+  until: string | null
 
   // Actions
   fetchCalibration: () => Promise<void>
   setCategory: (category: string | null) => void
+  setSince: (since: string | null) => void
+  setUntil: (until: string | null) => void
 }
 
 export const useCalibrationStore = create<CalibrationState>((set, get) => ({
@@ -29,13 +33,17 @@ export const useCalibrationStore = create<CalibrationState>((set, get) => ({
   error: null,
 
   category: null,
+  since: null,
+  until: null,
 
   fetchCalibration: async () => {
     set({ loading: true, error: null })
     try {
-      const { category } = get()
-      const params: { category?: string } = {}
+      const { category, since, until } = get()
+      const params: { category?: string; since?: string; until?: string } = {}
       if (category) params.category = category
+      if (since) params.since = since
+      if (until) params.until = until
 
       const data = await api.calibration(params)
       set({
@@ -54,4 +62,12 @@ export const useCalibrationStore = create<CalibrationState>((set, get) => ({
   setCategory: (category) => {
     set({ category })
   },
+
+  setSince: (since) => {
+    set({ since })
+  },
+
+  setUntil: (until) => {
+    set({ until })
+  },
 }))
diff --git a/web/src/stores/consensus.ts b/web/src/stores/consensus.ts
index 290e7b4..baa2111 100644
--- a/web/src/stores/consensus.ts
+++ b/web/src/stores/consensus.ts
@@ -29,6 +29,7 @@ export interface RoundData {
   challenges: ChallengeEntry[]
   reviser: string | null
   revision: string | null
+  revisionCitations?: Citation[] | null
   confidence: number | null
   rigor: number | null
   dissent: string | null
@@ -56,6 +57,7 @@ interface ConsensusState {
   cost: number | null
   threadId: string | null
   overview: string | null
+  followups: string[]
 
   // Refinement
   clarifyingQuestions: ClarifyingQuestion[]
@@ -106,6 +108,7 @@ const initialState = {
   cost: null as number | null,
   threadId: null as string | null,
   overview: null as string | null,
+  followups: [] as string[],
   clarifyingQuestions: [] as ClarifyingQuestion[],
   clarificationAnswers: {} as Record<number, string>,
   pendingRounds: 3,
@@ -203,6 +206,7 @@ export const useConsensusStore = create<ConsensusState>((set, get) => ({
       cost: null,
       threadId: null,
       overview: null,
+      followups: [],
       clarifyingQuestions: [],
       clarificationAnswers: {},
     })
@@ -292,6 +296,7 @@ function handleEvent(
         if (event.truncated) update.truncated = [...round.truncated, 'PROPOSE']
       } else if (event.phase === 'REVISE') {
         update.revision = event.content ?? null
+        update.revisionCitations = event.citations ?? null
         if (event.truncated) update.truncated = [...round.truncated, 'REVISE']
       }
 
@@ -353,6 +358,7 @@ function handleEvent(
         cost: event.cost,
         threadId: event.thread_id ?? null,
         overview: event.overview ?? null,
+        followups: event.followups ?? [],
       })
       break
     }