diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..d255463
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,65 @@
+# Changelog
+
+All notable changes to RoundTable are recorded here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+
+- **Adversarial Red Team engine.** Third pluggable engine alongside CVP and Blind Jury. A rotating attacker stress-tests the other participants' positions across N-2 stress rounds, defenders respond in parallel, and a final post-stress round forces every participant to acknowledge which attacks landed. The attacker's persona is suspended for their turn (replaced with a neutral red-team framing) and their confidence score is excluded from the consensus formula because it measures attack success, not belief in a position. `pickAttackerIndex` rotates round-robin so attacker assignments are deterministic and reproducible.
+- **Custom persona builder (axis sliders).** A new entry in the persona menu opens a session-scoped builder with six axes — Risk tolerance, Optimism, Evidence bar, Formality, Verbosity, Contrarian streak — each with three levels. The server composes the system prompt from a small library of vetted phrase fragments keyed by `(axis, level)`. The user-typed display name is sanitised against a Unicode-letter / digit / space / `._-'` allowlist and capped to 32 chars; user-typed prompt text never reaches the LLM. The spec is cached in `localStorage` for cross-session iteration.
+- **Claim-Level Disagreement Extraction.** A post-final-round LLM pass that emits structured `{contradictions: [{claim, sides: [{stance, participantIds, quote}]}]}`. The parser drops fabricated quotes by verifying each quote's first 80 normalised characters against the actual response content of the named participants. Same-participant-on-multiple-sides is rejected. Cap of 8 contradictions per run; cap of 240 chars per claim, 600 chars per quote. The extractor reuses the judge model when judge synthesis is enabled, otherwise falls back to the first participant's model. Default ON. Renders in a new `ClaimsPanel` with click-to-scroll-to-response per side.
+- **Engine Sweep Mode.** A new "Sweep" button next to "Run Consensus" runs the same prompt through CVP, Blind Jury, and Adversarial Red Team sequentially. The live ResultPanel shows the current engine; a `SweepResultsPanel` below renders one card per engine with the final score, judge majority excerpt, top contradictions, disagreement count, and per-engine token / USD subtotal. Sweep cancellation tears down the active run while preserving any engines that already completed.
+- **Cost cap.** A new `costCapUSD` option (also exposed in the Protocol panel UI) hard-aborts a run when the running estimated cost crosses the threshold. Server-clamped to ≤ $50. The engine throws `CostCapExceededError` which the SSE pipeline surfaces as an `error` event with the exact dollar figure.
+- **Markdown export now includes the claim digest.** Each contradiction renders as a sub-section with stance, participants, and verbatim quote per side.
+- **`SessionSnapshot.claims`** field on the snapshot type (optional for backwards compat with older permalinks). Loading a permalink rehydrates the claim digest into the Claims panel.
+
+### Changed
+
+- README hero blurb, Features table, Protocol section, Architecture file map, and Roadmap status all updated to reflect three available engines and the new affordances.
+- `extractConfidence` now matches the LAST `CONFIDENCE: NN` occurrence in a response. Models that preview their score mid-response no longer short-circuit the canonical trailing line.
+- `loadSnapshot` reconstructs `usageByParticipant` from `snapshot.rounds[*].responses[*].usage` instead of resetting to `{}`. Shared-view permalinks now show correct per-participant token totals in the floating cost meter.
+- `cancelConsensus` now also clears `judgeStream`, `judgeRunning`, and `claimsRunning` so a mid-judge or mid-extraction cancel can't leave stale streaming text in the UI.
+- `extractUsage` no longer chains `as unknown as` casts. All field reads go through `typeof` guards, with malformed values falling cleanly through to the heuristic estimator.
+
+### Fixed
+
+- The module-level `setInterval` rate-limit cleanup in `app/api/consensus/route.ts` is keyed on a global symbol so Next.js HMR can no longer accumulate intervals across reloads. Vercel cold starts are unaffected.
+- A test using `mockImplementation` instead of `mockImplementationOnce` was leaking a broken streamText stub into every later test in the engine suite, which would have masked confidence-extraction bugs in adversarial / claim-extraction code. Switched to scoped `mockImplementationOnce` chains.
+
+### Tests
+
+- Test count: 207 → 255 (+48 across the four features and the QA bundle).
+- New coverage: adversarial engine prompts and rotation, attacker-excluded scoring, parallel defenders, custom persona sanitiser and composer (including injection-shape names), claim-extractor parser (well-formed / noise / fabricated-quote / same-participant rejection), `pickClaimExtractorModelId`, engine integration end-to-end with claims, soft-fail behaviour, sweep state actions and cancellation, cost-cap enforcement and disabled defaults, `extractConfidence` last-occurrence anchoring, `loadSnapshot` usage reconstruction, API route accepting / rejecting custom persona specs, accepting the adversarial engine.
+
+### Documentation
+
+- Added [`newfeatures.md`](newfeatures.md) tracking the rationale for each feature, the Grok-consensus QA notes, and the code-quality bundle.
+- Added [`CHANGELOG.md`](CHANGELOG.md) (this file).
+- Added [`SECURITY.md`](SECURITY.md) covering the threat model and security principles.
+
+---
+
+## [1.0.0] — 2026-04-15
+
+Initial public release plus the demo-uplift features.
+
+### Added
+
+- **Blind Jury engine** alongside CVP — single-pass parallel responses + judge synthesis.
+- **Judge synthesizer** — optional non-voting model produces structured Majority / Minority / Unresolved / Confidence sections over the final round.
+- **Confidence trajectory chart** — live SVG sparkline with one line per participant.
+- **Disagreement ledger** — confidence-spread heuristic flags pairs whose self-reported confidence diverges by ≥ 20 points.
+- **Cost meter** with bundled pricing table for major frontier models.
+- **Floating run panel** stacking cost meter + trajectory + ledger + UML message-flow diagram on xl+ screens.
+- **Provider error handling** — errored participant calls render as red error cards and are excluded from the consensus score.
+- **Prompt library** — 8 curated preset prompts as chips under the textarea.
+- **Session export & share** — Markdown / JSON download plus URL-hash permalink (compressed via `CompressionStream` when available).
+- **Shared view mode** — `#rt=…` permalinks rehydrate into a read-only viewer.
+- **Real-time SSE streaming, cancel anytime, rate limiting, server-side input validation, persona/model re-verification.**
+- **CVP Consensus Validation Protocol** — multi-round structured debate with blind Round 1, randomised order, and early stopping.
+- **7 built-in personas** — Risk Analyst, First-Principles Engineer, VC Specialist, Scientific Skeptic, Optimistic Futurist, Devil's Advocate, Domain Expert.
+- **Multi-provider OpenAI-compatible client** — Grok, Claude, OpenAI, Mistral, Groq, Together, etc.
+
+[Unreleased]: https://github.com/entropyvortex/roundtable/compare/v1.0.0...HEAD
+[1.0.0]: https://github.com/entropyvortex/roundtable/releases/tag/v1.0.0
diff --git a/README.md b/README.md
index 26e721b..a72de03 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
**Put multiple AI models in a room. Give them personas. Watch them debate.**
-RoundTable runs the **Consensus Validation Protocol (CVP)** and a **Blind Jury** engine across any combination of AI providers — Grok, Claude, GPT, Gemini, Mistral, and more — with configurable personas, a non-voting Judge synthesizer, a live confidence trajectory chart, a disagreement ledger, a cost meter, shareable permalinks, and a premium dark interface designed for long sessions.
+RoundTable runs three pluggable engines — the **Consensus Validation Protocol (CVP)**, a **Blind Jury**, and an **Adversarial Red Team** — across any combination of AI providers (Grok, Claude, GPT, Gemini, Mistral, and more). It ships with configurable personas, an axis-tunable **custom persona builder**, a non-voting Judge synthesizer, **claim-level disagreement extraction** with verbatim quotes per side, a live confidence trajectory chart, a confidence-spread disagreement ledger, a cost meter with **hard-abort cost cap**, an **engine sweep** that runs one prompt through all three engines side-by-side, shareable permalinks, and a premium dark interface designed for long sessions.
[](LICENSE)
[](https://vercel.com/new/clone?repository-url=https://github.com/entropyvortex/roundtable)
@@ -145,6 +145,49 @@ Blind Jury is the right engine when you want _independent_ signals rather than a
Switch engines from the sidebar ("Protocol" section). The Blind Jury engine ignores the round count and the CVP-specific toggles.
+### Adversarial Red Team Engine (alternative)
+
+The third engine pressure-tests positions before producing a final synthesis. Where CVP rewards consensus and Blind Jury rewards independent signal, Red Team rewards _robustness_ — every claim has to survive a hostile probe before it gets credit.
+
+1. **Round 1 — Initial Positions.** Every participant emits their position in parallel with no cross-visibility, warned in advance that their position will be attacked. They are asked to state load-bearing claims explicitly so they can be challenged.
+
+2. **Rounds 2 to N-1 — Stress Tests.** One participant per round is the **attacker** (round-robin via `pickAttackerIndex(round, participantCount)`). The attacker turn is special: their persona is _suspended_ for the round and replaced with a "neutral red-team attacker" framing that demands they begin with `Attacking claim: ` and surface the weakest load-bearing claim. The remaining participants are **defenders** and respond to the attack **in parallel** (same anti-anchoring philosophy as Blind Round 1) — they cannot see other defenders' replies.
+
+3. **Round N — Post-Stress Final Synthesis.** Every participant in parallel writes their final position, explicitly acknowledging which attacks landed, which missed, and what conditional caveats they now attach.
+
+The attacker's confidence score reports how confident they are that the attack lands — not their belief in any underlying view. This is **out-of-band** for the consensus formula, so stress-round scores and disagreement detection are computed from defender responses only, keeping the `avg − 0.5·stddev` interpretation consistent with CVP and Blind Jury.
+
+Switch engines from the Protocol panel. Red Team uses the round count slider; minimum sensible run is 3 rounds (init + 1 stress + final).
+
+### Engine Sweep Mode
+
+Click **Sweep** instead of **Run Consensus** and the same prompt is run through CVP, Blind Jury, and Adversarial Red Team in sequence. The live results panel shows the currently-running engine; below it the **Sweep Results** panel renders one card per engine with the final consensus score, the judge's majority excerpt, the top contradictions, the disagreement count, and the per-engine token / USD subtotal. This makes the _protocol space_ legible — you see how the same question converges (or doesn't) under three different consensus shapes.
+
+Sweep is sequential to respect rate limits; Esc or the Cancel Sweep button tears down the active run while preserving any engines that already completed. Because a sweep is roughly 3× the cost of a single run, the **cost cap** in the Protocol panel is the recommended companion control.
+
+### Custom Persona Builder
+
+The persona menu now includes a **Build a custom persona…** entry. Instead of free-text, the builder exposes six axes — Risk tolerance, Optimism, Evidence bar, Formality, Verbosity, Contrarian streak — each with three levels (low / mid / high). The server composes the system prompt from a small library of vetted phrase fragments, one per `(axis, level)`. The user-typed name is sanitised to a Unicode-letter / digit / space / `._-'` allowlist and capped to 32 chars; user-typed prompt text never reaches the LLM.
+
+This preserves the existing security model: every consensus request rebuilds personas server-side from their IDs, and a custom persona's spec is re-sanitised and re-composed on every run. The spec is cached in `localStorage` so the user can iterate across sessions; it is **not** embedded in URL-hash permalinks (the spec is, the composed prompt is, but neither carries arbitrary text).
+
+### Claim-Level Disagreement Extraction
+
+The confidence-spread `Disagreement` ledger only catches pairs whose self-reported confidence diverges by ≥20 points. After every run with **Claim extraction** enabled (default ON), an additional LLM pass reads the final-round responses and emits a strict JSON object of `{contradictions: [{claim, sides: [{stance, participantIds, quote}]}]}`. The parser:
+
+- Drops contradictions with empty claims, fewer than 2 sides, or sides without a quote.
+- Verifies each quote against the actual response content of the named participants. If the (normalised) first 80 characters don't appear in any cited participant's text, the side is dropped — fabricated quotes don't render.
+- Rejects entries where any participant id appears on more than one side.
+- Caps to 8 contradictions per run.
+
+The result renders in the **Claim-Level Contradictions** panel with one card per contradiction, a colored stripe per side, the stance label, the participants involved, and the verbatim quote. Click a side to scroll to that participant's final-round response. If the extractor itself fails (provider error, model unavailable), a distinct red error card explains what happened — the run is unaffected.
+
+The extractor reuses the judge model when judge synthesis is enabled (single user choice, no extra picker); otherwise it falls back to the first participant's model.
+
+### Cost Cap
+
+A numeric "Cost cap" input in the Protocol panel hard-aborts the run if the running estimated cost crosses the threshold. The engine accumulates `runningCostUSD` after every round, judge call, and claim-extraction call; on cross, it throws `CostCapExceededError` which the SSE pipeline surfaces as an `error` event. The cap is server-clamped to ≤ $50.
+
### Why This Is Better Than Majority Vote
Majority vote asks N models the same question and picks the most common answer. CVP does something structurally different:
@@ -171,7 +214,7 @@ Majority vote asks N models the same question and picks the most common answer.
**Confidence scores are self-reported.** Models assign their own confidence. There is no calibration, no ground truth, and no penalty for overconfidence. The consensus score is only as meaningful as the models' ability to self-assess — which is known to be unreliable. The judge synthesizer is deliberately _not_ a calibrator: it summarises what was said, it does not grade it.
-**Disagreement heuristic is confidence-based.** The disagreement ledger flags pairs whose confidence diverges by ≥ 20 points. This catches substantive splits reliably but misses cases where two participants hold opposite positions with identical confidence. Treat the ledger as a lower bound on actual disagreement.
+**Disagreement heuristic is confidence-based.** The default disagreement ledger flags pairs whose confidence diverges by ≥ 20 points. This catches loud splits but misses cases where two participants hold opposite positions with identical confidence. The **claim-level extractor** addresses this gap by running an additional LLM pass that emits structured contradictions with verbatim quotes per side; quotes are verified against actual response content so fabricated claims are dropped. The confidence-spread ledger remains as a fast, deterministic, no-extra-LLM-call lower bound.
### Example Transcript
@@ -215,13 +258,15 @@ The following are deliberate non-goals for v1 but would further tighten the prot
1. **Confidence calibration or external validation.** Self-reported confidence is unreliable. A calibration step — comparing stated confidence to accuracy on known-answer questions — or a separate judge model that _grades_ argument quality (as opposed to the current faithfulness-only synthesizer) would add grounding.
-2. **Claim-level disagreement extraction.** The current disagreement ledger detects confidence splits, not semantic contradictions. A follow-up pass that extracts the actual claims participants make and flags direct contradictions would be more precise, at the cost of extra LLM calls.
+2. **Additional pluggable engines.** Adversarial Red Team is available; Delphi, Ranked Choice, and Dialectical variants are still on the Roadmap. The engine interface is clean enough that adding a new one is one new function plus a dispatcher branch.
-3. **Pluggable engines beyond CVP and Blind Jury.** The engine interface is clean enough to support Delphi, Adversarial Red Team, Dialectical, and Ranked Choice variants. See the Roadmap table below.
+3. **Cross-engine judge synthesis.** Engine sweep currently runs an independent judge per engine. A meta-judge that synthesises across all three engines' final rounds would surface "what every protocol agrees on" but is deferred — per-engine judges produce intentionally engine-specific outputs (e.g. CVP's "Majority Position" is semantically different from Adversarial's post-stress majority).
## Security
-This is experimental, it has no authentication protection, if you publish this with your keys, someone could burn your tokens/exploit to process their prompts out of curiosity or malice.
+This is an experimental research demo with **no authentication**. Anyone who can reach the URL can spend your provider keys. Read [SECURITY.md](SECURITY.md) before deploying.
+
+The codebase has been built with defense-in-depth in mind — server-side persona rebuilds (the client cannot inject a `systemPrompt`), an axis-only custom-persona builder (no user free-text reaches the LLM), per-IP rate limiting, server-side input validation, an optional cost cap that hard-aborts a run when the running estimate crosses a USD threshold, and a strict claim-extractor parser that rejects fabricated quotes. Details and threat model in [SECURITY.md](SECURITY.md).
---
@@ -231,30 +276,33 @@ This is experimental, it has no authentication protection, if you publish this w
## Features
-| Feature | Description |
-| ------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **Multi-Provider** | Connect any OpenAI-compatible API — Grok, Claude, OpenAI, Mistral, Groq, Together, and more |
-| **7 Built-in Personas** | Risk Analyst, First-Principles Engineer, VC Specialist, Scientific Skeptic, Optimistic Futurist, Devil's Advocate, Domain Expert |
-| **Two Engines** | **CVP** (multi-round debate) and **Blind Jury** (parallel independent responses + judge synthesis) — switch from the sidebar |
-| **Blind Round 1** | CVP's first round runs in parallel with zero cross-visibility so the first wave of analysis is not contaminated by speaking order |
-| **Randomized Order** | CVP shuffles participant order in rounds 2+ to kill first-mover anchoring bias |
-| **Early Stopping** | CVP detects convergence between rounds and terminates early, saving latency and tokens |
-| **Judge Synthesizer** | Optional non-voting model that produces a structured **Majority / Minority / Unresolved / Confidence** summary over the final-round answers |
-| **Confidence Trajectory Chart** | Live sparkline with one line per participant, so you can _see_ drift, convergence, and sycophancy as the run unfolds |
-| **Disagreement Ledger** | Deterministic confidence-spread detector grouping flagged pairs by round — click a row to jump to that round in the transcript |
-| **Cost Meter** | Live total tokens and estimated USD per run, with a bundled pricing table for major frontier models |
-| **Floating Run Panel** | On xl+ screens a pinned right-side container stacks the cost meter, confidence trajectory, disagreement ledger, and a collapsible UML-style message flow diagram, scrolling as a unit so all four stay in view throughout a long transcript. Below xl the same panels fall back into the left sidebar |
-| **Provider Error Handling** | Errored participant calls render as red error cards with the upstream message + HTTP status, fire a per-participant toast, and are excluded from the consensus score and disagreement ledger so one broken provider can't tank a run |
-| **Prompt Library** | 8 curated preset prompts surfaced under the textarea for first-time visitors to hit Run immediately |
-| **Session Export & Share** | One-click download as Markdown or JSON, plus a permalink that encodes the full run into the URL hash (compressed when available) |
-| **Shared View Mode** | Loading a `#rt=…` permalink rehydrates the run into a read-only viewer for review, embedding, or screenshots |
-| **Real-time SSE Streaming** | Watch responses arrive token-by-token with live progress tracking |
-| **Cascaded Model Selector** | Provider-first dropdown with persona assignment per participant |
-| **Copy to Clipboard** | One-click raw markdown export per response |
-| **Cancel Anytime** | Stop button + Escape key — abort signal propagates to the server and stops provider calls |
-| **Premium Dark UI** | High-contrast, readable interface designed for extended analysis sessions |
-| **Rate-Limited API** | In-memory per-IP rate limiting, server-side input validation, persona/model re-verification |
-| **No External Services** | No database, no auth service, no persistence — Vercel-deployable in one click |
+| Feature | Description |
+| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Multi-Provider** | Connect any OpenAI-compatible API — Grok, Claude, OpenAI, Mistral, Groq, Together, and more |
+| **Three Engines** | **CVP** (multi-round debate), **Blind Jury** (parallel independent responses + judge synthesis), and **Adversarial Red Team** (rotating attacker stress-tests positions before a post-stress synthesis) — switch from the Protocol panel |
+| **Engine Sweep Mode** | One click runs the same prompt through all three engines sequentially and renders side-by-side cards so you can _see_ how the protocol shape changes the conclusion |
+| **7 Built-in Personas** | Risk Analyst, First-Principles Engineer, VC Specialist, Scientific Skeptic, Optimistic Futurist, Devil's Advocate, Domain Expert |
+| **Custom Persona Builder** | Build session-scoped personas by tuning six axes (risk tolerance, optimism, evidence bar, formality, verbosity, contrarian streak) — server composes the prompt from vetted phrase fragments, no user free-text reaches the LLM, no jailbreak surface |
+| **Blind Round 1** | CVP's first round runs in parallel with zero cross-visibility so the first wave of analysis is not contaminated by speaking order |
+| **Randomized Order** | CVP shuffles participant order in rounds 2+ to kill first-mover anchoring bias |
+| **Early Stopping** | CVP detects convergence between rounds and terminates early, saving latency and tokens |
+| **Judge Synthesizer** | Optional non-voting model that produces a structured **Majority / Minority / Unresolved / Confidence** summary over the final-round answers |
+| **Claim-Level Disagreement Extractor** | LLM pass after the final round emits structured `{claim, sides[{stance, participants, verbatim quote}]}`. Quotes are verified against actual response content (fabricated quotes are dropped); same-participant-on-multiple-sides is rejected. Click a side to jump to that participant's response |
+| **Confidence Trajectory Chart** | Live sparkline with one line per participant, so you can _see_ drift, convergence, and sycophancy as the run unfolds |
+| **Disagreement Ledger** | Deterministic confidence-spread detector grouping flagged pairs by round — click a row to jump to that round in the transcript |
+| **Cost Meter + Cost Cap** | Live total tokens and estimated USD per run, with a bundled pricing table for major frontier models. Optional hard-abort cost cap (USD) tears down the run as soon as the running estimate crosses the threshold |
+| **Floating Run Panel** | On xl+ screens a pinned right-side container stacks the cost meter, confidence trajectory, disagreement ledger, claim contradictions, and a collapsible UML-style message flow diagram, scrolling as a unit so all of them stay in view throughout a long transcript. Below xl the panels fall back into the left sidebar |
+| **Provider Error Handling** | Errored participant calls render as red error cards with the upstream message + HTTP status, fire a per-participant toast, and are excluded from the consensus score and disagreement ledger so one broken provider can't tank a run |
+| **Prompt Library** | 8 curated preset prompts surfaced under the textarea for first-time visitors to hit Run immediately |
+| **Session Export & Share** | One-click download as Markdown or JSON (includes the claim digest), plus a permalink that encodes the full run into the URL hash (compressed when available) |
+| **Shared View Mode** | Loading a `#rt=…` permalink rehydrates the run into a read-only viewer for review, embedding, or screenshots |
+| **Real-time SSE Streaming** | Watch responses arrive token-by-token with live progress tracking |
+| **Cascaded Model Selector** | Provider-first dropdown with persona assignment per participant |
+| **Copy to Clipboard** | One-click raw markdown export per response |
+| **Cancel Anytime** | Stop button + Escape key — single-engine cancels the current run; sweep mode cancels the entire sweep while preserving any engines that already completed |
+| **Premium Dark UI** | High-contrast, readable interface designed for extended analysis sessions |
+| **Rate-Limited API** | In-memory per-IP rate limiting, server-side input validation, persona/model re-verification |
+| **No External Services** | No database, no auth service, no persistence — Vercel-deployable in one click |
---
@@ -278,7 +326,7 @@ Edit `.env.local` with your keys, then:
pnpm dev
```
-Open [http://localhost:3000](http://localhost:3000). Add participants from the left sidebar, pick an engine in the **Protocol** panel (CVP or Blind Jury), optionally enable judge synthesis, type a prompt (or click a preset), and hit **Run Consensus**. On xl+ screens the cost meter, confidence trajectory, disagreement ledger, and message-flow diagram live in a floating panel pinned to the right of the viewport — watch them populate in real time as the debate streams. Below xl those same panels fall back into the left sidebar. When the run finishes, click **Export** in the results panel to download the transcript as Markdown/JSON or copy a permalink that rehydrates the run on any browser.
+Open [http://localhost:3000](http://localhost:3000). Add participants from the left sidebar (pick a built-in persona or build a custom one with the axis sliders), choose an engine in the **Protocol** panel (CVP, Blind Jury, or Adversarial Red Team), optionally enable judge synthesis and claim-level extraction, set a **cost cap** if you want hard-abort protection, type a prompt (or click a preset), and hit **Run Consensus** — or hit **Sweep** to run the same prompt through all three engines back-to-back. On xl+ screens the cost meter, confidence trajectory, disagreement ledger, claim contradictions, and message-flow diagram live in a floating panel pinned to the right of the viewport — watch them populate in real time as the debate streams. Below xl those same panels fall back into the left sidebar. When the run finishes, click **Export** in the results panel to download the transcript as Markdown/JSON or copy a permalink that rehydrates the run on any browser.
---
@@ -379,28 +427,31 @@ app/
layout.tsx Root layout with Sonner toasts
components/
AISelector.tsx Cascaded provider/model picker + persona selector
- ConfigPanel.tsx Engine selector, CVP toggles, judge model picker
+ ConfigPanel.tsx Engine selector, CVP toggles, judge model picker, claim toggle, cost cap
ResultPanel.tsx Live streaming results, error cards, markdown rendering
- MessageFlowDiagram.tsx Floating right-side panel: cost + trajectory + ledger + UML flow
+ SweepResultsPanel.tsx Side-by-side comparison cards for engine sweep results
+ MessageFlowDiagram.tsx Floating right-side panel: cost + trajectory + ledger + claims + UML flow
ConfidenceTrajectory.tsx SVG sparkline of per-participant confidence across rounds
DisagreementPanel.tsx Grouped disagreement ledger with click-to-scroll
+ ClaimsPanel.tsx Claim-level contradictions card stack with verbatim quotes
CostMeter.tsx Live token/USD totals
JudgeCard.tsx Non-voting judge synthesis output
+ PersonaBuilder.tsx Axis-slider builder for custom personas (no free-text → no jailbreak surface)
PromptLibrary.tsx Preset prompt chips under the textarea
SessionMenu.tsx Export (Markdown/JSON) + copy permalink dropdown
BackToTop.tsx Scroll navigation
lib/
- consensus-engine.ts CVP + Blind Jury orchestration, judge synthesizer, disagreement detection
+ consensus-engine.ts CVP + Blind Jury + Adversarial Red Team orchestration, judge, claim extractor, cost cap
providers.ts Server-side provider resolution (parses AI_PROVIDERS)
- personas.ts 7 participant personas + JUDGE_PERSONA
+ personas.ts 7 participant personas + JUDGE_PERSONA + axis-based custom-persona composer
pricing.ts Model pricing table + cost estimator
prompt-library.ts Preset prompts for the library UI
- session.ts Snapshot ↔ Markdown / JSON / URL-hash serializer
- store.ts Zustand global state, options bundle, snapshot load/save
+ session.ts Snapshot ↔ Markdown / JSON / URL-hash serializer (incl. claim digests)
+ store.ts Zustand global state, options bundle, sweep state, snapshot load/save
types.ts All TypeScript types
```
-The consensus engine runs entirely server-side. Each round streams responses via Server-Sent Events. The client processes events through a single `processEvent` function that calls Zustand actions directly via `getState()` — no subscriptions, no re-renders from token events. The same event pipeline drives the confidence trajectory, the disagreement ledger, the cost meter, and the judge card — every panel reads from one coherent store.
+The consensus engine runs entirely server-side. Each round streams responses via Server-Sent Events. The client processes events through a single `processEvent` function that calls Zustand actions directly via `getState()` — no subscriptions, no re-renders from token events. The same event pipeline drives the confidence trajectory, the disagreement ledger, the cost meter, the judge card, and the claims panel — every panel reads from one coherent store.
---
@@ -448,14 +499,14 @@ The new persona will appear in every selector automatically.
## Roadmap
-RoundTable ships with two engines today. The architecture is designed to support more:
+RoundTable ships with three engines today. The architecture is designed to support more:
| Engine | Status | Description |
| --------------------------------------- | --------- | ---------------------------------------------------------------------------------------------- |
| **CVP (Consensus Validation Protocol)** | Available | Multi-round structured debate with blind Round 1, randomized order, early stop, optional judge |
| **Blind Jury** | Available | Parallel independent responses with no cross-visibility, followed by a judge synthesis |
+| **Adversarial Red Team** | Available | Rotating attacker stress-tests positions across stress rounds, post-stress synthesis last |
| **Delphi Method** | Planned | Anonymous multi-round forecasting with statistical aggregation between rounds |
-| **Adversarial Red Team** | Planned | One model attacks, others defend — iterative stress-testing of ideas |
| **Ranked Choice Synthesis** | Planned | Each model proposes solutions, then ranks all proposals — converges via elimination |
| **Dialectical Engine** | Planned | Thesis / Antithesis / Synthesis structure with formal argument mapping |
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..8857310
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,132 @@
+# Security
+
+RoundTable is an open-source research demo. This document explains its security principles, what it deliberately defends against, and what it does **not** protect.
+
+> **Headline:** RoundTable has no authentication. Anyone who can reach the URL can spend the API keys you configured. Treat any public deployment as a budget you're prepared to lose. The mitigations below are defense-in-depth — they do not make it safe to expose unauthenticated.
+
+---
+
+## Threat model
+
+The codebase is designed to defend against **prompt-injection escalation** from a malicious or compromised client (e.g. someone who finds a public deploy and crafts an evil request body, or a permalink shared in a forum that smuggles a jailbreak), and against **runaway costs** caused by misuse or accidents. It is **not** designed to defend against:
+
+- An attacker who has direct access to your `.env.local` or the deploy's environment variables. API keys are secrets — protect them at the platform level.
+- Trusted authenticated users (there are none — every visitor is treated identically).
+- Network-level attacks, DNS hijacking, MITM on the upstream provider connection. These are the hosting platform's job.
+- Provider-side abuse detection / quota / TOS enforcement. Each provider's terms apply directly.
+- Data leakage via the LLM itself — model outputs are not filtered or scanned.
+
+If you're considering a public deploy, the recommended baseline is: put the app behind your own auth (Vercel Password Protection, Cloudflare Access, an OAuth proxy, etc.), set a `costCapUSD`, monitor your provider dashboards, and rotate keys regularly.
+
+---
+
+## Architecture-level principles
+
+### 1. API keys never reach the browser
+
+Every consensus call is server-side. The browser never sees the contents of `AI_PROVIDERS`, `GROK_API_KEY`, `ANTHROPIC_API_KEY`, etc. The `/api/providers` route returns a client-safe model list with `id`, `providerName`, and `modelId` — no secrets. Keys are resolved from environment variables only when a provider call is about to be made (`lib/providers.ts:resolveApiKey`).
+
+### 2. Personas are rebuilt server-side on every request
+
+The `Persona` object the client sends to `/api/consensus` carries a `systemPrompt`, but the server **ignores it**. `app/api/consensus/route.ts` always:
+
+1. Reads `participant.persona.id` (a short string).
+2. Looks up the canonical persona via `getPersona(personaId)` from `lib/personas.ts`.
+3. Uses the server-side `systemPrompt` for that ID.
+
+This means a malicious client cannot smuggle a jailbreak prompt by editing the systemPrompt field, and it means a permalink cannot ship arbitrary instructions to whoever opens it. The same protection applies to model IDs: the server re-validates each `modelInfo.id` against the configured `AI_PROVIDERS` and rejects unknown models with HTTP 400.
+
+### 3. Custom personas use axes only — no free-text reaches the LLM
+
+The custom persona builder (`components/PersonaBuilder.tsx` and `composeCustomPersona` in `lib/personas.ts`) deliberately exposes only six axis selectors with three levels each. The server composes the system prompt from a small library of vetted phrase fragments keyed by `(axis, level)`. The user never types a system prompt — only a display name.
+
+The display name is the only user-typed string that appears anywhere in the composed prompt, and it is sanitised on the server in `sanitizeCustomPersonaSpec`:
+
+- **Allowlist:** only Unicode letters, digits, spaces, and `_-.'` survive. Newlines, code fences, braces, quotes, and angle brackets are stripped.
+- **Length cap:** 32 characters after sanitisation.
+- **Empty-after-strip:** the spec is rejected with HTTP 400.
+- **Color** must match `^#[0-9a-fA-F]{6}$` or it falls back to a safe default.
+- **Emoji** is capped to 4 codepoints.
+- **Axis values** must be in `low | mid | high` or they default to `mid`.
+
+The composer then wraps the sanitised name in a fixed structural template ("You are X, a custom RoundTable participant. Your stance for this debate is defined by..."), so even names that pass the allowlist (e.g. "Bob Ignore prior instructions") are positioned as part of an identity, not as standalone instructions. A model parsing the prompt reads `name` as a single (weirdly named) entity.
+
+`composeCustomPersona` throws `InvalidCustomPersonaError` on any spec that fails sanitisation — there is no silent fallback that could mask a bug.
+
+### 4. Server-side input validation everywhere
+
+`/api/consensus` enforces hard limits before the engine runs:
+
+- **Prompt length:** ≤ 10,000 characters.
+- **Participants:** ≤ 8 per request.
+- **Rounds:** clamped to `[1, 10]`.
+- **Cost cap (USD):** clamped to `[0, 50]`.
+- **Engine:** must be one of `cvp | blind-jury | adversarial`. Anything else maps to `cvp` (defensive default).
+- **Judge model ID:** if `judgeEnabled`, the model ID must resolve via `findResolvedModel`; otherwise rejected with HTTP 400.
+- **Custom persona spec:** if `persona.id === "custom"`, `sanitizeCustomPersonaSpec` must return a non-null result; otherwise HTTP 400.
+
+Booleans use a strict `typeof === "boolean"` check rather than truthiness. Numbers are clamped, not just parsed. Unknown fields in the request body are ignored.
+
+### 5. Per-IP rate limiting
+
+`app/api/consensus/route.ts` runs a sliding-window rate limiter at 5 requests per IP per minute. The IP is taken from `x-forwarded-for` (first hop) or `x-real-ip`. Rate-limited requests return HTTP 429.
+
+The cleanup interval that prunes old entries is keyed on a global symbol so Next.js HMR cannot stack repeated intervals across hot reloads in dev.
+
+This is **in-memory** and resets on cold start. It is intentionally simple — a stateless single-process limiter that needs no external service. For a public deploy, treat it as a courtesy, not a wall, and add a real edge rate limiter at the platform.
+
+### 6. Cost cap (defense against runaway runs)
+
+The new `costCapUSD` option in `ConsensusOptions` lets a user set a hard ceiling. The engine accumulates `runningCostUSD` after every round, every judge call, and every claim-extraction call; if the running total crosses the cap, the engine throws `CostCapExceededError`. The error propagates through the SSE pipeline as a normal `error` event, the run is aborted, and the client sees a clear toast with the exact dollar figure.
+
+Sweep mode is up to 3× the cost of a single run, so the cost cap is the recommended companion control for that feature.
+
+### 7. Claim-extractor parser hardening
+
+The claim-level extraction LLM pass produces JSON. The parser in `parseClaimsJSON` (`lib/consensus-engine.ts`) is intentionally strict and defensive:
+
+- Unparseable output → empty digest, soft-fail, run unaffected.
+- Each side must have a non-empty `stance`, valid `participantIds` (all must match an actual run participant), and a non-empty `quote`.
+- **Quote verification:** the first 80 normalised characters of the quote must appear in the actual response content of one of the named participants. Fabricated quotes the model invents are dropped before they reach the UI.
+- **Same-participant on multiple sides** is rejected — a participant cannot simultaneously support and oppose the same claim.
+- Cap of 8 contradictions, 240 chars per claim, 600 chars per quote.
+
+Provider errors during the extractor pass populate `ClaimDigest.error` so the UI can render a distinct "Claim Extraction Failed" card — silent failures look indistinguishable from "no contradictions found", which would be misleading.
+
+### 8. Errored providers are excluded from scoring
+
+A failing provider (wrong base URL, expired key, 404, upstream outage) is caught in `streamParticipant`, formatted via `formatProviderError`, and emitted as a `participant-end` event with an `error` field. The UI renders the participant as a red error card, fires a toast naming the broken model, and the engine **excludes the errored response from both the consensus score and the disagreement ledger**. One broken provider can no longer tank a run.
+
+---
+
+## What is intentionally NOT secured
+
+These are deliberate non-goals. Documenting them so contributors aren't surprised:
+
+- **No authentication / authorisation.** The app is single-tenant by deployment.
+- **No persistence on the server.** No DB, no session storage, no logs of user prompts beyond the standard Next.js / Vercel access logs.
+- **No content moderation.** Prompts are forwarded verbatim. Model outputs are rendered verbatim.
+- **No PII handling.** If a user pastes PII into a prompt, it is sent to whichever providers are configured.
+- **No dependency-pin enforcement at runtime.** Lockfile is committed, but the app trusts its own dependencies.
+- **The cost meter is an estimate.** Pricing in `lib/pricing.ts` is best-effort public list pricing; it can drift. The cost cap uses these estimates, so a model with stale or missing pricing data may run past the apparent cap.
+- **Snapshot permalinks (`#rt=…`) are not signed.** Anyone who can edit the URL can edit the snapshot. The hash is a convenience encoding, not an authenticity proof. Personas in a permalink are still server-rebuilt from their IDs, so the worst a tampered permalink can do is render incorrect display labels (the actual run, if re-executed, uses server-side definitions).
+
+---
+
+## Reporting a vulnerability
+
+Open a private issue on the repository or email the maintainer. Please do not disclose security issues in public issue trackers before a fix lands.
+
+If your report is sensitive, mark it as such and the maintainer will respond before public disclosure. We try to acknowledge reports within a few days; a fix timeline depends on severity and complexity.
+
+---
+
+## Security-relevant files
+
+| File | Role |
+| ------------------------------- | ------------------------------------------------------------------------------ |
+| `app/api/consensus/route.ts` | Input validation, rate limiting, persona / model re-verification, error funnel |
+| `lib/providers.ts` | API key resolution, never-leak-to-client model list |
+| `lib/personas.ts` | Server-side persona registry, axis-only custom-persona composer, sanitiser |
+| `lib/consensus-engine.ts` | Cost cap enforcement, claim-extractor parser, error formatting |
+| `components/PersonaBuilder.tsx` | Client-side builder UI — does not generate any LLM-bound text outside the spec |
diff --git a/app/api/consensus/route.ts b/app/api/consensus/route.ts
index 39362c9..7674207 100644
--- a/app/api/consensus/route.ts
+++ b/app/api/consensus/route.ts
@@ -9,7 +9,7 @@
import type { ConsensusEvent, ConsensusOptions, EngineType, Participant } from "@/lib/types";
import { runConsensus } from "@/lib/consensus-engine";
-import { getPersona } from "@/lib/personas";
+import { composeCustomPersona, getPersona, sanitizeCustomPersonaSpec } from "@/lib/personas";
import { findResolvedModel } from "@/lib/providers";
export const dynamic = "force-dynamic";
@@ -37,15 +37,24 @@ function isRateLimited(ip: string): boolean {
return false;
}
-// Periodic cleanup to prevent memory leak
-setInterval(() => {
- const now = Date.now();
- for (const [ip, log] of requestLog.entries()) {
- const recent = log.filter((t) => now - t < RATE_WINDOW_MS);
- if (recent.length === 0) requestLog.delete(ip);
- else requestLog.set(ip, recent);
- }
-}, RATE_WINDOW_MS);
+// Periodic cleanup to prevent memory leak.
+// On Vercel serverless this module is fresh per cold start, so the
+// setInterval is harmless. In dev with Next.js HMR the same module can
+// be re-evaluated many times; without this guard we'd register a new
+// interval per HMR pass and they'd never get cleared. Keying on a
+// global symbol prevents accumulation across reloads.
+const CLEANUP_KEY = Symbol.for("roundtable.consensus-route.cleanup");
+const globalAny = globalThis as unknown as Record;
+if (!globalAny[CLEANUP_KEY]) {
+ globalAny[CLEANUP_KEY] = setInterval(() => {
+ const now = Date.now();
+ for (const [ip, log] of requestLog.entries()) {
+ const recent = log.filter((t) => now - t < RATE_WINDOW_MS);
+ if (recent.length === 0) requestLog.delete(ip);
+ else requestLog.set(ip, recent);
+ }
+ }, RATE_WINDOW_MS);
+}
// ── Options parsing & validation ───────────────────────────
@@ -57,7 +66,9 @@ interface LooseRequestBody {
}
function parseEngine(v: unknown): EngineType {
- return v === "blind-jury" ? "blind-jury" : "cvp";
+ if (v === "blind-jury") return "blind-jury";
+ if (v === "adversarial") return "adversarial";
+ return "cvp";
}
function parseBool(v: unknown, fallback: boolean): boolean {
@@ -76,6 +87,14 @@ function parseOptions(body: LooseRequestBody): ConsensusOptions {
? (raw.judgeModelId as string)
: undefined;
+ // Cost cap is in USD. Clamp to a sensible range so a malformed body
+ // can't disable the protection or set absurd values.
+ const rawCap = raw.costCapUSD;
+ const costCapUSD =
+ typeof rawCap === "number" && Number.isFinite(rawCap) && rawCap > 0
+ ? Math.min(rawCap, 50)
+ : undefined;
+
return {
engine: parseEngine(raw.engine),
rounds,
@@ -84,6 +103,8 @@ function parseOptions(body: LooseRequestBody): ConsensusOptions {
earlyStop: parseBool(raw.earlyStop, true),
judgeEnabled: parseBool(raw.judgeEnabled, false),
judgeModelId,
+ extractClaimsEnabled: parseBool(raw.extractClaimsEnabled, false),
+ costCapUSD,
};
}
@@ -150,6 +171,7 @@ export async function POST(request: Request) {
id?: unknown;
modelInfo?: { id?: unknown };
persona?: { id?: unknown };
+ customPersonaSpec?: unknown;
}>) {
const modelCompositeId = typeof p.modelInfo?.id === "string" ? p.modelInfo.id : "";
const resolved = findResolvedModel(modelCompositeId);
@@ -160,9 +182,24 @@ export async function POST(request: Request) {
});
}
- // Rebuild persona from server-side definitions (ignore client systemPrompt)
+ // Rebuild persona from server-side definitions (ignore client systemPrompt).
+ // For custom personas, sanitize and compose from the axis spec instead.
const personaId = typeof p.persona?.id === "string" ? p.persona.id : "";
- const persona = getPersona(personaId);
+ let persona;
+ let customSpec;
+ if (personaId === "custom") {
+ const sanitized = sanitizeCustomPersonaSpec(p.customPersonaSpec);
+ if (!sanitized) {
+ return new Response(JSON.stringify({ error: "Invalid custom persona spec" }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+ persona = composeCustomPersona(sanitized);
+ customSpec = sanitized;
+ } else {
+ persona = getPersona(personaId);
+ }
validatedParticipants.push({
id: typeof p.id === "string" ? p.id : `p-${validatedParticipants.length + 1}`,
@@ -173,6 +210,7 @@ export async function POST(request: Request) {
modelId: resolved.modelId,
},
persona,
+ ...(customSpec ? { customPersonaSpec: customSpec } : {}),
});
}
diff --git a/app/page.tsx b/app/page.tsx
index b3aa23c..6e1bc5a 100644
--- a/app/page.tsx
+++ b/app/page.tsx
@@ -12,10 +12,12 @@ import MessageFlowDiagram from "@/components/MessageFlowDiagram";
import BackToTop from "@/components/BackToTop";
import ConfidenceTrajectory from "@/components/ConfidenceTrajectory";
import DisagreementPanel from "@/components/DisagreementPanel";
+import ClaimsPanel from "@/components/ClaimsPanel";
import CostMeter from "@/components/CostMeter";
import ConfigPanel from "@/components/ConfigPanel";
import PromptLibrary from "@/components/PromptLibrary";
import { toast } from "sonner";
+import SweepResultsPanel from "@/components/SweepResultsPanel";
import {
Play,
RotateCcw,
@@ -28,8 +30,9 @@ import {
ArrowRight,
Sparkles,
Eye,
+ Layers,
} from "lucide-react";
-import type { ConsensusEvent, ConsensusRequest } from "@/lib/types";
+import type { ConsensusEvent, ConsensusRequest, EngineType } from "@/lib/types";
import { decodeSnapshotFromHash } from "@/lib/session";
export default function HomePage() {
@@ -41,6 +44,7 @@ export default function HomePage() {
const progress = useArenaStore((s) => s.progress);
const finalScore = useArenaStore((s) => s.finalScore);
const sharedView = useArenaStore((s) => s.sharedView);
+ const sweepActive = useArenaStore((s) => s.sweepActive);
const setAvailableModels = useArenaStore((s) => s.setAvailableModels);
const setModelsLoading = useArenaStore((s) => s.setModelsLoading);
@@ -90,8 +94,13 @@ export default function HomePage() {
useEffect(() => {
const handler = (e: KeyboardEvent) => {
- if (e.key === "Escape" && useArenaStore.getState().isRunning) {
- useArenaStore.getState().cancelConsensus();
+ if (e.key !== "Escape") return;
+ const s = useArenaStore.getState();
+ if (s.sweepActive) {
+ s.cancelSweep();
+ toast.info("Sweep cancelled");
+ } else if (s.isRunning) {
+ s.cancelConsensus();
toast.info("Consensus cancelled");
}
};
@@ -99,33 +108,17 @@ export default function HomePage() {
return () => window.removeEventListener("keydown", handler);
}, []);
- const handleRunConsensus = useCallback(async () => {
+ const runOneEngine = useCallback(async (engineOverride?: EngineType) => {
const state = useArenaStore.getState();
- if (!state.prompt.trim()) {
- toast.error("Enter a prompt first");
- return;
- }
- if (state.participants.length < 2) {
- toast.error("Add at least 2 AI participants");
- return;
- }
- if (state.options.judgeEnabled && !state.options.judgeModelId) {
- toast.error("Choose a judge model or disable judge synthesis");
- return;
- }
-
- // Clear any URL hash from a previously loaded shared view
- if (typeof window !== "undefined" && window.location.hash) {
- history.replaceState(null, "", window.location.pathname);
- }
-
const controller = state.startConsensus();
- toast.info("Consensus started — Esc to cancel");
+
+ const optionsForRun =
+ engineOverride !== undefined ? { ...state.options, engine: engineOverride } : state.options;
const body: ConsensusRequest = {
prompt: state.prompt.trim(),
participants: state.participants,
- options: state.options,
+ options: optionsForRun,
};
try {
@@ -156,19 +149,104 @@ export default function HomePage() {
}
}
}
+ return true;
} catch (err) {
- if (err instanceof DOMException && err.name === "AbortError") return;
+ if (err instanceof DOMException && err.name === "AbortError") return false;
const msg = err instanceof Error ? err.message : "Unknown error";
toast.error(`Consensus failed: ${msg}`);
useArenaStore.getState().completeConsensus(0, `Error: ${msg}`, 0);
+ return false;
}
}, []);
+ const handleRunConsensus = useCallback(async () => {
+ const state = useArenaStore.getState();
+ if (!state.prompt.trim()) {
+ toast.error("Enter a prompt first");
+ return;
+ }
+ if (state.participants.length < 2) {
+ toast.error("Add at least 2 AI participants");
+ return;
+ }
+ if (state.options.judgeEnabled && !state.options.judgeModelId) {
+ toast.error("Choose a judge model or disable judge synthesis");
+ return;
+ }
+
+ // Clear any URL hash from a previously loaded shared view
+ if (typeof window !== "undefined" && window.location.hash) {
+ history.replaceState(null, "", window.location.pathname);
+ }
+ // Clear any previous sweep state when a single-engine run starts
+ useArenaStore.getState().clearSweep();
+
+ toast.info("Consensus started — Esc to cancel");
+ await runOneEngine();
+ }, [runOneEngine]);
+
+ const handleRunSweep = useCallback(async () => {
+ const state = useArenaStore.getState();
+ if (!state.prompt.trim()) {
+ toast.error("Enter a prompt first");
+ return;
+ }
+ if (state.participants.length < 2) {
+ toast.error("Add at least 2 AI participants");
+ return;
+ }
+ if (state.options.judgeEnabled && !state.options.judgeModelId) {
+ toast.error("Choose a judge model or disable judge synthesis");
+ return;
+ }
+
+ if (typeof window !== "undefined" && window.location.hash) {
+ history.replaceState(null, "", window.location.pathname);
+ }
+
+ const sweepEngines: EngineType[] = ["cvp", "blind-jury", "adversarial"];
+ const store = useArenaStore.getState();
+ store.startSweep(sweepEngines);
+ toast.info(
+ `Sweep started — running ${sweepEngines.length} engines sequentially. Esc cancels current.`,
+ );
+
+ for (let i = 0; i < sweepEngines.length; i++) {
+ // If the user cancelled the sweep (sweepActive flipped off), stop.
+ if (!useArenaStore.getState().sweepActive) break;
+ useArenaStore.getState().setSweepCurrentIndex(i);
+ const engine = sweepEngines[i];
+ const ok = await runOneEngine(engine);
+ if (!ok) {
+ // Aborted or errored. Don't push a partial snapshot — better to
+ // show whichever engines DID complete than a half-rendered card.
+ toast.error(`Sweep stopped on ${engine}.`);
+ break;
+ }
+ // Snapshot the just-completed engine BEFORE resetting state for
+ // the next one.
+ const snap = useArenaStore.getState().getSnapshot();
+ useArenaStore.getState().pushSweepResult(snap);
+ if (i < sweepEngines.length - 1 && useArenaStore.getState().sweepActive) {
+ useArenaStore.getState().reset();
+ }
+ }
+ if (useArenaStore.getState().sweepActive) {
+ toast.success("Sweep complete — compare engines below.");
+ }
+ }, [runOneEngine]);
+
const canRun = !isRunning && !sharedView && prompt.trim().length > 0 && participants.length >= 2;
const handleCancel = useCallback(() => {
- cancelConsensus();
- toast.info("Consensus cancelled");
+ const s = useArenaStore.getState();
+ if (s.sweepActive) {
+ s.cancelSweep();
+ toast.info("Sweep cancelled");
+ } else {
+ cancelConsensus();
+ toast.info("Consensus cancelled");
+ }
}, [cancelConsensus]);
const handleLeaveSharedView = useCallback(() => {
@@ -297,6 +375,7 @@ export default function HomePage() {
+
{isRunning && (
@@ -362,7 +441,9 @@ export default function HomePage() {
{participants.length} participant{participants.length !== 1 ? "s" : ""} ·{" "}
{options.engine === "blind-jury"
? "Blind Jury"
- : `${options.rounds} round${options.rounds !== 1 ? "s" : ""}`}
+ : options.engine === "adversarial"
+ ? `Red Team · ${options.rounds} round${options.rounds !== 1 ? "s" : ""}`
+ : `${options.rounds} round${options.rounds !== 1 ? "s" : ""}`}
{isRunning ? (
) : (
-
+
+
+
+
)}
@@ -388,6 +480,7 @@ export default function HomePage() {
+
@@ -450,6 +543,15 @@ function processEvent(event: ConsensusEvent) {
case "judge-end":
s.completeJudge(event.result);
break;
+ case "claims-start":
+ s.startClaims(event.modelId, event.providerName);
+ break;
+ case "claims-end":
+ s.completeClaims(event.digest);
+ if (event.digest.error) {
+ toast.error(`Claim extraction failed: ${event.digest.error}`);
+ }
+ break;
case "consensus-complete":
s.completeConsensus(event.finalScore, event.summary, event.roundsCompleted);
toast.success(`Consensus complete! Score: ${event.finalScore}%`);
diff --git a/components/AISelector.tsx b/components/AISelector.tsx
index af8df8f..cd50208 100644
--- a/components/AISelector.tsx
+++ b/components/AISelector.tsx
@@ -5,11 +5,23 @@
// ─────────────────────────────────────────────────────────────
import { useArenaStore } from "@/lib/store";
-import { PERSONAS } from "@/lib/personas";
-import type { ModelInfo, Persona } from "@/lib/types";
-import { Plus, X, ChevronRight, Bot, Loader2, Cpu, Server, Sparkles, Star } from "lucide-react";
+import { PERSONAS, composeCustomPersona, DEFAULT_CUSTOM_SPEC } from "@/lib/personas";
+import type { CustomPersonaSpec, ModelInfo, Persona } from "@/lib/types";
+import {
+ Plus,
+ X,
+ ChevronRight,
+ Bot,
+ Loader2,
+ Cpu,
+ Server,
+ Sparkles,
+ Star,
+ Sliders,
+} from "lucide-react";
import { useState, useRef, useEffect, useMemo, useCallback } from "react";
import { createPortal } from "react-dom";
+import PersonaBuilder from "./PersonaBuilder";
export default function AISelector() {
const {
@@ -27,6 +39,8 @@ export default function AISelector() {
const [selectedPersona, setSelectedPersona] = useState(
PERSONAS.find((p) => p.id === "first-principles") ?? PERSONAS[0],
);
+ const [selectedCustomSpec, setSelectedCustomSpec] = useState(null);
+ const [builderOpen, setBuilderOpen] = useState(false);
const [menuOpen, setMenuOpen] = useState(false);
const [activeProvider, setActiveProvider] = useState(null);
const [personaMenuOpen, setPersonaMenuOpen] = useState(false);
@@ -89,9 +103,20 @@ export default function AISelector() {
const handleAdd = () => {
if (!selectedModel || !selectedPersona) return;
- addParticipant(selectedModel, selectedPersona);
+ if (selectedPersona.id === "custom" && selectedCustomSpec) {
+ addParticipant(selectedModel, selectedPersona, selectedCustomSpec);
+ } else {
+ addParticipant(selectedModel, selectedPersona);
+ }
};
+ const handleSaveBuilder = useCallback((spec: CustomPersonaSpec) => {
+ const composed = composeCustomPersona(spec);
+ setSelectedPersona(composed);
+ setSelectedCustomSpec(spec);
+ setBuilderOpen(false);
+ }, []);
+
if (modelsLoading) {
return (
@@ -278,6 +303,25 @@ export default function AISelector() {
Choose Persona