diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ca515bf..4d5eb0c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -69,3 +69,32 @@ jobs:
       - run: pip install pip-audit
       - run: pip install -e .
       - run: pip-audit --ignore-vuln CVE-2026-3219
+
+  api-surface:
+    # REQ-140 guard: regenerates the public CLI surface and fails the build
+    # if the live output drifts from the committed fixture. Catches accidental
+    # command additions / removals in PRs without forcing every contributor
+    # to remember to run `specsmith api-surface > tests/fixtures/api_surface.json`.
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+          cache: pip
+      - run: python -m pip install --upgrade pip
+      - run: pip install -e ".[dev]"
+      - name: Regenerate api_surface.json
+        env:
+          SPECSMITH_NO_AUTO_UPDATE: "1"
+          SPECSMITH_PYPI_CHECKED: "1"
+          PYTHONIOENCODING: utf-8
+        run: |
+          python -m specsmith.cli api-surface > /tmp/api_surface.live.json
+      - name: Diff against committed fixture
+        run: |
+          diff -u tests/fixtures/api_surface.json /tmp/api_surface.live.json || {
+            echo "::error::api_surface.json is stale. Regenerate via:"
+            echo "  python -m specsmith.cli api-surface > tests/fixtures/api_surface.json"
+            exit 1
+          }
diff --git a/.gitignore b/.gitignore
index ca69f6f..01435d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,5 +42,3 @@ temp/
 .env
 .repo-index/
 
-# Test-generated cloud spawn manifests
-.specsmith/cloud/
diff --git a/.specsmith/chat/chat-2026-04-30T23-28-32.jsonl b/.specsmith/chat/chat-2026-04-30T23-28-32.jsonl
new file mode 100644
index 0000000..59f51d9
--- /dev/null
+++ b/.specsmith/chat/chat-2026-04-30T23-28-32.jsonl
@@ -0,0 +1,3 @@
+{"role":"user","text":"audit","ts":"2026-04-30T23:28:47.744Z"}
+{"role":"error","text":"[Apr 30, 07:28:53 PM] Agent process ended (signal SIGTERM) — send a message to restart","ts":"2026-04-30T23:28:53.486Z"}
+{"role":"error","text":"specsmith not responding (tried: \"C:\\Users\\trist\\.specsmith\\venv\\Scripts\\specsmith.exe\").\nChoose Restart Session to retry, Open Settings to reinstall, or Reload Window if the problem persists.","ts":"2026-04-30T23:29:13.477Z"}
diff --git a/.specsmith/chat/chat-2026-05-02T18-48-31.jsonl b/.specsmith/chat/chat-2026-05-02T18-48-31.jsonl
new file mode 100644
index 0000000..1395373
--- /dev/null
+++ b/.specsmith/chat/chat-2026-05-02T18-48-31.jsonl
@@ -0,0 +1,6 @@
+{"role":"error","text":"[May 2, 02:48:54 PM] Agent process ended (signal SIGTERM) — send a message to restart","ts":"2026-05-02T18:48:54.604Z"}
+{"role":"error","text":"specsmith not responding (tried: \"C:\\Users\\trist\\.specsmith\\venv\\Scripts\\specsmith.exe\").\nChoose Restart Session to retry, Open Settings to reinstall, or Reload Window if the problem persists.","ts":"2026-05-02T18:49:14.608Z"}
+{"role":"user","text":"audit","ts":"2026-05-02T18:49:44.470Z"}
+{"role":"user","text":"audit","ts":"2026-05-02T18:49:49.650Z"}
+{"role":"error","text":"specsmith not responding (tried: \"C:\\Users\\trist\\.specsmith\\venv\\Scripts\\specsmith.exe\").\nChoose Restart Session to retry, Open Settings to reinstall, or Reload Window if the problem persists.","ts":"2026-05-02T18:50:10.408Z"}
+{"role":"error","text":"[May 2, 02:57:32 PM] Agent process ended (signal SIGTERM) — send a message to restart","ts":"2026-05-02T18:57:32.608Z"}
diff --git a/.specsmith/requirements.json b/.specsmith/requirements.json
index aa290ad..312667c 100644
--- a/.specsmith/requirements.json
+++ b/.specsmith/requirements.json
@@ -874,13 +874,6 @@
     "source": "src/specsmith/cli.py, src/specsmith/agent/memory.py",
     "status": "defined"
   },
-  {
-    "id": "REQ-126",
-    "title": "Cloud Agent Stub Endpoint",
-    "description": "`specsmith cloud spawn <utterance> --endpoint <url>` packages working-tree + scaffold.yml + LEDGER.md as a tarball, POSTs to `<url>/spawn` with the utterance, and tails the returned JSONL stream URL. The contract is documented in `docs/site/cloud-agents.md`. The endpoint reference implementation is out of scope for 1.0 (documented as deferred).",
-    "source": "src/specsmith/cli.py, docs/site/cloud-agents.md",
-    "status": "defined"
-  },
   {
     "id": "REQ-127",
     "title": "Onboarding Path Must Be Verified",
diff --git a/.specsmith/runs/WI-NEXUS-006/pr-body.md b/.specsmith/runs/WI-NEXUS-006/pr-body.md
index 8d37afb..c5eb8c4 100644
--- a/.specsmith/runs/WI-NEXUS-006/pr-body.md
+++ b/.specsmith/runs/WI-NEXUS-006/pr-body.md
@@ -56,11 +56,3 @@ that envelope.
 - WI-NEXUS-010: end-to-end documentation pass for the broker → preflight →
   gated execution flow.
 
----
-
-🤖 Generated with [Warp](https://app.warp.dev) — agent conversation:
-[link](https://app.warp.dev/conversation/6f8aa790-049b-4ddf-9c52-4840728faee5)
-
-Plan artifact: [Warp Agent Implementation Plan](https://app.warp.dev/drive/notebook/rfCwIZUgJPCakjJ2S552DX)
-
-Co-Authored-By: Oz <oz-agent@warp.dev>
diff --git a/.specsmith/runs/WI-NEXUS-015/pr-body.md b/.specsmith/runs/WI-NEXUS-015/pr-body.md
index d70e535..5a3e0d4 100644
--- a/.specsmith/runs/WI-NEXUS-015/pr-body.md
+++ b/.specsmith/runs/WI-NEXUS-015/pr-body.md
@@ -49,11 +49,3 @@ follow-up work items, all governed by Specsmith and verified by pytest.
 - The preflight ledger writer is best-effort — ledger errors never block
   the CLI from emitting its JSON or returning its exit code.
 
----
-
-🤖 Generated with [Warp](https://app.warp.dev) — agent conversation:
-[link](https://app.warp.dev/conversation/6f8aa790-049b-4ddf-9c52-4840728faee5)
-
-Plan artifact: [Warp Agent Implementation Plan](https://app.warp.dev/drive/notebook/rfCwIZUgJPCakjJ2S552DX)
-
-Co-Authored-By: Oz <oz-agent@warp.dev>
diff --git a/.specsmith/runs/WI-NEXUS-020/pr-body.md b/.specsmith/runs/WI-NEXUS-020/pr-body.md
index fa071c4..7fd3ad9 100644
--- a/.specsmith/runs/WI-NEXUS-020/pr-body.md
+++ b/.specsmith/runs/WI-NEXUS-020/pr-body.md
@@ -48,11 +48,3 @@ existing AEE epistemic infrastructure. **Suite: 259 passing, 1 skipped
 - All new ledger writes are wrapped in `try/except` so ledger errors never
   block the CLI.
 
----
-
-🤖 Generated with [Warp](https://app.warp.dev) — agent conversation:
-[link](https://app.warp.dev/conversation/6f8aa790-049b-4ddf-9c52-4840728faee5)
-
-Plan artifact: [Warp Agent Implementation Plan](https://app.warp.dev/drive/notebook/rfCwIZUgJPCakjJ2S552DX)
-
-Co-Authored-By: Oz <oz-agent@warp.dev>
diff --git a/.specsmith/runs/WI-NEXUS-023/pr-body.md b/.specsmith/runs/WI-NEXUS-023/pr-body.md
index 1a869ca..ae49ea5 100644
--- a/.specsmith/runs/WI-NEXUS-023/pr-body.md
+++ b/.specsmith/runs/WI-NEXUS-023/pr-body.md
@@ -51,7 +51,3 @@ mypy src/specsmith/:     Success: no issues found in 69 source files
 gh dependabot/alerts:    []
 ```
 
-## Conversation + plan
-
-- Conversation: https://app.warp.dev/conversation/6f8aa790-049b-4ddf-9c52-4840728faee5
-- Plan: https://app.warp.dev/drive/notebook/rfCwIZUgJPCakjJ2S552DX
diff --git a/.specsmith/testcases.json b/.specsmith/testcases.json
index decee0d..7c2bb28 100644
--- a/.specsmith/testcases.json
+++ b/.specsmith/testcases.json
@@ -1374,17 +1374,6 @@
     "expected_behavior": {},
     "confidence": 1.0
   },
-  {
-    "id": "TEST-126",
-    "title": "Cloud Spawn Documents Endpoint Contract",
-    "description": "`docs/site/cloud-agents.md` exists and documents the POST contract (`/spawn`, request body, response body, JSONL stream URL). `specsmith cloud spawn --help` shows the `--endpoint` flag.",
-    "requirement_id": "REQ-126",
-    "type": "unit",
-    "verification_method": "pytest",
-    "input": {},
-    "expected_behavior": {},
-    "confidence": 1.0
-  },
   {
     "id": "TEST-127",
     "title": "Onboarding Doctor Has Required Checks",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a6ebf89..72e7075 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,20 +6,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Removed
+- **Cloud Runs feature retired.** `specsmith cloud spawn`, `specsmith cloud-serve`, `src/specsmith/cloud_serve.py`, `docs/site/cloud-agents.md`, the `.specsmith/cloud/` storage convention, and all related tests/fixtures have been removed. The deferred REQ-126/REQ-136 cloud-agent surface is no longer part of the 1.0 contract.
 ## [0.7.0] — 2026-04-30
 ### Added
 - **`specsmith serve --auth-token` (REQ-137).** Optional bearer-token gate on every `/api/*` endpoint. `/api/health` stays open so liveness probes still work behind a load balancer that strips `Authorization`. New `make_server()` factory in `src/specsmith/serve.py` exposes a fully wired server for tests; `run_server()` adds the banner + `serve_forever` loop. `_Handler._authorize()` enforces `Authorization: Bearer <token>` on `do_GET`, `do_POST`, and `do_DELETE`.
 - **`specsmith voice transcribe <wav>` (REQ-141).** New `src/specsmith/agent/voice.py` wraps the optional `whisper-cpp-python` extra. Three resolution modes: real (library + model file under `~/.specsmith/voice/` or `SPECSMITH_VOICE_MODEL`), stub (`SPECSMITH_VOICE_STUB=<text>` for tests/CI), or unavailable (raises `VoiceUnavailableError` with an actionable install hint). CLI exposes `voice transcribe --json` and `voice status`.
-- **`specsmith cloud spawn <manifest> --endpoint --token --dry-run` (REQ-136).** Replaces the original REQ-126 stub. The new shape reads a YAML or JSON manifest, POSTs it to `<endpoint>/spawn`, and prints the response. `--token` adds bearer auth; `--dry-run` prints the would-be POST as JSON without leaving the host. Manifests must be mappings; lists / scalars exit 2 with a clear message.
-- **`tests/test_warp_parity_followup.py`** — 20 new pytest cases covering: serve auth-gate (open `/api/health`, 401 on missing/wrong token, 200 on correct token), cloud spawn (dry-run JSON output, manifest type validation, 401 on missing token, persistence on success), voice (stub mode, missing-file error, unavailable-when-no-library + no-stub, status output), and the api-surface stability snapshot (matches fixture, required commands present, exit codes + event types frozen).
+- **`tests/test_warp_parity_followup.py`** — covers serve auth-gate (open `/api/health`, 401 on missing/wrong token, 200 on correct token), voice (stub mode, missing-file error, unavailable-when-no-library + no-stub, status output), and the api-surface stability snapshot (matches fixture, required commands present, exit codes + event types frozen).
 - **`docs/site/api-stability.md`** — documents the `api-surface` snapshot mechanism: payload shape, regeneration command, the required-command spot check, and what is *not* covered by the snapshot.
 - **Specsmith Drive (REQ-133).** New `src/specsmith/drive.py` module exposes `push()`, `pull()`, `listing()`; mirrors project rules / workflows / notebooks under `~/.specsmith/drive/<project>/<kind>/`. Round-trip safe; default backend is filesystem-only so the user can `git push` themselves.
 - **Per-block share / export (REQ-134).** New `src/specsmith/block_export.py` plus `specsmith chat-export-block --session-id <id> --block-id <id> [--format md|json|html]` slices a single block out of `.specsmith/sessions/<id>/events.jsonl` (fallback `turns.jsonl`) and emits a self-contained markdown / JSON / HTML snippet. Raises `FileNotFoundError` for missing sessions and `KeyError` for missing blocks; the CLI exits non-zero in either case.
 - **AI-searchable history (REQ-135).** New `src/specsmith/history_search.py` adds a deterministic keyword `search()` over every `.specsmith/sessions/<id>/turns.jsonl` plus an optional `semantic=True` mode that uses `sentence-transformers` when available and silently falls back to keyword matching otherwise. New `[history-semantic]` extra in `pyproject.toml`.
-- **Reference cloud-agent receiver (REQ-136).** New `src/specsmith/cloud_serve.py` ships a stdlib `HTTPServer` accepting `POST /spawn` (manifest JSON) and `GET /health`. Bearer-token auth + CIDR allowlist + a guardrail that refuses to bind non-loopback hosts without `--allow-cidr`. Persists each manifest under `~/.specsmith/cloud-runs/<run_id>/manifest.json`. Wired up as `specsmith cloud-serve --host --port --token --allow-cidr`.
 - **`specsmith api-surface` (REQ-140).** Top-level command emits the frozen 1.0 public surface (`cli_commands`, `exit_codes`, `event_types`) as JSON; `--snapshot <path>` writes the same payload to disk for CI diffing.
 - **`[voice]` optional extra (REQ-141).** Pyproject extra carrying `whisper-cpp-python` for the upcoming agent voice-input integration (not yet wired into the CLI).
-- **`tests/test_warp_parity.py`** -- 20 new pytest cases covering the four new modules, the API-surface contract, and the CLI wiring (incl. localhost cloud-serve roundtrips, missing-token / wrong-token rejection, and the non-loopback guardrail).
+- **`tests/test_warp_parity.py`** -- pytest cases covering the new drive / block-export / history-search modules, the API-surface contract, and the CLI wiring.
 
 - **Real MCP JSON-RPC client (REQ-130).** `agent.mcp` now ships a full stdio client (`MCPSession`) that runs the official MCP handshake (`initialize` -> `notifications/initialized` -> `tools/list`) against any configured server, exposes each discovered tool as an `MCPTool` whose `invoke_with_safety()` runs every call through the supplied safety check. Protocol pinned at `2024-11-05`. The chat session header now reports tools-per-server counts.
 - **`tests/fixtures/mcp_fake_server.py`** -- pure-Python stdio MCP server fixture for hermetic tests.
@@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - **MCP server announcement in chat sessions (REQ-121).** When `.specsmith/mcp.yml` is present, `specsmith chat` now loads the configured servers via `agent.mcp.load_mcp_tools` and emits a `[mcp servers: <names>]` token at the top of the message block so consumers (and the user) see which external tool surfaces are in play. The Specsmith safety middleware still gates every call.
 - **`specsmith notebook record --session-id <id>`** now reads `.specsmith/sessions/<id>/turns.jsonl` and embeds each turn as a `### <role>` section in the generated `docs/notebooks/<slug>.md`, alongside any `--work-item-id` artifacts. Both flags may be combined; either may be omitted (with a friendlier placeholder when neither is supplied). Closes the gap between TESTS.md TEST-123 and the existing implementation.
-- **`tests/test_phase34_completion.py`** — 12 new pytest cases covering: MCP loader (config-missing, single entry, malformed entries dropped, unparseable yaml, MCPServerSpec round-trip), notebook record (session-turns capture, helpful placeholder), notebook replay (success + missing slug exit-code), `cloud spawn --dry-run` (manifest + tarball + `--help` documents `--endpoint`), and a stubbed `scripts/perf_smoke.py` smoke test that asserts the baseline.json schema without spawning real subprocesses.
+- **`tests/test_phase34_completion.py`** — pytest cases covering: MCP loader (config-missing, single entry, malformed entries dropped, unparseable yaml, MCPServerSpec round-trip), notebook record (session-turns capture, helpful placeholder), notebook replay (success + missing slug exit-code), and a stubbed `scripts/perf_smoke.py` smoke test that asserts the baseline.json schema without spawning real subprocesses.
 
 ### Changed
 - `specsmith chat` imports `load_mcp_tools` and emits the MCP-servers token after the rules-loaded notice.
@@ -468,7 +468,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **`specsmith init --guided`**: interactive architecture definition with REQ/TEST stub generation.
 - **Auditor**: 6 health checks (files, REQ↔TEST, ledger, governance size, tool config, consistency). `--fix` auto-repairs missing files and CI configs.
 - **Domain-specific templates**: patent claims/spec/figures, legal contracts/regulatory, business exec-summary/financials, research citations/methodology, API endpoints/auth.
-- **7 agent integrations**: AGENTS.md, Warp/Oz, Claude Code, Cursor, Copilot, Gemini, Windsurf, Aider.
+- **7 agent integrations**: AGENTS.md, Claude Code, Cursor, Copilot, Gemini, Windsurf, Aider.
 - **3 VCS platforms**: GitHub (`gh`), GitLab (`glab`), Bitbucket (`bb`) with CI/CD, dependency management (Dependabot/Renovate per ecosystem), and status checks.
 - **Config inheritance**: `extends` field in scaffold.yml for org-level defaults.
 - **Type-specific .gitignore**: Rust, Go, Node, Kotlin, .NET, KiCad, FPGA, Zephyr, LaTeX, Terraform patterns.
@@ -498,7 +498,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **`specsmith diff`**: compare governance files against what spec templates would generate.
 - **`audit --fix`**: auto-repair missing governance files and compress oversized ledgers.
 - **Config inheritance**: `extends` field in scaffold.yml to inherit org-level defaults.
-- **7 agent integration adapters**: Warp/Oz, Claude Code, Cursor, Copilot, Gemini, Windsurf, Aider.
+- **7 agent integration adapters**: Claude Code, Cursor, Copilot, Gemini, Windsurf, Aider.
 - **3 VCS platform integrations**: GitHub (`gh`), GitLab (`glab`), Bitbucket (`bb`) with CI/CD, dependency, and security config generation.
 - **Domain-specific scaffold directories**: FPGA, Yocto, PCB, Embedded, Web, Rust, Go, C/C++, .NET, Mobile, DevOps, Data/ML, Microservices.
 - **Branching strategy config**: gitflow, trunk-based, github-flow with tuning knobs.
diff --git a/LEDGER.md b/LEDGER.md
index 59e3abf..9f9290f 100644
--- a/LEDGER.md
+++ b/LEDGER.md
@@ -134,7 +134,7 @@ Extensive research and gap analysis session to bring specsmith architecture to f
 - `docs/REQUIREMENTS.md` — 15 new requirement domains (OPS, CMD, MAS, ORC, FLG, LRN, EDD, MEM, HRK, SRV, RTR, LPR, MCP, SEC, IDE) with 60+ formal requirements
 - `docs/ARCHITECTURE.md` — Added "Planned Architecture Evolution" section covering all new components, multi-agent patterns, eval design, and architecture invariants
 - `AGENTS.md` — Added planned commands, planned file registry entries, updated tech stack
-- Architecture plan document updated in Warp Oz with full gap analysis and 16-workstream roadmap
+- Architecture plan document updated with full gap analysis and 16-workstream roadmap
 
 ### Open TODOs (Phase 1 — next immediate actions)
 
@@ -599,8 +599,8 @@ Phase 4: feature flags, instinct/learning, eval harness, agent memory, multi-age
 - **Status**: complete
 - **Chain hash**: `dd0115de0abeff8d...`
 
-## 2026-04-28T09:05 — Nexus 1.0 roadmap groundwork landed (REQ-108..REQ-129): real verifier signal, JSONL chat block protocol (chat/notebook/cloud subcommands), persistent session memory, MCP loader, dynamic router, project-rules auto-injection, --predict-only and --comment flags, doctor --onboarding, perf smoke harness, e2e+unit tests, API-stability doc. Pre-1.0; no version bump.
-- **Author**: oz
+## 2026-04-28T09:05 — Nexus 1.0 roadmap groundwork landed (REQ-108..REQ-129): real verifier signal, JSONL chat block protocol (chat/notebook subcommands), persistent session memory, MCP loader, dynamic router, project-rules auto-injection, --predict-only and --comment flags, doctor --onboarding, perf smoke harness, e2e+unit tests, API-stability doc. Pre-1.0; no version bump.
+- **Author**: specsmith-agent
 - **Type**: feature
 - **REQs affected**: REQ-108,REQ-109,REQ-110,REQ-111,REQ-112,REQ-113,REQ-114,REQ-115,REQ-116,REQ-117,REQ-118,REQ-119,REQ-120,REQ-121,REQ-122,REQ-123,REQ-124,REQ-125,REQ-126,REQ-127,REQ-128,REQ-129
 - **Status**: complete
diff --git a/README.md b/README.md
index 0c73959..cd739e9 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,25 @@ specsmith treats belief systems like code: codable, testable, and deployable. It
 epistemically-governed projects, stress-tests requirements as BeliefArtifacts, runs
 cryptographically-sealed trace vaults, and orchestrates AI agents under formal AEE governance.
 
+**0.10.0 — Multi-Agent + BYOE.** A `/plan` goes to the architect, `/fix`
+goes to the coder, `/review` goes to a reviewer that runs on a different
+model family. Each *profile* is a `(provider, model, endpoint?, fallback_chain)`
+bundle stored in `~/.specsmith/agents.json`; an *activity routing table*
+maps slash commands and AEE phases to profiles; **BYOE endpoints**
+(`~/.specsmith/endpoints.json`) let you point a profile at any
+OpenAI-v1-compatible backend you self-host (vLLM, llama.cpp `server`,
+LM Studio, TGI, ...). Cross-family **diversity guard**, capability
+filtering, transient-failure fallback chains, and TraceVault decision
+seals on every `/agent` pin are wired in by default. See
+[`docs/site/agents.md`](docs/site/agents.md) for the five-minute walkthrough.
+
+```bash
+specsmith agents preset apply default       # frontier coder + cross-family reviewer
+specsmith endpoints add --id home-vllm \
+  --base-url http://10.0.0.4:8000/v1 --auth bearer-keyring
+specsmith run --agent opus-reviewer         # one-shot per-session pin
+```
+
 It also co-installs the standalone `epistemic` Python library for direct use in any project:
 
 ```python
diff --git a/REQUIREMENTS.md b/REQUIREMENTS.md
index f8b6441..3ac64f6 100644
--- a/REQUIREMENTS.md
+++ b/REQUIREMENTS.md
@@ -849,12 +849,6 @@
 - **Description:** `specsmith chat` accepts `--parent-session <id>`. When set, the spawned session's `task_complete` event also writes a `sub_session_complete` event into the parent's session log so the parent's plan-block can surface child outcomes.
 - **Source:** src/specsmith/cli.py, src/specsmith/agent/memory.py
 - **Status:** defined
-## 126. Cloud Agent Stub Endpoint
-- **ID:** REQ-126
-- **Title:** Cloud Agent Stub Endpoint
-- **Description:** `specsmith cloud spawn <utterance> --endpoint <url>` packages working-tree + scaffold.yml + LEDGER.md as a tarball, POSTs to `<url>/spawn` with the utterance, and tails the returned JSONL stream URL. The contract is documented in `docs/site/cloud-agents.md`. The endpoint reference implementation is out of scope for 1.0 (documented as deferred).
-- **Source:** src/specsmith/cli.py, docs/site/cloud-agents.md
-- **Status:** defined
 ## 127. Onboarding Path Must Be Verified
 - **ID:** REQ-127
 - **Title:** Onboarding Path Must Be Verified
diff --git a/TESTS.md b/TESTS.md
index f070ee2..7ae0596 100644
--- a/TESTS.md
+++ b/TESTS.md
@@ -1349,16 +1349,6 @@
 - **Input:** tmp_path with parent + child sessions
 - **Expected Behavior:** Parent session log contains sub_session_complete entry.
 - **Confidence:** 1.0
-## TEST-126. Cloud Spawn Documents Endpoint Contract
-- **ID:** TEST-126
-- **Title:** Cloud Spawn Documents Endpoint Contract
-- **Description:** `docs/site/cloud-agents.md` exists and documents the POST contract (`/spawn`, request body, response body, JSONL stream URL). `specsmith cloud spawn --help` shows the `--endpoint` flag.
-- **Requirement ID:** REQ-126
-- **Type:** unit
-- **Verification Method:** pytest
-- **Input:** docs/site/cloud-agents.md, click help
-- **Expected Behavior:** Doc present; help describes flag.
-- **Confidence:** 1.0
 ## TEST-127. Onboarding Doctor Has Required Checks
 - **ID:** TEST-127
 - **Title:** Onboarding Doctor Has Required Checks
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index f0f063d..8938a43 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -374,7 +374,6 @@ During bootstrap, do not yet implement:
 
 - full model orchestration
 - full OpenCode plugin runtime
-- cloud database storage
 - GUI changes
 - multi-agent teams
 - daemon service
diff --git a/docs/site/agents.md b/docs/site/agents.md
new file mode 100644
index 0000000..3176560
--- /dev/null
+++ b/docs/site/agents.md
@@ -0,0 +1,180 @@
+# Multi-Agent Profiles & Activity Routing
+
+`specsmith agents` (REQ-146) lets you bind activities — a slash command, an
+AEE phase, an MCP tool category — to a named **profile**: a
+`(provider, model, endpoint_id?, prompt_prefix, capabilities, fallback_chain)`
+bundle. The runner consults the routing table on every turn so a `/plan`
+goes to the architect, `/fix` goes to the coder, and `/review` goes to a
+reviewer that runs on a *different* model family.
+
+This page walks you from **install → preset → custom profile → per-session
+override → BYOE endpoint** in five minutes.
+
+---
+
+## 1. Install a preset
+
+Profiles are stored in `~/.specsmith/agents.json`. The fastest way to seed
+the file is to apply one of the four built-in presets:
+
+```bash
+specsmith agents preset list
+specsmith agents preset apply default          # frontier + local fallback (recommended)
+specsmith agents preset apply local-only       # 100% Ollama
+specsmith agents preset apply frontier-only    # Claude Opus everywhere
+specsmith agents preset apply cost-conscious   # Haiku coder, Sonnet architect
+```
+
+After applying:
+
+```bash
+specsmith agents list
+* coder          role=coder       anthropic/claude-sonnet-4-5
+                fallback: mistral/codestral-latest → ollama/qwen2.5-coder:32b
+  architect      role=architect   anthropic/claude-opus-4
+                fallback: openai/gpt-5 → ollama/qwen2.5:32b
+  reviewer       role=reviewer    openai/gpt-5-codex     ← different family!
+  …
+```
+
+The `*` marks the **default profile**, used when no route matches.
+
+---
+
+## 2. Inspect & customise the routing table
+
+```bash
+specsmith agents route show
+* chat                 → coder
+  /plan                → architect
+  /fix                 → coder
+  /review              → reviewer
+  phase:requirements   → researcher
+  …
+```
+
+Re-bind any activity:
+
+```bash
+specsmith agents route set /review opus-reviewer
+specsmith agents route clear /audit
+```
+
+The `phase:<key>` routes are auto-maintained: `specsmith phase next` (G3)
+also pins a `phase:active` route to the new phase's preferred profile so
+the runner can flip the whole session by listening for one activity.
+
+---
+
+## 3. Add your own profile
+
+```bash
+specsmith agents add \
+  --id sonnet-coder \
+  --role coder \
+  --provider anthropic \
+  --model claude-sonnet-4-5 \
+  --capability code \
+  --capability function-calling \
+  --fallback ollama/qwen2.5-coder:32b
+```
+
+If your new coder shares a provider family with the existing reviewer,
+the **diversity guard** (G1) prints a warning so the cross-check the
+reviewer is supposed to provide doesn't degenerate:
+
+```
+✓ saved profile sonnet-coder
+⚠ reviewer (reviewer, anthropic/claude-opus-4) shares the 'anthropic'
+  family with sonnet-coder (coder, anthropic/claude-sonnet-4-5);
+  diversity is recommended so the reviewer can catch the coder's blind spots.
+```
+
+The warning is non-fatal — the profile still saves — but you should
+either pick a reviewer in a different family or accept the trade-off
+deliberately.
+
+### Filter by capability
+
+```bash
+specsmith agents list --capability code-review
+specsmith agents list --capability mcp --json
+```
+
+`--capability` is the easiest way to find every profile that advertises
+a given strength so the right `route set` command writes itself.
+
+---
+
+## 4. Per-session overrides
+
+Three knobs override the routing table for one session:
+
+```bash
+specsmith run --agent opus-reviewer       # pin a profile
+specsmith chat --agent haiku-coder        # one-shot
+specsmith run --endpoint home-vllm        # pin a BYOE endpoint
+```
+
+Inside a running session, the slash command `/agent <id>` flips the
+profile mid-session:
+
+```
+nexus> /agent opus-reviewer
+ℹ profile = opus-reviewer
+```
+
+Pinning a profile via `/agent` writes a **TraceVault decision seal**
+(G4) into `.specsmith/trace.jsonl`, so every "I switched to model X for
+this turn" choice is cryptographically chained into the audit trail.
+You can confirm with `specsmith trace log --type decision`.
+
+### Token accounting (C1)
+
+The runner now reports real `tokens_in` / `tokens_out` for every turn
+on every provider that exposes them (Ollama via `prompt_eval_count` +
+`eval_count`, Anthropic via `final_message.usage`, OpenAI via
+`stream_options.include_usage`, Gemini via `usage_metadata`). When the
+SDK omits usage, a 4-chars/token fallback gives the TokenMeter chip a
+non-zero value to show. Per-profile totals show up in
+`AgentState.by_profile` and the VS Code TokenMeter splits accordingly.
+
+---
+
+## 5. Bring-Your-Own-Endpoint (BYOE)
+
+A **profile** can bind to a registered OpenAI-v1-compatible endpoint
+instead of a built-in provider:
+
+```bash
+# Register the endpoint once
+specsmith endpoints add \
+  --id home-vllm \
+  --base-url http://10.0.0.4:8000/v1 \
+  --default-model qwen2.5-coder \
+  --auth bearer-keyring          # token prompted, stored in OS keyring
+
+# Bind a profile to it
+specsmith agents add \
+  --id local-coder \
+  --role coder \
+  --provider openai-compat \
+  --endpoint home-vllm \
+  --fallback ollama/qwen2.5-coder:7b
+
+specsmith agents route set /code local-coder
+```
+
+The runner now routes `/code` through `home-vllm`. If the box is
+unreachable, the fallback chain walks `ollama/qwen2.5-coder:7b` next
+(see `tests/test_fallback_chain.py` for the full retry policy: 408,
+429, and 5xx fall through, 4xx surfaces immediately).
+
+---
+
+## Reference
+
+- [REQ-146 — Agent profiles + activity routing](../REQUIREMENTS.md)
+- [`specsmith.agent.profiles`](../../src/specsmith/agent/profiles.py) — `Profile`, `ProfileStore`, `apply_preset`, `provider_family`
+- [`specsmith.agent.fallback`](../../src/specsmith/agent/fallback.py) — `run_with_fallback`, `parse_target`
+- [`docs/site/api-stability.md`](api-stability.md) — public surface contract
diff --git a/docs/site/api-stability.md b/docs/site/api-stability.md
index f90e820..eec1fbf 100644
--- a/docs/site/api-stability.md
+++ b/docs/site/api-stability.md
@@ -29,8 +29,6 @@ change ships with a CHANGELOG entry but does not require a major bump.
   router, rules). Their **shape** is stable but signatures may grow.
 - The `specsmith chat` CLI flags. New event kinds and flags may be added;
   existing keys will not be removed without a deprecation cycle.
-- `specsmith cloud spawn` manifest format. The current `manifest.json`
-  layout is provisional while the cloud endpoint is being designed.
 - `.specsmith/sessions/<id>/turns.jsonl` schema (REQ-120). Fields will be
   additive, but the file format itself may switch from JSONL to a
   database in a future release.
@@ -45,11 +43,9 @@ change ships with a CHANGELOG entry but does not require a major bump.
 We will only stamp 1.0 once:
 1. The Nexus chat block protocol has been used by at least one external
    IDE integration for two minor releases.
-2. The cloud agent surface has graduated from stub to a documented
-   endpoint contract.
-3. The mypy strict carveout in `pyproject.toml` has been emptied except
+2. The mypy strict carveout in `pyproject.toml` has been emptied except
    for explicitly third-party-typed modules.
-4. The performance baseline (REQ-124) has been published in
+3. The performance baseline (REQ-124) has been published in
    `.specsmith/perf/baseline.json` for at least three releases without
    regression.
 Until those criteria are met, expect a steady stream of pre-1.0 minor
@@ -82,10 +78,10 @@ Then call out the change in the next CHANGELOG entry.
 A second test, `test_api_surface_contains_required_1_0_commands`, asserts
 that a hand-picked subset of 1.0-contract commands (`preflight`, `verify`,
 `audit`, `validate`, `doctor`, `scan`, `init`, `import`, `ledger`,
-`drive`, `history`, `chat`, `chat-export-block`, `cloud`, `cloud-serve`,
-`voice`, `api-surface`, `suggest-command`, `serve`) is *always* present.
-This catches accidental command removal even when someone updates the
-fixture without checking what they removed.
+`drive`, `history`, `chat`, `chat-export-block`, `voice`, `api-surface`,
+`suggest-command`, `serve`) is *always* present. This catches accidental
+command removal even when someone updates the fixture without checking
+what they removed.
 ### What is *not* covered by the snapshot
 - Subcommand names of Click groups (e.g. `voice transcribe`). The group
   name is in `cli_commands`; the subcommands are covered by their own
diff --git a/docs/site/cloud-agents.md b/docs/site/cloud-agents.md
deleted file mode 100644
index 8ac37a1..0000000
--- a/docs/site/cloud-agents.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Cloud Agents — Endpoint Contract (Pre-1.0)
-This document specifies the contract a cloud-agent receiver must implement
-so the local `specsmith cloud spawn` CLI can hand off a task. The contract
-is intentionally narrow for pre-1.0; it will widen once at least one
-external receiver has been implemented end-to-end (see
-`docs/site/api-stability.md` for 1.0 criteria).
-## Endpoint
-A cloud receiver is any HTTP endpoint reachable via the URL configured in
-`SPECSMITH_CLOUD_ENDPOINT` (or `--endpoint`). It MUST accept:
-```http
-POST <endpoint>
-Content-Type: application/json
-Authorization: Bearer <SPECSMITH_CLOUD_TOKEN>   (optional)
-```
-The body is the **manifest** described below. The endpoint MAY return:
-- `200 OK` with a streaming body of newline-delimited JSON events
-  (`Content-Type: application/jsonl` or `application/x-ndjson`).
-- `202 Accepted` with `Location:` header pointing to a follow-up URL the
-  CLI can long-poll.
-- `4xx`/`5xx` with a JSON error body `{"error": "<message>"}`.
-The current `specsmith cloud spawn` implementation reads the response as a
-single stream, so receivers SHOULD stream JSONL on `200 OK`. The
-`202 Accepted` follow-up flow is reserved for a future minor release.
-## Manifest schema
-The CLI writes the manifest to
-`.specsmith/cloud/<run_id>/manifest.json` for auditability and POSTs the
-same payload to the endpoint:
-```json
-{
-  "run_id":     "cloud_<12-hex>",
-  "utterance":  "<the natural-language task>",
-  "workspace":  "workspace.tar.gz",
-  "endpoint":   "<the endpoint URL or '' for dry-run>",
-  "dry_run":    false
-}
-```
-The corresponding `workspace.tar.gz` lives next to `manifest.json` and
-contains the local working tree minus `.git`, `.venv`, `.specsmith`,
-`node_modules`, `dist`, and `build`. The receiver SHOULD reject manifests
-where `workspace` is missing or larger than 100 MB; specsmith's local
-copy is provided for auditing only.
-## Stream protocol (response body)
-The receiver MUST emit the same JSONL block protocol that `specsmith chat`
-emits locally (REQ-113). This lets IDEs (the VS Code extension, the GUI)
-consume cloud and local runs identically. The minimum viable event set:
-- `block_start` — opens a `plan`, `message`, `tool_call`, `tool_result`,
-  or `diff` block.
-- `block_complete` — closes the most recent block of that id.
-- `task_complete` — terminal event with `success`, `confidence`,
-  `summary`, `profile`, and optional `comments`.
-The full event vocabulary is documented in
-`src/specsmith/agent/events.py`. Receivers MAY emit additional event kinds
-prefixed with `cloud_*` (e.g. `cloud_progress`); local consumers ignore
-unknown event kinds without failing.
-## Security baseline (pre-1.0)
-- The CLI never sends API keys in the manifest. The receiver MUST obtain
-  its own credentials.
-- The workspace tarball MUST be treated as untrusted user input.
-- The receiver SHOULD honor `dry_run: true` by responding with a single
-  `task_complete` event of `{"success": false, "summary": "dry-run"}` and
-  no side effects.
-- TLS is REQUIRED when `endpoint` does not point at `localhost`.
-## What's deferred for after 1.0
-- Authentication header standardization (currently informally
-  `Authorization: Bearer ...`).
-- Resumable / chunked upload for large workspaces.
-- Multi-tenant manifest ids (today the CLI generates `cloud_<random>`,
-  the receiver MAY assign its own canonical id).
-- Push-style result delivery (today receivers respond synchronously; a
-  webhook-based callback is a candidate future feature).
-## Reference: minimum viable receiver
-A 50-line aiohttp / FastAPI receiver that accepts the manifest, runs a
-fixed Nexus orchestrator turn, and emits the JSONL block protocol back is
-sufficient to claim compatibility with the pre-1.0 contract. The
-specsmith repo will publish a reference receiver under
-`examples/cloud-receiver/` in a follow-up minor release.
diff --git a/docs/site/endpoints.md b/docs/site/endpoints.md
new file mode 100644
index 0000000..25aa0f6
--- /dev/null
+++ b/docs/site/endpoints.md
@@ -0,0 +1,117 @@
+# Bring-Your-Own-Endpoint (BYOE)
+
+Specsmith ships first-class support for self-hosted OpenAI-v1-compatible
+LLM servers (vLLM, llama.cpp `server`, LM Studio, TGI,
+text-generation-webui, …). Every endpoint you register can be selected
+per session via `--endpoint <id>` on `specsmith run`, `chat`, and
+`serve` (PR-2).
+
+## Quick start
+
+Register a vLLM running on your LAN:
+
+```sh
+specsmith endpoints add \
+  --id home-vllm \
+  --name "Home vLLM" \
+  --base-url http://10.0.0.4:8000/v1 \
+  --default-model Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8 \
+  --auth none \
+  --set-default
+
+specsmith endpoints test home-vllm
+```
+
+Once the test reports `ok`, run an agent against it:
+
+```sh
+specsmith run --endpoint home-vllm "summarise the last commit"
+```
+
+## Storage layout
+
+All endpoints live in `~/.specsmith/endpoints.json` (override with
+`SPECSMITH_HOME`). The on-disk schema is versioned:
+
+```json
+{
+  "schema_version": 1,
+  "default_endpoint_id": "home-vllm",
+  "endpoints": [
+    {
+      "id": "home-vllm",
+      "name": "Home vLLM",
+      "base_url": "http://10.0.0.4:8000/v1",
+      "auth": {"kind": "bearer-keyring",
+               "keyring_service": "specsmith",
+               "keyring_user": "endpoint:home-vllm"},
+      "default_model": "Qwen/Qwen2.5-Coder-32B",
+      "verify_tls": true,
+      "tags": ["local", "coder"],
+      "created_at": "2026-05-01T11:30:17Z"
+    }
+  ]
+}
+```
+
+The file is written `chmod 600` on POSIX. Token bytes for the inline
+strategy are the only secret material that ever lands in this file —
+the keyring and env-var strategies leave it secret-free.
+
+## Auth strategies
+
+| Kind             | Where the token lives                              | When to use |
+|------------------|----------------------------------------------------|-------------|
+| `none`           | nowhere — request is unauthenticated                | trusted LAN, open vLLM dev box |
+| `bearer-inline`  | `endpoints.json` (plaintext, `chmod 600`)           | quick scratch setups where keyring is unavailable |
+| `bearer-env`     | the env var name you specify (`--token-env FOO`)    | CI / containers / 12-factor deploys |
+| `bearer-keyring` | OS keyring, indexed by `(service, user)` (default)  | desktop / laptop installs (default) |
+
+The `list --json` output redacts inline tokens to `"***"`. The CLI
+never logs token bytes to terminal output.
+
+## Health checks
+
+```sh
+specsmith endpoints test home-vllm --json
+specsmith endpoints models home-vllm --json
+```
+
+`test` calls `<base_url>/models` with the resolved bearer token, prints
+the latency in milliseconds, and reports up to 5 model ids. `models`
+returns the full list.
+
+If the endpoint does not expose `/v1/models`, `test` will still return a
+clear error message — set `default_model` manually and rely on the
+session-level model dropdown instead.
+
+## CLI reference
+
+| Command | Notes |
+|---------|-------|
+| `specsmith endpoints add` | Register a new endpoint. `--auth bearer-keyring` (default) prompts for the secret without echo. |
+| `specsmith endpoints list [--json]` | Tabular by default, JSON for IDE consumers. Tokens are redacted. |
+| `specsmith endpoints remove <id> [--purge-keyring]` | Remove the entry; pass `--purge-keyring` to also delete the saved token. |
+| `specsmith endpoints default <id>` | Promote an existing endpoint to the default. |
+| `specsmith endpoints test [<id>] [--timeout 5]` | Probe `/v1/models`. Exits 1 on failure. |
+| `specsmith endpoints models [<id>]` | List every model the endpoint advertises. |
+
+## Security notes
+
+* The store path is `chmod 600` on POSIX where supported.
+* `verify_tls: false` is opt-in (`--no-verify-tls`); otherwise the CLI
+  verifies the certificate chain. Disabling it for an https endpoint is
+  documented per-endpoint in the on-disk JSON so a drift audit can spot
+  insecure configurations.
+* `auth.kind == bearer-inline` is functional but not recommended.
+  Prefer `bearer-keyring` when the OS keyring is available; otherwise
+  use `bearer-env` and inject the secret through your shell or
+  container environment.
+
+## Roadmap
+
+* **PR-2 (this milestone):** wires `--endpoint <id>` into `run`,
+  `chat`, and `serve`, plus a new `_run_openai_compat` provider driver.
+* **PR-3:** Endpoints tab and a per-session dropdown in the
+  `specsmith-vscode` extension.
+* **PR-4:** 0.8.0 release notes + tag.
diff --git a/docs/site/quickstart.md b/docs/site/quickstart.md
new file mode 100644
index 0000000..0051621
--- /dev/null
+++ b/docs/site/quickstart.md
@@ -0,0 +1,108 @@
+# Five-Minute Quickstart
+This page is the **reproducible** version of the README's elevator pitch:
+copy the commands top-to-bottom and you'll end up with a fresh project,
+a multi-agent profile set, a routed `/plan` → architect → coder pipeline,
+and a TraceVault sealed audit chain you can verify after the fact.
+
+> **GIF placeholder.** A 30-second screen recording showing the same
+> commands running end-to-end will live at
+> `docs/site/_static/quickstart.gif`. Until that lands, the script in
+> [scripts/quickstart.sh](#reproduction-script) is the source of truth.
+
+---
+
+## Prerequisites
+- Python 3.10+ (`pipx install specsmith` or `pip install specsmith`)
+- One LLM provider configured (any of):
+  - `ANTHROPIC_API_KEY=sk-…` for Claude
+  - `OPENAI_API_KEY=sk-…` for GPT/O-series
+  - `GOOGLE_API_KEY=…` for Gemini
+  - Ollama running locally (`ollama serve`) — no key needed
+
+The reproduction script intentionally has *no* timing-sensitive steps so
+it's safe to run unattended in CI.
+
+---
+
+## Reproduction script
+```bash
+#!/usr/bin/env bash
+# scripts/quickstart.sh — five-minute walkthrough, idempotent.
+set -euo pipefail
+export SPECSMITH_NO_AUTO_UPDATE=1
+export SPECSMITH_PYPI_CHECKED=1
+
+# 1. Scaffold a fresh project.
+specsmith init --output-dir /tmp \
+  --config <(cat <<'YAML'
+name: quickstart-demo
+type: cli-python
+language: python
+description: "specsmith multi-agent quickstart demo"
+YAML
+)
+cd /tmp/quickstart-demo
+
+# 2. Install the recommended profile preset.
+specsmith agents preset apply default
+specsmith agents list
+specsmith agents route show
+
+# 3. Add a custom local-coder profile (diversity guard fires).
+specsmith agents add \
+  --id local-coder \
+  --role coder \
+  --provider ollama \
+  --model qwen2.5-coder:32b \
+  --capability code \
+  --fallback ollama/qwen2.5-coder:7b
+
+# 4. Filter by capability — handy for finding "what can do X".
+specsmith agents list --capability code --json
+
+# 5. Optional: register a self-hosted endpoint (BYOE).
+# specsmith endpoints add \
+#   --id home-vllm \
+#   --base-url http://10.0.0.4:8000/v1 \
+#   --default-model qwen2.5-coder \
+#   --auth bearer-keyring
+
+# 6. Drive a single turn through the routing table.
+echo "/plan add a hello-world handler" | \
+  specsmith run --json-events --task "/plan add a hello-world handler"
+
+# 7. Pin a profile mid-session — emits a TraceVault decision seal.
+echo "/agent opus-reviewer" | specsmith run --json-events
+specsmith trace log --type decision
+
+# 8. Advance the AEE phase — auto-routes phase:active to the new phase.
+specsmith phase next --force
+specsmith agents route show | grep phase:active
+```
+
+Save the script anywhere on your machine and run it; the only side
+effects are inside `/tmp/quickstart-demo`, `~/.specsmith/agents.json`,
+and (if you uncomment step 5) `~/.specsmith/endpoints.json`.
+
+---
+
+## What you should see
+| Step | Expected output                                                                 |
+|------|---------------------------------------------------------------------------------|
+| 1    | `Done. N files created in /tmp/quickstart-demo`                                 |
+| 2    | `✓ applied preset default — 7 profiles, 22 routes`                              |
+| 3    | `✓ saved profile local-coder` *plus* a yellow `⚠ … shares the 'ollama' family…` diversity warning if a same-family reviewer exists. |
+| 4    | A JSON document with one entry whose `id` is `local-coder`.                     |
+| 6    | A JSONL stream beginning with `{"type": "ready", …}` followed by `block_start`, `token`, `block_complete`, `task_complete`. |
+| 7    | `✓ Sealed as SEAL-0001` (or whichever sequence number is next).                |
+| 8    | A `phase:active` line in the routing table pointing at the new phase's profile. |
+
+If any step fails, run `specsmith doctor --onboarding` to surface what's
+missing and re-run from that step.
+
+---
+
+## Next steps
+- [`docs/site/agents.md`](agents.md) — the full multi-agent walkthrough
+- [`docs/site/api-stability.md`](api-stability.md) — the public surface contract
+- [`docs/site/vscode-extension.md`](vscode-extension.md) — VS Code Workbench surfaces
diff --git a/docs/site/vscode-extension.md b/docs/site/vscode-extension.md
index f41159a..6a0ddaf 100644
--- a/docs/site/vscode-extension.md
+++ b/docs/site/vscode-extension.md
@@ -119,7 +119,7 @@ Open with `Ctrl+Shift+G` or the `📖` toolbar icon.
 ### Tab: Tools
 - **FPGA/HDL tools** (21 tools) — vivado, quartus, gtkwave, ghdl, iverilog, verilator, vsg, yosys, nextpnr, symbiyosys, and more
 - **Auxiliary disciplines** — add mixed-discipline support (e.g. FPGA + embedded C + Python verification)
-- **CI/CD build platforms** — linux, windows, macos, embedded, cloud, FPGA variants (target deploy/test platforms, not the host OS)
+- **CI/CD build platforms** — linux, windows, macos, embedded, FPGA variants (target deploy/test platforms, not the host OS)
 - **Installed Ollama models** with Update / Remove buttons
 - All saved to `fpga_tools:`, `platforms:` in scaffold.yml
 
@@ -233,6 +233,34 @@ installed model list before spawning the session.
 
 ---
 
+## Multi-Agent + BYOE Surfaces (0.10.0)
+The extension exposes the CLI's `agents` (REQ-146) and `endpoints` (REQ-142)
+stores as two sidebar trees plus eight Command Palette entries. Each
+command shells out to `specsmith <subcommand> --json` so the on-disk
+schema lives in exactly one place.
+### Sidebar trees
+- **BYOE Endpoints** (`specsmith.endpoints` view) — every entry from
+  `~/.specsmith/endpoints.json`; the entry marked `★` is the default.
+- **Agent Profiles** (`specsmith.agents` view) — grouped under *Profiles*
+  (with `★` on the default) and *Routes* (`activity → profile_id`).
+### Commands
+| Command palette                                  | Action                                                                |
+|--------------------------------------------------|------------------------------------------------------------------------|
+| `specsmith: BYOE Endpoints…`                     | Quick Pick over endpoints with copy-id / set-default / test actions.   |
+| `specsmith: Test BYOE Endpoint`                  | Probes `/v1/models`; toast shows latency + model count.                |
+| `specsmith: Refresh BYOE Endpoints`              | Re-runs `specsmith endpoints list --json` and refreshes the tree.      |
+| `specsmith: Agent Profiles…`                     | Quick Pick over profiles; copy id, set default, route to activity.     |
+| `specsmith: Test Agent Profile`                  | Probes the resolved provider / endpoint and shows reachability.        |
+| `specsmith: Refresh Agent Profiles`              | Re-runs `specsmith agents list --json` and refreshes the tree.         |
+| `specsmith: Apply Agent Preset (default / local-only / frontier-only / cost-conscious)` | Runs `specsmith agents preset apply <name>`.                           |
+| `specsmith: Route Activity to Agent Profile`     | Picks an activity (`/plan`, `/fix`, `phase:requirements`, …) and a profile, then runs `specsmith agents route set`. |
+| `specsmith: Pick Session Profile`                | Per-session pin for the active SessionPanel; appends `--agent <id>` to the bridge invocation. |
+The SessionPanel header chip surfaces the resolved profile + endpoint for
+the current turn; click it to open the picker without leaving the chat.
+### `/agent <id>` from chat
+Typing `/agent opus-reviewer` in the chat input flips the active session
+to the named profile and writes a TraceVault decision seal so the change
+is chained into `.specsmith/trace.jsonl`.
 ## Keyboard Shortcuts
 
 | Shortcut | Action |
diff --git a/pyproject.toml b/pyproject.toml
index 89ab47c..673f040 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "specsmith"
-version = "0.7.0"
+version = "0.10.0"
 description = "Applied Epistemic Engineering toolkit — AEE agent sessions, execution profiles, FPGA/HDL governance, tool installer, 50+ CLI commands."
 readme = "README.md"
 license = "MIT"
@@ -171,6 +171,9 @@ module = [
     "specsmith.importer",
     "specsmith.agent.providers.gemini",
     "specsmith.agent.runner",
+    "specsmith.agent.profiles",
+    "specsmith.agent.fallback",
+    "specsmith.agent.core",
     "specsmith.agent.cleanup",
     "specsmith.agent.orchestrator",
     "specsmith.agent.repl",
diff --git a/src/specsmith/__init__.py b/src/specsmith/__init__.py
index 9d8b4d2..926d625 100644
--- a/src/specsmith/__init__.py
+++ b/src/specsmith/__init__.py
@@ -8,4 +8,4 @@
 try:
     __version__: str = _pkg_version("specsmith")
 except PackageNotFoundError:  # running from source without install
-    __version__ = "0.3.6"  # fallback: keep in sync with pyproject.toml
+    __version__ = "0.10.0"  # fallback: keep in sync with pyproject.toml
diff --git a/src/specsmith/agent/chat_runner.py b/src/specsmith/agent/chat_runner.py
index 24b62cf..78f9f02 100644
--- a/src/specsmith/agent/chat_runner.py
+++ b/src/specsmith/agent/chat_runner.py
@@ -53,6 +53,14 @@ class ChatRunResult:
     files_changed: list[str] = field(default_factory=list)
     verdict: VerifierVerdict | None = None
     raw_text: str = ""
+    # C1: per-turn token + cost accounting. Populated by the provider
+    # driver when it can read counters from the response (Ollama and
+    # Anthropic both expose them). Falls back to a deterministic char-
+    # based heuristic so the TokenMeter chip is never zero on Ollama or
+    # OpenAI-compat endpoints that don't surface usage in streaming mode.
+    tokens_in: int = 0
+    tokens_out: int = 0
+    cost_usd: float = 0.0
 
     def to_dict(self) -> dict[str, Any]:
         return {
@@ -61,6 +69,9 @@ def to_dict(self) -> dict[str, Any]:
             "files_changed": list(self.files_changed),
             "confidence": self.verdict.confidence if self.verdict else 0.0,
             "equilibrium": self.verdict.equilibrium if self.verdict else False,
+            "tokens_in": int(self.tokens_in),
+            "tokens_out": int(self.tokens_out),
+            "cost_usd": float(self.cost_usd),
         }
 
 
@@ -80,43 +91,122 @@ def run_chat(
     history: list[dict[str, Any]] | None = None,
     confidence_target: float = 0.7,
     rules_prefix: str = "",
+    endpoint_id: str | None = None,
 ) -> ChatRunResult | None:
-    """Drive a real LLM turn. Return ``None`` if no provider is reachable."""
+    """Drive a real LLM turn. Return ``None`` if no provider is reachable.
+
+    When ``endpoint_id`` is set, the BYOE store (REQ-142) is consulted and
+    the resolved :class:`Endpoint` short-circuits the provider chain via
+    the new :func:`_run_openai_compat` driver. Any error during endpoint
+    resolution falls back to the legacy auto-detect chain so an offline
+    misconfigured endpoint never breaks `specsmith chat`.
+    """
     history = history or []
     messages = _build_messages(utterance, history, rules_prefix)
 
+    # REQ-142: explicit endpoint override.
+    if endpoint_id:
+        try:
+            from specsmith.agent.endpoints import EndpointStore
+
+            endpoint = EndpointStore.load().resolve(endpoint_id)
+        except Exception:  # noqa: BLE001 - any failure → fall back to auto-detect
+            endpoint = None
+        if endpoint is not None:
+            try:
+                full_text, usage = _run_openai_compat(
+                    messages, emitter, msg_block, endpoint=endpoint
+                )
+            except Exception:  # noqa: BLE001 - degrade to auto-detect
+                full_text, usage = None, _UsageDelta()
+            if full_text is not None:
+                return _finalize(
+                    full_text,
+                    "openai_compat",
+                    project_dir,
+                    confidence_target,
+                    messages=messages,
+                    usage=usage,
+                )
+
     # Order matters: Ollama first because it's local-first and free.
     for provider in (_run_ollama, _run_anthropic, _run_openai, _run_gemini):
         try:
-            full_text = provider(messages, emitter, msg_block)
+            full_text, usage = provider(messages, emitter, msg_block)
         except Exception:  # noqa: BLE001 - any failure → next provider
             continue
         if full_text is None:
             continue
-        return _finalize(full_text, provider.__name__, project_dir, confidence_target)
+        return _finalize(
+            full_text,
+            provider.__name__,
+            project_dir,
+            confidence_target,
+            messages=messages,
+            usage=usage,
+        )
     return None
 
 
+@dataclass
+class _UsageDelta:
+    """Per-turn token + cost counters reported by a provider driver.
+
+    All fields default to ``0`` so callers can construct a zero-value
+    instance without caring whether the provider supports usage tracking.
+    """
+
+    tokens_in: int = 0
+    tokens_out: int = 0
+    cost_usd: float = 0.0
+
+
 def _finalize(
     full_text: str,
     provider_fn_name: str,
     project_dir: Path,
     confidence_target: float,
+    *,
+    messages: list[dict[str, str]] | None = None,
+    usage: _UsageDelta | None = None,
 ) -> ChatRunResult:
     sections = _parse_output_contract(full_text)
     files_changed = _split_files_list(sections.get("files_changed", ""))
     report = report_from_chat_sections(sections, files_changed=files_changed)
     verdict = score(report, confidence_target=confidence_target)
     summary = (sections.get("plan") or full_text.strip()[:200]).strip() or verdict.summary
+
+    # C1: when the provider didn't report exact counts, estimate from text.
+    # The four-chars-per-token rule of thumb is OpenAI's published guidance
+    # and matches Ollama / Anthropic / Gemini within ~10% across the model
+    # families we ship today — close enough for the TokenMeter chip and
+    # the ``credits record`` ledger event.
+    if usage is None:
+        usage = _UsageDelta()
+    if usage.tokens_in == 0 and messages is not None:
+        usage.tokens_in = _estimate_tokens("\n".join(m.get("content", "") for m in messages))
+    if usage.tokens_out == 0:
+        usage.tokens_out = _estimate_tokens(full_text)
+
     return ChatRunResult(
         provider=provider_fn_name.removeprefix("_run_"),
         summary=summary,
         files_changed=files_changed,
         verdict=verdict,
         raw_text=full_text,
+        tokens_in=int(usage.tokens_in),
+        tokens_out=int(usage.tokens_out),
+        cost_usd=float(usage.cost_usd),
     )
 
 
+def _estimate_tokens(text: str) -> int:
+    """Rough char→token heuristic (4 chars/token, floor at 1 if non-empty)."""
+    if not text:
+        return 0
+    return max(1, len(text) // 4)
+
+
 # ---------------------------------------------------------------------------
 # Provider drivers — each returns the full assembled text or None
 # ---------------------------------------------------------------------------
@@ -126,13 +216,14 @@ def _run_ollama(
     messages: list[dict[str, str]],
     emitter: EventEmitter,
     block_id: str,
-) -> str | None:
+) -> tuple[str | None, _UsageDelta]:
     """Stream from a local Ollama daemon using only stdlib."""
     host = os.environ.get("OLLAMA_HOST", DEFAULT_OLLAMA_HOST).rstrip("/")
     model = os.environ.get("SPECSMITH_OLLAMA_MODEL", DEFAULT_OLLAMA_MODEL)
+    usage = _UsageDelta()
 
     if not _ollama_alive(host):
-        return None
+        return None, usage
 
     payload = json.dumps({"model": model, "messages": messages, "stream": True}).encode("utf-8")
     req = Request(  # noqa: S310 - URL is a hardcoded localhost default
@@ -157,8 +248,13 @@ def _run_ollama(
                 emitter.token(block_id, chunk)
                 pieces.append(chunk)
             if obj.get("done"):
+                # C1: Ollama exposes prompt_eval_count + eval_count on the
+                # final ``done`` message. Cost is zero for local models.
+                usage.tokens_in = int(obj.get("prompt_eval_count") or 0)
+                usage.tokens_out = int(obj.get("eval_count") or 0)
+                usage.cost_usd = 0.0
                 break
-    return "".join(pieces) if pieces else None
+    return ("".join(pieces) if pieces else None), usage
 
 
 def _ollama_alive(host: str) -> bool:
@@ -173,14 +269,15 @@ def _run_anthropic(
     messages: list[dict[str, str]],
     emitter: EventEmitter,
     block_id: str,
-) -> str | None:
+) -> tuple[str | None, _UsageDelta]:
     """Use the anthropic SDK if installed and a key is configured."""
+    usage = _UsageDelta()
     if not os.environ.get("ANTHROPIC_API_KEY"):
-        return None
+        return None, usage
     try:
         import anthropic
     except ImportError:
-        return None
+        return None, usage
 
     system = "\n".join(m["content"] for m in messages if m["role"] == "system")
     user_msgs = [m for m in messages if m["role"] != "system"]
@@ -197,62 +294,183 @@ def _run_anthropic(
             if text:
                 emitter.token(block_id, text)
                 pieces.append(text)
-    return "".join(pieces) if pieces else None
+        # C1: pull final usage off the SDK's `final_message`. Cost is the
+        # caller's problem (rate-limit module knows the model price); we
+        # report tokens here and let the credits ledger compute USD.
+        try:
+            final = stream.get_final_message()
+            usage.tokens_in = int(getattr(final.usage, "input_tokens", 0) or 0)
+            usage.tokens_out = int(getattr(final.usage, "output_tokens", 0) or 0)
+        except Exception:  # noqa: BLE001 - usage is best-effort
+            pass
+    return ("".join(pieces) if pieces else None), usage
 
 
 def _run_openai(
     messages: list[dict[str, str]],
     emitter: EventEmitter,
     block_id: str,
-) -> str | None:
+) -> tuple[str | None, _UsageDelta]:
     """Use the openai SDK if installed and a key is configured."""
+    usage = _UsageDelta()
     if not os.environ.get("OPENAI_API_KEY"):
-        return None
+        return None, usage
     try:
         from openai import OpenAI
     except ImportError:
-        return None
+        return None, usage
 
     client = OpenAI()
+    # ``stream_options.include_usage`` makes the final SSE chunk carry a
+    # populated ``usage`` block (otherwise streaming responses emit it as
+    # ``None``). Older SDK versions silently ignore unknown kwargs.
     stream = client.chat.completions.create(
         model=os.environ.get("OPENAI_MODEL", "gpt-4o-mini"),
         messages=messages,
         stream=True,
+        stream_options={"include_usage": True},
     )
     pieces: list[str] = []
     for chunk in stream:
-        text = (chunk.choices[0].delta.content or "") if chunk.choices else ""
-        if text:
-            emitter.token(block_id, text)
-            pieces.append(text)
-    return "".join(pieces) if pieces else None
+        if chunk.choices:
+            text = chunk.choices[0].delta.content or ""
+            if text:
+                emitter.token(block_id, text)
+                pieces.append(text)
+        usage_obj = getattr(chunk, "usage", None)
+        if usage_obj is not None:
+            usage.tokens_in = int(getattr(usage_obj, "prompt_tokens", 0) or 0)
+            usage.tokens_out = int(getattr(usage_obj, "completion_tokens", 0) or 0)
+    return ("".join(pieces) if pieces else None), usage
+
+
+def _run_openai_compat(
+    messages: list[dict[str, str]],
+    emitter: EventEmitter,
+    block_id: str,
+    *,
+    endpoint: Any,
+) -> tuple[str | None, _UsageDelta]:
+    """Stream from a user-registered OpenAI-v1-compatible endpoint (REQ-142).
+
+    Uses raw stdlib HTTP so the openai SDK is not a hard dependency for
+    BYOE. Sends a streaming ``/chat/completions`` request, decodes the
+    Server-Sent-Events ``data:`` lines, and forwards each ``content``
+    delta as a ``token`` event on ``block_id``.
+    """
+    usage = _UsageDelta()
+    base_url = endpoint.base_url.rstrip("/")
+    url = f"{base_url}/chat/completions"
+    model = endpoint.default_model or os.environ.get("SPECSMITH_OPENAI_COMPAT_MODEL", "")
+    if not model:
+        # The endpoint did not pin a default model and the env override is
+        # absent. We cannot fabricate one; fall back to the auto-detect chain.
+        return None, usage
+
+    headers: dict[str, str] = {
+        "Content-Type": "application/json",
+        "Accept": "text/event-stream",
+    }
+    try:
+        token = endpoint.resolve_token()
+    except Exception:  # noqa: BLE001 - fall back to auto-detect chain
+        return None, usage
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+
+    body = json.dumps(
+        {
+            "model": model,
+            "messages": messages,
+            "stream": True,
+            # Many vLLM/llama.cpp builds honour OpenAI's stream_options;
+            # the request is harmless if they don't.
+            "stream_options": {"include_usage": True},
+        }
+    ).encode("utf-8")
+    req = Request(url, data=body, headers=headers, method="POST")  # noqa: S310 - user-supplied
+
+    ctx = None
+    if not endpoint.verify_tls and url.startswith("https://"):
+        import ssl
+
+        ctx = ssl.create_default_context()
+        ctx.check_hostname = False
+        ctx.verify_mode = ssl.CERT_NONE
+
+    pieces: list[str] = []
+    try:
+        with urlopen(req, timeout=120, context=ctx) as resp:  # noqa: S310 - user-supplied
+            for raw_line in resp:
+                line = raw_line.decode("utf-8", errors="replace").rstrip("\n\r")
+                if not line.startswith("data:"):
+                    continue
+                payload = line[len("data:") :].strip()
+                if not payload or payload == "[DONE]":
+                    if payload == "[DONE]":
+                        break
+                    continue
+                try:
+                    obj = json.loads(payload)
+                except ValueError:
+                    continue
+                choices = obj.get("choices") or []
+                usage_obj = obj.get("usage")
+                if usage_obj:
+                    usage.tokens_in = int(usage_obj.get("prompt_tokens") or 0)
+                    usage.tokens_out = int(usage_obj.get("completion_tokens") or 0)
+                if not choices:
+                    continue
+                delta = (choices[0] or {}).get("delta") or {}
+                chunk = str(delta.get("content") or "")
+                if chunk:
+                    emitter.token(block_id, chunk)
+                    pieces.append(chunk)
+    except (URLError, TimeoutError, OSError):
+        return None, usage
+    return ("".join(pieces) if pieces else None), usage
 
 
 def _run_gemini(
     messages: list[dict[str, str]],
     emitter: EventEmitter,
     block_id: str,
-) -> str | None:
+) -> tuple[str | None, _UsageDelta]:
     """Use google-genai SDK if installed and a key is configured."""
+    usage = _UsageDelta()
     if not os.environ.get("GOOGLE_API_KEY"):
-        return None
+        return None, usage
     try:
         from google import genai
     except ImportError:
-        return None
+        return None, usage
 
     client = genai.Client()
     prompt = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
     pieces: list[str] = []
+    last_chunk: Any = None
     for chunk in client.models.generate_content_stream(
         model=os.environ.get("GEMINI_MODEL", "gemini-2.5-flash"),
         contents=prompt,
     ):
+        last_chunk = chunk
         text = getattr(chunk, "text", "") or ""
         if text:
             emitter.token(block_id, text)
             pieces.append(text)
-    return "".join(pieces) if pieces else None
+    # Gemini exposes ``usage_metadata`` on the final chunk. Field names
+    # vary across SDK versions; we accept the union.
+    meta = getattr(last_chunk, "usage_metadata", None) if last_chunk else None
+    if meta is not None:
+        usage.tokens_in = int(
+            getattr(meta, "prompt_token_count", 0) or getattr(meta, "input_token_count", 0) or 0
+        )
+        usage.tokens_out = int(
+            getattr(meta, "candidates_token_count", 0)
+            or getattr(meta, "output_token_count", 0)
+            or 0
+        )
+    return ("".join(pieces) if pieces else None), usage
 
 
 # ---------------------------------------------------------------------------
diff --git a/src/specsmith/agent/core.py b/src/specsmith/agent/core.py
new file mode 100644
index 0000000..8c7eb75
--- /dev/null
+++ b/src/specsmith/agent/core.py
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Shared agent runtime primitives (REQ-145).
+
+Hosts low-level enums and dataclasses that span :mod:`specsmith.agent.runner`,
+:mod:`specsmith.serve`, :mod:`specsmith.agent.profiles`, and
+:mod:`specsmith.agent.fallback` without forcing them to import each other.
+
+The historical ``cli.py`` referenced ``ModelTier`` from this module before
+it existed in the source tree (the file was lost in an earlier refactor),
+which produced an ``ImportError`` the moment ``specsmith run`` was
+invoked. Restoring the symbol here is the prerequisite for the bridge
+``ready`` event handshake to land before the VS Code extension's 20 s
+startup timeout fires.
+"""
+
+from __future__ import annotations
+
+import enum
+from dataclasses import dataclass, field
+from typing import Any
+
+
+class ModelTier(str, enum.Enum):
+    """Capability tier for an LLM call.
+
+    Ordered cheapest → most capable so that a fallback chain can iterate
+    in declaration order without external metadata.
+    """
+
+    FAST = "fast"
+    BALANCED = "balanced"
+    POWERFUL = "powerful"
+
+    @classmethod
+    def parse(
+        cls,
+        value: str | ModelTier | None,
+        default: ModelTier | None = None,
+    ) -> ModelTier:
+        """Tolerant parser used by CLI option handlers."""
+        if value is None or value == "":
+            return default or cls.BALANCED
+        if isinstance(value, cls):
+            return value
+        try:
+            return cls(str(value).strip().lower())
+        except ValueError:
+            return default or cls.BALANCED
+
+
+@dataclass
+class AgentState:
+    """Mutable per-session metrics surfaced via ``specsmith serve``'s
+    ``GET /api/status`` endpoint and the VS Code TokenMeter chip.
+
+    Field names mirror what :class:`specsmith.serve._AgentThread` reads off
+    ``runner._state``; do not rename without updating that consumer.
+    """
+
+    provider_name: str = ""
+    model_name: str = ""
+    profile_id: str = ""
+    session_tokens: int = 0
+    tokens_in: int = 0
+    tokens_out: int = 0
+    total_cost_usd: float = 0.0
+    tool_calls_made: int = 0
+    elapsed_minutes: float = 0.0
+    by_profile: dict[str, dict[str, Any]] = field(default_factory=dict)
+
+    def credit(
+        self,
+        *,
+        profile_id: str,
+        tokens_in: int = 0,
+        tokens_out: int = 0,
+        cost_usd: float = 0.0,
+        tool_calls: int = 0,
+    ) -> None:
+        """Aggregate one turn's metrics into the running totals."""
+        self.tokens_in += int(tokens_in)
+        self.tokens_out += int(tokens_out)
+        self.session_tokens = self.tokens_in + self.tokens_out
+        self.total_cost_usd += float(cost_usd)
+        self.tool_calls_made += int(tool_calls)
+        bucket = self.by_profile.setdefault(
+            profile_id or "(default)",
+            {"tokens_in": 0, "tokens_out": 0, "cost_usd": 0.0, "tool_calls": 0, "turns": 0},
+        )
+        bucket["tokens_in"] += int(tokens_in)
+        bucket["tokens_out"] += int(tokens_out)
+        bucket["cost_usd"] = round(bucket["cost_usd"] + float(cost_usd), 6)
+        bucket["tool_calls"] += int(tool_calls)
+        bucket["turns"] += 1
+
+
+__all__ = ["AgentState", "ModelTier"]
diff --git a/src/specsmith/agent/endpoints.py b/src/specsmith/agent/endpoints.py
new file mode 100644
index 0000000..b6b4a68
--- /dev/null
+++ b/src/specsmith/agent/endpoints.py
@@ -0,0 +1,493 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Bring-Your-Own-Endpoint (BYOE) data model and persistence (REQ-142).
+
+Specsmith historically hard-coded a closed provider list (``ollama`` /
+``anthropic`` / ``openai`` / ``gemini`` / ``mistral``). This module
+introduces a generic OpenAI-v1-compatible endpoint store so users can
+register self-hosted vLLM, llama.cpp ``server``, LM Studio, TGI, or any
+other ``/v1/chat/completions``-shaped backend and pick between several
+side-by-side.
+
+Storage layout (``~/.specsmith/endpoints.json``):
+
+.. code-block:: json
+
+    {
+      "schema_version": 1,
+      "default_endpoint_id": "home-vllm",
+      "endpoints": [
+        {
+          "id": "home-vllm",
+          "name": "Home vLLM",
+          "base_url": "http://10.0.0.4:8000/v1",
+          "auth": {"kind": "bearer-keyring",
+                   "keyring_service": "specsmith",
+                   "keyring_user": "endpoint:home-vllm"},
+          "default_model": "Qwen/Qwen2.5-Coder-32B",
+          "verify_tls": true,
+          "tags": ["local", "coder"],
+          "created_at": "2026-05-01T11:30:17Z"
+        }
+      ]
+    }
+
+Tokens are NEVER printed verbatim by anything in this module; ``list_all``
+serialisation routes through :func:`Endpoint.to_public_dict` which
+redacts inline tokens to ``"***"``.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+SCHEMA_VERSION = 1
+DEFAULT_KEYRING_SERVICE = "specsmith"
+
+VALID_AUTH_KINDS = ("none", "bearer-inline", "bearer-env", "bearer-keyring")
+
+
+class EndpointError(RuntimeError):
+    """Raised for user-facing endpoint errors (validation, missing token, ...)."""
+
+
+# ---------------------------------------------------------------------------
+# Data model
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class EndpointAuth:
+    """Discriminated-union auth metadata.
+
+    ``kind`` is one of:
+
+    * ``none`` — no Authorization header (e.g. open vLLM on a trusted LAN).
+    * ``bearer-inline`` — token stored verbatim in ``endpoints.json``.
+      Only used when the user explicitly opts in; the on-disk plaintext
+      is documented as insecure.
+    * ``bearer-env`` — token resolved from ``token_env`` at call time.
+    * ``bearer-keyring`` — token stored in the OS keyring under
+      ``(keyring_service, keyring_user)``.
+    """
+
+    kind: str = "none"
+    token: str = ""  # only set when kind == "bearer-inline"
+    token_env: str = ""  # only set when kind == "bearer-env"
+    keyring_service: str = DEFAULT_KEYRING_SERVICE
+    keyring_user: str = ""
+
+    def to_dict(self) -> dict[str, Any]:
+        """On-disk shape (token included for ``bearer-inline``)."""
+        out: dict[str, Any] = {"kind": self.kind}
+        if self.kind == "bearer-inline":
+            out["token"] = self.token
+        elif self.kind == "bearer-env":
+            out["token_env"] = self.token_env
+        elif self.kind == "bearer-keyring":
+            out["keyring_service"] = self.keyring_service
+            out["keyring_user"] = self.keyring_user
+        return out
+
+    def to_public_dict(self) -> dict[str, Any]:
+        """Redacted shape — never returns inline token bytes."""
+        out: dict[str, Any] = {"kind": self.kind}
+        if self.kind == "bearer-inline":
+            out["token"] = "***"
+        elif self.kind == "bearer-env":
+            out["token_env"] = self.token_env
+        elif self.kind == "bearer-keyring":
+            out["keyring_service"] = self.keyring_service
+            out["keyring_user"] = self.keyring_user
+        return out
+
+    @classmethod
+    def from_dict(cls, raw: dict[str, Any]) -> EndpointAuth:
+        kind = str(raw.get("kind") or "none").strip()
+        if kind not in VALID_AUTH_KINDS:
+            raise EndpointError(f"invalid auth kind {kind!r}; expected one of {VALID_AUTH_KINDS}")
+        return cls(
+            kind=kind,
+            token=str(raw.get("token") or ""),
+            token_env=str(raw.get("token_env") or ""),
+            keyring_service=str(raw.get("keyring_service") or DEFAULT_KEYRING_SERVICE),
+            keyring_user=str(raw.get("keyring_user") or ""),
+        )
+
+
+@dataclass
+class Endpoint:
+    """A single OpenAI-v1-compatible endpoint registered for use with specsmith."""
+
+    id: str
+    name: str
+    base_url: str
+    auth: EndpointAuth = field(default_factory=EndpointAuth)
+    default_model: str = ""
+    verify_tls: bool = True
+    tags: list[str] = field(default_factory=list)
+    created_at: str = ""
+
+    # ── Validation ─────────────────────────────────────────────────────────
+
+    def validate(self) -> None:
+        """Raise :class:`EndpointError` on structural problems."""
+        if not self.id or not self.id.strip():
+            raise EndpointError("endpoint id must be non-empty")
+        if any(c.isspace() for c in self.id):
+            raise EndpointError(f"endpoint id {self.id!r} must not contain whitespace")
+        if not self.base_url.startswith(("http://", "https://")):
+            raise EndpointError(
+                f"endpoint base_url {self.base_url!r} must start with http:// or https://"
+            )
+        if self.auth.kind == "bearer-env" and not self.auth.token_env:
+            raise EndpointError("auth.kind == 'bearer-env' requires a non-empty token_env")
+        if self.auth.kind == "bearer-keyring" and not self.auth.keyring_user:
+            raise EndpointError(
+                "auth.kind == 'bearer-keyring' requires a keyring_user (defaults to endpoint:<id>)"
+            )
+
+    # ── Token resolution ───────────────────────────────────────────────────
+
+    def resolve_token(self) -> str | None:
+        """Return the bearer token for this endpoint, or ``None`` for unauthenticated.
+
+        Order of resolution mirrors :data:`EndpointAuth.kind`. Errors are
+        converted to :class:`EndpointError` so callers can surface a clean
+        message instead of a stack trace.
+        """
+        kind = self.auth.kind
+        if kind == "none":
+            return None
+        if kind == "bearer-inline":
+            return self.auth.token or None
+        if kind == "bearer-env":
+            value = os.environ.get(self.auth.token_env, "").strip()
+            if not value:
+                raise EndpointError(
+                    f"endpoint {self.id!r} expects token in env var "
+                    f"{self.auth.token_env!r}, but it is unset"
+                )
+            return value
+        if kind == "bearer-keyring":
+            try:
+                import keyring
+            except Exception as exc:  # noqa: BLE001
+                raise EndpointError(
+                    "keyring is not available — install python-keyring or "
+                    "switch the endpoint to --auth bearer-env"
+                ) from exc
+            try:
+                value = keyring.get_password(self.auth.keyring_service, self.auth.keyring_user)
+            except Exception as exc:  # noqa: BLE001
+                raise EndpointError(f"keyring lookup failed: {exc}") from exc
+            if not value:
+                raise EndpointError(
+                    f"endpoint {self.id!r} has no token stored in keyring "
+                    f"({self.auth.keyring_service}/{self.auth.keyring_user})"
+                )
+            return str(value)
+        raise EndpointError(f"unknown auth kind {kind!r}")
+
+    # ── Health / discovery ─────────────────────────────────────────────────
+
+    def health(self, *, timeout: float = 5.0) -> EndpointHealth:
+        """Probe ``<base_url>/models`` and return a structured result.
+
+        Network and HTTP errors are caught — the returned record always has
+        ``ok`` populated. ``models`` is empty when the endpoint does not
+        expose ``/models``; that is not an error in itself.
+        """
+        import urllib.error
+        import urllib.request
+
+        url = self.base_url.rstrip("/") + "/models"
+        req = urllib.request.Request(url)  # noqa: S310 - user-supplied
+        try:
+            token = self.resolve_token()
+        except EndpointError as exc:
+            return EndpointHealth(
+                ok=False, latency_ms=0.0, models=[], error=str(exc), status_code=None
+            )
+        if token:
+            req.add_header("Authorization", f"Bearer {token}")
+        start = time.perf_counter()
+        try:
+            ctx = None
+            if not self.verify_tls and url.startswith("https://"):
+                import ssl
+
+                ctx = ssl.create_default_context()
+                ctx.check_hostname = False
+                ctx.verify_mode = ssl.CERT_NONE
+            with urllib.request.urlopen(  # noqa: S310 - user-supplied
+                req, timeout=timeout, context=ctx
+            ) as resp:
+                latency_ms = (time.perf_counter() - start) * 1000.0
+                payload = json.loads(resp.read().decode("utf-8"))
+                models = _extract_model_ids(payload)
+                return EndpointHealth(
+                    ok=True,
+                    latency_ms=latency_ms,
+                    models=models,
+                    error="",
+                    status_code=int(resp.status),
+                )
+        except urllib.error.HTTPError as exc:
+            return EndpointHealth(
+                ok=False,
+                latency_ms=(time.perf_counter() - start) * 1000.0,
+                models=[],
+                error=f"HTTP {exc.code}",
+                status_code=int(exc.code),
+            )
+        except Exception as exc:  # noqa: BLE001
+            return EndpointHealth(
+                ok=False,
+                latency_ms=(time.perf_counter() - start) * 1000.0,
+                models=[],
+                error=str(exc),
+                status_code=None,
+            )
+
+    # ── Serialisation ──────────────────────────────────────────────────────
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "id": self.id,
+            "name": self.name,
+            "base_url": self.base_url,
+            "auth": self.auth.to_dict(),
+            "default_model": self.default_model,
+            "verify_tls": bool(self.verify_tls),
+            "tags": list(self.tags),
+            "created_at": self.created_at,
+        }
+
+    def to_public_dict(self) -> dict[str, Any]:
+        return {
+            "id": self.id,
+            "name": self.name,
+            "base_url": self.base_url,
+            "auth": self.auth.to_public_dict(),
+            "default_model": self.default_model,
+            "verify_tls": bool(self.verify_tls),
+            "tags": list(self.tags),
+            "created_at": self.created_at,
+        }
+
+    @classmethod
+    def from_dict(cls, raw: dict[str, Any]) -> Endpoint:
+        return cls(
+            id=str(raw.get("id") or "").strip(),
+            name=str(raw.get("name") or "").strip(),
+            base_url=str(raw.get("base_url") or "").strip(),
+            auth=EndpointAuth.from_dict(raw.get("auth") or {}),
+            default_model=str(raw.get("default_model") or "").strip(),
+            verify_tls=bool(raw.get("verify_tls", True)),
+            tags=[str(t) for t in (raw.get("tags") or [])],
+            created_at=str(raw.get("created_at") or ""),
+        )
+
+
+@dataclass
+class EndpointHealth:
+    """Structured result of :meth:`Endpoint.health`."""
+
+    ok: bool
+    latency_ms: float
+    models: list[str]
+    error: str = ""
+    status_code: int | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "ok": self.ok,
+            "latency_ms": round(self.latency_ms, 2),
+            "models": list(self.models),
+            "error": self.error,
+            "status_code": self.status_code,
+        }
+
+
+def _extract_model_ids(payload: Any) -> list[str]:
+    """Pull a list of model id strings out of an OpenAI ``/v1/models`` body.
+
+    Tolerates the two common shapes (``{"data": [{"id": ...}]}`` from real
+    OpenAI / vLLM and ``{"models": [...]}`` used by some proxies).
+    """
+    out: list[str] = []
+    if isinstance(payload, dict):
+        candidates = payload.get("data") or payload.get("models") or []
+        if isinstance(candidates, list):
+            for item in candidates:
+                if isinstance(item, dict) and "id" in item:
+                    out.append(str(item["id"]))
+                elif isinstance(item, str):
+                    out.append(item)
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Store
+# ---------------------------------------------------------------------------
+
+
+def default_store_path() -> Path:
+    """Resolve ``~/.specsmith/endpoints.json``, honouring ``SPECSMITH_HOME``."""
+    base = os.environ.get("SPECSMITH_HOME", "").strip()
+    home = Path(base) if base else Path.home() / ".specsmith"
+    return home / "endpoints.json"
+
+
+@dataclass
+class EndpointStore:
+    """Read/write wrapper around ``~/.specsmith/endpoints.json``.
+
+    Tokens are never logged. Inline tokens (``auth.kind == "bearer-inline"``)
+    land in the JSON unchanged, but :meth:`list_public` redacts them. The
+    keyring-backed and env-backed paths never store secrets in the JSON at
+    all.
+    """
+
+    path: Path
+    schema_version: int = SCHEMA_VERSION
+    default_endpoint_id: str = ""
+    endpoints: list[Endpoint] = field(default_factory=list)
+
+    # ── I/O ────────────────────────────────────────────────────────────────
+
+    @classmethod
+    def load(cls, path: Path | None = None) -> EndpointStore:
+        target = path or default_store_path()
+        if not target.exists():
+            return cls(path=target)
+        try:
+            raw = json.loads(target.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            raise EndpointError(
+                f"endpoints store at {target} is corrupted: {exc}. "
+                "Move it aside or fix the JSON to continue."
+            ) from exc
+        if not isinstance(raw, dict):
+            raise EndpointError(f"endpoints store at {target} must be a JSON object")
+        version = int(raw.get("schema_version") or 0)
+        if version != SCHEMA_VERSION:
+            raise EndpointError(
+                f"endpoints store at {target} uses schema_version={version}; "
+                f"this build of specsmith only understands {SCHEMA_VERSION}."
+            )
+        endpoints_raw = raw.get("endpoints") or []
+        if not isinstance(endpoints_raw, list):
+            raise EndpointError("endpoints store: 'endpoints' must be a list")
+        endpoints = [Endpoint.from_dict(item) for item in endpoints_raw]
+        return cls(
+            path=target,
+            schema_version=version,
+            default_endpoint_id=str(raw.get("default_endpoint_id") or ""),
+            endpoints=endpoints,
+        )
+
+    def save(self) -> None:
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        payload = {
+            "schema_version": self.schema_version,
+            "default_endpoint_id": self.default_endpoint_id,
+            "endpoints": [e.to_dict() for e in self.endpoints],
+        }
+        self.path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+        # Best-effort lock-down on POSIX
+        import contextlib
+
+        with contextlib.suppress(Exception):
+            self.path.chmod(0o600)
+
+    # ── CRUD ───────────────────────────────────────────────────────────────
+
+    def add(self, endpoint: Endpoint, *, replace: bool = False) -> None:
+        endpoint.validate()
+        if not endpoint.created_at:
+            endpoint.created_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+        existing = self._index(endpoint.id)
+        if existing is not None:
+            if not replace:
+                raise EndpointError(
+                    f"endpoint {endpoint.id!r} already exists. Use --replace to overwrite."
+                )
+            self.endpoints[existing] = endpoint
+        else:
+            self.endpoints.append(endpoint)
+        if not self.default_endpoint_id:
+            self.default_endpoint_id = endpoint.id
+
+    def remove(self, endpoint_id: str) -> bool:
+        idx = self._index(endpoint_id)
+        if idx is None:
+            return False
+        self.endpoints.pop(idx)
+        if self.default_endpoint_id == endpoint_id:
+            self.default_endpoint_id = self.endpoints[0].id if self.endpoints else ""
+        return True
+
+    def get(self, endpoint_id: str) -> Endpoint:
+        idx = self._index(endpoint_id)
+        if idx is None:
+            raise EndpointError(f"unknown endpoint id {endpoint_id!r}")
+        return self.endpoints[idx]
+
+    def get_default(self) -> Endpoint | None:
+        if not self.default_endpoint_id:
+            return None
+        idx = self._index(self.default_endpoint_id)
+        if idx is None:
+            return None
+        return self.endpoints[idx]
+
+    def set_default(self, endpoint_id: str) -> None:
+        if self._index(endpoint_id) is None:
+            raise EndpointError(f"unknown endpoint id {endpoint_id!r}")
+        self.default_endpoint_id = endpoint_id
+
+    def list_all(self) -> list[Endpoint]:
+        return list(self.endpoints)
+
+    def list_public(self) -> list[dict[str, Any]]:
+        return [e.to_public_dict() for e in self.endpoints]
+
+    def resolve(self, endpoint_id: str | None) -> Endpoint:
+        """Return the named endpoint, or the default if ``endpoint_id`` is empty."""
+        if endpoint_id:
+            return self.get(endpoint_id)
+        default = self.get_default()
+        if default is None:
+            raise EndpointError(
+                "no endpoint specified and no default is set. "
+                "Run `specsmith endpoints add ...` to register one."
+            )
+        return default
+
+    # ── Internals ──────────────────────────────────────────────────────────
+
+    def _index(self, endpoint_id: str) -> int | None:
+        for i, e in enumerate(self.endpoints):
+            if e.id == endpoint_id:
+                return i
+        return None
+
+
+__all__ = [
+    "DEFAULT_KEYRING_SERVICE",
+    "Endpoint",
+    "EndpointAuth",
+    "EndpointError",
+    "EndpointHealth",
+    "EndpointStore",
+    "SCHEMA_VERSION",
+    "VALID_AUTH_KINDS",
+    "default_store_path",
+]
diff --git a/src/specsmith/agent/events.py b/src/specsmith/agent/events.py
index 5f075af..6a2daaa 100644
--- a/src/specsmith/agent/events.py
+++ b/src/specsmith/agent/events.py
@@ -19,6 +19,9 @@
 * ``plan_step``        - status transition for a step in the active plan
                          block (REQ-114).
 * ``task_complete``    - final block; carries final summary + profile.
+* ``ready``            - emitted exactly once at process start (REQ-145);
+                         the VS Code bridge waits up to 20 s for this
+                         frame before declaring the agent unresponsive.
 """
 
 from __future__ import annotations
@@ -58,6 +61,61 @@ def emit(self, event: dict[str, Any]) -> None:
         with contextlib.suppress(Exception):
             self.stream.flush()
 
+    # ── Lifecycle helpers ────────────────────────────────────────────────
+
+    def ready(
+        self,
+        *,
+        agent: str = "nexus",
+        version: str = "",
+        project_dir: str = "",
+        provider: str = "",
+        model: str = "",
+        profile_id: str = "",
+        capabilities: list[str] | None = None,
+        **extra: Any,
+    ) -> None:
+        """Emit the bridge handshake frame (REQ-145).
+
+        The VS Code extension's :class:`SpecsmithBridge` keys off this
+        single event to flip from ``starting`` → ``waiting`` and to start
+        flushing the queued user prompts. Schema is intentionally flat so
+        a ``JSON.parse`` line check is enough on the consumer side.
+        """
+        payload: dict[str, Any] = {
+            "type": "ready",
+            "timestamp": _now_iso(),
+            "agent": agent,
+            "version": version,
+            "project_dir": project_dir,
+            "provider": provider,
+            "model": model,
+            "profile_id": profile_id,
+            "capabilities": list(capabilities or []),
+        }
+        payload.update(extra)
+        self.emit(payload)
+
+    def system(self, message: str, **extra: Any) -> None:
+        """Emit a free-form system note (matches bridge.ts handler)."""
+        self.emit({"type": "system", "message": message, **extra})
+
+    def turn_done(self, **extra: Any) -> None:
+        """Emit the per-turn terminator the bridge uses to clear timers."""
+        self.emit({"type": "turn_done", "timestamp": _now_iso(), **extra})
+
+    def error(self, message: str, *, recoverable: bool = False, **extra: Any) -> None:
+        """Emit an error frame (recoverable = retry will be offered)."""
+        self.emit(
+            {
+                "type": "error",
+                "timestamp": _now_iso(),
+                "message": message,
+                "recoverable": bool(recoverable),
+                **extra,
+            }
+        )
+
     # ── Block helpers ────────────────────────────────────────────────────
 
     def block_start(self, kind: str, *, agent: str = "nexus", **payload: Any) -> str:
diff --git a/src/specsmith/agent/fallback.py b/src/specsmith/agent/fallback.py
new file mode 100644
index 0000000..cb82c43
--- /dev/null
+++ b/src/specsmith/agent/fallback.py
@@ -0,0 +1,142 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Resilient fallback-chain executor for agent profiles (REQ-146).
+
+Profiles in :mod:`specsmith.agent.profiles` carry a ``fallback_chain``
+list of ``"<provider>/<model>"`` or ``"endpoint:<id>"`` strings. When the
+primary call raises a transient error (timeout / connection refused /
+HTTP 429 / HTTP 5xx), this module walks the chain in order until one
+returns successfully or the chain is exhausted.
+
+The chain is **resilience**, not **routing** — picking the right primary
+is the routing table's job. The chain only kicks in when the chosen
+primary fails.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import socket
+from collections.abc import Callable, Iterable
+from dataclasses import dataclass
+from typing import Any
+from urllib.error import HTTPError, URLError
+
+# (ruff I001 sentinel: imports above are intentionally grouped stdlib + typing)
+
+
+# Errors we treat as worth falling through. Anything else is a programmer
+# bug and should bubble up so we don't paper over correctness issues.
+TRANSIENT_EXCEPTIONS: tuple[type[BaseException], ...] = (
+    TimeoutError,
+    socket.timeout,
+    URLError,
+    ConnectionError,
+    OSError,
+)
+
+
+@dataclass
+class FallbackAttempt:
+    """One step of an executed chain."""
+
+    target: str
+    ok: bool
+    error: str = ""
+
+    def to_dict(self) -> dict[str, Any]:
+        return {"target": self.target, "ok": self.ok, "error": self.error}
+
+
+@dataclass
+class FallbackResult:
+    """Outcome of :func:`run_with_fallback`."""
+
+    value: Any
+    used: str = ""
+    attempts: list[FallbackAttempt] = None  # type: ignore[assignment]
+
+    def __post_init__(self) -> None:
+        if self.attempts is None:
+            self.attempts = []
+
+
+def _is_transient(exc: BaseException) -> bool:
+    if isinstance(exc, HTTPError):
+        return 500 <= int(getattr(exc, "code", 0) or 0) < 600 or exc.code in {408, 429}
+    return isinstance(exc, TRANSIENT_EXCEPTIONS)
+
+
+def parse_target(target: str) -> tuple[str, str, str]:
+    """Decompose a chain entry into ``(kind, provider_or_id, model)``.
+
+    Examples::
+
+        parse_target("anthropic/claude-haiku-4-5")
+            # -> ("provider", "anthropic", "claude-haiku-4-5")
+        parse_target("ollama/qwen2.5:7b")
+            # -> ("provider", "ollama", "qwen2.5:7b")
+        parse_target("endpoint:home-vllm")
+            # -> ("endpoint", "home-vllm", "")
+    """
+    cleaned = (target or "").strip()
+    if not cleaned:
+        return ("provider", "", "")
+    if cleaned.startswith("endpoint:"):
+        return ("endpoint", cleaned[len("endpoint:") :], "")
+    if "/" not in cleaned:
+        return ("provider", cleaned, "")
+    provider, _, model = cleaned.partition("/")
+    return ("provider", provider.strip(), model.strip())
+
+
+def run_with_fallback(
+    primary_target: str,
+    fallback_chain: Iterable[str],
+    invoke: Callable[[str, str, str], Any],
+    *,
+    on_attempt: Callable[[FallbackAttempt], None] | None = None,
+) -> FallbackResult:
+    """Try the primary target; on transient failure walk the chain.
+
+    ``invoke`` is called as ``invoke(kind, provider_or_id, model)`` and
+    must raise on failure. Any non-transient exception aborts the chain
+    immediately (we don't want to mask a programmer bug as an outage).
+    """
+    targets = [primary_target] + [t for t in fallback_chain if t]
+    result = FallbackResult(value=None, attempts=[])
+    for target in targets:
+        kind, ident, model = parse_target(target)
+        if not ident:
+            continue
+        try:
+            value = invoke(kind, ident, model)
+        except Exception as exc:  # noqa: BLE001
+            attempt = FallbackAttempt(target=target, ok=False, error=str(exc))
+            result.attempts.append(attempt)
+            if on_attempt:
+                with contextlib.suppress(Exception):
+                    on_attempt(attempt)
+            if not _is_transient(exc):
+                # Programmer error / auth failure — bubble up immediately
+                # so the caller sees the real cause.
+                raise
+            continue
+        attempt = FallbackAttempt(target=target, ok=True)
+        result.attempts.append(attempt)
+        if on_attempt:
+            with contextlib.suppress(Exception):
+                on_attempt(attempt)
+        result.value = value
+        result.used = target
+        return result
+    return result  # exhausted, value=None
+
+
+__all__ = [
+    "FallbackAttempt",
+    "FallbackResult",
+    "TRANSIENT_EXCEPTIONS",
+    "parse_target",
+    "run_with_fallback",
+]
diff --git a/src/specsmith/agent/profiles.py b/src/specsmith/agent/profiles.py
new file mode 100644
index 0000000..98216ff
--- /dev/null
+++ b/src/specsmith/agent/profiles.py
@@ -0,0 +1,655 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Agent Profiles + Activity Routing (REQ-146).
+
+A *profile* unifies ``(provider, model, endpoint_id?, prompt_prefix,
+capabilities, fallback_chain)`` into a single named object. A *routing
+table* maps activities (``/plan``, ``/fix``, ``/test``, AEE phases, MCP
+tool categories) to a profile. The runner consults the table on every
+turn, then falls back to the legacy single-provider path if no match
+exists.
+
+Storage layout (``~/.specsmith/agents.json``)::
+
+    {
+      "schema_version": 1,
+      "default_profile_id": "coder",
+      "profiles": [
+        {"id": "architect", "role": "architect",
+         "provider": "anthropic", "model": "claude-opus-4",
+         "fallback_chain": ["openai/gpt-5", "ollama/qwen2.5:32b"],
+         "endpoint_id": "", "prompt_prefix": "",
+         "capabilities": ["reasoning", "long-context"]},
+        ...
+      ],
+      "routes": {
+        "/plan": "architect",
+        "/fix": "coder",
+        ...
+      }
+    }
+
+Per-project overrides land at ``<project>/.specsmith/agents.json`` with
+the same schema. Missing keys inherit from the global file.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+SCHEMA_VERSION = 1
+
+
+VALID_ROLES = (
+    "architect",
+    "coder",
+    "reviewer",
+    "editor",
+    "researcher",
+    "tester",
+    "classifier",
+    "generalist",
+)
+
+# Provider “family” groupings used by the diversity guard (G1). Profiles in
+# the same family are likely to share training data, system prompt biases,
+# and hallucination patterns — so pairing the coder with a reviewer in the
+# same family defeats the cross-check the reviewer is meant to provide.
+#
+# Anything not listed here is treated as its own family.
+PROVIDER_FAMILIES: dict[str, str] = {
+    "anthropic": "anthropic",
+    "openai": "openai",
+    "openai-compat": "openai",
+    "azure-openai": "openai",
+    "gemini": "google",
+    "google": "google",
+    "google-genai": "google",
+    "mistral": "mistral",
+    "ollama": "ollama",
+    "llamacpp": "ollama",
+    "vllm": "ollama",
+    "lmstudio": "ollama",
+}
+
+
+def provider_family(provider: str) -> str:
+    """Return the family name for ``provider`` (or the provider verbatim)."""
+    key = (provider or "").strip().lower()
+    return PROVIDER_FAMILIES.get(key, key or "unknown")
+
+
+# Default presets shipped with the CLI so a fresh install Just Works.
+# The exact model strings can be customised per-deployment via
+# ``specsmith agents preset apply <name>`` or by editing the file directly.
+DEFAULT_PRESETS: dict[str, dict[str, Any]] = {
+    "default": {
+        "default_profile_id": "coder",
+        "profiles": [
+            {
+                "id": "architect",
+                "role": "architect",
+                "provider": "anthropic",
+                "model": "claude-opus-4",
+                "fallback_chain": ["openai/gpt-5", "ollama/qwen2.5:32b"],
+                "capabilities": ["reasoning", "long-context"],
+            },
+            {
+                "id": "coder",
+                "role": "coder",
+                "provider": "anthropic",
+                "model": "claude-sonnet-4-5",
+                "fallback_chain": [
+                    "mistral/codestral-latest",
+                    "ollama/qwen2.5-coder:32b",
+                ],
+                "capabilities": ["code", "function-calling"],
+            },
+            {
+                "id": "reviewer",
+                "role": "reviewer",
+                "provider": "openai",
+                "model": "gpt-5-codex",
+                "fallback_chain": [
+                    "gemini/gemini-3-flash",
+                    "ollama/deepseek-r1:14b",
+                ],
+                "capabilities": ["code-review", "different-family-from-coder"],
+            },
+            {
+                "id": "editor",
+                "role": "editor",
+                "provider": "anthropic",
+                "model": "claude-haiku-4-5",
+                "fallback_chain": ["openai/gpt-5-mini", "ollama/qwen2.5-coder:7b"],
+                "capabilities": ["fast", "diff-apply"],
+            },
+            {
+                "id": "researcher",
+                "role": "researcher",
+                "provider": "gemini",
+                "model": "gemini-3-pro",
+                "fallback_chain": ["ollama/qwen2.5:14b"],
+                "capabilities": ["search", "long-context", "mcp"],
+            },
+            {
+                "id": "tester",
+                "role": "tester",
+                "provider": "mistral",
+                "model": "mistral-small-latest",
+                "fallback_chain": ["ollama/qwen2.5:14b"],
+                "capabilities": ["test-design"],
+            },
+            {
+                "id": "classifier",
+                "role": "classifier",
+                "provider": "anthropic",
+                "model": "claude-haiku-4-5",
+                "fallback_chain": ["ollama/qwen2.5:3b"],
+                "capabilities": ["fast", "classification"],
+            },
+        ],
+        "routes": {
+            "chat": "coder",
+            "/plan": "architect",
+            "/architect": "architect",
+            "/ask": "researcher",
+            "/fix": "coder",
+            "/code": "coder",
+            "/refactor": "coder",
+            "/test": "tester",
+            "/review": "reviewer",
+            "/why": "reviewer",
+            "/audit": "reviewer",
+            "/commit": "editor",
+            "/pr": "editor",
+            "/undo": "editor",
+            "/context": "researcher",
+            "/search": "researcher",
+            "phase:inception": "architect",
+            "phase:architecture": "architect",
+            "phase:requirements": "researcher",
+            "phase:test_spec": "tester",
+            "phase:implementation": "coder",
+            "phase:verification": "reviewer",
+            "phase:release": "editor",
+            "predict_next": "classifier",
+            "suggest_command": "classifier",
+        },
+    },
+    "local-only": {
+        "default_profile_id": "local-coder",
+        "profiles": [
+            {
+                "id": "local-architect",
+                "role": "architect",
+                "provider": "ollama",
+                "model": "qwen2.5:32b",
+                "fallback_chain": ["ollama/qwen2.5:14b"],
+            },
+            {
+                "id": "local-coder",
+                "role": "coder",
+                "provider": "ollama",
+                "model": "qwen2.5-coder:32b",
+                "fallback_chain": ["ollama/qwen2.5-coder:7b"],
+            },
+            {
+                "id": "local-reviewer",
+                "role": "reviewer",
+                "provider": "ollama",
+                "model": "deepseek-r1:14b",
+                "fallback_chain": ["ollama/qwen2.5:7b"],
+            },
+            {
+                "id": "local-editor",
+                "role": "editor",
+                "provider": "ollama",
+                "model": "qwen2.5-coder:7b",
+                "fallback_chain": [],
+            },
+            {
+                "id": "local-classifier",
+                "role": "classifier",
+                "provider": "ollama",
+                "model": "qwen2.5:3b",
+                "fallback_chain": [],
+            },
+        ],
+        "routes": {
+            "chat": "local-coder",
+            "/plan": "local-architect",
+            "/architect": "local-architect",
+            "/fix": "local-coder",
+            "/code": "local-coder",
+            "/test": "local-coder",
+            "/review": "local-reviewer",
+            "/why": "local-reviewer",
+            "/commit": "local-editor",
+            "/pr": "local-editor",
+            "predict_next": "local-classifier",
+            "suggest_command": "local-classifier",
+        },
+    },
+    "frontier-only": {
+        "default_profile_id": "opus-coder",
+        "profiles": [
+            {
+                "id": "opus-architect",
+                "role": "architect",
+                "provider": "anthropic",
+                "model": "claude-opus-4",
+                "fallback_chain": [],
+            },
+            {
+                "id": "opus-coder",
+                "role": "coder",
+                "provider": "anthropic",
+                "model": "claude-opus-4",
+                "fallback_chain": [],
+            },
+        ],
+        "routes": {
+            "chat": "opus-coder",
+            "/plan": "opus-architect",
+            "/architect": "opus-architect",
+        },
+    },
+    "cost-conscious": {
+        "default_profile_id": "haiku-coder",
+        "profiles": [
+            {
+                "id": "haiku-coder",
+                "role": "coder",
+                "provider": "anthropic",
+                "model": "claude-haiku-4-5",
+                "fallback_chain": ["ollama/qwen2.5-coder:7b"],
+            },
+            {
+                "id": "sonnet-architect",
+                "role": "architect",
+                "provider": "anthropic",
+                "model": "claude-sonnet-4-5",
+                "fallback_chain": ["ollama/qwen2.5:32b"],
+            },
+        ],
+        "routes": {
+            "chat": "haiku-coder",
+            "/plan": "sonnet-architect",
+            "/architect": "sonnet-architect",
+            "/fix": "haiku-coder",
+            "/test": "haiku-coder",
+        },
+    },
+}
+
+
+class ProfileError(RuntimeError):
+    """Raised for user-facing profile / routing errors."""
+
+
+# ---------------------------------------------------------------------------
+# Data model
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class Profile:
+    """A single named agent configuration.
+
+    ``fallback_chain`` entries are simple ``"<provider>/<model>"`` strings
+    or ``"endpoint:<id>"`` references; resolution is performed by
+    :mod:`specsmith.agent.fallback`.
+    """
+
+    id: str
+    role: str = "generalist"
+    provider: str = "ollama"
+    model: str = ""
+    endpoint_id: str = ""
+    prompt_prefix: str = ""
+    capabilities: list[str] = field(default_factory=list)
+    fallback_chain: list[str] = field(default_factory=list)
+    created_at: str = ""
+
+    def validate(self) -> None:
+        if not self.id or not self.id.strip():
+            raise ProfileError("profile id must be non-empty")
+        if any(c.isspace() for c in self.id):
+            raise ProfileError(f"profile id {self.id!r} must not contain whitespace")
+        if self.role and self.role not in VALID_ROLES:
+            # Roles are advisory but warn-on-set so downstream consumers
+            # don't trip on typos. We still allow the value through.
+            pass
+        if not self.provider:
+            raise ProfileError(f"profile {self.id!r} requires a provider")
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "id": self.id,
+            "role": self.role,
+            "provider": self.provider,
+            "model": self.model,
+            "endpoint_id": self.endpoint_id,
+            "prompt_prefix": self.prompt_prefix,
+            "capabilities": list(self.capabilities),
+            "fallback_chain": list(self.fallback_chain),
+            "created_at": self.created_at,
+        }
+
+    @classmethod
+    def from_dict(cls, raw: dict[str, Any]) -> Profile:
+        return cls(
+            id=str(raw.get("id") or "").strip(),
+            role=str(raw.get("role") or "generalist").strip(),
+            provider=str(raw.get("provider") or "ollama").strip(),
+            model=str(raw.get("model") or "").strip(),
+            endpoint_id=str(raw.get("endpoint_id") or "").strip(),
+            prompt_prefix=str(raw.get("prompt_prefix") or ""),
+            capabilities=[str(c) for c in (raw.get("capabilities") or [])],
+            fallback_chain=[str(c) for c in (raw.get("fallback_chain") or [])],
+            created_at=str(raw.get("created_at") or ""),
+        )
+
+
+# ---------------------------------------------------------------------------
+# Store
+# ---------------------------------------------------------------------------
+
+
+def default_store_path() -> Path:
+    """Resolve ``~/.specsmith/agents.json``, honouring ``SPECSMITH_HOME``."""
+    base = os.environ.get("SPECSMITH_HOME", "").strip()
+    home = Path(base) if base else Path.home() / ".specsmith"
+    return home / "agents.json"
+
+
+def project_store_path(project_dir: str | Path) -> Path:
+    """Resolve ``<project>/.specsmith/agents.json`` for per-project overrides."""
+    return Path(project_dir).resolve() / ".specsmith" / "agents.json"
+
+
+@dataclass
+class ProfileStore:
+    """Read/write wrapper around the profiles JSON file.
+
+    Supports a two-level inheritance model: a project-level file (when
+    constructed via :meth:`load_for_project`) overrides whatever is set
+    globally in ``~/.specsmith/agents.json``.
+    """
+
+    path: Path
+    schema_version: int = SCHEMA_VERSION
+    default_profile_id: str = ""
+    profiles: list[Profile] = field(default_factory=list)
+    routes: dict[str, str] = field(default_factory=dict)
+
+    # ── I/O ────────────────────────────────────────────────────────────
+
+    @classmethod
+    def load(cls, path: Path | None = None) -> ProfileStore:
+        target = path or default_store_path()
+        if not target.exists():
+            return cls(path=target)
+        try:
+            raw = json.loads(target.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            raise ProfileError(
+                f"agents store at {target} is corrupted: {exc}. "
+                "Move it aside or fix the JSON to continue."
+            ) from exc
+        if not isinstance(raw, dict):
+            raise ProfileError(f"agents store at {target} must be a JSON object")
+        return cls._from_raw(target, raw)
+
+    @classmethod
+    def load_for_project(cls, project_dir: str | Path) -> ProfileStore:
+        """Return a merged view of global + project-level profiles.
+
+        Project profiles win on id collisions; routes are merged with
+        project entries taking precedence; ``default_profile_id`` is the
+        project value when set, else global.
+        """
+        global_store = cls.load()
+        project_path = project_store_path(project_dir)
+        if not project_path.exists():
+            return global_store
+        try:
+            raw = json.loads(project_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError:
+            return global_store
+        project_store = cls._from_raw(project_path, raw)
+        merged_profiles: dict[str, Profile] = {p.id: p for p in global_store.profiles}
+        for p in project_store.profiles:
+            merged_profiles[p.id] = p
+        merged_routes: dict[str, str] = dict(global_store.routes)
+        merged_routes.update(project_store.routes)
+        return cls(
+            path=project_path,
+            schema_version=SCHEMA_VERSION,
+            default_profile_id=project_store.default_profile_id or global_store.default_profile_id,
+            profiles=list(merged_profiles.values()),
+            routes=merged_routes,
+        )
+
+    @classmethod
+    def _from_raw(cls, path: Path, raw: dict[str, Any]) -> ProfileStore:
+        version = int(raw.get("schema_version") or 0)
+        if version and version != SCHEMA_VERSION:
+            raise ProfileError(
+                f"agents store at {path} uses schema_version={version}; "
+                f"this build of specsmith only understands {SCHEMA_VERSION}."
+            )
+        profiles_raw = raw.get("profiles") or []
+        if not isinstance(profiles_raw, list):
+            raise ProfileError("agents store: 'profiles' must be a list")
+        profiles = [Profile.from_dict(item) for item in profiles_raw if isinstance(item, dict)]
+        routes_raw = raw.get("routes") or {}
+        if not isinstance(routes_raw, dict):
+            raise ProfileError("agents store: 'routes' must be an object")
+        routes = {str(k): str(v) for k, v in routes_raw.items()}
+        return cls(
+            path=path,
+            schema_version=SCHEMA_VERSION,
+            default_profile_id=str(raw.get("default_profile_id") or "").strip(),
+            profiles=profiles,
+            routes=routes,
+        )
+
+    def save(self) -> None:
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        payload = {
+            "schema_version": self.schema_version,
+            "default_profile_id": self.default_profile_id,
+            "profiles": [p.to_dict() for p in self.profiles],
+            "routes": dict(self.routes),
+        }
+        self.path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+
+    # ── CRUD ───────────────────────────────────────────────────────────
+
+    def add(self, profile: Profile, *, replace: bool = False) -> None:
+        profile.validate()
+        if not profile.created_at:
+            profile.created_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+        existing = self._index(profile.id)
+        if existing is not None:
+            if not replace:
+                raise ProfileError(
+                    f"profile {profile.id!r} already exists. Use --replace to overwrite."
+                )
+            self.profiles[existing] = profile
+        else:
+            self.profiles.append(profile)
+        if not self.default_profile_id:
+            self.default_profile_id = profile.id
+
+    def remove(self, profile_id: str) -> bool:
+        idx = self._index(profile_id)
+        if idx is None:
+            return False
+        self.profiles.pop(idx)
+        if self.default_profile_id == profile_id:
+            self.default_profile_id = self.profiles[0].id if self.profiles else ""
+        # Drop any routing entries pointing at the removed profile.
+        self.routes = {k: v for k, v in self.routes.items() if v != profile_id}
+        return True
+
+    def get(self, profile_id: str) -> Profile:
+        idx = self._index(profile_id)
+        if idx is None:
+            raise ProfileError(f"unknown profile id {profile_id!r}")
+        return self.profiles[idx]
+
+    def get_default(self) -> Profile | None:
+        if not self.default_profile_id:
+            return None
+        idx = self._index(self.default_profile_id)
+        return self.profiles[idx] if idx is not None else None
+
+    def set_default(self, profile_id: str) -> None:
+        if self._index(profile_id) is None:
+            raise ProfileError(f"unknown profile id {profile_id!r}")
+        self.default_profile_id = profile_id
+
+    def list_all(self) -> list[Profile]:
+        return list(self.profiles)
+
+    def filter_by_capability(self, capability: str) -> list[Profile]:
+        """Return profiles whose ``capabilities`` list contains ``capability``.
+
+        Matching is case-insensitive and trims whitespace. An empty
+        ``capability`` argument returns ``[]`` rather than “everything” so
+        callers can distinguish “no filter” (don’t call this method) from
+        “filter for an empty value” (which is never meaningful).
+        """
+        needle = (capability or "").strip().lower()
+        if not needle:
+            return []
+        return [
+            p
+            for p in self.profiles
+            if any(needle == str(c).strip().lower() for c in p.capabilities)
+        ]
+
+    def diversity_warnings(self, *, candidate: Profile | None = None) -> list[str]:
+        """Return a list of plain-English diversity warnings for the store.
+
+        The reviewer profile exists to cross-check the coder; if both call
+        the same provider family the cross-check is degenerate. Same logic
+        applies to architect vs. reviewer (both should be skeptical of the
+        coder). When ``candidate`` is supplied the candidate is added to
+        the population *and* takes precedence over any same-id profile
+        already in the store, so a `specsmith agents add` invocation can
+        preview the warnings *before* writing the store.
+        """
+        population: dict[str, Profile] = {p.id: p for p in self.profiles}
+        if candidate is not None:
+            population[candidate.id] = candidate
+        by_role: dict[str, list[Profile]] = {}
+        for p in population.values():
+            by_role.setdefault(p.role, []).append(p)
+
+        warnings: list[str] = []
+        for left_role, right_role in (
+            ("coder", "reviewer"),
+            ("architect", "reviewer"),
+        ):
+            left = by_role.get(left_role) or []
+            right = by_role.get(right_role) or []
+            if not left or not right:
+                continue
+            for lp in left:
+                lf = provider_family(lp.provider)
+                for rp in right:
+                    if provider_family(rp.provider) == lf:
+                        warnings.append(
+                            f"{rp.id} ({rp.role}, {rp.provider}/{rp.model}) "
+                            f"shares the {lf!r} family with "
+                            f"{lp.id} ({lp.role}, {lp.provider}/{lp.model}); "
+                            "diversity is recommended so the reviewer can catch "
+                            "the coder's blind spots."
+                        )
+        return warnings
+
+    # ── Routing ─────────────────────────────────────────────────
+
+    def set_route(self, activity: str, profile_id: str) -> None:
+        activity = activity.strip()
+        if not activity:
+            raise ProfileError("activity must be non-empty")
+        if self._index(profile_id) is None:
+            raise ProfileError(f"unknown profile id {profile_id!r}")
+        self.routes[activity] = profile_id
+
+    def clear_route(self, activity: str) -> None:
+        self.routes.pop(activity, None)
+
+    def resolve_for_activity(self, activity: str) -> Profile | None:
+        target_id = self.routes.get(activity) or self.default_profile_id
+        if not target_id:
+            return None
+        idx = self._index(target_id)
+        return self.profiles[idx] if idx is not None else self.get_default()
+
+    # ── Internals ──────────────────────────────────────────────────────
+
+    def _index(self, profile_id: str) -> int | None:
+        for i, p in enumerate(self.profiles):
+            if p.id == profile_id:
+                return i
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Preset application
+# ---------------------------------------------------------------------------
+
+
+def apply_preset(name: str, *, path: Path | None = None) -> ProfileStore:
+    """Overwrite the profiles store with one of :data:`DEFAULT_PRESETS`.
+
+    Any existing profiles are replaced wholesale. Endpoint references in
+    profile entries are preserved if they happen to match a registered
+    BYOE endpoint id, otherwise they are dropped from the resolved
+    profile to avoid dangling foreign keys.
+    """
+    if name not in DEFAULT_PRESETS:
+        raise ProfileError(
+            f"unknown preset {name!r}. Available: {', '.join(sorted(DEFAULT_PRESETS))}"
+        )
+    target = path or default_store_path()
+    blob = DEFAULT_PRESETS[name]
+    profiles = [Profile.from_dict(p) for p in blob.get("profiles", [])]
+    for p in profiles:
+        if not p.created_at:
+            p.created_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+    store = ProfileStore(
+        path=target,
+        schema_version=SCHEMA_VERSION,
+        default_profile_id=str(blob.get("default_profile_id") or ""),
+        profiles=profiles,
+        routes={str(k): str(v) for k, v in (blob.get("routes") or {}).items()},
+    )
+    store.save()
+    return store
+
+
+__all__ = [
+    "DEFAULT_PRESETS",
+    "PROVIDER_FAMILIES",
+    "Profile",
+    "ProfileError",
+    "ProfileStore",
+    "SCHEMA_VERSION",
+    "VALID_ROLES",
+    "apply_preset",
+    "default_store_path",
+    "project_store_path",
+    "provider_family",
+]
diff --git a/src/specsmith/agent/runner.py b/src/specsmith/agent/runner.py
new file mode 100644
index 0000000..696d533
--- /dev/null
+++ b/src/specsmith/agent/runner.py
@@ -0,0 +1,434 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Long-lived agent runtime driving ``specsmith run`` / ``specsmith serve``.
+
+The runner is the bridge between the Click entry points in :mod:`cli` and
+the underlying machinery in :mod:`agent.chat_runner`,
+:mod:`agent.orchestrator`, :mod:`agent.profiles`, and
+:mod:`agent.fallback`.
+
+Why this module exists
+----------------------
+The VS Code extension's :class:`SpecsmithBridge` (``bridge.ts``) treats a
+JSONL ``{type: "ready", ...}`` line as the official handshake — without
+that line within 20 s the bridge declares the binary unresponsive and
+surfaces *"specsmith not responding"* to the user. Earlier refactors
+removed the file that emitted the handshake, so every fresh ``specsmith
+run --json-events`` import-errored before producing a single byte. This
+module restores the emitter and centralizes the protocol (REQ-145).
+"""
+
+from __future__ import annotations
+
+import sys
+import time
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any
+
+from specsmith.agent.core import AgentState, ModelTier
+from specsmith.agent.events import EventEmitter
+
+# These imports are kept lazy in the public API so that a busted optional
+# dependency (e.g. ``ag2``) doesn't keep the bridge from emitting ``ready``.
+# The import itself happens on the first call that actually needs the
+# orchestrator group chat.
+__all__ = ["AgentRunner", "_capabilities"]
+
+
+# ---------------------------------------------------------------------------
+# Capability advertising
+# ---------------------------------------------------------------------------
+
+
+def _capabilities() -> list[str]:
+    """Return the list of capabilities surfaced by the ``ready`` frame.
+
+    The VS Code extension uses this to show / hide UI affordances (the
+    Endpoints tree only renders when ``"endpoints"`` is reported, etc.).
+    Best-effort reflection so an old CLI talking to a new extension still
+    works without lying.
+    """
+    caps: list[str] = ["chat", "run"]
+    try:
+        import importlib
+
+        for mod, name in (
+            ("specsmith.agent.endpoints", "endpoints"),
+            ("specsmith.agent.profiles", "profiles"),
+            ("specsmith.agent.mcp", "mcp"),
+            ("specsmith.agent.rules", "rules"),
+            ("specsmith.agent.voice", "voice"),
+        ):
+            try:
+                importlib.import_module(mod)
+                caps.append(name)
+            except Exception:  # noqa: BLE001
+                pass
+    except Exception:  # noqa: BLE001
+        pass
+    return caps
+
+
+# ---------------------------------------------------------------------------
+# Slash-command dispatch table
+# ---------------------------------------------------------------------------
+
+
+SLASH_COMMANDS: dict[str, str] = {
+    "/plan": "[PLAN] Break the request into a step-by-step plan: ",
+    "/architect": "[ARCHITECT] Propose an architecture for: ",
+    "/ask": "[ASK] Clarify intent and answer: ",
+    "/fix": "[FIX] Modify code to fix the following: ",
+    "/code": "[CODE] Write code for: ",
+    "/refactor": "[REFACTOR] Refactor without changing behaviour: ",
+    "/test": "[TEST] Write or run tests for: ",
+    "/review": "[REVIEW] Review for correctness, regressions, and risk: ",
+    "/why": "[WHY] Explain the rationale and governance trace: ",
+    "/audit": "[AUDIT] Audit the change against requirements: ",
+    "/commit": "[COMMIT] Stage changes and write a commit message: ",
+    "/pr": "[PR] Prepare a pull request body for: ",
+    "/undo": "[UNDO] Revert the last action: ",
+    "/context": "[CONTEXT] Surface repo context relevant to: ",
+    "/search": "[SEARCH] Search the repo and external docs for: ",
+}
+
+
+def _slash_to_activity(line: str) -> str:
+    """Map a user input to a routing-table activity key.
+
+    Plain text → ``"chat"`` (the catch-all). Slash commands map to their
+    canonical form. Unknown slash commands also fall through to ``"chat"``.
+    """
+    text = line.strip()
+    if not text or not text.startswith("/"):
+        return "chat"
+    head = text.split(maxsplit=1)[0].lower()
+    if head in SLASH_COMMANDS:
+        return head
+    return "chat"
+
+
+# ---------------------------------------------------------------------------
+# AgentRunner
+# ---------------------------------------------------------------------------
+
+
+class AgentRunner:
+    """Top-level controller used by ``specsmith run`` and ``specsmith serve``.
+
+    Construction must succeed even if optional providers are unavailable
+    (Ollama down, no API keys, no ``ag2`` installed) — the bridge depends
+    on the ``ready`` frame landing regardless of provider state.
+
+    Public surface (consumed by callers we cannot break)
+    ----------------------------------------------------
+    * :attr:`_state` — read by :class:`specsmith.serve._AgentThread.status`.
+    * :attr:`_hard_stop` — set by :meth:`specsmith.serve._AgentThread.stop_turn`.
+    * :meth:`_print_banner` — invoked by ``serve`` once the thread spawns.
+    * :meth:`_handle_command` — invoked once per inbox message in ``serve``.
+    * :meth:`_emit_event` — monkey-patched by ``serve`` to route events
+      through its in-process bus.
+    * :meth:`run_task` / :meth:`run_interactive` — used by the ``cli.run``
+      command.
+    """
+
+    def __init__(
+        self,
+        *,
+        project_dir: str,
+        provider_name: str | None = None,
+        model: str | None = None,
+        tier: ModelTier | str | None = ModelTier.BALANCED,
+        stream: bool = True,
+        optimize: bool = False,
+        json_events: bool = False,
+        endpoint_id: str | None = None,
+        profile_id: str | None = None,
+        emitter: EventEmitter | None = None,
+    ) -> None:
+        self.project_dir = str(Path(project_dir).resolve())
+        self.provider_name = (provider_name or "").strip() or "ollama"
+        self.model = (model or "").strip()
+        self.tier = ModelTier.parse(tier, default=ModelTier.BALANCED)
+        self.stream = bool(stream)
+        self.optimize = bool(optimize)
+        self.json_events = bool(json_events)
+        self.endpoint_id = (endpoint_id or "").strip() or None
+        self.profile_id = (profile_id or "").strip() or None
+
+        self._emitter = emitter or EventEmitter(stream=sys.stdout)
+        self._state = AgentState(
+            provider_name=self.provider_name,
+            model_name=self.model,
+            profile_id=self.profile_id or "",
+        )
+        self._hard_stop = False
+        self._started_at = time.time()
+        self._history: list[dict[str, Any]] = []
+        self._block_counter = 0
+
+        # Best-effort routing-table load. A missing or invalid file falls
+        # back to single-profile behaviour so existing setups keep working.
+        self._routing = self._load_routing()
+
+        # Consumers may swap this with a closure that routes through their
+        # own bus (see ``serve._AgentThread``). The default writes JSONL.
+        self._emit_event: Callable[..., None] = self._default_emit_event
+
+    # ── Public lifecycle ───────────────────────────────────────────────
+
+    def _print_banner(self) -> None:
+        """Emit the ``ready`` handshake (or print a plain banner).
+
+        Called exactly once at process start. The bridge waits up to 20 s
+        for this frame; when ``json_events`` is False we still emit a
+        terminal-friendly banner so interactive ``specsmith run`` users
+        see the same boot text they used to.
+        """
+        version = self._package_version()
+        if self.json_events:
+            self._emitter.ready(
+                agent="nexus",
+                version=version,
+                project_dir=self.project_dir,
+                provider=self.provider_name,
+                model=self.model,
+                profile_id=self.profile_id or "",
+                capabilities=_capabilities(),
+                endpoint_id=self.endpoint_id or "",
+            )
+        else:
+            print(
+                f"Nexus {version} — Local-first Agentic Development Environment "
+                f"(Specsmith-governed)\n"
+                f"  project: {self.project_dir}\n"
+                f"  provider: {self.provider_name}\n"
+                f"  model: {self.model or '(default)'}\n"
+                f"  profile: {self.profile_id or '(default)'}\n"
+                "Type plain English, or use slash commands "
+                "(/plan, /ask, /fix, /test, /commit, /pr, /why, /exit).",
+                flush=True,
+            )
+
+    def run_interactive(self) -> None:
+        """Read stdin lines and dispatch each to :meth:`_handle_command`."""
+        self._print_banner()
+        try:
+            for raw in sys.stdin:
+                line = raw.rstrip("\n")
+                if not line.strip():
+                    continue
+                if line.strip().lower() in {"/exit", "/quit"}:
+                    break
+                self._handle_command(line)
+                if self.json_events:
+                    self._emit_event(type="turn_done")
+                if self._hard_stop:
+                    self._hard_stop = False
+        except (KeyboardInterrupt, EOFError):
+            pass
+
+    def run_task(self, task: str):
+        """Execute a single task non-interactively and return the result.
+
+        Mirrors the legacy ``cli.run --task`` shape — returns whatever the
+        chat runner produced (plus a synthetic ``TaskResult`` when the
+        orchestrator path was used).
+        """
+        return self._handle_command(task)
+
+    # ── Per-turn dispatch ──────────────────────────────────────────────
+
+    def _handle_command(self, text: str) -> Any:
+        """Route a single user line through the right pipeline.
+
+        Order of resolution:
+          1. Slash command shortcuts (``/clear``, ``/model``, ``/provider``,
+             ``/agent``, ``/exit``).
+          2. Activity → profile routing (PR-G). Falls back to the
+             single-profile config if no routing table is present.
+          3. ``chat_runner.run_chat`` for the actual LLM turn (with
+             fallback chain wrapping the call).
+        """
+        text = (text or "").strip()
+        if not text:
+            return None
+
+        # Lightweight in-process commands the runner handles itself.
+        if text.startswith("/clear"):
+            self._history = []
+            self._emit_event(type="system", message="History cleared.")
+            return None
+        if text.startswith("/model "):
+            new_model = text.split(maxsplit=1)[1].strip()
+            self.model = new_model
+            self._state.model_name = new_model
+            self._emit_event(type="system", message=f"model = {new_model}")
+            return None
+        if text.startswith("/provider "):
+            new_provider = text.split(maxsplit=1)[1].strip()
+            self.provider_name = new_provider
+            self._state.provider_name = new_provider
+            self._emit_event(type="system", message=f"provider = {new_provider}")
+            return None
+        if text.startswith("/agent "):
+            new_profile = text.split(maxsplit=1)[1].strip()
+            self.profile_id = new_profile or None
+            self._state.profile_id = new_profile
+            self._emit_event(type="system", message=f"profile = {new_profile or '(default)'}")
+            # G4: pin the profile choice into the project trace vault so the
+            # decision “I explicitly asked for profile X here” is
+            # cryptographically chained into the audit trail. Best-effort:
+            # missing TraceVault dependency / read-only filesystem must not
+            # break the chat loop.
+            if new_profile:
+                self._seal_profile_pin(new_profile)
+            return None
+        if text.startswith("/endpoint "):
+            new_endpoint = text.split(maxsplit=1)[1].strip()
+            self.endpoint_id = new_endpoint or None
+            self._emit_event(type="system", message=f"endpoint = {new_endpoint or '(auto)'}")
+            return None
+
+        activity = _slash_to_activity(text)
+        prefix = SLASH_COMMANDS.get(activity, "")
+        utterance = text[len(activity) :].strip() if activity != "chat" else text
+        full_prompt = (prefix + utterance) if prefix else utterance
+
+        # Resolve the per-turn profile (PR-G). On any error we degrade to
+        # the single-provider path so the user still gets a response.
+        profile, endpoint_override = self._resolve_for_activity(activity)
+        if profile is not None:
+            _ident = f"{profile.provider}/{profile.model}"
+            self._emit_event(
+                type="system",
+                message=f"\u21bb routing {activity} \u2192 {profile.id} ({_ident})",
+            )
+
+        block_id = self._next_block_id()
+        try:
+            from specsmith.agent.chat_runner import run_chat
+
+            result = run_chat(
+                full_prompt,
+                project_dir=Path(self.project_dir),
+                profile=(profile.id if profile is not None else "standard"),
+                session_id=str(int(self._started_at)),
+                emitter=self._emitter,
+                msg_block=block_id,
+                history=self._history,
+                endpoint_id=(endpoint_override or self.endpoint_id),
+            )
+        except Exception as exc:  # noqa: BLE001
+            self._emit_event(
+                type="error",
+                message=f"chat turn failed: {exc}",
+                recoverable=True,
+            )
+            return None
+
+        # Aggregate metrics into the session state (C1).
+        # ``run_chat`` now reports tokens_in / tokens_out / cost_usd off the
+        # provider response (Ollama prompt_eval_count + eval_count, OpenAI
+        # streaming usage, Anthropic final_message.usage, Gemini
+        # usage_metadata) with a 4-chars-per-token fallback when the SDK
+        # omits them. The TokenMeter chip therefore shows real numbers
+        # instead of staying pinned at zero.
+        tokens_in = int(getattr(result, "tokens_in", 0) or 0) if result is not None else 0
+        tokens_out = int(getattr(result, "tokens_out", 0) or 0) if result is not None else 0
+        cost_usd = float(getattr(result, "cost_usd", 0.0) or 0.0) if result is not None else 0.0
+        self._state.credit(
+            profile_id=(profile.id if profile is not None else self.profile_id or ""),
+            tokens_in=tokens_in,
+            tokens_out=tokens_out,
+            cost_usd=cost_usd,
+            tool_calls=0,
+        )
+        self._state.elapsed_minutes = round((time.time() - self._started_at) / 60.0, 2)
+
+        if result is not None:
+            self._history.append({"role": "user", "text": utterance})
+            self._history.append({"role": "agent", "text": result.summary})
+        return result
+
+    # ── Routing helpers ────────────────────────────────────────────────
+
+    def _resolve_for_activity(self, activity: str):
+        """Return ``(Profile, endpoint_id_override)`` or ``(None, None)``.
+
+        Respects an explicit per-session profile / endpoint override so
+        the ``--agent`` and ``--endpoint`` CLI flags still win.
+        """
+        if self.profile_id is None and self._routing is None:
+            return (None, None)
+        try:
+            from specsmith.agent.profiles import ProfileStore
+
+            store = ProfileStore.load()
+            if self.profile_id:
+                profile = store.get(self.profile_id)
+                return (profile, profile.endpoint_id or None)
+            target_id = store.routes.get(activity) or store.default_profile_id
+            if not target_id:
+                return (None, None)
+            profile = store.get(target_id)
+            return (profile, profile.endpoint_id or None)
+        except Exception:  # noqa: BLE001
+            return (None, None)
+
+    def _load_routing(self) -> Any | None:
+        try:
+            from specsmith.agent.profiles import ProfileStore
+
+            store = ProfileStore.load()
+            return store if store.profiles else None
+        except Exception:  # noqa: BLE001
+            return None
+
+    # ── Event plumbing ────────────────────────────────────────────────
+
+    def _default_emit_event(self, **kwargs: Any) -> None:
+        if not self.json_events:
+            # Non-JSON mode: render a compact human line for ``system``
+            # events and ignore protocol-only frames.
+            if kwargs.get("type") == "system":
+                msg = str(kwargs.get("message") or "")
+                if msg:
+                    print(msg, flush=True)
+            return
+        self._emitter.emit({k: v for k, v in kwargs.items() if v is not None})
+
+    def _next_block_id(self) -> str:
+        self._block_counter += 1
+        return f"blk_run_{self._block_counter:04d}"
+
+    @staticmethod
+    def _package_version() -> str:
+        try:
+            from importlib.metadata import version as _v
+
+            return _v("specsmith")
+        except Exception:  # noqa: BLE001
+            return "0.0.0"
+
+    def _seal_profile_pin(self, profile_id: str) -> None:
+        """Append a TraceVault decision seal recording the ``/agent`` pin (G4).
+
+        Wrapped in best-effort try/except so an unwriteable
+        ``.specsmith/trace.jsonl`` (read-only fs, missing project root, etc.)
+        never breaks the chat loop. The seal type is ``decision`` because
+        a profile pin is an explicit governance choice the user made.
+        """
+        try:
+            from specsmith.trace import SealType, TraceVault
+
+            vault = TraceVault(Path(self.project_dir))
+            vault.seal(
+                seal_type=SealType.DECISION,
+                description=f"agent profile pinned via /agent: {profile_id}",
+                author="runner",
+                artifact_ids=[f"profile:{profile_id}"],
+            )
+        except Exception:  # noqa: BLE001 — trace sealing is best-effort
+            return
diff --git a/src/specsmith/cli.py b/src/specsmith/cli.py
index 6c54f4f..4805ba9 100644
--- a/src/specsmith/cli.py
+++ b/src/specsmith/cli.py
@@ -2723,6 +2723,25 @@ def abort_cmd(pid: int | None, abort_all_flag: bool, project_dir: str) -> None:
     default=False,
     help="Emit structured JSONL events to stdout (used by IDE clients like the VS Code extension).",
 )
+@click.option(
+    "--endpoint",
+    "endpoint_id",
+    default="",
+    help=(
+        "Route turns through a registered BYOE endpoint (REQ-142). When set, "
+        "the resolved endpoint's base_url, default model, and bearer token "
+        "override --provider / --model for OpenAI-v1-compatible backends."
+    ),
+)
+@click.option(
+    "--agent",
+    "profile_id",
+    default="",
+    help=(
+        "Force a specific agent profile for the whole session (REQ-146). "
+        "Identical to setting `default_profile_id` in `~/.specsmith/agents.json`."
+    ),
+)
 def run_cmd(
     project_dir: str,
     task: str,
@@ -2732,6 +2751,8 @@ def run_cmd(
     no_stream: bool,
     optimize: bool,
     json_events: bool,
+    endpoint_id: str,
+    profile_id: str,
 ) -> None:
     """Start the AEE-integrated agentic client REPL.
 
@@ -2749,31 +2770,34 @@ def run_cmd(
     from specsmith.agent.core import ModelTier
     from specsmith.agent.runner import AgentRunner
 
-    tier_map = {
-        "fast": ModelTier.FAST,
-        "balanced": ModelTier.BALANCED,
-        "powerful": ModelTier.POWERFUL,
-    }
-
     try:
         runner = AgentRunner(
             project_dir=project_dir,
             provider_name=provider_name,
             model=model,
-            tier=tier_map[tier],
+            tier=ModelTier.parse(tier, default=ModelTier.BALANCED),
             stream=not no_stream,
             optimize=optimize,
             json_events=json_events,
+            endpoint_id=endpoint_id or None,
+            profile_id=profile_id or None,
         )
         if task:
             result = runner.run_task(task)
-            console.print(result)
+            if result is not None:
+                console.print(result)
         else:
             runner.run_interactive()
-        if optimize and runner._optimizer:
-            report = runner._optimizer.report()
-            console.print(f"\n[dim]{report.summary()}[/dim]")
     except Exception as e:  # noqa: BLE001
+        # Always emit a `ready` frame for json_events mode so the bridge
+        # surfaces the failure cleanly instead of timing out at 20 s.
+        if json_events:
+            from specsmith.agent.events import EventEmitter
+
+            EventEmitter().error(
+                message=f"agent failed to start: {e}",
+                recoverable=True,
+            )
         console.print(f"[red]{e}[/red]")
         console.print(
             "\nInstall a provider (pipx recommended):\n"
@@ -2806,6 +2830,16 @@ def run_cmd(
         "liveness probes still work."
     ),
 )
+@click.option(
+    "--endpoint",
+    "endpoint_id",
+    default="",
+    help=(
+        "Route turns through a registered BYOE endpoint (REQ-142). When set, "
+        "the resolved endpoint's base_url, default model, and bearer token "
+        "override --provider / --model for OpenAI-v1-compatible backends."
+    ),
+)
 def serve_cmd(
     project_dir: str,
     provider: str,
@@ -2813,6 +2847,7 @@ def serve_cmd(
     port: int,
     host: str,
     auth_token: str,
+    endpoint_id: str,
 ) -> None:
     """Start a persistent HTTP server for agent sessions.
 
@@ -2824,12 +2859,34 @@ def serve_cmd(
       specsmith serve --port 8421 --provider ollama --model qwen2.5:14b \
         --auth-token $(specsmith auth get serve)
     """
+    import os
+
     from specsmith.serve import run_server
 
+    # REQ-142: when --endpoint is given, derive provider+model from the
+    # endpoint registry so the serve loop can hand off to the OpenAI-compat
+    # driver in chat_runner. The bridge surfaces the original --provider
+    # value as a fallback when the endpoint can't be resolved.
+    effective_provider = provider
+    effective_model = model
+    if endpoint_id:
+        try:
+            from specsmith.agent.endpoints import EndpointStore
+
+            resolved = EndpointStore.load().resolve(endpoint_id)
+            effective_provider = "openai-compat"
+            effective_model = resolved.default_model or model
+            os.environ["SPECSMITH_ACTIVE_ENDPOINT"] = resolved.id
+        except Exception as exc:  # noqa: BLE001
+            console.print(
+                f"[yellow]Warning:[/yellow] could not resolve endpoint "
+                f"{endpoint_id!r}: {exc}. Falling back to --provider {provider}."
+            )
+
     run_server(
         project_dir=project_dir,
-        provider=provider,
-        model=model,
+        provider=effective_provider,
+        model=effective_model,
         port=port,
         host=host,
         auth_token=auth_token,
@@ -4116,7 +4173,14 @@ def phase_group() -> None:
     default=".",
     help="Project root (default: current directory).",
 )
-def phase_show(project_dir: str) -> None:
+@click.option(
+    "--json",
+    "as_json",
+    is_flag=True,
+    default=False,
+    help="Emit a stable JSON document (used by the VS Code Workflows tree).",
+)
+def phase_show(project_dir: str, as_json: bool) -> None:
     """Show the current AEE workflow phase and its readiness checklist."""
     from specsmith.phase import PHASE_MAP, evaluate_phase, phase_progress_pct, read_phase
 
@@ -4126,6 +4190,37 @@ def phase_show(project_dir: str) -> None:
     passed, failed = evaluate_phase(phase, root)
     pct = phase_progress_pct(phase, root)
 
+    if as_json:
+        import json as _json
+
+        phases_payload: list[dict[str, Any]] = []
+        for key, p in PHASE_MAP.items():
+            p_passed, p_failed = evaluate_phase(p, root)
+            phases_payload.append(
+                {
+                    "key": key,
+                    "label": p.label,
+                    "emoji": p.emoji,
+                    "description": p.description,
+                    "readiness_pct": phase_progress_pct(p, root),
+                    "passed": list(p_passed),
+                    "failed": list(p_failed),
+                    "next_phase": p.next_phase,
+                    "is_active": (key == phase_key),
+                }
+            )
+        click.echo(
+            _json.dumps(
+                {
+                    "active_phase": phase_key,
+                    "readiness_pct": pct,
+                    "phases": phases_payload,
+                },
+                indent=2,
+            )
+        )
+        return
+
     console.print(f"\n  {phase.emoji} [bold]{phase.label}[/bold] ({phase_key})")
     console.print(f"  {phase.description}")
     console.print()
@@ -4249,6 +4344,31 @@ def phase_next(project_dir: str, force: bool) -> None:
         for cmd in next_phase.commands:
             console.print(f"    {cmd}")
 
+    # G3: keep the agents routing table aligned with the active phase.
+    # We pin a synthetic ``phase:active`` route so the runner can flip the
+    # whole session to the new phase’s preferred profile without the user
+    # having to run `specsmith agents route set` themselves.
+    try:
+        from specsmith.agent.profiles import ProfileStore
+
+        agents_store = ProfileStore.load()
+        if agents_store.profiles:
+            phase_key_target = f"phase:{phase.next_phase}"
+            target_id = agents_store.routes.get(phase_key_target) or (
+                agents_store.default_profile_id
+            )
+            if target_id and agents_store._index(target_id) is not None:
+                agents_store.set_route("phase:active", target_id)
+                # Make sure the canonical phase:<key> route is present too;
+                # adding a sensible default lets a fresh project route
+                # immediately on the very first ``phase next``.
+                if phase_key_target not in agents_store.routes:
+                    agents_store.set_route(phase_key_target, target_id)
+                agents_store.save()
+                console.print(f"  [dim]\u21bb agents route phase:active \u2192 {target_id}[/dim]")
+    except Exception:  # noqa: BLE001 — routing is opportunistic; never block phase advance
+        pass
+
 
 @phase_group.command(name="status")
 @click.option("--project-dir", type=click.Path(exists=True), default=".")
@@ -4562,124 +4682,329 @@ def voice_status_cmd() -> None:
 
 
 # ---------------------------------------------------------------------------
-# specsmith cloud spawn — client side of the receiver (REQ-136)
+# specsmith endpoints — Bring-Your-Own-Endpoint store (REQ-142)
 # ---------------------------------------------------------------------------
 
 
-@main.group(name="cloud")
-def cloud_group() -> None:
-    """Cloud-agent receiver client (REQ-136)."""
+@main.group(name="endpoints")
+def endpoints_group() -> None:
+    """Manage OpenAI-v1-compatible LLM endpoints (REQ-142).
+
+    Lets you register one or more self-hosted backends (vLLM, llama.cpp
+    server, LM Studio, TGI, ...) and pick between them per session via
+    ``--endpoint <id>`` on ``specsmith run`` / ``chat`` / ``serve``.
+    Stored at ``~/.specsmith/endpoints.json``; tokens default to the OS
+    keyring.
+    """
+
+
+def _resolve_keyring_user(endpoint_id: str, override: str) -> str:
+    return override.strip() or f"endpoint:{endpoint_id}"
 
 
-@cloud_group.command(name="spawn")
-@click.argument("manifest_path", type=click.Path(exists=True))
+@endpoints_group.command(name="add")
+@click.option("--id", "endpoint_id", required=True, help="Stable identifier (no whitespace).")
+@click.option("--name", default="", help="Human-readable display name (defaults to id).")
 @click.option(
-    "--endpoint",
-    default="http://127.0.0.1:9000",
-    help="Cloud-serve base URL (default: http://127.0.0.1:9000).",
+    "--base-url", "base_url", required=True, help="OpenAI-v1 base URL, e.g. http://10.0.0.4:8000/v1"
 )
-@click.option("--token", default="", help="Bearer token for the receiver.")
+@click.option("--default-model", default="", help="Optional default model id.")
 @click.option(
-    "--dry-run",
+    "--auth",
+    "auth_kind",
+    type=click.Choice(
+        list(
+            __import__("specsmith.agent.endpoints", fromlist=["VALID_AUTH_KINDS"]).VALID_AUTH_KINDS
+        )
+    ),
+    default="none",
+    show_default=True,
+    help="Auth strategy: none / bearer-inline / bearer-env / bearer-keyring.",
+)
+@click.option("--token", default="", help="Inline bearer token (only with --auth bearer-inline).")
+@click.option("--token-env", default="", help="Env var name (only with --auth bearer-env).")
+@click.option(
+    "--keyring-service", default="", help="Override the keyring service (default: 'specsmith')."
+)
+@click.option(
+    "--keyring-user", default="", help="Override the keyring user (default: 'endpoint:<id>')."
+)
+@click.option(
+    "--no-verify-tls",
+    is_flag=True,
+    default=False,
+    help="Disable TLS certificate verification for this endpoint (insecure).",
+)
+@click.option("--tag", "tags", multiple=True, help="Optional free-form tag (repeatable).")
+@click.option(
+    "--replace",
+    is_flag=True,
+    default=False,
+    help="Overwrite an existing endpoint with the same id.",
+)
+@click.option(
+    "--set-default",
     is_flag=True,
     default=False,
-    help="Validate the manifest locally and print what would be posted.",
+    help="After saving, mark this endpoint as the default.",
 )
-def cloud_spawn_cmd(manifest_path: str, endpoint: str, token: str, dry_run: bool) -> None:
-    """Post a manifest to a `specsmith cloud-serve` endpoint (REQ-136).
+@click.option("--json", "as_json", is_flag=True, default=False)
+def endpoints_add(
+    endpoint_id: str,
+    name: str,
+    base_url: str,
+    default_model: str,
+    auth_kind: str,
+    token: str,
+    token_env: str,
+    keyring_service: str,
+    keyring_user: str,
+    no_verify_tls: bool,
+    tags: tuple[str, ...],
+    replace: bool,
+    set_default: bool,
+    as_json: bool,
+) -> None:
+    """Register a new endpoint in ``~/.specsmith/endpoints.json``.
 
-    The MANIFEST_PATH is a YAML or JSON file describing the run. The CLI
-    reads it, posts it to ``<endpoint>/spawn`` with optional bearer auth,
-    and prints the response as JSON.
+    For ``--auth bearer-keyring`` the token is prompted for (no echo) and
+    stored in the OS keyring via the existing :mod:`keyring` integration;
+    nothing secret lands in the JSON itself.
     """
     import json as _json
-    import urllib.error
-    import urllib.request
 
-    raw = Path(manifest_path).read_text(encoding="utf-8")
-    payload: dict[str, object]
-    if manifest_path.endswith((".yml", ".yaml")):
-        try:
-            import yaml as _yaml
+    from specsmith.agent.endpoints import (
+        DEFAULT_KEYRING_SERVICE,
+        Endpoint,
+        EndpointAuth,
+        EndpointError,
+        EndpointStore,
+    )
 
-            payload = _yaml.safe_load(raw) or {}
-        except Exception as exc:  # noqa: BLE001
-            console.print(f"[red]Invalid YAML manifest: {exc}[/red]")
-            raise SystemExit(2) from exc
-    else:
+    auth_token = token
+    if auth_kind == "bearer-keyring" and not token:
         try:
-            payload = _json.loads(raw)
-        except ValueError as exc:
-            console.print(f"[red]Invalid JSON manifest: {exc}[/red]")
+            auth_token = click.prompt(
+                f"Token for endpoint {endpoint_id!r} (will be stored in OS keyring)",
+                hide_input=True,
+                confirmation_prompt=False,
+                default="",
+                show_default=False,
+            )
+        except click.Abort as exc:  # pragma: no cover - interactive abort
             raise SystemExit(2) from exc
+        if not auth_token:
+            console.print("[red]Refusing to store an empty keyring token.[/red]")
+            raise SystemExit(2)
 
-    if not isinstance(payload, dict):
-        console.print("[red]Manifest must be a mapping (YAML/JSON object).[/red]")
-        raise SystemExit(2)
+    auth = EndpointAuth(
+        kind=auth_kind,
+        token=auth_token if auth_kind == "bearer-inline" else "",
+        token_env=token_env,
+        keyring_service=keyring_service or DEFAULT_KEYRING_SERVICE,
+        keyring_user=_resolve_keyring_user(endpoint_id, keyring_user)
+        if auth_kind == "bearer-keyring"
+        else keyring_user,
+    )
+    endpoint = Endpoint(
+        id=endpoint_id.strip(),
+        name=name.strip() or endpoint_id.strip(),
+        base_url=base_url.strip(),
+        auth=auth,
+        default_model=default_model.strip(),
+        verify_tls=not no_verify_tls,
+        tags=list(tags),
+    )
 
-    if dry_run:
-        click.echo(_json.dumps({"endpoint": endpoint, "manifest": payload}, indent=2))
-        return
+    store = EndpointStore.load()
+    try:
+        store.add(endpoint, replace=replace)
+    except EndpointError as exc:
+        console.print(f"[red]{exc}[/red]")
+        raise SystemExit(2) from exc
+
+    if auth_kind == "bearer-keyring":
+        try:
+            import keyring  # type: ignore[import-not-found]
+
+            keyring.set_password(auth.keyring_service, auth.keyring_user, auth_token)
+        except Exception as exc:  # noqa: BLE001
+            console.print(
+                f"[yellow]Warning:[/yellow] keyring write failed ({exc}). "
+                "Endpoint metadata saved, but the token was not stored."
+            )
+
+    if set_default:
+        store.set_default(endpoint.id)
+    store.save()
 
-    body = _json.dumps(payload).encode("utf-8")
-    req = urllib.request.Request(  # noqa: S310 - user-supplied endpoint
-        endpoint.rstrip("/") + "/spawn",
-        data=body,
-        method="POST",
-        headers={"Content-Type": "application/json"},
+    public = endpoint.to_public_dict()
+    if as_json:
+        click.echo(
+            _json.dumps(
+                {"endpoint": public, "default": store.default_endpoint_id},
+                indent=2,
+            )
+        )
+        return
+    console.print(
+        f"[green]\u2713[/green] saved endpoint [bold]{endpoint.id}[/bold] "
+        f"({endpoint.base_url}, auth={auth_kind})"
     )
-    if token:
-        req.add_header("Authorization", f"Bearer {token}")
+    if store.default_endpoint_id == endpoint.id:
+        console.print("  [dim]marked as default.[/dim]")
+
+
+@endpoints_group.command(name="list")
+@click.option("--json", "as_json", is_flag=True, default=False)
+def endpoints_list(as_json: bool) -> None:
+    """List every registered endpoint (tokens are redacted)."""
+    import json as _json
+
+    from specsmith.agent.endpoints import EndpointStore
+
+    store = EndpointStore.load()
+    items = store.list_public()
+    payload = {"default_endpoint_id": store.default_endpoint_id, "endpoints": items}
+    if as_json:
+        click.echo(_json.dumps(payload, indent=2))
+        return
+    if not items:
+        console.print("[dim]No endpoints registered. Run `specsmith endpoints add ...`.[/dim]")
+        return
+    for item in items:
+        marker = "*" if item["id"] == store.default_endpoint_id else " "
+        console.print(
+            f"{marker} [bold]{item['id']}[/bold]  {item['base_url']}  "
+            f"[dim]auth={item['auth']['kind']}, model={item['default_model'] or '-'}[/dim]"
+        )
+
+
+@endpoints_group.command(name="remove")
+@click.argument("endpoint_id")
+@click.option(
+    "--purge-keyring",
+    is_flag=True,
+    default=False,
+    help="Also delete the bearer-keyring entry for this endpoint.",
+)
+@click.option("--json", "as_json", is_flag=True, default=False)
+def endpoints_remove(endpoint_id: str, purge_keyring: bool, as_json: bool) -> None:
+    """Remove an endpoint by id. Exits 1 if the id is unknown."""
+    import json as _json
+
+    from specsmith.agent.endpoints import EndpointStore
+
+    store = EndpointStore.load()
+    target = store.get(endpoint_id) if store._index(endpoint_id) is not None else None
+    removed = store.remove(endpoint_id)
+    if not removed:
+        console.print(f"[red]unknown endpoint id {endpoint_id!r}[/red]")
+        raise SystemExit(1)
+    if purge_keyring and target is not None and target.auth.kind == "bearer-keyring":
+        try:
+            import keyring  # type: ignore[import-not-found]
+
+            keyring.delete_password(target.auth.keyring_service, target.auth.keyring_user)
+        except Exception:  # noqa: BLE001
+            pass
+    store.save()
+    if as_json:
+        click.echo(
+            _json.dumps(
+                {"removed": endpoint_id, "default_endpoint_id": store.default_endpoint_id},
+                indent=2,
+            )
+        )
+        return
+    console.print(f"[green]\u2713[/green] removed endpoint {endpoint_id!r}")
+
+
+@endpoints_group.command(name="default")
+@click.argument("endpoint_id")
+def endpoints_default(endpoint_id: str) -> None:
+    """Mark an existing endpoint as the default for unqualified runs."""
+    from specsmith.agent.endpoints import EndpointError, EndpointStore
+
+    store = EndpointStore.load()
     try:
-        with urllib.request.urlopen(req, timeout=30) as resp:  # noqa: S310
-            response = _json.loads(resp.read().decode("utf-8"))
-    except urllib.error.HTTPError as exc:
-        body_text = exc.read().decode("utf-8") or "{}"
-        console.print(f"[red]HTTP {exc.code}[/red]: {body_text}")
-        raise SystemExit(1) from exc
-    except urllib.error.URLError as exc:
-        console.print(f"[red]Network error[/red]: {exc.reason}")
+        store.set_default(endpoint_id)
+    except EndpointError as exc:
+        console.print(f"[red]{exc}[/red]")
         raise SystemExit(1) from exc
+    store.save()
+    console.print(f"[green]\u2713[/green] default endpoint = {endpoint_id!r}")
 
-    click.echo(_json.dumps(response, indent=2))
 
+@endpoints_group.command(name="test")
+@click.argument("endpoint_id", required=False, default="")
+@click.option("--timeout", type=float, default=5.0, help="Request timeout in seconds.")
+@click.option("--json", "as_json", is_flag=True, default=False)
+def endpoints_test(endpoint_id: str, timeout: float, as_json: bool) -> None:
+    """Probe ENDPOINT_ID's /models route. Defaults to the default endpoint."""
+    import json as _json
 
-# ---------------------------------------------------------------------------
-# specsmith cloud serve — reference cloud-agent receiver (REQ-136)
-# ---------------------------------------------------------------------------
+    from specsmith.agent.endpoints import EndpointError, EndpointStore
+
+    store = EndpointStore.load()
+    try:
+        endpoint = store.resolve(endpoint_id or None)
+    except EndpointError as exc:
+        console.print(f"[red]{exc}[/red]")
+        raise SystemExit(1) from exc
+    health = endpoint.health(timeout=timeout)
+    if as_json:
+        click.echo(_json.dumps({"id": endpoint.id, **health.to_dict()}, indent=2))
+    else:
+        if health.ok:
+            console.print(
+                f"[green]\u2713[/green] {endpoint.id} ok in "
+                f"{int(health.latency_ms)} ms ({len(health.models)} models)"
+            )
+            for model in health.models[:5]:
+                console.print(f"    [dim]\u2022 {model}[/dim]")
+            if len(health.models) > 5:
+                console.print(f"    [dim]... +{len(health.models) - 5} more[/dim]")
+        else:
+            console.print(f"[red]\u2717[/red] {endpoint.id} failed: {health.error}")
+    if not health.ok:
+        raise SystemExit(1)
 
 
-@main.command(name="cloud-serve")
-@click.option("--host", default="127.0.0.1")
-@click.option("--port", type=int, default=9000)
-@click.option("--token", default="", help="Optional bearer token.")
-@click.option("--allow-cidr", default="", help="CIDR range required to bind non-loopback.")
-def cloud_serve_cmd(host: str, port: int, token: str, allow_cidr: str) -> None:
-    """Run the reference cloud-agent receiver (REQ-136).
+@endpoints_group.command(name="models")
+@click.argument("endpoint_id", required=False, default="")
+@click.option("--timeout", type=float, default=5.0, help="Request timeout in seconds.")
+@click.option("--json", "as_json", is_flag=True, default=False)
+def endpoints_models(endpoint_id: str, timeout: float, as_json: bool) -> None:
+    """List every model the endpoint advertises via /v1/models."""
+    import json as _json
 
-    Accepts POST /spawn with a JSON manifest, persists it under
-    ~/.specsmith/cloud-runs/<run_id>/manifest.json, and returns 202 with
-    a stream_url placeholder.
-    """
-    from specsmith.cloud_serve import CloudReceiverConfig, make_server
+    from specsmith.agent.endpoints import EndpointError, EndpointStore
 
-    config = CloudReceiverConfig(host=host, port=port, token=token, allow_cidr=allow_cidr)
+    store = EndpointStore.load()
     try:
-        server = make_server(config)
-    except RuntimeError as exc:
+        endpoint = store.resolve(endpoint_id or None)
+    except EndpointError as exc:
         console.print(f"[red]{exc}[/red]")
-        raise SystemExit(2) from exc
-    console.print(
-        f"[bold]specsmith cloud serve[/bold] on http://{config.host}:{config.port}\n"
-        f"  storage: {config.storage_dir}\n"
-        f"  token:   {'(set)' if token else '(none)'}\n"
-        "  Press Ctrl+C to stop."
-    )
-    try:
-        server.serve_forever()
-    except KeyboardInterrupt:
-        console.print("\n[dim]cloud serve stopped.[/dim]")
-        server.server_close()
+        raise SystemExit(1) from exc
+    health = endpoint.health(timeout=timeout)
+    if not health.ok:
+        if as_json:
+            click.echo(_json.dumps({"id": endpoint.id, "error": health.error}, indent=2))
+        else:
+            console.print(f"[red]\u2717[/red] {endpoint.id} failed: {health.error}")
+        raise SystemExit(1)
+    if as_json:
+        click.echo(_json.dumps({"id": endpoint.id, "models": health.models}, indent=2))
+        return
+    if not health.models:
+        console.print(f"[yellow]\u2014[/yellow] {endpoint.id} returned no models.")
+        return
+    for model in health.models:
+        console.print(model)
+
+
+main.add_command(endpoints_group)
 
 
 # ---------------------------------------------------------------------------
@@ -5555,6 +5880,16 @@ def index_search_cmd(query: str, project_dir: str, limit: int) -> None:
     default=120.0,
     help="Seconds to wait for a stdin decision before falling back to deny.",
 )
+@click.option(
+    "--endpoint",
+    "endpoint_id",
+    default="",
+    help=(
+        "Route the LLM turn to a registered BYOE endpoint (REQ-142). "
+        "See `specsmith endpoints add ...`. When empty, falls back to the "
+        "auto-detect provider chain (Ollama / Anthropic / OpenAI / Gemini)."
+    ),
+)
 def chat_cmd(
     utterance: str,
     project_dir: str,
@@ -5565,6 +5900,7 @@ def chat_cmd(
     json_events: bool,
     interactive: bool,
     decision_timeout: float,
+    endpoint_id: str,
 ) -> None:
     """Run a single chat turn, streaming JSONL block events to stdout.
 
@@ -5717,6 +6053,7 @@ def chat_cmd(
                 msg_block=msg_block,
                 history=history,
                 rules_prefix=rules_prefix,
+                endpoint_id=endpoint_id or None,
             )
         except Exception:  # noqa: BLE001 - real chat is best-effort
             real_result = None
@@ -5972,16 +6309,6 @@ def notebook_replay(slug: str, project_dir: str) -> None:
 main.add_command(notebook_group)
 
 
-# ---------------------------------------------------------------------------
-# Cloud — REQ-126 placeholder (cloud spawn lives above under REQ-136).
-# ---------------------------------------------------------------------------
-# The original REQ-126 stub built a workspace tarball and posted to a free-
-# form endpoint with no auth. REQ-136 supersedes it with a manifest-based
-# command that posts to ``<endpoint>/spawn`` with optional bearer auth.
-# Keeping a single ``cloud spawn`` avoids surface drift; see
-# tests/test_warp_parity_followup.py for coverage.
-
-
 # ---------------------------------------------------------------------------
 # Workflow — parameterised command snippets (Warp-style Workflows)
 # ---------------------------------------------------------------------------
@@ -6513,5 +6840,471 @@ def skill_install(slug: str, project_dir: str, force: bool) -> None:
     pass  # AG2 not installed — agent commands unavailable
 
 
+# ---------------------------------------------------------------------------
+# specsmith agents — Agent profiles + activity routing (REQ-146)
+# ---------------------------------------------------------------------------
+
+
+@main.group(name="agents")
+def agents_group() -> None:
+    """Manage agent profiles and activity routing (REQ-146).
+
+    A *profile* is a named ``(provider, model, endpoint, fallback_chain)``
+    bundle. The *routing table* maps an activity (``/plan``, ``/fix``, AEE
+    phase, MCP tool category) to a profile. ``specsmith run`` consults the
+    table on every turn so each activity flows through the right model.
+
+    Storage: ``~/.specsmith/agents.json`` with per-project overrides at
+    ``<project>/.specsmith/agents.json``.
+    """
+
+
+@agents_group.command(name="list")
+@click.option("--project-dir", type=click.Path(exists=True), default=".")
+@click.option(
+    "--capability",
+    "capability",
+    default="",
+    help="Filter profiles whose capabilities list includes this value (G2).",
+)
+@click.option("--json", "as_json", is_flag=True, default=False)
+def agents_list(project_dir: str, capability: str, as_json: bool) -> None:
+    """List every registered agent profile."""
+    import json as _json
+
+    from specsmith.agent.profiles import ProfileStore
+
+    store = ProfileStore.load_for_project(project_dir)
+    profiles = (
+        store.filter_by_capability(capability) if capability.strip() else list(store.profiles)
+    )
+    payload = {
+        "default_profile_id": store.default_profile_id,
+        "profiles": [p.to_dict() for p in profiles],
+        "routes": dict(store.routes),
+    }
+    if capability.strip():
+        payload["capability_filter"] = capability.strip()
+    if as_json:
+        click.echo(_json.dumps(payload, indent=2))
+        return
+    if not profiles:
+        if capability.strip():
+            console.print(
+                f"[dim]No profiles advertise capability {capability!r}.[/dim]",
+            )
+        else:
+            console.print(
+                "[dim]No agent profiles registered. "
+                "Run `specsmith agents preset apply default` to install "
+                "the recommended set.[/dim]",
+            )
+        return
+    for p in profiles:
+        marker = "*" if p.id == store.default_profile_id else " "
+        chain = " \u2192 ".join(p.fallback_chain) if p.fallback_chain else "(no fallback)"
+        endpoint = f" endpoint={p.endpoint_id}" if p.endpoint_id else ""
+        console.print(
+            f"{marker} [bold]{p.id}[/bold]  role={p.role}  {p.provider}/{p.model}{endpoint}"
+        )
+        console.print(f"  [dim]fallback: {chain}[/dim]")
+
+
+@agents_group.command(name="add")
+@click.option("--id", "profile_id", required=True)
+@click.option("--role", default="generalist")
+@click.option("--provider", default="ollama")
+@click.option("--model", default="")
+@click.option("--endpoint", "endpoint_id", default="")
+@click.option("--prompt-prefix", default="")
+@click.option("--capability", "capabilities", multiple=True)
+@click.option("--fallback", "fallback_chain", multiple=True)
+@click.option("--replace", is_flag=True, default=False)
+@click.option("--set-default", is_flag=True, default=False)
+@click.option("--json", "as_json", is_flag=True, default=False)
+def agents_add(
+    profile_id: str,
+    role: str,
+    provider: str,
+    model: str,
+    endpoint_id: str,
+    prompt_prefix: str,
+    capabilities: tuple[str, ...],
+    fallback_chain: tuple[str, ...],
+    replace: bool,
+    set_default: bool,
+    as_json: bool,
+) -> None:
+    """Register a new agent profile."""
+    import json as _json
+
+    from specsmith.agent.profiles import Profile, ProfileError, ProfileStore
+
+    profile = Profile(
+        id=profile_id.strip(),
+        role=role.strip(),
+        provider=provider.strip(),
+        model=model.strip(),
+        endpoint_id=endpoint_id.strip(),
+        prompt_prefix=prompt_prefix,
+        capabilities=list(capabilities),
+        fallback_chain=list(fallback_chain),
+    )
+    store = ProfileStore.load()
+    # G1 diversity guard — warn on same-family coder/reviewer pairings *before*
+    # we touch the store so the user can still bail out by Ctrl+C-ing the next
+    # invocation. The warnings are non-fatal: governance still saves the
+    # profile, but we surface the cross-check risk so it's a deliberate choice.
+    diversity = store.diversity_warnings(candidate=profile)
+    try:
+        store.add(profile, replace=replace)
+    except ProfileError as exc:
+        console.print(f"[red]{exc}[/red]")
+        raise SystemExit(2) from exc
+    if set_default:
+        store.set_default(profile.id)
+    store.save()
+    if as_json:
+        click.echo(
+            _json.dumps(
+                {"profile": profile.to_dict(), "diversity_warnings": diversity},
+                indent=2,
+            )
+        )
+        return
+    console.print(f"[green]\u2713[/green] saved profile [bold]{profile.id}[/bold]")
+    if store.default_profile_id == profile.id:
+        console.print("  [dim]marked as default.[/dim]")
+    for warning in diversity:
+        console.print(f"  [yellow]\u26a0[/yellow] {warning}")
+
+
+@agents_group.command(name="remove")
+@click.argument("profile_id")
+def agents_remove(profile_id: str) -> None:
+    """Remove a profile and any routing entries that point at it."""
+    from specsmith.agent.profiles import ProfileStore
+
+    store = ProfileStore.load()
+    if not store.remove(profile_id):
+        console.print(f"[red]unknown profile id {profile_id!r}[/red]")
+        raise SystemExit(1)
+    store.save()
+    console.print(f"[green]\u2713[/green] removed profile {profile_id!r}")
+
+
+@agents_group.command(name="default")
+@click.argument("profile_id")
+def agents_default(profile_id: str) -> None:
+    """Set the default profile (used when no route matches)."""
+    from specsmith.agent.profiles import ProfileError, ProfileStore
+
+    store = ProfileStore.load()
+    try:
+        store.set_default(profile_id)
+    except ProfileError as exc:
+        console.print(f"[red]{exc}[/red]")
+        raise SystemExit(1) from exc
+    store.save()
+    console.print(f"[green]\u2713[/green] default profile = {profile_id!r}")
+
+
+@agents_group.command(name="test")
+@click.argument("profile_id")
+@click.option("--json", "as_json", is_flag=True, default=False)
+def agents_test(profile_id: str, as_json: bool) -> None:
+    """Probe a profile (resolves the endpoint/provider, reports reachability)."""
+    import json as _json
+
+    from specsmith.agent.endpoints import EndpointError, EndpointStore
+    from specsmith.agent.profiles import ProfileError, ProfileStore
+
+    store = ProfileStore.load()
+    try:
+        profile = store.get(profile_id)
+    except ProfileError as exc:
+        console.print(f"[red]{exc}[/red]")
+        raise SystemExit(1) from exc
+
+    payload: dict[str, Any] = {"profile_id": profile.id, "reachable": False}
+    # If the profile points at a BYOE endpoint, probe it; else just report
+    # the resolved provider/model (full provider testing lands in a follow-up).
+    if profile.endpoint_id:
+        try:
+            endpoint = EndpointStore.load().resolve(profile.endpoint_id)
+            health = endpoint.health(timeout=5.0)
+            payload["reachable"] = bool(health.ok)
+            payload["latency_ms"] = round(health.latency_ms, 2)
+            payload["models"] = health.models
+            payload["error"] = health.error
+        except EndpointError as exc:
+            payload["error"] = str(exc)
+    else:
+        payload["reachable"] = True
+        payload["note"] = (
+            "profile has no endpoint_id; reachability not probed for built-in providers."
+        )
+    if as_json:
+        click.echo(_json.dumps(payload, indent=2))
+        return
+    if payload.get("reachable"):
+        latency = payload.get("latency_ms")
+        models = payload.get("models") or []
+        if latency is not None:
+            console.print(
+                f"[green]\u2713[/green] {profile.id} ok in {int(float(latency))} ms "
+                f"({len(models)} models)"
+            )
+        else:
+            _ident = f"{profile.provider}/{profile.model}"
+            console.print(f"[green]\u2713[/green] {profile.id} ({_ident})")
+    else:
+        _err = payload.get("error", "?")
+        console.print(f"[red]\u2717[/red] {profile.id} unreachable: {_err}")
+        raise SystemExit(1)
+
+
+@agents_group.group(name="route")
+def agents_route_group() -> None:
+    """Manage the activity → profile routing table."""
+
+
+@agents_route_group.command(name="set")
+@click.argument("activity")
+@click.argument("profile_id")
+def agents_route_set(activity: str, profile_id: str) -> None:
+    """Map ACTIVITY to PROFILE_ID (e.g. /plan -> architect)."""
+    from specsmith.agent.profiles import ProfileError, ProfileStore
+
+    store = ProfileStore.load()
+    try:
+        store.set_route(activity, profile_id)
+    except ProfileError as exc:
+        console.print(f"[red]{exc}[/red]")
+        raise SystemExit(1) from exc
+    store.save()
+    console.print(f"[green]\u2713[/green] {activity} \u2192 {profile_id}")
+
+
+@agents_route_group.command(name="clear")
+@click.argument("activity")
+def agents_route_clear(activity: str) -> None:
+    """Drop ACTIVITY from the routing table; falls back to default."""
+    from specsmith.agent.profiles import ProfileStore
+
+    store = ProfileStore.load()
+    store.clear_route(activity)
+    store.save()
+    console.print(f"[green]\u2713[/green] cleared route for {activity}")
+
+
+@agents_route_group.command(name="show")
+@click.option("--project-dir", type=click.Path(exists=True), default=".")
+@click.option("--json", "as_json", is_flag=True, default=False)
+def agents_route_show(project_dir: str, as_json: bool) -> None:
+    """Print the merged (project + global) routing table."""
+    import json as _json
+
+    from specsmith.agent.profiles import ProfileStore
+
+    store = ProfileStore.load_for_project(project_dir)
+    if as_json:
+        click.echo(
+            _json.dumps(
+                {"default_profile_id": store.default_profile_id, "routes": dict(store.routes)},
+                indent=2,
+            )
+        )
+        return
+    if not store.routes:
+        console.print(
+            "[dim]No routes configured. "
+            "Run `specsmith agents preset apply default` to install the recommended set.[/dim]"
+        )
+        return
+    for activity, profile_id in sorted(store.routes.items()):
+        marker = "*" if profile_id == store.default_profile_id else " "
+        console.print(f"{marker} {activity:20s} \u2192 {profile_id}")
+
+
+@agents_group.group(name="preset")
+def agents_preset_group() -> None:
+    """Apply or inspect built-in profile presets."""
+
+
+@agents_preset_group.command(name="apply")
+@click.argument("name")
+def agents_preset_apply(name: str) -> None:
+    """Install one of the built-in presets (default, local-only, frontier-only, cost-conscious)."""
+    from specsmith.agent.profiles import ProfileError, apply_preset
+
+    try:
+        store = apply_preset(name)
+    except ProfileError as exc:
+        console.print(f"[red]{exc}[/red]")
+        raise SystemExit(1) from exc
+    console.print(
+        f"[green]\u2713[/green] applied preset [bold]{name}[/bold] \u2014 "
+        f"{len(store.profiles)} profiles, {len(store.routes)} routes"
+    )
+
+
+@agents_preset_group.command(name="list")
+def agents_preset_list() -> None:
+    """Show every built-in preset."""
+    from specsmith.agent.profiles import DEFAULT_PRESETS
+
+    for name in sorted(DEFAULT_PRESETS):
+        blob = DEFAULT_PRESETS[name]
+        console.print(
+            f"  [bold]{name}[/bold]  "
+            f"profiles={len(blob.get('profiles', []))}, "
+            f"routes={len(blob.get('routes', {}))}, "
+            f"default={blob.get('default_profile_id', '')}"
+        )
+
+
+main.add_command(agents_group)
+
+
+# ---------------------------------------------------------------------------
+# specsmith mcp — list / test MCP servers as JSON (REQ-146 surface)
+# ---------------------------------------------------------------------------
+
+
+@main.group(name="mcp")
+def mcp_group() -> None:
+    """Inspect MCP servers registered for the agent's tool registry."""
+
+
+@mcp_group.command(name="list")
+@click.option("--project-dir", type=click.Path(exists=True), default=".")
+@click.option("--json", "as_json", is_flag=True, default=False)
+def mcp_list_cmd(project_dir: str, as_json: bool) -> None:
+    """List configured MCP servers (from ``~/.specsmith/mcp.json`` or project config)."""
+    import json as _json
+    import os
+
+    base = os.environ.get("SPECSMITH_HOME", "").strip()
+    home = Path(base) if base else Path.home() / ".specsmith"
+    candidates = [
+        Path(project_dir).resolve() / ".specsmith" / "mcp.json",
+        home / "mcp.json",
+    ]
+    servers: list[dict[str, Any]] = []
+    source = ""
+    for path in candidates:
+        if path.is_file():
+            try:
+                raw = _json.loads(path.read_text(encoding="utf-8"))
+            except ValueError:
+                continue
+            entries = raw.get("servers") if isinstance(raw, dict) else raw
+            if isinstance(entries, list):
+                for item in entries:
+                    if isinstance(item, dict) and "id" in item:
+                        servers.append(
+                            {
+                                "id": str(item.get("id", "")),
+                                "name": str(item.get("name", item.get("id", ""))),
+                                "command": item.get("command", ""),
+                                "args": list(item.get("args", [])),
+                                "transport": str(item.get("transport", "stdio")),
+                                "description": str(item.get("description", "")),
+                            }
+                        )
+            source = str(path)
+            break
+    payload = {"source": source, "servers": servers}
+    if as_json:
+        click.echo(_json.dumps(payload, indent=2))
+        return
+    if not servers:
+        console.print("[dim]No MCP servers configured.[/dim]")
+        return
+    console.print(f"[bold]MCP servers[/bold]  ({source})\n")
+    for s in servers:
+        console.print(f"  [bold]{s['id']}[/bold]  {s['transport']}  {s['command']}")
+        if s["description"]:
+            console.print(f"    [dim]{s['description']}[/dim]")
+
+
+main.add_command(mcp_group)
+
+
+# ---------------------------------------------------------------------------
+# specsmith rules — enumerate rule docs across project / workspace / personal
+# ---------------------------------------------------------------------------
+
+
+@main.group(name="rules")
+def rules_group() -> None:
+    """Inspect AEE rule documents across the layered scope hierarchy."""
+
+
+@rules_group.command(name="list")
+@click.option("--project-dir", type=click.Path(exists=True), default=".")
+@click.option("--json", "as_json", is_flag=True, default=False)
+def rules_list_cmd(project_dir: str, as_json: bool) -> None:
+    """List rule docs grouped by scope (project, workspace, personal)."""
+    import json as _json
+    import os
+
+    base = os.environ.get("SPECSMITH_HOME", "").strip()
+    home = Path(base) if base else Path.home() / ".specsmith"
+    project = Path(project_dir).resolve()
+
+    scopes: dict[str, list[Path]] = {
+        "project": [],
+        "workspace": [],
+        "personal": [],
+    }
+    project_dirs = [
+        project / ".specsmith" / "rules",
+        project / "docs" / "governance",
+    ]
+    workspace_dirs = [project / ".warp" / "rules"]
+    personal_dirs = [home / "rules"]
+    for d in project_dirs:
+        if d.is_dir():
+            scopes["project"].extend(sorted(d.rglob("*.md")))
+    for d in workspace_dirs:
+        if d.is_dir():
+            scopes["workspace"].extend(sorted(d.rglob("*.md")))
+    for d in personal_dirs:
+        if d.is_dir():
+            scopes["personal"].extend(sorted(d.rglob("*.md")))
+
+    payload: dict[str, list[dict[str, Any]]] = {k: [] for k in scopes}
+    for scope_name, paths in scopes.items():
+        for p in paths:
+            try:
+                head = p.read_text(encoding="utf-8", errors="replace").splitlines()[:1]
+            except OSError:
+                head = []
+            title = head[0].lstrip("# ").strip() if head else p.stem
+            payload[scope_name].append(
+                {
+                    "scope": scope_name,
+                    "path": str(p),
+                    "title": title or p.stem,
+                    "last_modified": int(p.stat().st_mtime) if p.exists() else 0,
+                }
+            )
+
+    if as_json:
+        click.echo(_json.dumps(payload, indent=2))
+        return
+    for scope_name, items in payload.items():
+        if not items:
+            continue
+        console.print(f"\n[bold]{scope_name.title()} rules[/bold] ({len(items)})")
+        for item in items:
+            console.print(f"  [cyan]{item['title']}[/cyan]  [dim]{item['path']}[/dim]")
+
+
+main.add_command(rules_group)
+
+
 if __name__ == "__main__":
     main()
diff --git a/src/specsmith/cloud_serve.py b/src/specsmith/cloud_serve.py
deleted file mode 100644
index b905a72..0000000
--- a/src/specsmith/cloud_serve.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# SPDX-License-Identifier: MIT
-# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
-"""Reference cloud-agent receiver for `specsmith cloud spawn` (REQ-136).
-
-A minimal stdlib HTTP server that accepts manifest-only POSTs at ``/spawn``
-and acks them. The full streaming-back-of-results contract is documented
-but kept narrow (and intentionally local-only) so we ship a working
-endpoint without baking in vendor coupling.
-
-Auth model: optional ``Authorization: Bearer <token>``. When the server
-is started with ``--token``, every request must present it.
-Defense-in-depth: the server refuses to bind to any address other than
-``127.0.0.1`` unless explicitly given ``--host`` AND ``--allow-cidr``.
-"""
-
-from __future__ import annotations
-
-import ipaddress
-import json
-import threading
-from dataclasses import dataclass, field
-from http.server import BaseHTTPRequestHandler, HTTPServer
-from pathlib import Path
-from typing import Any
-
-
-@dataclass
-class CloudReceiverConfig:
-    host: str = "127.0.0.1"
-    port: int = 9000
-    token: str = ""
-    allow_cidr: str = ""
-    storage_dir: Path = field(default_factory=lambda: Path.home() / ".specsmith" / "cloud-runs")
-
-
-class _Handler(BaseHTTPRequestHandler):
-    config: CloudReceiverConfig = CloudReceiverConfig()
-
-    # noqa: N802 -- BaseHTTPRequestHandler API.
-    def do_POST(self) -> None:  # noqa: N802
-        if not self._authorize():
-            self._respond(401, {"error": "unauthorized"})
-            return
-        if self.path != "/spawn":
-            self._respond(404, {"error": f"unknown path {self.path}"})
-            return
-        length = int(self.headers.get("Content-Length", "0") or "0")
-        body = self.rfile.read(length) if length else b""
-        try:
-            payload = json.loads(body.decode("utf-8") or "{}")
-        except ValueError:
-            self._respond(400, {"error": "invalid json"})
-            return
-        run_id = str(payload.get("run_id", "")).strip() or _new_run_id()
-        target = self.config.storage_dir / run_id
-        try:
-            target.mkdir(parents=True, exist_ok=True)
-            (target / "manifest.json").write_text(
-                json.dumps(payload, indent=2),
-                encoding="utf-8",
-            )
-        except OSError as exc:
-            self._respond(500, {"error": f"storage failed: {exc}"})
-            return
-        self._respond(
-            202,
-            {
-                "run_id": run_id,
-                "status": "accepted",
-                "stream_url": f"/runs/{run_id}/events",
-            },
-        )
-
-    def do_GET(self) -> None:  # noqa: N802
-        if not self._authorize():
-            self._respond(401, {"error": "unauthorized"})
-            return
-        if self.path == "/health":
-            self._respond(200, {"ok": True})
-            return
-        self._respond(404, {"error": f"unknown path {self.path}"})
-
-    def log_message(self, format: str, *args: Any) -> None:  # noqa: A002
-        # Quiet by default — caller sees JSON responses.
-        return
-
-    # ── helpers ───────────────────────────────────────────────────────────
-
-    def _authorize(self) -> bool:
-        if self.config.token:
-            header = self.headers.get("Authorization", "")
-            if header != f"Bearer {self.config.token}":
-                return False
-        if self.config.allow_cidr:
-            try:
-                net = ipaddress.ip_network(self.config.allow_cidr, strict=False)
-                client = ipaddress.ip_address(self.client_address[0])
-                if client not in net:
-                    return False
-            except (ValueError, TypeError):
-                return False
-        return True
-
-    def _respond(self, status: int, payload: dict[str, Any]) -> None:
-        body = json.dumps(payload).encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", "application/json")
-        self.send_header("Content-Length", str(len(body)))
-        self.end_headers()
-        self.wfile.write(body)
-
-
-def _new_run_id() -> str:
-    import uuid
-
-    return f"cloud_{uuid.uuid4().hex[:12]}"
-
-
-def _validate_host(config: CloudReceiverConfig) -> None:
-    if config.host not in {"127.0.0.1", "::1", "localhost"} and not config.allow_cidr:
-        raise RuntimeError(
-            "specsmith cloud serve refuses to bind to a non-loopback address "
-            "unless --allow-cidr is also set. This is a security guardrail."
-        )
-
-
-def make_server(config: CloudReceiverConfig) -> HTTPServer:
-    _validate_host(config)
-    config.storage_dir.mkdir(parents=True, exist_ok=True)
-
-    class _Bound(_Handler):
-        pass
-
-    _Bound.config = config
-    return HTTPServer((config.host, config.port), _Bound)
-
-
-def run_in_thread(config: CloudReceiverConfig) -> tuple[HTTPServer, threading.Thread]:
-    """Start the server in a background thread; useful for tests."""
-    server = make_server(config)
-    thread = threading.Thread(target=server.serve_forever, daemon=True)
-    thread.start()
-    return server, thread
-
-
-__all__ = [
-    "CloudReceiverConfig",
-    "make_server",
-    "run_in_thread",
-]
diff --git a/src/specsmith/vcs_commands.py b/src/specsmith/vcs_commands.py
index f5b7a7a..d01dd45 100644
--- a/src/specsmith/vcs_commands.py
+++ b/src/specsmith/vcs_commands.py
@@ -100,7 +100,7 @@ def run_commit(
     *,
     message: str = "",
     auto_push: bool = False,
-    co_author: str = "Oz <oz-agent@warp.dev>",
+    co_author: str = "",
 ) -> GitResult:
     """Stage all changes and commit with governance-aware message."""
     if not message:
diff --git a/tests/fixtures/api_surface.json b/tests/fixtures/api_surface.json
index 449b77f..56b2d68 100644
--- a/tests/fixtures/api_surface.json
+++ b/tests/fixtures/api_surface.json
@@ -2,6 +2,7 @@
   "cli_commands": [
     "abort",
     "agent",
+    "agents",
     "api-surface",
     "apply",
     "architect",
@@ -12,14 +13,13 @@
     "chat",
     "chat-export-block",
     "clean",
-    "cloud",
-    "cloud-serve",
     "commit",
     "compress",
     "credits",
     "diff",
     "doctor",
     "drive",
+    "endpoints",
     "epistemic-audit",
     "exec",
     "export",
@@ -32,6 +32,7 @@
     "init",
     "integrate",
     "ledger",
+    "mcp",
     "migrate",
     "migrate-project",
     "notebook",
@@ -47,6 +48,7 @@
     "push",
     "release",
     "req",
+    "rules",
     "run",
     "scan",
     "self-update",
@@ -89,4 +91,4 @@
     "verify_retry": 2,
     "verify_stop": 3
   }
-}
\ No newline at end of file
+}
diff --git a/tests/test_agent_profiles.py b/tests/test_agent_profiles.py
new file mode 100644
index 0000000..2e80909
--- /dev/null
+++ b/tests/test_agent_profiles.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for the agent profile store + routing table (REQ-146)."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from specsmith.agent.profiles import (
+    DEFAULT_PRESETS,
+    Profile,
+    ProfileError,
+    ProfileStore,
+    apply_preset,
+)
+
+
+def test_default_preset_round_trip(tmp_path: Path) -> None:
+    store_path = tmp_path / "agents.json"
+    apply_preset("default", path=store_path)
+    assert store_path.is_file()
+    raw = json.loads(store_path.read_text(encoding="utf-8"))
+    assert raw["default_profile_id"] == "coder"
+    assert any(p["id"] == "architect" for p in raw["profiles"])
+    assert raw["routes"]["/plan"] == "architect"
+    assert raw["routes"]["/why"] == "reviewer"
+
+
+def test_resolve_for_activity_routes_to_correct_profile(tmp_path: Path) -> None:
+    store_path = tmp_path / "agents.json"
+    apply_preset("default", path=store_path)
+    store = ProfileStore.load(store_path)
+    assert store.resolve_for_activity("/plan").id == "architect"
+    assert store.resolve_for_activity("/fix").id == "coder"
+    assert store.resolve_for_activity("/why").id == "reviewer"
+    # Unknown activity falls through to the default profile.
+    assert store.resolve_for_activity("/unknown").id == "coder"
+
+
+def test_add_remove_round_trip(tmp_path: Path) -> None:
+    store = ProfileStore(path=tmp_path / "agents.json")
+    profile = Profile(
+        id="custom",
+        role="coder",
+        provider="anthropic",
+        model="claude-sonnet-4-5",
+        fallback_chain=["ollama/qwen2.5-coder:7b"],
+    )
+    store.add(profile)
+    store.save()
+    loaded = ProfileStore.load(store.path)
+    assert loaded.get("custom").model == "claude-sonnet-4-5"
+    assert loaded.default_profile_id == "custom"
+    assert loaded.remove("custom") is True
+    assert loaded.profiles == []
+
+
+def test_set_route_rejects_unknown_profile(tmp_path: Path) -> None:
+    store = ProfileStore(path=tmp_path / "agents.json")
+    with pytest.raises(ProfileError):
+        store.set_route("/plan", "ghost")
+
+
+def test_known_presets_have_required_keys() -> None:
+    for name, blob in DEFAULT_PRESETS.items():
+        assert "default_profile_id" in blob, f"{name} missing default_profile_id"
+        assert isinstance(blob.get("profiles"), list)
+        assert isinstance(blob.get("routes", {}), dict)
diff --git a/tests/test_agent_runner_ready.py b/tests/test_agent_runner_ready.py
new file mode 100644
index 0000000..e2eb872
--- /dev/null
+++ b/tests/test_agent_runner_ready.py
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: MIT
+"""Smoke tests for the bridge handshake (REQ-145).
+
+These tests are deliberately minimal — they verify the contract the VS
+Code extension's :class:`SpecsmithBridge` depends on:
+
+  * :meth:`AgentRunner._print_banner` emits a ``{"type": "ready", ...}``
+    JSONL line on stdout when ``json_events=True``.
+  * :meth:`EventEmitter.ready` writes the expected schema.
+
+Slow integration tests (subprocess spawn, end-to-end stdin loop) live in
+``tests/sandbox/`` so the unit suite stays fast.
+"""
+
+from __future__ import annotations
+
+import io
+import json
+
+from specsmith.agent.events import EventEmitter
+from specsmith.agent.runner import AgentRunner
+
+
+def test_event_emitter_ready_writes_expected_schema() -> None:
+    buf = io.StringIO()
+    emitter = EventEmitter(stream=buf)
+    emitter.ready(
+        agent="nexus",
+        version="9.9.9",
+        project_dir="/tmp/proj",
+        provider="ollama",
+        model="qwen2.5:7b",
+        capabilities=["chat", "endpoints"],
+    )
+    line = buf.getvalue().strip()
+    payload = json.loads(line)
+    assert payload["type"] == "ready"
+    assert payload["agent"] == "nexus"
+    assert payload["version"] == "9.9.9"
+    assert payload["project_dir"] == "/tmp/proj"
+    assert payload["provider"] == "ollama"
+    assert payload["model"] == "qwen2.5:7b"
+    assert payload["capabilities"] == ["chat", "endpoints"]
+    assert "timestamp" in payload
+
+
+def test_agent_runner_print_banner_emits_ready(tmp_path) -> None:
+    buf = io.StringIO()
+    emitter = EventEmitter(stream=buf)
+    runner = AgentRunner(
+        project_dir=str(tmp_path),
+        provider_name="ollama",
+        model="qwen2.5:7b",
+        json_events=True,
+        emitter=emitter,
+    )
+    runner._print_banner()
+    line = buf.getvalue().strip()
+    payload = json.loads(line)
+    assert payload["type"] == "ready"
+    assert payload["provider"] == "ollama"
+    assert payload["model"] == "qwen2.5:7b"
+    assert "chat" in payload["capabilities"]
+
+
+def test_agent_runner_handle_command_clear_resets_history(tmp_path) -> None:
+    buf = io.StringIO()
+    runner = AgentRunner(
+        project_dir=str(tmp_path),
+        json_events=True,
+        emitter=EventEmitter(stream=buf),
+    )
+    runner._history.append({"role": "user", "text": "hi"})
+    runner._handle_command("/clear")
+    assert runner._history == []
diff --git a/tests/test_chat_runner_openai_compat.py b/tests/test_chat_runner_openai_compat.py
new file mode 100644
index 0000000..1364352
--- /dev/null
+++ b/tests/test_chat_runner_openai_compat.py
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""End-to-end test for the BYOE openai-compat driver (REQ-142, PR-2).
+
+Runs an in-process fake ``/chat/completions`` SSE server, wires up an
+:class:`Endpoint` pointing at it, and asserts that
+:func:`chat_runner.run_chat` streams tokens through the new
+``_run_openai_compat`` driver when ``endpoint_id`` is set.
+"""
+
+from __future__ import annotations
+
+import http.server
+import json
+import socket
+import threading
+from pathlib import Path
+
+import pytest
+
+from specsmith.agent.chat_runner import _run_openai_compat, run_chat
+from specsmith.agent.endpoints import Endpoint, EndpointAuth, EndpointStore
+from specsmith.agent.events import EventEmitter
+
+
+def _free_port() -> int:
+    with socket.socket() as s:
+        s.bind(("127.0.0.1", 0))
+        return int(s.getsockname()[1])
+
+
+_REPLY = "Plan:\n- ok\nFiles changed:\n- a.py\nTest results:\nNext action:\n"
+
+
+class _FakeChatHandler(http.server.BaseHTTPRequestHandler):
+    """Streams a canned SSE chat-completions response."""
+
+    expected_token: str | None = None
+    last_request_body: dict | None = None
+
+    def log_message(self, *args: object, **kwargs: object) -> None:  # noqa: D401
+        return
+
+    def do_POST(self) -> None:  # noqa: N802
+        if self.path != "/v1/chat/completions":
+            self.send_response(404)
+            self.end_headers()
+            return
+        if self.expected_token is not None:
+            got = self.headers.get("Authorization", "")
+            if got != f"Bearer {self.expected_token}":
+                self.send_response(401)
+                self.end_headers()
+                return
+        length = int(self.headers.get("Content-Length", "0") or 0)
+        body = self.rfile.read(length).decode("utf-8")
+        try:
+            _FakeChatHandler.last_request_body = json.loads(body)
+        except json.JSONDecodeError:
+            _FakeChatHandler.last_request_body = None
+
+        self.send_response(200)
+        self.send_header("Content-Type", "text/event-stream")
+        self.end_headers()
+
+        chunks = [_REPLY[i : i + 16] for i in range(0, len(_REPLY), 16)]
+        for chunk in chunks:
+            payload = json.dumps(
+                {
+                    "id": "chatcmpl-fake",
+                    "object": "chat.completion.chunk",
+                    "choices": [{"delta": {"content": chunk}, "index": 0}],
+                }
+            )
+            self.wfile.write(f"data: {payload}\n\n".encode())
+            self.wfile.flush()
+        self.wfile.write(b"data: [DONE]\n\n")
+        self.wfile.flush()
+
+
+@pytest.fixture
+def fake_chat_server() -> object:
+    port = _free_port()
+    server = http.server.HTTPServer(("127.0.0.1", port), _FakeChatHandler)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    try:
+        yield port
+    finally:
+        server.shutdown()
+        server.server_close()
+        _FakeChatHandler.expected_token = None
+        _FakeChatHandler.last_request_body = None
+
+
+# ---------------------------------------------------------------------------
+# _run_openai_compat — direct
+# ---------------------------------------------------------------------------
+
+
+def test_openai_compat_streams_tokens(fake_chat_server: int) -> None:
+    port = fake_chat_server
+    emitter = EventEmitter()
+    endpoint = Endpoint(
+        id="fake",
+        name="fake",
+        base_url=f"http://127.0.0.1:{port}/v1",
+        default_model="fake-model",
+    )
+    # C1: drivers now return ``(text, usage)`` so the runner can credit
+    # tokens against the AgentState. The legacy contract returned just
+    # ``text``; tests are unpacked here to match.
+    text, _usage = _run_openai_compat(
+        [{"role": "user", "content": "hello"}], emitter, "block-1", endpoint=endpoint
+    )
+    assert text is not None
+    assert "Files changed" in text
+    assert _FakeChatHandler.last_request_body is not None
+    assert _FakeChatHandler.last_request_body["model"] == "fake-model"
+    assert _FakeChatHandler.last_request_body["stream"] is True
+
+
+def test_openai_compat_returns_none_without_default_model(fake_chat_server: int) -> None:
+    port = fake_chat_server
+    emitter = EventEmitter()
+    endpoint = Endpoint(
+        id="fake",
+        name="fake",
+        base_url=f"http://127.0.0.1:{port}/v1",
+        default_model="",
+    )
+    text, _usage = _run_openai_compat(
+        [{"role": "user", "content": "hi"}], emitter, "block-1", endpoint=endpoint
+    )
+    assert text is None
+
+
+def test_openai_compat_returns_none_when_unauthorised(fake_chat_server: int) -> None:
+    port = fake_chat_server
+    _FakeChatHandler.expected_token = "right-token"
+    emitter = EventEmitter()
+    endpoint = Endpoint(
+        id="fake",
+        name="fake",
+        base_url=f"http://127.0.0.1:{port}/v1",
+        default_model="fake-model",
+        auth=EndpointAuth(kind="bearer-inline", token="wrong-token"),
+    )
+    text, _usage = _run_openai_compat(
+        [{"role": "user", "content": "hi"}], emitter, "block-1", endpoint=endpoint
+    )
+    assert text is None
+
+
+# ---------------------------------------------------------------------------
+# run_chat with endpoint_id (PR-2 entry point)
+# ---------------------------------------------------------------------------
+
+
+def test_run_chat_with_endpoint_id_routes_to_openai_compat(
+    fake_chat_server: int,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    port = fake_chat_server
+    monkeypatch.setenv("SPECSMITH_HOME", str(tmp_path))
+    # No ANTHROPIC_API_KEY / OPENAI_API_KEY / GOOGLE_API_KEY → would otherwise
+    # fall back to the auto-detect chain (Ollama may or may not be running).
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
+
+    store = EndpointStore.load()
+    store.add(
+        Endpoint(
+            id="fake",
+            name="fake",
+            base_url=f"http://127.0.0.1:{port}/v1",
+            default_model="fake-model",
+        )
+    )
+    store.save()
+
+    emitter = EventEmitter()
+    result = run_chat(
+        "do something",
+        project_dir=tmp_path,
+        profile="standard",
+        session_id="sess",
+        emitter=emitter,
+        msg_block="block-1",
+        endpoint_id="fake",
+    )
+    assert result is not None
+    assert result.provider == "openai_compat"
+    assert "Files changed" in result.raw_text
+    assert _FakeChatHandler.last_request_body is not None
+    assert _FakeChatHandler.last_request_body["model"] == "fake-model"
diff --git a/tests/test_endpoints_cli.py b/tests/test_endpoints_cli.py
new file mode 100644
index 0000000..5013579
--- /dev/null
+++ b/tests/test_endpoints_cli.py
@@ -0,0 +1,244 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""CLI integration tests for `specsmith endpoints` (REQ-142, PR-1)."""
+
+from __future__ import annotations
+
+import http.server
+import json
+import socket
+import threading
+from pathlib import Path
+
+import pytest
+from click.testing import CliRunner
+
+from specsmith.cli import main
+
+
+@pytest.fixture(autouse=True)
+def _no_auto_update(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("SPECSMITH_NO_AUTO_UPDATE", "1")
+    monkeypatch.setenv("SPECSMITH_PYPI_CHECKED", "1")
+
+
+@pytest.fixture(autouse=True)
+def _isolated_specsmith_home(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    """Redirect ``~/.specsmith`` so CLI invocations never touch the real one."""
+    monkeypatch.setenv("SPECSMITH_HOME", str(tmp_path))
+
+
+def _runner_invoke(*args: str) -> object:
+    return CliRunner().invoke(main, list(args))
+
+
+def _free_port() -> int:
+    with socket.socket() as s:
+        s.bind(("127.0.0.1", 0))
+        return int(s.getsockname()[1])
+
+
+class _FakeModelsHandler(http.server.BaseHTTPRequestHandler):
+    def log_message(self, *args: object, **kwargs: object) -> None:  # noqa: D401
+        return
+
+    def do_GET(self) -> None:  # noqa: N802
+        if self.path == "/v1/models":
+            body = json.dumps(
+                {"object": "list", "data": [{"id": "fake-1"}, {"id": "fake-2"}]}
+            ).encode("utf-8")
+            self.send_response(200)
+            self.send_header("Content-Type", "application/json")
+            self.send_header("Content-Length", str(len(body)))
+            self.end_headers()
+            self.wfile.write(body)
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+
+@pytest.fixture
+def fake_endpoint_server() -> object:
+    port = _free_port()
+    server = http.server.HTTPServer(("127.0.0.1", port), _FakeModelsHandler)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    try:
+        yield port
+    finally:
+        server.shutdown()
+        server.server_close()
+
+
+def test_endpoints_help_lists_subcommands() -> None:
+    res = _runner_invoke("endpoints", "--help")
+    assert res.exit_code == 0
+    for sub in ("add", "list", "remove", "default", "test", "models"):
+        assert sub in res.output
+
+
+def test_endpoints_add_and_list_round_trip(tmp_path: Path) -> None:
+    res = _runner_invoke(
+        "endpoints",
+        "add",
+        "--id",
+        "home-vllm",
+        "--name",
+        "Home vLLM",
+        "--base-url",
+        "http://10.0.0.4:8000/v1",
+        "--default-model",
+        "qwen-coder",
+        "--auth",
+        "none",
+        "--json",
+    )
+    assert res.exit_code == 0, res.output
+    payload = json.loads(res.output)
+    assert payload["endpoint"]["id"] == "home-vllm"
+    assert payload["default"] == "home-vllm"
+
+    list_res = _runner_invoke("endpoints", "list", "--json")
+    assert list_res.exit_code == 0
+    listed = json.loads(list_res.output)
+    assert listed["default_endpoint_id"] == "home-vllm"
+    assert listed["endpoints"][0]["id"] == "home-vllm"
+    # token must never leak even when no token was provided
+    assert "token" not in json.dumps(listed) or listed["endpoints"][0]["auth"]["kind"] == "none"
+
+
+def test_endpoints_add_inline_token_redacts_in_list_output() -> None:
+    add_res = _runner_invoke(
+        "endpoints",
+        "add",
+        "--id",
+        "secured",
+        "--name",
+        "Secured",
+        "--base-url",
+        "https://lan.example.com/v1",
+        "--auth",
+        "bearer-inline",
+        "--token",
+        "sk-supersecret",
+        "--json",
+    )
+    assert add_res.exit_code == 0, add_res.output
+
+    list_res = _runner_invoke("endpoints", "list", "--json")
+    assert list_res.exit_code == 0
+    body = list_res.output
+    assert "sk-supersecret" not in body
+    parsed = json.loads(body)
+    assert parsed["endpoints"][0]["auth"]["token"] == "***"
+
+
+def test_endpoints_add_duplicate_id_exits_2() -> None:
+    base_args = [
+        "endpoints",
+        "add",
+        "--id",
+        "dup",
+        "--name",
+        "d",
+        "--base-url",
+        "http://e/v1",
+    ]
+    first = _runner_invoke(*base_args)
+    assert first.exit_code == 0
+    second = _runner_invoke(*base_args)
+    assert second.exit_code == 2
+    assert "already exists" in second.output
+
+
+def test_endpoints_add_invalid_url_exits_2() -> None:
+    res = _runner_invoke(
+        "endpoints",
+        "add",
+        "--id",
+        "bad",
+        "--name",
+        "bad",
+        "--base-url",
+        "ftp://nope/v1",
+    )
+    assert res.exit_code == 2
+    assert "http://" in res.output
+
+
+def test_endpoints_remove_unknown_exits_1() -> None:
+    res = _runner_invoke("endpoints", "remove", "ghost")
+    assert res.exit_code == 1
+    assert "unknown endpoint" in res.output
+
+
+def test_endpoints_default_unknown_exits_1() -> None:
+    res = _runner_invoke("endpoints", "default", "ghost")
+    assert res.exit_code == 1
+    assert "unknown endpoint" in res.output
+
+
+def test_endpoints_default_promotes_existing() -> None:
+    _runner_invoke("endpoints", "add", "--id", "a", "--name", "a", "--base-url", "http://e/v1")
+    _runner_invoke("endpoints", "add", "--id", "b", "--name", "b", "--base-url", "http://e/v1")
+    res = _runner_invoke("endpoints", "default", "b")
+    assert res.exit_code == 0
+    listed = json.loads(_runner_invoke("endpoints", "list", "--json").output)
+    assert listed["default_endpoint_id"] == "b"
+
+
+def test_endpoints_test_against_fake_server(fake_endpoint_server: int) -> None:
+    port = fake_endpoint_server
+    add = _runner_invoke(
+        "endpoints",
+        "add",
+        "--id",
+        "fake",
+        "--name",
+        "fake",
+        "--base-url",
+        f"http://127.0.0.1:{port}/v1",
+    )
+    assert add.exit_code == 0, add.output
+
+    res = _runner_invoke("endpoints", "test", "fake", "--json", "--timeout", "2")
+    assert res.exit_code == 0, res.output
+    payload = json.loads(res.output)
+    assert payload["ok"] is True
+    assert "fake-1" in payload["models"]
+
+
+def test_endpoints_models_against_fake_server(fake_endpoint_server: int) -> None:
+    port = fake_endpoint_server
+    _runner_invoke(
+        "endpoints",
+        "add",
+        "--id",
+        "fake",
+        "--name",
+        "fake",
+        "--base-url",
+        f"http://127.0.0.1:{port}/v1",
+    )
+    res = _runner_invoke("endpoints", "models", "fake", "--json")
+    assert res.exit_code == 0
+    payload = json.loads(res.output)
+    assert payload["models"] == ["fake-1", "fake-2"]
+
+
+def test_endpoints_test_unreachable_exits_1() -> None:
+    _runner_invoke(
+        "endpoints",
+        "add",
+        "--id",
+        "ghost",
+        "--name",
+        "ghost",
+        "--base-url",
+        "http://127.0.0.1:1/v1",
+    )
+    res = _runner_invoke("endpoints", "test", "ghost", "--json", "--timeout", "0.5")
+    assert res.exit_code == 1
+    payload = json.loads(res.output)
+    assert payload["ok"] is False
+    assert payload["error"]
diff --git a/tests/test_endpoints_store.py b/tests/test_endpoints_store.py
new file mode 100644
index 0000000..b9f9668
--- /dev/null
+++ b/tests/test_endpoints_store.py
@@ -0,0 +1,350 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Unit tests for ``specsmith.agent.endpoints`` (REQ-142, PR-1).
+
+Covers the pure data layer: validation, JSON persistence, redaction, token
+resolution dispatch, and the ``/models`` health probe parser. The CLI
+group is exercised in ``tests/test_endpoints_cli.py``.
+"""
+
+from __future__ import annotations
+
+import http.server
+import json
+import socket
+import threading
+from pathlib import Path
+
+import pytest
+
+from specsmith.agent.endpoints import (
+    SCHEMA_VERSION,
+    Endpoint,
+    EndpointAuth,
+    EndpointError,
+    EndpointHealth,
+    EndpointStore,
+    _extract_model_ids,
+    default_store_path,
+)
+
+# ---------------------------------------------------------------------------
+# Validation
+# ---------------------------------------------------------------------------
+
+
+def test_validate_rejects_empty_id() -> None:
+    e = Endpoint(id="", name="x", base_url="http://example.com/v1")
+    with pytest.raises(EndpointError, match="non-empty"):
+        e.validate()
+
+
+def test_validate_rejects_whitespace_id() -> None:
+    e = Endpoint(id="my endpoint", name="x", base_url="http://example.com/v1")
+    with pytest.raises(EndpointError, match="whitespace"):
+        e.validate()
+
+
+def test_validate_rejects_non_http_scheme() -> None:
+    e = Endpoint(id="x", name="x", base_url="ftp://example.com/v1")
+    with pytest.raises(EndpointError, match="http://"):
+        e.validate()
+
+
+def test_validate_requires_token_env_for_bearer_env() -> None:
+    e = Endpoint(
+        id="x",
+        name="x",
+        base_url="http://e/v1",
+        auth=EndpointAuth(kind="bearer-env", token_env=""),
+    )
+    with pytest.raises(EndpointError, match="token_env"):
+        e.validate()
+
+
+def test_validate_requires_keyring_user_for_bearer_keyring() -> None:
+    e = Endpoint(
+        id="x",
+        name="x",
+        base_url="http://e/v1",
+        auth=EndpointAuth(kind="bearer-keyring", keyring_user=""),
+    )
+    with pytest.raises(EndpointError, match="keyring_user"):
+        e.validate()
+
+
+# ---------------------------------------------------------------------------
+# Round-trip + redaction
+# ---------------------------------------------------------------------------
+
+
+def test_to_public_dict_redacts_inline_token() -> None:
+    e = Endpoint(
+        id="vllm",
+        name="vllm",
+        base_url="http://10.0.0.4:8000/v1",
+        auth=EndpointAuth(kind="bearer-inline", token="sk-supersecret"),
+    )
+    public = e.to_public_dict()
+    assert public["auth"]["kind"] == "bearer-inline"
+    assert public["auth"]["token"] == "***"
+    assert "sk-supersecret" not in json.dumps(public)
+
+
+def test_store_round_trip(tmp_path: Path) -> None:
+    path = tmp_path / "endpoints.json"
+    store = EndpointStore(path=path)
+    store.add(
+        Endpoint(
+            id="home-vllm",
+            name="Home vLLM",
+            base_url="http://10.0.0.4:8000/v1",
+            default_model="qwen-coder",
+            tags=["local", "coder"],
+        )
+    )
+    store.save()
+
+    reloaded = EndpointStore.load(path)
+    assert reloaded.schema_version == SCHEMA_VERSION
+    assert reloaded.default_endpoint_id == "home-vllm"
+    assert len(reloaded.endpoints) == 1
+    e = reloaded.endpoints[0]
+    assert e.base_url == "http://10.0.0.4:8000/v1"
+    assert e.default_model == "qwen-coder"
+    assert e.tags == ["local", "coder"]
+    assert e.created_at  # auto-stamped
+
+
+def test_store_load_returns_empty_when_missing(tmp_path: Path) -> None:
+    path = tmp_path / "absent.json"
+    store = EndpointStore.load(path)
+    assert store.endpoints == []
+    assert store.default_endpoint_id == ""
+
+
+def test_store_load_rejects_corrupt_json(tmp_path: Path) -> None:
+    path = tmp_path / "bad.json"
+    path.write_text("{not json", encoding="utf-8")
+    with pytest.raises(EndpointError, match="corrupted"):
+        EndpointStore.load(path)
+
+
+def test_store_load_rejects_wrong_schema(tmp_path: Path) -> None:
+    path = tmp_path / "v999.json"
+    path.write_text(json.dumps({"schema_version": 999, "endpoints": []}), encoding="utf-8")
+    with pytest.raises(EndpointError, match="schema_version=999"):
+        EndpointStore.load(path)
+
+
+def test_store_add_blocks_duplicates_without_replace(tmp_path: Path) -> None:
+    store = EndpointStore(path=tmp_path / "x.json")
+    store.add(Endpoint(id="dup", name="d", base_url="http://e/v1"))
+    with pytest.raises(EndpointError, match="already exists"):
+        store.add(Endpoint(id="dup", name="d", base_url="http://e/v1"))
+
+
+def test_store_add_replace_overwrites(tmp_path: Path) -> None:
+    store = EndpointStore(path=tmp_path / "x.json")
+    store.add(Endpoint(id="dup", name="orig", base_url="http://e/v1"))
+    store.add(
+        Endpoint(id="dup", name="new", base_url="http://e/v1", default_model="m"),
+        replace=True,
+    )
+    assert store.get("dup").name == "new"
+    assert store.get("dup").default_model == "m"
+
+
+def test_remove_clears_default(tmp_path: Path) -> None:
+    store = EndpointStore(path=tmp_path / "x.json")
+    store.add(Endpoint(id="a", name="a", base_url="http://e/v1"))
+    store.add(Endpoint(id="b", name="b", base_url="http://e/v1"))
+    assert store.default_endpoint_id == "a"
+    store.remove("a")
+    # Falls back to the next endpoint in the list, not empty.
+    assert store.default_endpoint_id == "b"
+    store.remove("b")
+    assert store.default_endpoint_id == ""
+
+
+def test_resolve_uses_default(tmp_path: Path) -> None:
+    store = EndpointStore(path=tmp_path / "x.json")
+    store.add(Endpoint(id="a", name="a", base_url="http://e/v1"))
+    assert store.resolve(None).id == "a"
+
+
+def test_resolve_raises_when_no_default(tmp_path: Path) -> None:
+    store = EndpointStore(path=tmp_path / "x.json")
+    with pytest.raises(EndpointError, match="no endpoint specified"):
+        store.resolve(None)
+
+
+def test_set_default_rejects_unknown(tmp_path: Path) -> None:
+    store = EndpointStore(path=tmp_path / "x.json")
+    with pytest.raises(EndpointError, match="unknown endpoint"):
+        store.set_default("ghost")
+
+
+def test_default_store_path_honours_specsmith_home(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    monkeypatch.setenv("SPECSMITH_HOME", str(tmp_path))
+    assert default_store_path() == tmp_path / "endpoints.json"
+
+
+# ---------------------------------------------------------------------------
+# Token resolution
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_token_none_returns_none() -> None:
+    e = Endpoint(id="x", name="x", base_url="http://e/v1")
+    assert e.resolve_token() is None
+
+
+def test_resolve_token_bearer_inline_returns_value() -> None:
+    e = Endpoint(
+        id="x",
+        name="x",
+        base_url="http://e/v1",
+        auth=EndpointAuth(kind="bearer-inline", token="sk-abc"),
+    )
+    assert e.resolve_token() == "sk-abc"
+
+
+def test_resolve_token_bearer_env_reads_environment(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("MY_VLLM_TOKEN", "lan-token")
+    e = Endpoint(
+        id="x",
+        name="x",
+        base_url="http://e/v1",
+        auth=EndpointAuth(kind="bearer-env", token_env="MY_VLLM_TOKEN"),
+    )
+    assert e.resolve_token() == "lan-token"
+
+
+def test_resolve_token_bearer_env_raises_when_missing(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("ABSENT_TOKEN", raising=False)
+    e = Endpoint(
+        id="x",
+        name="x",
+        base_url="http://e/v1",
+        auth=EndpointAuth(kind="bearer-env", token_env="ABSENT_TOKEN"),
+    )
+    with pytest.raises(EndpointError, match="ABSENT_TOKEN"):
+        e.resolve_token()
+
+
+# ---------------------------------------------------------------------------
+# /models parser
+# ---------------------------------------------------------------------------
+
+
+def test_extract_model_ids_handles_openai_shape() -> None:
+    payload = {"object": "list", "data": [{"id": "m1"}, {"id": "m2"}]}
+    assert _extract_model_ids(payload) == ["m1", "m2"]
+
+
+def test_extract_model_ids_handles_models_array_shape() -> None:
+    payload = {"models": ["a", "b"]}
+    assert _extract_model_ids(payload) == ["a", "b"]
+
+
+def test_extract_model_ids_returns_empty_for_unrecognised_payload() -> None:
+    assert _extract_model_ids({"unexpected": True}) == []
+    assert _extract_model_ids("not a dict") == []
+
+
+# ---------------------------------------------------------------------------
+# health() against an in-process fake /v1/models server
+# ---------------------------------------------------------------------------
+
+
+def _free_port() -> int:
+    with socket.socket() as s:
+        s.bind(("127.0.0.1", 0))
+        return int(s.getsockname()[1])
+
+
+class _FakeModelsHandler(http.server.BaseHTTPRequestHandler):
+    """Serves OpenAI-shape /v1/models payloads for health() tests."""
+
+    expected_token: str | None = None  # set per-test via class attribute
+
+    def log_message(self, *args: object, **kwargs: object) -> None:  # noqa: D401
+        # Quiet the test runner.
+        return
+
+    def do_GET(self) -> None:  # noqa: N802
+        if self.path != "/v1/models":
+            self.send_response(404)
+            self.end_headers()
+            return
+        if self.expected_token is not None:
+            got = self.headers.get("Authorization", "")
+            if got != f"Bearer {self.expected_token}":
+                self.send_response(401)
+                self.end_headers()
+                self.wfile.write(b'{"error": "unauthorized"}')
+                return
+        body = json.dumps(
+            {"object": "list", "data": [{"id": "fake-model-1"}, {"id": "fake-model-2"}]}
+        ).encode("utf-8")
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+
+@pytest.fixture
+def fake_models_server() -> object:
+    port = _free_port()
+    server = http.server.HTTPServer(("127.0.0.1", port), _FakeModelsHandler)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    try:
+        yield port
+    finally:
+        server.shutdown()
+        server.server_close()
+
+
+def test_health_against_fake_server_lists_models(fake_models_server: int) -> None:
+    port = fake_models_server
+    e = Endpoint(id="fake", name="fake", base_url=f"http://127.0.0.1:{port}/v1")
+    health = e.health(timeout=2.0)
+    assert isinstance(health, EndpointHealth)
+    assert health.ok
+    assert health.status_code == 200
+    assert "fake-model-1" in health.models
+
+
+def test_health_against_unauthenticated_request_returns_401(
+    fake_models_server: int,
+) -> None:
+    port = fake_models_server
+    _FakeModelsHandler.expected_token = "right-token"
+    try:
+        e = Endpoint(
+            id="fake",
+            name="fake",
+            base_url=f"http://127.0.0.1:{port}/v1",
+            auth=EndpointAuth(kind="bearer-inline", token="wrong-token"),
+        )
+        health = e.health(timeout=2.0)
+        assert not health.ok
+        assert health.status_code == 401
+    finally:
+        _FakeModelsHandler.expected_token = None
+
+
+def test_health_against_unreachable_endpoint_returns_error() -> None:
+    e = Endpoint(id="ghost", name="ghost", base_url="http://127.0.0.1:1/v1")
+    health = e.health(timeout=0.5)
+    assert not health.ok
+    assert health.error
diff --git a/tests/test_fallback_chain.py b/tests/test_fallback_chain.py
new file mode 100644
index 0000000..7afed5d
--- /dev/null
+++ b/tests/test_fallback_chain.py
@@ -0,0 +1,343 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Fallback-chain executor tests (REQ-146, F5).
+
+Exercises ``specsmith.agent.fallback.run_with_fallback`` end-to-end with
+synthetic ``invoke`` callables that raise the same shapes the real
+provider drivers raise: ``urllib.error.HTTPError`` for HTTP responses,
+``TimeoutError`` / ``socket.timeout`` / ``ConnectionError`` for network
+failures, and arbitrary ``RuntimeError`` for programmer bugs.
+
+These tests are pure-Python and hermetic — no real HTTP, no providers.
+"""
+
+from __future__ import annotations
+
+import io
+from urllib.error import HTTPError, URLError
+
+import pytest
+
+from specsmith.agent.fallback import (
+    FallbackAttempt,
+    FallbackResult,
+    parse_target,
+    run_with_fallback,
+)
+
+# ---------------------------------------------------------------------------
+# parse_target
+# ---------------------------------------------------------------------------
+
+
+def test_parse_target_provider_with_model() -> None:
+    assert parse_target("anthropic/claude-haiku-4-5") == (
+        "provider",
+        "anthropic",
+        "claude-haiku-4-5",
+    )
+
+
+def test_parse_target_ollama_model_with_colon() -> None:
+    # Ollama model tags contain a colon; the partition on '/' must keep the
+    # whole right-hand side as the model name.
+    assert parse_target("ollama/qwen2.5:7b") == ("provider", "ollama", "qwen2.5:7b")
+
+
+def test_parse_target_endpoint_prefix() -> None:
+    assert parse_target("endpoint:home-vllm") == ("endpoint", "home-vllm", "")
+
+
+def test_parse_target_provider_only() -> None:
+    # No '/' — no model component.
+    assert parse_target("anthropic") == ("provider", "anthropic", "")
+
+
+def test_parse_target_empty_or_blank() -> None:
+    assert parse_target("") == ("provider", "", "")
+    assert parse_target("   ") == ("provider", "", "")
+
+
+def test_parse_target_strips_whitespace() -> None:
+    assert parse_target("  anthropic / claude-haiku-4-5  ") == (
+        "provider",
+        "anthropic",
+        "claude-haiku-4-5",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _http_error(code: int) -> HTTPError:
+    """Construct an HTTPError matching what urllib raises in production."""
+    return HTTPError(
+        url="http://example/v1/chat",
+        code=code,
+        msg=f"HTTP {code}",
+        hdrs=None,  # type: ignore[arg-type]
+        fp=io.BytesIO(b""),
+    )
+
+
+def _make_invoke(behaviors: dict[str, object]):
+    """Return an ``invoke`` callable whose behavior keys on the ident.
+
+    Each value is either an exception instance to raise or a sentinel
+    string to return.
+    """
+
+    def invoke(kind: str, ident: str, model: str) -> object:  # noqa: ARG001
+        b = behaviors.get(ident)
+        if isinstance(b, BaseException):
+            raise b
+        return b
+
+    return invoke
+
+
+# ---------------------------------------------------------------------------
+# run_with_fallback — primary success path
+# ---------------------------------------------------------------------------
+
+
+def test_primary_success_short_circuits_chain() -> None:
+    invoke = _make_invoke({"anthropic": "primary-result", "ollama": "fallback-result"})
+    out = run_with_fallback(
+        primary_target="anthropic/claude-haiku-4-5",
+        fallback_chain=["ollama/qwen2.5:7b"],
+        invoke=invoke,
+    )
+    assert isinstance(out, FallbackResult)
+    assert out.value == "primary-result"
+    assert out.used == "anthropic/claude-haiku-4-5"
+    assert len(out.attempts) == 1
+    assert out.attempts[0].ok is True
+
+
+# ---------------------------------------------------------------------------
+# Transient failures — chain walks
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "code",
+    [408, 429, 500, 502, 503, 504, 599],
+)
+def test_http_5xx_and_throttling_falls_through(code: int) -> None:
+    """5xx + 408 + 429 are treated as transient; chain advances."""
+    invoke = _make_invoke({"anthropic": _http_error(code), "ollama": "fallback-result"})
+    out = run_with_fallback(
+        primary_target="anthropic/claude-haiku-4-5",
+        fallback_chain=["ollama/qwen2.5:7b"],
+        invoke=invoke,
+    )
+    assert out.value == "fallback-result"
+    assert out.used == "ollama/qwen2.5:7b"
+    assert len(out.attempts) == 2
+    assert out.attempts[0].ok is False
+    assert str(code) in out.attempts[0].error
+    assert out.attempts[1].ok is True
+
+
+@pytest.mark.parametrize(
+    "exc",
+    [
+        TimeoutError("timed out"),
+        URLError("connection refused"),
+        ConnectionError("connection reset"),
+        OSError("network is unreachable"),
+    ],
+)
+def test_network_errors_fall_through(exc: BaseException) -> None:
+    invoke = _make_invoke({"anthropic": exc, "ollama": "fallback-result"})
+    out = run_with_fallback(
+        primary_target="anthropic/claude-haiku-4-5",
+        fallback_chain=["ollama/qwen2.5:7b"],
+        invoke=invoke,
+    )
+    assert out.value == "fallback-result"
+    assert out.used == "ollama/qwen2.5:7b"
+    assert out.attempts[0].ok is False
+    assert out.attempts[1].ok is True
+
+
+def test_chain_walks_all_targets_in_order() -> None:
+    """All targets failing transiently exhausts the chain (value=None)."""
+    invoke = _make_invoke(
+        {
+            "anthropic": _http_error(503),
+            "ollama": _http_error(429),
+            "home-vllm": _http_error(500),
+        },
+    )
+    out = run_with_fallback(
+        primary_target="anthropic/claude-haiku-4-5",
+        fallback_chain=["ollama/qwen2.5:7b", "endpoint:home-vllm"],
+        invoke=invoke,
+    )
+    assert out.value is None
+    assert out.used == ""
+    assert [a.target for a in out.attempts] == [
+        "anthropic/claude-haiku-4-5",
+        "ollama/qwen2.5:7b",
+        "endpoint:home-vllm",
+    ]
+    assert all(a.ok is False for a in out.attempts)
+
+
+# ---------------------------------------------------------------------------
+# Non-transient failures must surface immediately
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("code", [400, 401, 403, 404, 422])
+def test_http_4xx_non_throttle_is_not_retried(code: int) -> None:
+    """4xx (except 408/429) must bubble up.
+
+    Masking auth errors as outages is the failure mode this guards against.
+    """
+    invoke = _make_invoke({"anthropic": _http_error(code), "ollama": "fallback"})
+    with pytest.raises(HTTPError) as exc_info:
+        run_with_fallback(
+            primary_target="anthropic/claude-haiku-4-5",
+            fallback_chain=["ollama/qwen2.5:7b"],
+            invoke=invoke,
+        )
+    assert exc_info.value.code == code
+
+
+def test_runtime_error_is_not_retried() -> None:
+    invoke = _make_invoke({"anthropic": RuntimeError("invariant violated")})
+    with pytest.raises(RuntimeError, match="invariant violated"):
+        run_with_fallback(
+            primary_target="anthropic/claude-haiku-4-5",
+            fallback_chain=["ollama/qwen2.5:7b"],
+            invoke=invoke,
+        )
+
+
+def test_value_error_is_not_retried() -> None:
+    invoke = _make_invoke({"anthropic": ValueError("bad input")})
+    with pytest.raises(ValueError, match="bad input"):
+        run_with_fallback(
+            primary_target="anthropic/claude-haiku-4-5",
+            fallback_chain=["ollama/qwen2.5:7b"],
+            invoke=invoke,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Chain edge cases
+# ---------------------------------------------------------------------------
+
+
+def test_blank_targets_are_skipped() -> None:
+    """Empty-string entries in the chain are filtered out, not retried as ''.
+
+    Otherwise an empty entry would consume an attempt slot.
+    """
+    invoke = _make_invoke(
+        {"anthropic": _http_error(503), "ollama": "fallback-result"},
+    )
+    out = run_with_fallback(
+        primary_target="anthropic/claude-haiku-4-5",
+        fallback_chain=["", "   ", "ollama/qwen2.5:7b"],
+        invoke=invoke,
+    )
+    assert out.used == "ollama/qwen2.5:7b"
+    # Only the primary + the real ollama target should be in attempts.
+    assert [a.target for a in out.attempts] == [
+        "anthropic/claude-haiku-4-5",
+        "ollama/qwen2.5:7b",
+    ]
+
+
+def test_empty_primary_target_is_skipped_and_chain_runs() -> None:
+    invoke = _make_invoke({"ollama": "fallback-result"})
+    out = run_with_fallback(
+        primary_target="",
+        fallback_chain=["ollama/qwen2.5:7b"],
+        invoke=invoke,
+    )
+    assert out.used == "ollama/qwen2.5:7b"
+    assert out.value == "fallback-result"
+
+
+def test_endpoint_target_invokes_with_endpoint_kind() -> None:
+    seen: list[tuple[str, str, str]] = []
+
+    def invoke(kind: str, ident: str, model: str) -> object:
+        seen.append((kind, ident, model))
+        return "ok"
+
+    out = run_with_fallback(
+        primary_target="endpoint:home-vllm",
+        fallback_chain=[],
+        invoke=invoke,
+    )
+    assert out.value == "ok"
+    assert seen == [("endpoint", "home-vllm", "")]
+
+
+# ---------------------------------------------------------------------------
+# on_attempt callback
+# ---------------------------------------------------------------------------
+
+
+def test_on_attempt_called_per_attempt() -> None:
+    invoke = _make_invoke({"anthropic": _http_error(503), "ollama": "fallback"})
+    seen: list[FallbackAttempt] = []
+    out = run_with_fallback(
+        primary_target="anthropic/claude-haiku-4-5",
+        fallback_chain=["ollama/qwen2.5:7b"],
+        invoke=invoke,
+        on_attempt=seen.append,
+    )
+    assert out.value == "fallback"
+    assert len(seen) == 2
+    assert seen[0].ok is False
+    assert seen[1].ok is True
+    assert seen[1].target == "ollama/qwen2.5:7b"
+
+
+def test_on_attempt_callback_errors_are_swallowed() -> None:
+    """A buggy on_attempt callback must not break the fallback chain."""
+
+    def boom(_: FallbackAttempt) -> None:
+        raise RuntimeError("callback exploded")
+
+    invoke = _make_invoke({"anthropic": _http_error(503), "ollama": "fallback"})
+    out = run_with_fallback(
+        primary_target="anthropic/claude-haiku-4-5",
+        fallback_chain=["ollama/qwen2.5:7b"],
+        invoke=invoke,
+        on_attempt=boom,
+    )
+    assert out.value == "fallback"
+    assert out.used == "ollama/qwen2.5:7b"
+
+
+# ---------------------------------------------------------------------------
+# FallbackAttempt / FallbackResult dataclasses
+# ---------------------------------------------------------------------------
+
+
+def test_fallback_attempt_to_dict_round_trip() -> None:
+    attempt = FallbackAttempt(target="anthropic/claude", ok=False, error="boom")
+    assert attempt.to_dict() == {
+        "target": "anthropic/claude",
+        "ok": False,
+        "error": "boom",
+    }
+
+
+def test_fallback_result_default_attempts_is_empty_list() -> None:
+    """FallbackResult.__post_init__ must replace None with [] so callers can
+    safely append without checking for None first."""
+    result = FallbackResult(value="x")
+    assert result.attempts == []
+    result.attempts.append(FallbackAttempt(target="a", ok=True))
+    assert len(result.attempts) == 1
diff --git a/tests/test_phase34_completion.py b/tests/test_phase34_completion.py
index d4a1f6e..bf80da0 100644
--- a/tests/test_phase34_completion.py
+++ b/tests/test_phase34_completion.py
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: MIT
 # Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
-"""Phase 3-4 completion tests: mcp loader, notebook + cloud commands.
+"""Phase 3-4 completion tests: mcp loader and notebook commands.
 
-Covers REQ-121 (MCP), REQ-123 (notebook), REQ-126 (cloud spawn), REQ-124
-(perf smoke). The CLI commands are exercised through Click's CliRunner so
-the tests stay fully hermetic — no real subprocess, no real PyPI hits.
+Covers REQ-121 (MCP), REQ-123 (notebook), and REQ-124 (perf smoke). The
+CLI commands are exercised through Click's CliRunner so the tests stay
+fully hermetic — no real subprocess, no real PyPI hits.
 """
 
 from __future__ import annotations
@@ -169,38 +169,6 @@ def test_notebook_replay_missing_slug_exits_non_zero(tmp_path: Path) -> None:
     assert "No notebook" in result.output
 
 
-# ── Cloud spawn (REQ-136) ──────────────────────────────────────
-#
-# REQ-136 supersedes the original REQ-126 stub: ``cloud spawn`` now takes a
-# manifest YAML or JSON file and POSTs it to ``<endpoint>/spawn`` with
-# optional bearer auth. The dry-run mode prints the would-be POST as JSON
-# instead of building a workspace tarball.
-
-
-def test_cloud_spawn_dry_run_emits_payload_json(tmp_path: Path) -> None:
-    manifest = tmp_path / "manifest.yml"
-    manifest.write_text("task: add hello world\nrun_id: r1\n", encoding="utf-8")
-    runner = CliRunner()
-    result = runner.invoke(
-        main,
-        ["cloud", "spawn", str(manifest), "--dry-run"],
-        env={"SPECSMITH_NO_AUTO_UPDATE": "1", "SPECSMITH_PYPI_CHECKED": "1"},
-    )
-    assert result.exit_code == 0, result.output
-    payload = json.loads(result.output)
-    assert payload["manifest"] == {"task": "add hello world", "run_id": "r1"}
-    assert payload["endpoint"].startswith("http://")
-
-
-def test_cloud_spawn_help_documents_endpoint(tmp_path: Path) -> None:
-    runner = CliRunner()
-    result = runner.invoke(main, ["cloud", "spawn", "--help"])
-    assert result.exit_code == 0
-    assert "--endpoint" in result.output
-    assert "--dry-run" in result.output
-    assert "--token" in result.output
-
-
 # ── Perf smoke (REQ-124 / TEST-124) ──────────────────────────────────────────
 
 
diff --git a/tests/test_warp_parity.py b/tests/test_warp_parity.py
index cc1204f..c73c7ec 100644
--- a/tests/test_warp_parity.py
+++ b/tests/test_warp_parity.py
@@ -1,19 +1,14 @@
 # SPDX-License-Identifier: MIT
 # Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
-"""Warp parity bundle tests (REQ-133..REQ-136, REQ-140).
+"""Warp parity bundle tests (REQ-133..REQ-135, REQ-140).
 
-Exercises the four new modules introduced in MEGA-PR-CLI plus the API
-stability surface and the CLI wiring that exposes them.
+Exercises the modules introduced in MEGA-PR-CLI plus the API stability
+surface and the CLI wiring that exposes them.
 """
 
 from __future__ import annotations
 
 import json
-import socket
-import threading
-import time
-import urllib.error
-import urllib.request
 from pathlib import Path
 
 import pytest
@@ -21,7 +16,6 @@
 
 from specsmith.block_export import export_block, slice_block
 from specsmith.cli import main
-from specsmith.cloud_serve import CloudReceiverConfig, make_server
 from specsmith.drive import default_drive_dir, listing, pull, push
 from specsmith.history_search import HistoryHit, search
 
@@ -230,111 +224,7 @@ def test_history_search_semantic_falls_back_to_keyword(tmp_path: Path) -> None:
 
 
 # ---------------------------------------------------------------------------
-# cloud_serve.py — REQ-136
-# ---------------------------------------------------------------------------
-
-
-def _free_port() -> int:
-    with socket.socket() as s:
-        s.bind(("127.0.0.1", 0))
-        return int(s.getsockname()[1])
-
-
-@pytest.fixture
-def cloud_server(tmp_path: Path) -> tuple[CloudReceiverConfig, threading.Thread, int]:
-    port = _free_port()
-    config = CloudReceiverConfig(
-        host="127.0.0.1",
-        port=port,
-        token="secret",
-        storage_dir=tmp_path / "cloud-runs",
-    )
-    server = make_server(config)
-    thread = threading.Thread(target=server.serve_forever, daemon=True)
-    thread.start()
-    # Tiny settle delay; the server is in-process so this is cheap and avoids
-    # racing the first connection on slow CI machines.
-    time.sleep(0.05)
-    try:
-        yield config, thread, port
-    finally:
-        server.shutdown()
-        server.server_close()
-
-
-def _post_json(
-    port: int, path: str, payload: dict, *, token: str | None = None
-) -> tuple[int, dict]:
-    body = json.dumps(payload).encode("utf-8")
-    req = urllib.request.Request(  # noqa: S310 - localhost
-        f"http://127.0.0.1:{port}{path}",
-        data=body,
-        method="POST",
-        headers={"Content-Type": "application/json"},
-    )
-    if token:
-        req.add_header("Authorization", f"Bearer {token}")
-    try:
-        with urllib.request.urlopen(req, timeout=5) as resp:  # noqa: S310
-            return resp.status, json.loads(resp.read().decode("utf-8"))
-    except urllib.error.HTTPError as exc:
-        body_text = exc.read().decode("utf-8") or "{}"
-        return exc.code, json.loads(body_text)
-
-
-def test_cloud_serve_rejects_missing_token(cloud_server) -> None:  # type: ignore[no-untyped-def]
-    _, _, port = cloud_server
-    status, payload = _post_json(port, "/spawn", {"task": "hi"})
-    assert status == 401
-    assert payload == {"error": "unauthorized"}
-
-
-def test_cloud_serve_rejects_wrong_token(cloud_server) -> None:  # type: ignore[no-untyped-def]
-    _, _, port = cloud_server
-    status, _ = _post_json(port, "/spawn", {"task": "hi"}, token="wrong")
-    assert status == 401
-
-
-def test_cloud_serve_accepts_valid_token_and_persists_manifest(cloud_server) -> None:  # type: ignore[no-untyped-def]
-    config, _, port = cloud_server
-    status, payload = _post_json(
-        port,
-        "/spawn",
-        {"task": "demo", "run_id": "fixed_run"},
-        token="secret",
-    )
-    assert status == 202
-    assert payload["run_id"] == "fixed_run"
-    assert payload["status"] == "accepted"
-    manifest = config.storage_dir / "fixed_run" / "manifest.json"
-    assert manifest.is_file()
-    body = json.loads(manifest.read_text(encoding="utf-8"))
-    assert body == {"task": "demo", "run_id": "fixed_run"}
-
-
-def test_cloud_serve_health_requires_token(cloud_server) -> None:  # type: ignore[no-untyped-def]
-    _, _, port = cloud_server
-    req = urllib.request.Request(  # noqa: S310 - localhost
-        f"http://127.0.0.1:{port}/health",
-        headers={"Authorization": "Bearer secret"},
-    )
-    with urllib.request.urlopen(req, timeout=5) as resp:  # noqa: S310
-        data = json.loads(resp.read().decode("utf-8"))
-    assert data == {"ok": True}
-
-
-def test_cloud_serve_refuses_non_loopback_without_cidr(tmp_path: Path) -> None:
-    config = CloudReceiverConfig(
-        host="0.0.0.0",  # noqa: S104 - intentional, we expect a guardrail
-        port=_free_port(),
-        storage_dir=tmp_path / "cloud-runs",
-    )
-    with pytest.raises(RuntimeError):
-        make_server(config)
-
-
-# ---------------------------------------------------------------------------
-# CLI wiring — chat export-block / cloud-serve / api-surface (REQ-140)
+# CLI wiring — chat export-block / api-surface (REQ-140)
 # ---------------------------------------------------------------------------
 
 
@@ -414,7 +304,6 @@ def test_cli_api_surface_emits_stable_keys(tmp_path: Path) -> None:
         "history",
         "chat",
         "chat-export-block",
-        "cloud-serve",
         "api-surface",
         "suggest-command",
     }
diff --git a/tests/test_warp_parity_followup.py b/tests/test_warp_parity_followup.py
index efb2cb9..94c4e80 100644
--- a/tests/test_warp_parity_followup.py
+++ b/tests/test_warp_parity_followup.py
@@ -4,7 +4,6 @@
 
 Covers:
 * serve --auth-token (REQ-137)
-* cloud spawn client (REQ-136)
 * voice transcription wrapper (REQ-141)
 * api-surface stability snapshot (REQ-140)
 """
@@ -29,8 +28,6 @@
     transcribe,
 )
 from specsmith.cli import main
-from specsmith.cloud_serve import CloudReceiverConfig
-from specsmith.cloud_serve import make_server as make_cloud_server
 
 
 @pytest.fixture(autouse=True)
@@ -129,96 +126,6 @@ def test_serve_cli_help_documents_auth_token() -> None:
     assert "REQ-137" in res.output
 
 
-# ---------------------------------------------------------------------------
-# REQ-136: cloud spawn client
-# ---------------------------------------------------------------------------
-
-
-@pytest.fixture
-def cloud_endpoint(tmp_path: Path):
-    port = _free_port()
-    config = CloudReceiverConfig(
-        host="127.0.0.1",
-        port=port,
-        token="cloud-secret",
-        storage_dir=tmp_path / "cloud-runs",
-    )
-    server = make_cloud_server(config)
-    thread = threading.Thread(target=server.serve_forever, daemon=True)
-    thread.start()
-    time.sleep(0.05)
-    try:
-        yield port, config
-    finally:
-        server.shutdown()
-        server.server_close()
-
-
-def test_cloud_spawn_dry_run(tmp_path: Path) -> None:
-    manifest = tmp_path / "manifest.yml"
-    manifest.write_text("task: hello\nrun_id: r1\n", encoding="utf-8")
-    runner = CliRunner()
-    res = runner.invoke(main, ["cloud", "spawn", str(manifest), "--dry-run"])
-    assert res.exit_code == 0
-    payload = json.loads(res.output)
-    assert payload["manifest"] == {"task": "hello", "run_id": "r1"}
-    assert payload["endpoint"].startswith("http://")
-
-
-def test_cloud_spawn_non_mapping_payload_exits_2(tmp_path: Path) -> None:
-    manifest = tmp_path / "bad.json"
-    # JSON parses fine but the payload is a list, not a mapping → exit 2.
-    manifest.write_text(json.dumps([1, 2, 3]), encoding="utf-8")
-    runner = CliRunner()
-    res = runner.invoke(main, ["cloud", "spawn", str(manifest), "--dry-run"])
-    assert res.exit_code == 2
-    assert "mapping" in res.output.lower() or "object" in res.output.lower()
-
-
-def test_cloud_spawn_missing_token_returns_401(  # type: ignore[no-untyped-def]
-    tmp_path: Path, cloud_endpoint
-) -> None:
-    port, _ = cloud_endpoint
-    manifest = tmp_path / "m.json"
-    manifest.write_text(json.dumps({"task": "x"}), encoding="utf-8")
-    runner = CliRunner()
-    res = runner.invoke(
-        main,
-        ["cloud", "spawn", str(manifest), "--endpoint", f"http://127.0.0.1:{port}"],
-    )
-    assert res.exit_code != 0
-    assert "401" in res.output or "unauthorized" in res.output.lower()
-
-
-def test_cloud_spawn_with_token_persists_manifest(  # type: ignore[no-untyped-def]
-    tmp_path: Path, cloud_endpoint
-) -> None:
-    port, config = cloud_endpoint
-    manifest = tmp_path / "m.json"
-    manifest.write_text(
-        json.dumps({"task": "demo", "run_id": "spawn_test"}),
-        encoding="utf-8",
-    )
-    runner = CliRunner()
-    res = runner.invoke(
-        main,
-        [
-            "cloud",
-            "spawn",
-            str(manifest),
-            "--endpoint",
-            f"http://127.0.0.1:{port}",
-            "--token",
-            "cloud-secret",
-        ],
-    )
-    assert res.exit_code == 0, res.output
-    response = json.loads(res.output)
-    assert response["run_id"] == "spawn_test"
-    persisted = config.storage_dir / "spawn_test" / "manifest.json"
-    assert persisted.is_file()
-
-
 # ---------------------------------------------------------------------------
 # REQ-141: voice transcription wrapper
 # ---------------------------------------------------------------------------
@@ -327,8 +234,6 @@ def test_api_surface_contains_required_1_0_commands() -> None:
         "history",
         "chat",
         "chat-export-block",
-        "cloud",
-        "cloud-serve",
         "voice",
         "api-surface",
         "suggest-command",