diff --git a/.claude/commands/wtnew.md b/.claude/commands/wtnew.md
index 59d9746cd..9fef56647 100644
--- a/.claude/commands/wtnew.md
+++ b/.claude/commands/wtnew.md
@@ -1,6 +1,6 @@
 # 创建 Worktree
 
-基于最新 `origin/main` 创建隔离的 worktree 开发环境。
+基于最新 `origin/dev` 创建隔离的 worktree 开发环境，并自动拉起一个并行工作的 Kitty + Codex 开发位。
 
 ## 参数
 
@@ -21,7 +21,7 @@ PROJECT_NAME=$(basename "$MAIN_REPO")
 git fetch origin
 ```
 
-确保基于最新的 `origin/main` 创建，避免从过时的 base 分叉。
+确保基于最新的 `origin/dev` 创建，避免从过时的 base 分叉。
 
 ## Step 2：启用 worktreeConfig
 
@@ -38,7 +38,7 @@ git config extensions.worktreeConfig true
 路径规则：`~/worktrees/<项目名>--<目录名>`（如 `~/worktrees/leon--feat-eval`）
 
 ```bash
-git worktree add "$HOME/worktrees/$PROJECT_NAME--<目录名>" -b $ARGUMENTS origin/main
+git worktree add "$HOME/worktrees/$PROJECT_NAME--<目录名>" -b $ARGUMENTS origin/dev
 ```
 
 - worktree 存放在 `~/worktrees/`，与主仓库完全隔离
@@ -163,16 +163,44 @@ ln -s "$MAIN_REPO/CLAUDE.local.md" CLAUDE.local.md 2>/dev/null
 输出：
 - worktree 路径
 - 分支名
+- base 分支（必须明确是 `origin/dev`）
 - 分配的端口（backend / frontend）
 - 自动生成的描述
 - `CLAUDE.local.md` 符号链接状态
 
-询问用户：是否在新 worktree 中打开新的 Claude 会话？
+## Step 9：自动拉起 Kitty + Codex 并行工作位
 
-如果是，用 osascript 打开新终端并启动 claude（**必须将路径替换为实际计算出的完整绝对路径，不得使用变量或占位符**）：
+不要再询问“是否打开新的 Claude 会话”。默认直接拉起一个新的 Kitty tab，并在里面启动 Codex。
+
+要求：
+- tab title 固定为 `dev-feature`
+- Codex 必须在新建好的 worktree 路径里启动
+- 必须用实际计算出的完整绝对路径，不得保留变量或占位符
+- 如果当前 shell 没有 `KITTY_LISTEN_ON`，要明确报错并停下，不要静默跳过
+
+执行命令（**必须将路径替换为实际计算出的完整绝对路径，不得使用变量或占位符**）：
 
 ```bash
-osascript -e 'tell app "Terminal" to do script "cd \"/Users/apple/worktrees/<项目名>--<目录名>\" && claude"'
+if [ -z "$KITTY_LISTEN_ON" ]; then
+  echo "❌ 错误：未设置 KITTY_LISTEN_ON，无法自动创建 dev-feature kitty tab"
+  exit 1
+fi
+
+kitty @ --to "$KITTY_LISTEN_ON" launch \
+  --type tab \
+  --tab-title "dev-feature" \
+  --title "dev-feature" \
+  zsh -lc 'cd "/Users/apple/worktrees/<项目名>--<目录名>" && codex --cd "/Users/apple/worktrees/<项目名>--<目录名>"'
 ```
 
-关键：`cd` 和 `claude` 必须写在 osascript 的 `do script` 字符串内部，不是写在外层 Bash 命令里。
+关键：
+- `cd` 和 `codex --cd ...` 必须写在新 tab 的命令字符串内部
+- `codex --cd` 和前面的 `cd` 都必须指向同一个实际 worktree 绝对路径
+- 不要退回 Terminal / osascript；这里的标准交互面就是 Kitty tab
+
+## Step 10：最终输出
+
+除了原有输出，再追加：
+- `Codex tab: dev-feature`
+- `Codex cwd: <worktree 绝对路径>`
+- 如果启动成功，明确说明“并行开发位已就绪”
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b384072f3..4a11dc769 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,8 +2,9 @@ name: CI
 
 on:
   push:
-    branches: [main]
+    branches: [main, dev]
   pull_request:
+    branches: [main, dev]
 
 jobs:
   lint:
@@ -41,6 +42,10 @@ jobs:
       - name: Run tests
         # --maxfail=5: surface up to 5 failures per platform before stopping
         # e2e tests self-skip via skipif when provider secrets are absent
+        env:
+          SUPABASE_PUBLIC_URL: ${{ secrets.SUPABASE_PUBLIC_URL }}
+          LEON_SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_KEY }}
+          SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY }}
         run: uv run pytest tests/ --ignore=tests/test_e2e_providers.py --ignore=tests/test_sandbox_e2e.py --ignore=tests/test_daytona_e2e.py --ignore=tests/test_e2e_backend_api.py --ignore=tests/test_e2e_summary_persistence.py --ignore=tests/test_p3_e2e.py --maxfail=5 --timeout=60 -q
 
   frontend:
diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml
index ee18d0d38..54eee564c 100644
--- a/.github/workflows/deploy-staging.yml
+++ b/.github/workflows/deploy-staging.yml
@@ -7,6 +7,9 @@ name: Deploy Staging
 # Both update the staging apps to the target branch, then deploy.
 
 on:
+  push:
+    branches:
+      - pr188-agent-optimize
   pull_request:
     types: [labeled]
   workflow_dispatch:
@@ -23,9 +26,12 @@ jobs:
   deploy-staging:
     # For label trigger: only run when the label is exactly "deploy-staging"
     if: >
+      github.event_name == 'push' ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && github.event.label.name == 'deploy-staging')
     runs-on: ubuntu-latest
+    env:
+      STAGING_STACK_UUID: fasbsube26s75ag6qus5bpi2
 
     steps:
       - name: Resolve target ref
@@ -33,33 +39,99 @@ jobs:
         run: |
           if [ "${{ github.event_name }}" = "pull_request" ]; then
             echo "ref=${{ github.head_ref }}" >> "$GITHUB_OUTPUT"
+          elif [ "${{ github.event_name }}" = "push" ]; then
+            echo "ref=${{ github.ref_name }}" >> "$GITHUB_OUTPUT"
           else
             echo "ref=${{ inputs.ref }}" >> "$GITHUB_OUTPUT"
           fi
 
-      - name: Update staging backend branch
+      - name: Check out target ref
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.ref.outputs.ref }}
+
+      - name: Resolve target commit
+        id: target
         run: |
-          curl -s -X PATCH "${{ secrets.COOLIFY_URL }}/api/v1/applications/${{ secrets.COOLIFY_BACKEND_STAGING_UUID }}" \
-            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}" \
-            -H "Content-Type: application/json" \
-            -d '{"git_branch": "${{ steps.ref.outputs.ref }}"}'
+          set -euo pipefail
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+      - name: Assert repo staging compose contract
+        run: |
+          set -euo pipefail
+          grep -F "leon-home:/root/.leon" docker-compose.yml >/dev/null
+          grep -F "volumes:" docker-compose.yml >/dev/null
 
-      - name: Update staging frontend branch
+      - name: Update staging stack branch
         run: |
-          curl -s -X PATCH "${{ secrets.COOLIFY_URL }}/api/v1/applications/${{ secrets.COOLIFY_FRONTEND_STAGING_UUID }}" \
+          set -euo pipefail
+          body="$(curl -sS --fail-with-body -X PATCH "${{ secrets.COOLIFY_URL }}/api/v1/applications/${STAGING_STACK_UUID}" \
             -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}" \
             -H "Content-Type: application/json" \
-            -d '{"git_branch": "${{ steps.ref.outputs.ref }}"}'
+            -d "{\"git_branch\": \"${{ steps.ref.outputs.ref }}\"}")"
+          echo "$body"
+          printf '%s' "$body" | jq -e --arg uuid "$STAGING_STACK_UUID" '.uuid == $uuid' >/dev/null
+
+      - name: Deploy staging stack
+        id: deploy
+        run: |
+          set -euo pipefail
+          body="$(curl -sS --fail-with-body "${{ secrets.COOLIFY_URL }}/api/v1/deploy?uuid=${STAGING_STACK_UUID}&force=false" \
+            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}")"
+          echo "$body"
+          printf '%s' "$body" | jq -e --arg uuid "$STAGING_STACK_UUID" '.deployments[0].resource_uuid == $uuid' >/dev/null
+          echo "deployment_uuid=$(printf '%s' "$body" | jq -r '.deployments[0].deployment_uuid')" >> "$GITHUB_OUTPUT"
+
+      - name: Wait for staging deployment
+        run: |
+          set -euo pipefail
+          deployment_uuid="${{ steps.deploy.outputs.deployment_uuid }}"
+          for _ in $(seq 1 60); do
+            body="$(curl -sS --fail-with-body "${{ secrets.COOLIFY_URL }}/api/v1/deployments/${deployment_uuid}" \
+              -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}")"
+            status="$(printf '%s' "$body" | jq -r '.status')"
+            echo "deployment status: $status"
+            if [ "$status" = "finished" ]; then
+              exit 0
+            fi
+            if [ "$status" != "queued" ] && [ "$status" != "in_progress" ]; then
+              echo "$body"
+              exit 1
+            fi
+            sleep 10
+          done
+          echo "Timed out waiting for staging deployment ${deployment_uuid}"
+          exit 1
 
-      - name: Deploy backend to staging
+      - name: Verify Coolify staging contract
         run: |
-          curl -sX GET "${{ secrets.COOLIFY_URL }}/api/v1/deploy?uuid=${{ secrets.COOLIFY_BACKEND_STAGING_UUID }}&force=false" \
-            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}"
+          set -euo pipefail
+          body="$(curl -sS --fail-with-body "${{ secrets.COOLIFY_URL }}/api/v1/applications/${STAGING_STACK_UUID}" \
+            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}")"
+          echo "$body" | jq '{uuid,git_branch,docker_compose_location}'
+          printf '%s' "$body" | jq -e --arg ref "${{ steps.ref.outputs.ref }}" '.git_branch == $ref' >/dev/null
+          printf '%s' "$body" | jq -e '.docker_compose_raw | contains("leon-home:/root/.leon")' >/dev/null
+          printf '%s' "$body" | jq -e --arg volume "${STAGING_STACK_UUID}_leon-home:/root/.leon" '.docker_compose | contains($volume)' >/dev/null
+          printf '%s' "$body" | jq -e --arg sha "${{ steps.target.outputs.sha }}" '.docker_compose | contains($sha)' >/dev/null
 
-      - name: Deploy frontend to staging
+      - name: Verify staging health contract
         run: |
-          curl -sX GET "${{ secrets.COOLIFY_URL }}/api/v1/deploy?uuid=${{ secrets.COOLIFY_FRONTEND_STAGING_UUID }}&force=false" \
-            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}"
+          set -euo pipefail
+          for attempt in $(seq 1 18); do
+            status="$(curl -sS -o /tmp/staging-health.json -w '%{http_code}' "https://app.staging.mycel.nextmind.space/api/monitor/health")"
+            echo "health attempt ${attempt}: status=${status}"
+            if [ "$status" = "200" ]; then
+              body="$(cat /tmp/staging-health.json)"
+              echo "$body"
+              printf '%s' "$body" | jq -e '.db.path == "/root/.leon/sandbox.db"' >/dev/null
+              printf '%s' "$body" | jq -e '.db.exists == true' >/dev/null
+              exit 0
+            fi
+            cat /tmp/staging-health.json || true
+            sleep 10
+          done
+          echo "Staging health contract did not become ready in time"
+          exit 1
 
       - name: Comment on PR with staging URL
         if: github.event_name == 'pull_request'
@@ -70,5 +142,5 @@ jobs:
               issue_number: context.issue.number,
               owner: context.repo.owner,
               repo: context.repo.repo,
-              body: `🚀 **预发部署已触发**\n\n- 前端: https://app.staging.mycel.nextmind.space\n- 后端: https://api.staging.mycel.nextmind.space\n\n分支: \`${{ steps.ref.outputs.ref }}\``
+              body: `🚀 **预发部署已触发**\n\n- 共享 Staging: https://app.staging.mycel.nextmind.space\n- API（同域反代）: https://app.staging.mycel.nextmind.space/api\n\n分支: \`${{ steps.ref.outputs.ref }}\``
             })
diff --git a/.gitignore b/.gitignore
index be4d3c775..e24215ae8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,6 +102,8 @@ worktrees/
 # Development artifacts — never commit
 docs/lessons/
 docs/plans/
+docs/superpowers/plans/
+docs/superpowers/specs/
 frontend/.vite/
 .playwright-cli/
 ops
diff --git a/Dockerfile b/Dockerfile
index e875ed19f..36bb7bf5a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,11 +7,13 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 
 # Install dependencies (cached layer before source copy)
 COPY pyproject.toml uv.lock ./
-RUN uv sync --frozen --no-dev --no-install-project
+# @@@sandbox-sdk-image-parity - shared staging/provider inventory should reflect runtime truth,
+# not "SDK missing from image" accidents while config files are present.
+RUN uv sync --frozen --no-dev --extra sandbox --extra e2b --extra daytona --no-install-project
 
 # Copy source and install project
 COPY . .
-RUN uv sync --frozen --no-dev
+RUN uv sync --frozen --no-dev --extra sandbox --extra e2b --extra daytona
 
 ENV PATH="/app/.venv/bin:$PATH"
 
diff --git a/README.md b/README.md
index a7fdc9af7..f75571e6f 100644
--- a/README.md
+++ b/README.md
@@ -95,7 +95,7 @@ Full-featured web platform for managing and interacting with agents:
 
 ### Multi-Agent Communication
 
-Agents are first-class social entities. They can discover each other, send messages, and collaborate autonomously:
+Agents are first-class social entities. They can list chats, read messages, send messages, and collaborate autonomously:
 
 ```
 Member (template)
@@ -103,8 +103,10 @@ Member (template)
        └→ Thread (agent brain / conversation)
 ```
 
-- **`chat_send`**: Agent A messages Agent B; B responds autonomously
-- **`directory`**: Agents browse and discover other entities
+- **`list_chats`**: List active conversations with unread counts and participants
+- **`read_messages`**: Read message history before responding
+- **`send_message`**: Agent A messages Agent B; B responds autonomously
+- **`search_messages`**: Search message history across chats
 - **Real-time delivery**: SSE-based chat with typing indicators and read receipts
 
 Humans also have entities — agents can initiate conversations with humans, not just the other way around.
diff --git a/README.zh.md b/README.zh.md
index 12bb8981a..1b3d31c87 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -95,7 +95,7 @@ cd frontend/app && npm run dev
 
 ### 多 Agent 通讯
 
-Agent 是一等公民的社交实体，可以互相发现、发送消息、自主协作：
+Agent 是一等公民的社交实体，可以列出对话、读取消息、发送消息、自主协作：
 
 ```
 Member（模板）
@@ -103,8 +103,10 @@ Member（模板）
        └→ Thread（Agent 大脑 / 对话）
 ```
 
-- **`chat_send`**：Agent A 给 Agent B 发消息，B 自主回复
-- **`directory`**：Agent 浏览和发现其他实体
+- **`list_chats`**：列出活跃对话、未读数和参与者
+- **`read_messages`**：先读取消息历史，再决定如何回复
+- **`send_message`**：Agent A 给 Agent B 发消息，B 自主回复
+- **`search_messages`**：跨对话搜索消息历史
 - **实时投递**：基于 SSE 的聊天，支持输入提示和已读回执
 
 人类也有 Entity——Agent 可以主动找人类对话，而不只是被动响应。
diff --git a/backend/taskboard/_service_loader.py b/backend/taskboard/_service_loader.py
new file mode 100644
index 000000000..c59e44605
--- /dev/null
+++ b/backend/taskboard/_service_loader.py
@@ -0,0 +1,25 @@
+"""Typed task_service loader for taskboard surfaces."""
+
+from __future__ import annotations
+
+from typing import Any, Protocol, cast
+
+
+class TaskServiceProtocol(Protocol):
+    def list_tasks(self) -> list[dict[str, Any]]: ...
+    def get_task(self, task_id: str) -> dict[str, Any] | None: ...
+    def get_highest_priority_pending_task(self) -> dict[str, Any] | None: ...
+    def create_task(self, **fields: Any) -> dict[str, Any]: ...
+    def update_task(self, task_id: str, **fields: Any) -> dict[str, Any] | None: ...
+
+
+try:
+    from backend.web.services import task_service as _task_service
+except ImportError:
+    _task_service = None
+
+
+def require_task_service() -> TaskServiceProtocol:
+    if _task_service is None:
+        raise RuntimeError("backend.web.services.task_service is unavailable")
+    return cast(TaskServiceProtocol, _task_service)
diff --git a/backend/taskboard/middleware.py b/backend/taskboard/middleware.py
index 69a274624..6f9f3f83f 100644
--- a/backend/taskboard/middleware.py
+++ b/backend/taskboard/middleware.py
@@ -16,7 +16,7 @@
 import json
 import logging
 import time
-from collections.abc import Awaitable, Callable
+from collections.abc import Awaitable, Callable, Mapping
 from typing import Any
 
 from langchain.agents.middleware.types import (
@@ -26,12 +26,9 @@
     ToolCallRequest,
 )
 from langchain_core.messages import ToolMessage
+from langchain_core.messages.tool import ToolCall
 
-# Lazy import: backend is only available when running as web service
-try:
-    from backend.web.services import task_service
-except ImportError:
-    task_service = None  # type: ignore[assignment]
+from backend.taskboard._service_loader import require_task_service
 
 logger = logging.getLogger(__name__)
 
@@ -76,7 +73,7 @@ def __init__(
     # Tool schemas
     # ------------------------------------------------------------------
 
-    def _get_tool_schemas(self) -> list[dict]:
+    def _get_tool_schemas(self) -> list[dict[str, Any]]:
         """Return OpenAI-format function schemas, filtered by blocked_tools."""
         schemas = [
             {
@@ -263,7 +260,7 @@ async def awrap_tool_call(
     # Dispatch
     # ------------------------------------------------------------------
 
-    def _handle_tool_call(self, tool_call: dict) -> ToolMessage:
+    def _handle_tool_call(self, tool_call: Mapping[str, Any] | ToolCall) -> ToolMessage:
         tool_name = tool_call.get("name")
         tool_id = tool_call.get("id", "")
         args = tool_call.get("args", {})
@@ -292,6 +289,7 @@ def _handle_tool_call(self, tool_call: dict) -> ToolMessage:
 
     def _handle_list(self, args: dict) -> dict:
         """List board tasks with optional status/priority filter."""
+        task_service = require_task_service()
         try:
             tasks = task_service.list_tasks()
         except Exception as e:
@@ -310,6 +308,7 @@ def _handle_list(self, args: dict) -> dict:
 
     def _handle_claim(self, args: dict) -> dict:
         """Claim a task: set running + thread_id + started_at."""
+        task_service = require_task_service()
         task_id = args.get("TaskId", "")
         now_ms = int(time.time() * 1000)
         updated = task_service.update_task(
@@ -324,6 +323,7 @@ def _handle_claim(self, args: dict) -> dict:
 
     def _handle_progress(self, args: dict) -> dict:
         """Update task progress and optionally append a note."""
+        task_service = require_task_service()
         task_id = args.get("TaskId", "")
         progress = args.get("Progress", 0)
 
@@ -346,6 +346,7 @@ def _handle_progress(self, args: dict) -> dict:
 
     def _handle_complete(self, args: dict) -> dict:
         """Complete a task with result."""
+        task_service = require_task_service()
         task_id = args.get("TaskId", "")
         result_text = args.get("Result", "")
         now_ms = int(time.time() * 1000)
@@ -362,6 +363,7 @@ def _handle_complete(self, args: dict) -> dict:
 
     def _handle_fail(self, args: dict) -> dict:
         """Fail a task with reason."""
+        task_service = require_task_service()
         task_id = args.get("TaskId", "")
         reason = args.get("Reason", "")
         now_ms = int(time.time() * 1000)
@@ -381,6 +383,7 @@ def _handle_fail(self, args: dict) -> dict:
 
     async def on_idle(self) -> dict[str, Any] | None:
         """Called when agent enters IDLE state. Returns highest-priority pending task, or None."""
+        task_service = require_task_service()
         return await asyncio.to_thread(task_service.get_highest_priority_pending_task)
 
     # ------------------------------------------------------------------
@@ -389,6 +392,7 @@ async def on_idle(self) -> dict[str, Any] | None:
 
     def _handle_create(self, args: dict) -> dict:
         """Create a board task with source='agent'."""
+        task_service = require_task_service()
         try:
             task = task_service.create_task(
                 title=args.get("Title", "New task"),
diff --git a/backend/taskboard/service.py b/backend/taskboard/service.py
index e1c99b568..e00a32b65 100644
--- a/backend/taskboard/service.py
+++ b/backend/taskboard/service.py
@@ -17,14 +17,9 @@
 import time
 from typing import Any
 
+from backend.taskboard._service_loader import require_task_service
 from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
 
-# Lazy import: backend is only available when running as web service
-try:
-    from backend.web.services import task_service
-except ImportError:
-    task_service = None  # type: ignore[assignment]
-
 logger = logging.getLogger(__name__)
 
 
@@ -218,6 +213,7 @@ def _get_thread_id(self) -> str:
     # ------------------------------------------------------------------
 
     async def _list_tasks(self, Status: str = "", Priority: str = "") -> str:
+        task_service = require_task_service()
         try:
             tasks = await asyncio.to_thread(task_service.list_tasks)
         except Exception as e:
@@ -232,6 +228,7 @@ async def _list_tasks(self, Status: str = "", Priority: str = "") -> str:
         return json.dumps({"tasks": tasks, "total": len(tasks)}, ensure_ascii=False)
 
     async def _claim_task(self, TaskId: str) -> str:
+        task_service = require_task_service()
         thread_id = self._get_thread_id()
         now_ms = int(time.time() * 1000)
         try:
@@ -250,6 +247,7 @@ async def _claim_task(self, TaskId: str) -> str:
         return json.dumps({"task": updated}, ensure_ascii=False)
 
     async def _update_progress(self, TaskId: str, Progress: int, Note: str = "") -> str:
+        task_service = require_task_service()
         update_kwargs: dict[str, Any] = {"progress": Progress}
 
         if Note:
@@ -273,6 +271,7 @@ async def _update_progress(self, TaskId: str, Progress: int, Note: str = "") ->
         return json.dumps({"task": updated}, ensure_ascii=False)
 
     async def _complete_task(self, TaskId: str, Result: str) -> str:
+        task_service = require_task_service()
         now_ms = int(time.time() * 1000)
         try:
             updated = await asyncio.to_thread(
@@ -291,6 +290,7 @@ async def _complete_task(self, TaskId: str, Result: str) -> str:
         return json.dumps({"task": updated}, ensure_ascii=False)
 
     async def _fail_task(self, TaskId: str, Reason: str) -> str:
+        task_service = require_task_service()
         now_ms = int(time.time() * 1000)
         try:
             updated = await asyncio.to_thread(
@@ -308,6 +308,7 @@ async def _fail_task(self, TaskId: str, Reason: str) -> str:
         return json.dumps({"task": updated}, ensure_ascii=False)
 
     async def _create_task(self, Title: str, Description: str = "", Priority: str = "medium") -> str:
+        task_service = require_task_service()
         try:
             task = await asyncio.to_thread(
                 task_service.create_task,
@@ -327,4 +328,5 @@ async def _create_task(self, Title: str, Description: str = "", Priority: str =
 
     async def on_idle(self) -> dict[str, Any] | None:
         """Called when agent enters IDLE state. Returns highest-priority pending task, or None."""
+        task_service = require_task_service()
         return await asyncio.to_thread(task_service.get_highest_priority_pending_task)
diff --git a/backend/web/core/config.py b/backend/web/core/config.py
index 23da41471..ab9d87372 100644
--- a/backend/web/core/config.py
+++ b/backend/web/core/config.py
@@ -4,10 +4,9 @@
 from pathlib import Path
 
 from config.user_paths import user_home_path
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
 
-# Database paths
-DB_PATH = resolve_role_db_path(SQLiteDBRole.MAIN)
+# Legacy DB_PATH — used only by SQLite sandbox repos as default path
+DB_PATH = user_home_path("leon.db")
 SANDBOXES_DIR = user_home_path("sandboxes")
 SANDBOX_VOLUME_ROOT = Path(os.environ.get("LEON_SANDBOX_VOLUME_ROOT", str(user_home_path("volumes")))).expanduser().resolve()
 
diff --git a/backend/web/core/dependencies.py b/backend/web/core/dependencies.py
index 52bc277a0..85ece805b 100644
--- a/backend/web/core/dependencies.py
+++ b/backend/web/core/dependencies.py
@@ -1,7 +1,6 @@
 """FastAPI dependency injection functions."""
 
 import asyncio
-import os
 from typing import Annotated, Any
 
 from fastapi import Depends, FastAPI, HTTPException, Request
@@ -9,18 +8,6 @@
 from backend.web.services.agent_pool import get_or_create_agent, resolve_thread_sandbox
 from sandbox.thread_context import set_current_thread_id
 
-# Dev bypass: set LEON_DEV_SKIP_AUTH=1 to skip JWT verification and inject a mock identity.
-# WARNING: this bypasses ALL auth — never set in production.
-_DEV_SKIP_AUTH = os.environ.get("LEON_DEV_SKIP_AUTH", "").lower() in ("1", "true", "yes")
-_DEV_PAYLOAD = {"user_id": "dev-user"}
-
-if _DEV_SKIP_AUTH:
-    import logging as _logging
-
-    _logging.getLogger(__name__).warning(
-        "LEON_DEV_SKIP_AUTH is active — JWT auth is BYPASSED for all requests. This must never be enabled in production."
-    )
-
 
 async def get_app(request: Request) -> FastAPI:
     """Get FastAPI app instance from request."""
@@ -37,8 +24,6 @@ def _get_auth_service(app: FastAPI):
 
 def _extract_jwt_payload(request: Request) -> dict:
     """Extract and verify JWT payload from Bearer token. Returns {user_id}."""
-    if _DEV_SKIP_AUTH:
-        return _DEV_PAYLOAD
     auth_header = request.headers.get("Authorization", "")
     if not auth_header.startswith("Bearer "):
         raise HTTPException(401, "Missing or invalid Authorization header")
@@ -52,8 +37,6 @@ def _extract_jwt_payload(request: Request) -> dict:
 async def get_current_user_id(request: Request) -> str:
     """Extract user_id from JWT and verify user exists. Returns 401 if user was deleted (e.g. DB reset)."""
     user_id = _extract_jwt_payload(request)["user_id"]
-    if _DEV_SKIP_AUTH:
-        return user_id
     member_repo = getattr(request.app.state, "member_repo", None)
     if member_repo and member_repo.get_by_id(user_id) is None:
         raise HTTPException(401, "User no longer exists — please re-login")
diff --git a/backend/web/core/lifespan.py b/backend/web/core/lifespan.py
index 13a76a4b2..b985254ec 100644
--- a/backend/web/core/lifespan.py
+++ b/backend/web/core/lifespan.py
@@ -3,193 +3,73 @@
 import asyncio
 import os
 from contextlib import asynccontextmanager
-from typing import Any
+from typing import Any, cast
 
 from fastapi import FastAPI
+from psycopg import AsyncConnection
 
 from backend.web.services.event_buffer import RunEventBuffer, ThreadEventBuffer
 from backend.web.services.idle_reaper import idle_reaper_loop
-from backend.web.services.resource_cache import resource_overview_refresh_loop
-from config.env_manager import ConfigManager
+from backend.web.services.resource_cache import monitor_resource_overview_refresh_loop
 from core.runtime.middleware.queue import MessageQueueManager
 
 
-def _seed_dev_user(app: FastAPI) -> None:
-    """Create dev-user human member + initial agents if not yet seeded.
+def _require_web_runtime_contract() -> None:
+    # @@@web-checkpointer-contract - web routes can create LeonAgent on first
+    # message, so missing Postgres checkpointer config is a startup contract
+    # violation, not a late per-request error.
+    if not os.getenv("LEON_POSTGRES_URL"):
+        raise RuntimeError("LEON_POSTGRES_URL is required for backend web runtime")
 
-    Mirrors AuthService.register() but uses the fixed 'dev-user' ID that
-    matches _DEV_PAYLOAD, so list_members('dev-user') returns results.
-    """
-    import logging
-    import time
-    from pathlib import Path
 
-    from backend.web.services.member_service import MEMBERS_DIR, _write_agent_md, _write_json
-    from storage.contracts import MemberRow, MemberType
-    from storage.providers.sqlite.member_repo import generate_member_id
+async def _validate_web_checkpointer_contract() -> None:
+    pg_url = os.getenv("LEON_POSTGRES_URL")
+    if not pg_url:
+        raise RuntimeError("LEON_POSTGRES_URL is required for backend web runtime")
 
-    log = logging.getLogger(__name__)
-    member_repo = app.state.member_repo
-
-    dev_user_id = "dev-user"
-
-    if member_repo.get_by_id(dev_user_id) is not None:
-        return  # already seeded
-
-    log.info("DEV: seeding dev-user member + initial agents")
-    now = time.time()
-
-    # Human member row
-    member_repo.create(
-        MemberRow(
-            id=dev_user_id,
-            name="Dev",
-            type=MemberType.HUMAN,
-            created_at=now,
-        )
-    )
-
-    # Initial agents (same as register())
-    initial_agents = [
-        {"name": "Toad", "description": "Curious and energetic assistant", "avatar": "toad.jpeg"},
-        {"name": "Morel", "description": "Thoughtful senior analyst", "avatar": "morel.jpeg"},
-    ]
-    assets_dir = Path(__file__).resolve().parents[3] / "assets"
-
-    for agent_def in initial_agents:
-        agent_id = generate_member_id()
-        agent_dir = MEMBERS_DIR / agent_id
-        agent_dir.mkdir(parents=True, exist_ok=True)
-        _write_agent_md(agent_dir / "agent.md", name=agent_def["name"], description=agent_def["description"])
-        _write_json(
-            agent_dir / "meta.json",
-            {
-                "status": "active",
-                "version": "1.0.0",
-                "created_at": int(now * 1000),
-                "updated_at": int(now * 1000),
-            },
-        )
-        member_repo.create(
-            MemberRow(
-                id=agent_id,
-                name=agent_def["name"],
-                type=MemberType.MYCEL_AGENT,
-                description=agent_def["description"],
-                config_dir=str(agent_dir),
-                owner_user_id=dev_user_id,
-                created_at=now,
-            )
-        )
-        src_avatar = assets_dir / agent_def["avatar"]
-        if src_avatar.exists():
-            try:
-                from backend.web.routers.entities import process_and_save_avatar
-
-                avatar_path = process_and_save_avatar(src_avatar, agent_id)
-                member_repo.update(agent_id, avatar=avatar_path, updated_at=now)
-            except Exception as e:
-                log.warning("DEV: avatar copy failed for %s: %s", agent_def["name"], e)
+    conn = await AsyncConnection.connect(pg_url)
+    try:
+        async with conn.cursor() as cursor:
+            await cursor.execute("SELECT 1")
+            await cursor.fetchone()
+    finally:
+        await conn.close()
 
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """FastAPI lifespan context manager for startup and shutdown."""
-    # Load configuration
-    config_manager = ConfigManager()
-    config_manager.load_to_env()
-
-    # Ensure event store table exists (lazy init, not at module import)
-    from backend.web.services.event_store import init_event_store
-
-    init_event_store()
-
-    from backend.web.services.library_service import ensure_library_dir
-    from backend.web.services.member_service import ensure_members_dir
-
-    ensure_members_dir()
-    ensure_library_dir()
-
-    # ---- Entity-Chat repos + services ----
-    _storage_strategy = os.getenv("LEON_STORAGE_STRATEGY", "sqlite")
-
-    if _storage_strategy == "supabase":
-        from backend.web.core.supabase_factory import create_supabase_client
-        from storage.container import StorageContainer
-        from storage.providers.supabase import (
-            SupabaseAccountRepo,
-            SupabaseChatEntityRepo,
-            SupabaseChatMessageRepo,
-            SupabaseChatRepo,
-            SupabaseContactRepo,
-            SupabaseEntityRepo,
-            SupabaseInviteCodeRepo,
-            SupabaseMemberRepo,
-            SupabaseRecipeRepo,
-            SupabaseThreadLaunchPrefRepo,
-            SupabaseThreadRepo,
-            SupabaseUserSettingsRepo,
-        )
-
-        _supabase_client = create_supabase_client()
-        app.state.member_repo = SupabaseMemberRepo(_supabase_client)
-        app.state.account_repo = SupabaseAccountRepo(_supabase_client)
-        app.state.entity_repo = SupabaseEntityRepo(_supabase_client)
-        app.state.thread_repo = SupabaseThreadRepo(_supabase_client)
-        app.state.thread_launch_pref_repo = SupabaseThreadLaunchPrefRepo(_supabase_client)
-        app.state.recipe_repo = SupabaseRecipeRepo(_supabase_client)
-        app.state.chat_repo = SupabaseChatRepo(_supabase_client)
-        app.state.chat_entity_repo = SupabaseChatEntityRepo(_supabase_client)
-        app.state.chat_message_repo = SupabaseChatMessageRepo(_supabase_client)
-        app.state.invite_code_repo = SupabaseInviteCodeRepo(_supabase_client)
-        app.state.user_settings_repo = SupabaseUserSettingsRepo(_supabase_client)
-        app.state._supabase_client = _supabase_client
-        app.state._storage_container = StorageContainer(strategy="supabase", supabase_client=_supabase_client)
-    else:
-        from storage.providers.sqlite.chat_repo import SQLiteChatEntityRepo, SQLiteChatMessageRepo, SQLiteChatRepo
-        from storage.providers.sqlite.entity_repo import SQLiteEntityRepo
-        from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-        from storage.providers.sqlite.member_repo import SQLiteAccountRepo, SQLiteMemberRepo
-        from storage.providers.sqlite.recipe_repo import SQLiteRecipeRepo
-        from storage.providers.sqlite.thread_launch_pref_repo import SQLiteThreadLaunchPrefRepo
-        from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
-
-        db = resolve_role_db_path(SQLiteDBRole.MAIN)
-        chat_db = resolve_role_db_path(SQLiteDBRole.CHAT)
-
-        app.state.member_repo = SQLiteMemberRepo(db)
-        app.state.account_repo = SQLiteAccountRepo(db)
-        app.state.entity_repo = SQLiteEntityRepo(db)
-        app.state.thread_repo = SQLiteThreadRepo(db)
-        app.state.thread_launch_pref_repo = SQLiteThreadLaunchPrefRepo(db)
-        app.state.recipe_repo = SQLiteRecipeRepo(db)
-        app.state.chat_repo = SQLiteChatRepo(chat_db)
-        app.state.chat_entity_repo = SQLiteChatEntityRepo(chat_db)
-        app.state.chat_message_repo = SQLiteChatMessageRepo(chat_db)
+    _require_web_runtime_contract()
+    await _validate_web_checkpointer_contract()
+
+    # ---- Member-Chat repos + services ----
+    from backend.web.core.supabase_factory import create_supabase_auth_client, create_supabase_client
+    from storage.container import StorageContainer
+
+    _supabase_client = create_supabase_client()
+    storage_container = StorageContainer(supabase_client=_supabase_client)
+    app.state.member_repo = storage_container.member_repo()
+    app.state.thread_repo = storage_container.thread_repo()
+    app.state.thread_launch_pref_repo = storage_container.thread_launch_pref_repo()
+    app.state.recipe_repo = storage_container.recipe_repo()
+    app.state.chat_repo = storage_container.chat_repo()
+    app.state.invite_code_repo = storage_container.invite_code_repo()
+    app.state.user_settings_repo = storage_container.user_settings_repo()
+    app.state.agent_config_repo = storage_container.agent_config_repo()
+    app.state.panel_task_repo = storage_container.panel_task_repo()
+    app.state.cron_job_repo = storage_container.cron_job_repo()
+    app.state._supabase_client = _supabase_client
+    app.state._supabase_auth_client_factory = create_supabase_auth_client
+    app.state._storage_container = storage_container
 
     from backend.web.services.auth_service import AuthService
 
-    if _storage_strategy == "supabase":
-        app.state.auth_service = AuthService(
-            members=app.state.member_repo,
-            accounts=app.state.account_repo,
-            entities=app.state.entity_repo,
-            supabase_client=_supabase_client,
-            invite_codes=app.state.invite_code_repo,
-        )
-    else:
-        app.state.auth_service = AuthService(
-            members=app.state.member_repo,
-            accounts=app.state.account_repo,
-            entities=app.state.entity_repo,
-            supabase_client=None,
-        )
-
-    # Dev bypass: seed dev-user + initial agents on first startup
-    from backend.web.core.dependencies import _DEV_SKIP_AUTH
-
-    if _DEV_SKIP_AUTH:
-        _seed_dev_user(app)
+    app.state.auth_service = AuthService(
+        members=app.state.member_repo,
+        supabase_client=_supabase_client,
+        supabase_auth_client_factory=create_supabase_auth_client,
+        invite_codes=app.state.invite_code_repo,
+    )
 
     from backend.web.services.chat_events import ChatEventBus
     from backend.web.services.typing_tracker import TypingTracker
@@ -197,92 +77,91 @@ async def lifespan(app: FastAPI):
     app.state.chat_event_bus = ChatEventBus()
     app.state.typing_tracker = TypingTracker(app.state.chat_event_bus)
 
-    from backend.web.services.delivery_resolver import DefaultDeliveryResolver
+    app.state.contact_repo = storage_container.contact_repo()
 
-    if _storage_strategy == "supabase":
-        app.state.contact_repo = SupabaseContactRepo(_supabase_client)
-    else:
-        from storage.providers.sqlite.contact_repo import SQLiteContactRepo
+    # Wire chat delivery after event loop is available
+    # ---- Messaging system (Supabase-backed, required) ----
+    from backend.web.core.supabase_factory import create_messaging_supabase_client
+    from core.agents.communication.delivery import make_chat_delivery_fn
+    from messaging.delivery.resolver import HireVisitDeliveryResolver
+    from messaging.relationships.service import RelationshipService
+    from messaging.service import MessagingService
+    from storage.providers.supabase.messaging_repo import (
+        SupabaseChatMemberRepo,
+        SupabaseMessageReadRepo,
+        SupabaseMessagesRepo,
+        SupabaseRelationshipRepo,
+    )
 
-        app.state.contact_repo = SQLiteContactRepo(chat_db)
+    _msg_supabase = create_messaging_supabase_client()
+    _chat_member_repo = SupabaseChatMemberRepo(_msg_supabase)
+    _messages_repo = SupabaseMessagesRepo(_msg_supabase)
+    _message_read_repo = SupabaseMessageReadRepo(_msg_supabase)
+    app.state.relationship_repo = SupabaseRelationshipRepo(_msg_supabase)
+    app.state.chat_member_repo = _chat_member_repo
+    app.state.messages_repo = _messages_repo
 
-    delivery_resolver = DefaultDeliveryResolver(app.state.contact_repo, app.state.chat_entity_repo)
+    app.state.relationship_service = RelationshipService(
+        app.state.relationship_repo,
+        member_repo=app.state.member_repo,
+        thread_repo=app.state.thread_repo,
+    )
 
-    from backend.web.services.chat_service import ChatService
+    _msg_delivery_resolver = HireVisitDeliveryResolver(
+        contact_repo=app.state.contact_repo,
+        chat_member_repo=_chat_member_repo,
+        relationship_repo=app.state.relationship_repo,
+    )
 
-    app.state.chat_service = ChatService(
+    app.state.messaging_service = MessagingService(
         chat_repo=app.state.chat_repo,
-        chat_entity_repo=app.state.chat_entity_repo,
-        chat_message_repo=app.state.chat_message_repo,
-        entity_repo=app.state.entity_repo,
+        chat_member_repo=_chat_member_repo,
+        messages_repo=_messages_repo,
+        message_read_repo=_message_read_repo,
         member_repo=app.state.member_repo,
+        thread_repo=app.state.thread_repo,
         event_bus=app.state.chat_event_bus,
-        delivery_resolver=delivery_resolver,
+        delivery_resolver=_msg_delivery_resolver,
     )
-
-    # Wire chat delivery after event loop is available
-    from core.agents.communication.delivery import make_chat_delivery_fn
-
-    app.state.chat_service.set_delivery_fn(make_chat_delivery_fn(app))
+    app.state.messaging_service.set_delivery_fn(make_chat_delivery_fn(app))
 
     # ---- Existing state ----
     app.state.queue_manager = MessageQueueManager()
-    app.state.agent_pool: dict[str, Any] = {}
-    app.state.thread_sandbox: dict[str, str] = {}
-    app.state.thread_cwd: dict[str, str] = {}
-    app.state.thread_locks: dict[str, asyncio.Lock] = {}
+    app.state.agent_pool = cast(dict[str, Any], {})
+    app.state.thread_sandbox = cast(dict[str, str], {})
+    app.state.thread_cwd = cast(dict[str, str], {})
+    app.state.thread_locks = cast(dict[str, asyncio.Lock], {})
     app.state.thread_locks_guard = asyncio.Lock()
-    app.state.thread_tasks: dict[str, asyncio.Task] = {}
-    app.state.thread_event_buffers: dict[str, ThreadEventBuffer] = {}
-    app.state.subagent_buffers: dict[str, RunEventBuffer] = {}
+    app.state.thread_tasks = cast(dict[str, asyncio.Task[Any]], {})
+    app.state.thread_event_buffers = cast(dict[str, ThreadEventBuffer], {})
+    app.state.subagent_buffers = cast(dict[str, RunEventBuffer], {})
 
     from backend.web.services.display_builder import DisplayBuilder
 
     app.state.display_builder = DisplayBuilder()
-    app.state.thread_last_active: dict[str, float] = {}  # thread_id → epoch timestamp
-    app.state.idle_reaper_task: asyncio.Task | None = None
+    app.state.thread_last_active = cast(dict[str, float], {})  # thread_id → epoch timestamp
+    app.state.idle_reaper_task = cast(asyncio.Task[Any] | None, None)
     app.state.cron_service = None
     app.state._event_loop = asyncio.get_running_loop()
-    app.state.monitor_resources_task: asyncio.Task | None = None
+    app.state.monitor_resources_task = cast(asyncio.Task[Any] | None, None)
 
     try:
         # Start idle reaper background task
         app.state.idle_reaper_task = asyncio.create_task(idle_reaper_loop(app))
 
         # Start resource overview refresh loop
-        app.state.monitor_resources_task = asyncio.create_task(resource_overview_refresh_loop())
+        app.state.monitor_resources_task = asyncio.create_task(monitor_resource_overview_refresh_loop())
 
         # Start cron scheduler
         from backend.web.services.cron_service import CronService
 
-        cron_svc = CronService()
+        cron_svc = CronService(
+            cron_job_repo=app.state.cron_job_repo,
+            task_repo=app.state.panel_task_repo,
+        )
         await cron_svc.start()
         app.state.cron_service = cron_svc
 
-        # @@@wechat-registry — create registry with delivery callback, auto-start all
-        from backend.web.services.wechat_service import WeChatConnectionRegistry, migrate_entity_id_dirs
-        from core.runtime.middleware.queue.formatters import format_wechat_message
-
-        migrate_entity_id_dirs()
-
-        async def _wechat_deliver(conn, msg):
-            """Delivery callback — routes WeChat messages to configured thread/chat."""
-            routing = conn.routing
-            if not routing.type or not routing.id:
-                return
-            sender_name = msg.from_user_id.split("@")[0] or msg.from_user_id
-            if routing.type == "thread":
-                from backend.web.services.message_routing import route_message_to_brain
-
-                content = format_wechat_message(sender_name, msg.from_user_id, msg.text)
-                await route_message_to_brain(app, routing.id, content, source="owner", sender_name=sender_name)
-            elif routing.type == "chat":
-                content = format_wechat_message(sender_name, msg.from_user_id, msg.text)
-                app.state.chat_service.send_message(routing.id, conn.user_id, content)
-
-        app.state.wechat_registry = WeChatConnectionRegistry(delivery_fn=_wechat_deliver)
-        app.state.wechat_registry.auto_start_all()
-
         yield
     finally:
         # @@@background-task-shutdown-order - cancel monitor/reaper before provider cleanup.
@@ -295,10 +174,6 @@ async def _wechat_deliver(conn, msg):
                 except asyncio.CancelledError:
                     pass
 
-        # Cleanup: stop WeChat connections
-        if hasattr(app.state, "wechat_registry"):
-            await app.state.wechat_registry.shutdown()
-
         # Cleanup: stop cron scheduler
         if app.state.cron_service:
             await app.state.cron_service.stop()
@@ -312,3 +187,8 @@ async def _wechat_deliver(conn, msg):
                 agent.close()
             except Exception as e:
                 print(f"[web] Agent cleanup error: {e}")
+
+        # Cleanup: stop LSP language servers
+        from core.tools.lsp.service import lsp_pool
+
+        await lsp_pool.close_all()
diff --git a/backend/web/core/storage_factory.py b/backend/web/core/storage_factory.py
index 8e189dd9d..8f63d3333 100644
--- a/backend/web/core/storage_factory.py
+++ b/backend/web/core/storage_factory.py
@@ -6,15 +6,10 @@
 
 from __future__ import annotations
 
-import os
 from functools import lru_cache
 from typing import Any
 
 
-def _strategy() -> str:
-    return os.getenv("LEON_STORAGE_STRATEGY", "sqlite")
-
-
 @lru_cache(maxsize=1)
 def _supabase_client() -> Any:
     from backend.web.core.supabase_factory import create_supabase_client
@@ -23,90 +18,24 @@ def _supabase_client() -> Any:
 
 
 def make_panel_task_repo() -> Any:
-    if _strategy() == "supabase":
-        from storage.providers.supabase.panel_task_repo import SupabasePanelTaskRepo
-
-        return SupabasePanelTaskRepo(_supabase_client())
-    from backend.web.core.config import DB_PATH
-    from storage.providers.sqlite.panel_task_repo import SQLitePanelTaskRepo
+    from storage.providers.supabase.panel_task_repo import SupabasePanelTaskRepo
 
-    return SQLitePanelTaskRepo(db_path=DB_PATH)
+    return SupabasePanelTaskRepo(_supabase_client())
 
 
 def make_cron_job_repo() -> Any:
-    if _strategy() == "supabase":
-        from storage.providers.supabase.cron_job_repo import SupabaseCronJobRepo
+    from storage.providers.supabase.cron_job_repo import SupabaseCronJobRepo
 
-        return SupabaseCronJobRepo(_supabase_client())
-    from backend.web.core.config import DB_PATH
-    from storage.providers.sqlite.cron_job_repo import SQLiteCronJobRepo
-
-    return SQLiteCronJobRepo(db_path=DB_PATH)
+    return SupabaseCronJobRepo(_supabase_client())
 
 
 def make_sandbox_monitor_repo() -> Any:
-    if _strategy() == "supabase":
-        from storage.providers.supabase.sandbox_monitor_repo import SupabaseSandboxMonitorRepo
-
-        return SupabaseSandboxMonitorRepo(_supabase_client())
     from storage.providers.sqlite.sandbox_monitor_repo import SQLiteSandboxMonitorRepo
 
     return SQLiteSandboxMonitorRepo()
 
 
-def make_agent_registry_repo() -> Any:
-    if _strategy() == "supabase":
-        from storage.providers.supabase.agent_registry_repo import SupabaseAgentRegistryRepo
-
-        return SupabaseAgentRegistryRepo(_supabase_client())
-    from storage.providers.sqlite.agent_registry_repo import SQLiteAgentRegistryRepo
-
-    return SQLiteAgentRegistryRepo()
-
-
-def make_tool_task_repo(db_path: Any = None) -> Any:
-    if _strategy() == "supabase":
-        from storage.providers.supabase.tool_task_repo import SupabaseToolTaskRepo
-
-        return SupabaseToolTaskRepo(_supabase_client())
-    from storage.providers.sqlite.tool_task_repo import SQLiteToolTaskRepo
-
-    if db_path is None:
-        from core.tools.task.service import DEFAULT_DB_PATH
-
-        db_path = DEFAULT_DB_PATH
-    return SQLiteToolTaskRepo(db_path=db_path)
-
-
-def make_sync_file_repo() -> Any:
-    if _strategy() == "supabase":
-        from storage.providers.supabase.sync_file_repo import SupabaseSyncFileRepo
-
-        return SupabaseSyncFileRepo(_supabase_client())
-    from storage.providers.sqlite.sync_file_repo import SQLiteSyncFileRepo
-
-    return SQLiteSyncFileRepo()
-
-
-def upsert_resource_snapshot(**kwargs: Any) -> None:
-    """Strategy-aware resource snapshot upsert."""
-    if _strategy() == "supabase":
-        from storage.providers.supabase.resource_snapshot_repo import upsert_lease_resource_snapshot
-
-        upsert_lease_resource_snapshot(**kwargs, client=_supabase_client())
-    else:
-        from storage.providers.sqlite.resource_snapshot_repo import upsert_lease_resource_snapshot
-
-        kwargs.pop("client", None)
-        upsert_lease_resource_snapshot(**kwargs)
-
-
 def list_resource_snapshots(lease_ids: list[str]) -> dict[str, Any]:
-    """Strategy-aware resource snapshot list."""
-    if _strategy() == "supabase":
-        from storage.providers.supabase.resource_snapshot_repo import list_snapshots_by_lease_ids
-
-        return list_snapshots_by_lease_ids(lease_ids, client=_supabase_client())
-    from storage.providers.sqlite.resource_snapshot_repo import list_snapshots_by_lease_ids
+    from storage.providers.supabase.resource_snapshot_repo import list_snapshots_by_lease_ids
 
-    return list_snapshots_by_lease_ids(lease_ids)
+    return list_snapshots_by_lease_ids(lease_ids, client=_supabase_client())
diff --git a/backend/web/core/supabase_factory.py b/backend/web/core/supabase_factory.py
index c8dc9abd1..2e3cfca26 100644
--- a/backend/web/core/supabase_factory.py
+++ b/backend/web/core/supabase_factory.py
@@ -1,4 +1,4 @@
-"""Runtime Supabase client factory for storage wiring."""
+"""Runtime Supabase client factories for storage and auth wiring."""
 
 from __future__ import annotations
 
@@ -6,6 +6,19 @@
 
 import httpx
 from supabase import ClientOptions, create_client
+from supabase_auth._sync.gotrue_client import SyncGoTrueClient
+
+
+def _resolve_supabase_url() -> str:
+    url = os.getenv("SUPABASE_INTERNAL_URL") or os.getenv("SUPABASE_PUBLIC_URL")
+    if not url:
+        raise RuntimeError("SUPABASE_INTERNAL_URL or SUPABASE_PUBLIC_URL is required.")
+    return url
+
+
+def _resolve_supabase_auth_url() -> str:
+    url = os.getenv("SUPABASE_AUTH_URL") or _resolve_supabase_url()
+    return url
 
 
 def create_supabase_client():
@@ -16,13 +29,46 @@ def create_supabase_client():
     httpx client never routes through any system/VPN proxy.
     """
     # Prefer internal URL (same-host direct connection) over public tunnel URL.
-    url = os.getenv("SUPABASE_INTERNAL_URL") or os.getenv("SUPABASE_PUBLIC_URL")
+    url = _resolve_supabase_url()
     key = os.getenv("LEON_SUPABASE_SERVICE_ROLE_KEY")
-    if not url:
-        raise RuntimeError("SUPABASE_INTERNAL_URL or SUPABASE_PUBLIC_URL is required.")
     if not key:
         raise RuntimeError("LEON_SUPABASE_SERVICE_ROLE_KEY is required for Supabase storage runtime.")
     schema = os.getenv("LEON_DB_SCHEMA", "public")
     timeout = httpx.Timeout(30.0, connect=10.0)
     http_client = httpx.Client(timeout=timeout, trust_env=False)
     return create_client(url, key, options=ClientOptions(httpx_client=http_client, schema=schema))
+
+
+def create_supabase_auth_client():
+    """Build a supabase-py auth client for end-user auth flows.
+
+    Uses the anon key rather than service-role credentials so auth endpoints
+    behave like real caller traffic instead of admin/server traffic.
+    """
+    url = _resolve_supabase_auth_url()
+    key = os.getenv("SUPABASE_ANON_KEY")
+    if not key:
+        raise RuntimeError("SUPABASE_ANON_KEY is required for Supabase auth runtime.")
+    timeout = httpx.Timeout(30.0, connect=10.0)
+    http_client = httpx.Client(timeout=timeout, trust_env=False)
+    auth_url = os.getenv("SUPABASE_AUTH_URL")
+    if auth_url:
+        # @@@direct-gotrue - local auth may bypass Kong and hit GoTrue directly at /token.
+        return SyncGoTrueClient(url=auth_url, headers={"apikey": key}, http_client=http_client)
+    return create_client(url, key, options=ClientOptions(httpx_client=http_client))
+
+
+def create_messaging_supabase_client():
+    """Build a server-side Supabase client for messaging repos.
+
+    @@@messaging-public-schema - messaging tables still live in public while
+    main product storage moved to LEON_DB_SCHEMA, so this client must stay on
+    public and use server credentials.
+    """
+    url = _resolve_supabase_url()
+    key = os.getenv("LEON_SUPABASE_SERVICE_ROLE_KEY")
+    if not key:
+        raise RuntimeError("LEON_SUPABASE_SERVICE_ROLE_KEY is required for messaging.")
+    timeout = httpx.Timeout(30.0, connect=10.0)
+    http_client = httpx.Client(timeout=timeout, trust_env=False)
+    return create_client(url, key, options=ClientOptions(httpx_client=http_client, schema="public"))
diff --git a/backend/web/main.py b/backend/web/main.py
index 64f60e0a5..8f6252bbe 100644
--- a/backend/web/main.py
+++ b/backend/web/main.py
@@ -1,10 +1,7 @@
 """Leon Web Backend - FastAPI Application."""
 
 import os
-import sqlite3
 import subprocess
-import sys
-from pathlib import Path
 
 # Load .env file if ENV_FILE is specified (e.g. ENV_FILE=.env for local dev)
 _env_file = os.getenv("ENV_FILE")
@@ -17,85 +14,25 @@
 from fastapi import FastAPI  # noqa: E402
 from fastapi.middleware.cors import CORSMiddleware  # noqa: E402
 
-
-def _ensure_windows_db_env_defaults() -> None:
-    """On Windows, default Leon DBs to a LOCALAPPDATA-backed path."""
-    if sys.platform != "win32":
-        return
-
-    root = _resolve_windows_db_root()
-    root.mkdir(parents=True, exist_ok=True)
-    defaults = {
-        "LEON_DB_PATH": root / "leon.db",
-        "LEON_RUN_EVENT_DB_PATH": root / "events.db",
-        "LEON_QUEUE_DB_PATH": root / "queue.db",
-        "LEON_CHAT_DB_PATH": root / "chat.db",
-        "LEON_SANDBOX_DB_PATH": root / "sandbox.db",
-        "LEON_SUBAGENT_DB_PATH": root / "subagent.db",
-        "LEON_EVAL_DB_PATH": root / "eval.db",
-    }
-    for key, value in defaults.items():
-        os.environ.setdefault(key, str(value))
-
-
-def _resolve_windows_db_root() -> Path:
-    local_appdata = Path(os.getenv("LOCALAPPDATA") or (Path.home() / "AppData" / "Local"))
-    candidates = [
-        local_appdata / "Leon",
-        Path.home() / ".codex" / "memories" / "mycel-run",
-        Path.home() / ".leon-win",
-    ]
-    seen: set[Path] = set()
-    for root in candidates:
-        if root in seen:
-            continue
-        seen.add(root)
-        if _sqlite_root_supports_wal(root):
-            return root
-    return candidates[0]
-
-
-def _sqlite_root_supports_wal(root: Path) -> bool:
-    probe = root / ".leon-probe.db"
-    conn: sqlite3.Connection | None = None
-    try:
-        root.mkdir(parents=True, exist_ok=True)
-        conn = sqlite3.connect(str(probe), timeout=1.0)
-        mode = conn.execute("PRAGMA journal_mode=WAL").fetchone()
-        conn.execute("CREATE TABLE IF NOT EXISTS _probe(x INTEGER)")
-        conn.commit()
-        return bool(mode and str(mode[0]).lower() == "wal")
-    except Exception:
-        return False
-    finally:
-        if conn is not None:
-            conn.close()
-        for suffix in ("", "-wal", "-shm"):
-            try:
-                (root / f".leon-probe.db{suffix}").unlink(missing_ok=True)
-            except OSError:
-                pass
-
-
-_ensure_windows_db_env_defaults()
-
 from backend.web.core.lifespan import lifespan  # noqa: E402
 from backend.web.routers import (  # noqa: E402
     auth,
-    chats,
-    connections,
-    debug,
+    contacts,
+    conversations,  # noqa: E402
     entities,
     invite_codes,
     marketplace,
     monitor,
     panel,
+    resources,
     sandbox,
     settings,
     thread_files,
     threads,
     webhooks,
 )
+from backend.web.routers import messaging as messaging_router  # noqa: E402
+from messaging.relationships.router import router as relationships_router  # noqa: E402
 
 # Create FastAPI app
 app = FastAPI(title="Leon Web Backend", lifespan=lifespan)
@@ -113,19 +50,23 @@ def _sqlite_root_supports_wal(root: Path) -> bool:
 app.include_router(auth.router)
 app.include_router(invite_codes.router)
 app.include_router(threads.router)
-app.include_router(chats.router)
+
+app.include_router(messaging_router.router)
+
+app.include_router(contacts.router)
+app.include_router(relationships_router)
 app.include_router(entities.router)
 app.include_router(entities.members_router)
 app.include_router(sandbox.router)
 app.include_router(webhooks.router)
-app.include_router(connections.router)
 app.include_router(thread_files.router)
 app.include_router(thread_files._public)
 app.include_router(settings.router)
-app.include_router(debug.router)
 app.include_router(panel.router)
 app.include_router(monitor.router)
+app.include_router(resources.router)
 app.include_router(marketplace.router)
+app.include_router(conversations.router)
 
 
 def _resolve_port() -> int:
@@ -158,5 +99,5 @@ def _resolve_port() -> int:
         host="0.0.0.0",
         port=port,
         reload=True,
-        reload_dirs=["backend", "core", "config", "storage", "sandbox"],
+        reload_dirs=["backend", "core", "config", "storage", "sandbox", "messaging"],
     )
diff --git a/backend/web/models/requests.py b/backend/web/models/requests.py
index 05a108bf0..582ec7f4c 100644
--- a/backend/web/models/requests.py
+++ b/backend/web/models/requests.py
@@ -1,8 +1,8 @@
 """Pydantic request models for Leon web API."""
 
-from typing import Literal
+from typing import Any, Literal
 
-from pydantic import BaseModel, Field
+from pydantic import AliasChoices, BaseModel, Field
 
 from sandbox.config import MountSpec
 
@@ -20,7 +20,7 @@ class RecipeSnapshotRequest(BaseModel):
 
 class CreateThreadRequest(BaseModel):
     member_id: str  # which agent template to create thread from
-    sandbox: str = "local"
+    sandbox: str = Field(default="local", validation_alias=AliasChoices("sandbox", "sandbox_type"))
     recipe: RecipeSnapshotRequest | None = None
     lease_id: str | None = None
     cwd: str | None = None
@@ -53,3 +53,22 @@ class RunRequest(BaseModel):
 class SendMessageRequest(BaseModel):
     message: str
     attachments: list[str] = Field(default_factory=list)
+
+
+class AskUserAnswerRequest(BaseModel):
+    header: str | None = None
+    question: str | None = None
+    selected_options: list[str] = Field(default_factory=list)
+    free_text: str | None = None
+
+
+class ResolvePermissionRequest(BaseModel):
+    decision: Literal["allow", "deny"]
+    message: str | None = None
+    answers: list[AskUserAnswerRequest] | None = None
+    annotations: dict[str, Any] | None = None
+
+
+class ThreadPermissionRuleRequest(BaseModel):
+    behavior: Literal["allow", "deny", "ask"]
+    tool_name: str
diff --git a/backend/web/routers/auth.py b/backend/web/routers/auth.py
index 5c5f87b5b..582a642fa 100644
--- a/backend/web/routers/auth.py
+++ b/backend/web/routers/auth.py
@@ -11,6 +11,15 @@
 router = APIRouter(prefix="/api/auth", tags=["auth"])
 
 
+async def _call_auth_service(app: Any, status_code: int, method_name: str, *args: Any) -> Any:
+    try:
+        service = _get_auth_service(app)
+        method = getattr(service, method_name)
+        return await asyncio.to_thread(method, *args)
+    except ValueError as e:
+        raise HTTPException(status_code, str(e))
+
+
 # ── Registration step 1: send OTP ──────────────────────────────────────────
 
 
@@ -22,11 +31,8 @@ class SendOtpRequest(BaseModel):
 
 @router.post("/send-otp")
 async def send_otp(payload: SendOtpRequest, app: Annotated[Any, Depends(get_app)]) -> dict:
-    try:
-        await asyncio.to_thread(_get_auth_service(app).send_otp, payload.email, payload.password, payload.invite_code)
-        return {"ok": True}
-    except ValueError as e:
-        raise HTTPException(400, str(e))
+    await _call_auth_service(app, 400, "send_otp", payload.email, payload.password, payload.invite_code)
+    return {"ok": True}
 
 
 # ── Registration step 2: verify OTP ────────────────────────────────────────
@@ -39,10 +45,7 @@ class VerifyOtpRequest(BaseModel):
 
 @router.post("/verify-otp")
 async def verify_otp(payload: VerifyOtpRequest, app: Annotated[Any, Depends(get_app)]) -> dict:
-    try:
-        return await asyncio.to_thread(_get_auth_service(app).verify_register_otp, payload.email, payload.token)
-    except ValueError as e:
-        raise HTTPException(400, str(e))
+    return await _call_auth_service(app, 400, "verify_register_otp", payload.email, payload.token)
 
 
 # ── Registration step 3: set password + invite code ────────────────────────
@@ -55,10 +58,7 @@ class CompleteRegisterRequest(BaseModel):
 
 @router.post("/complete-register")
 async def complete_register(payload: CompleteRegisterRequest, app: Annotated[Any, Depends(get_app)]) -> dict:
-    try:
-        return await asyncio.to_thread(_get_auth_service(app).complete_register, payload.temp_token, payload.invite_code)
-    except ValueError as e:
-        raise HTTPException(400, str(e))
+    return await _call_auth_service(app, 400, "complete_register", payload.temp_token, payload.invite_code)
 
 
 # ── Login ───────────────────────────────────────────────────────────────────
@@ -71,7 +71,4 @@ class LoginRequest(BaseModel):
 
 @router.post("/login")
 async def login(payload: LoginRequest, app: Annotated[Any, Depends(get_app)]) -> dict:
-    try:
-        return await asyncio.to_thread(_get_auth_service(app).login, payload.identifier, payload.password)
-    except ValueError as e:
-        raise HTTPException(401, str(e))
+    return await _call_auth_service(app, 401, "login", payload.identifier, payload.password)
diff --git a/backend/web/routers/chats.py b/backend/web/routers/chats.py
deleted file mode 100644
index 5e7e3ff9e..000000000
--- a/backend/web/routers/chats.py
+++ /dev/null
@@ -1,316 +0,0 @@
-"""Chat API router — entity-to-entity communication."""
-
-import asyncio
-import json
-import logging
-from typing import Annotated, Any, Literal
-
-from fastapi import APIRouter, Depends, HTTPException, Query
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-
-from backend.web.core.dependencies import get_app, get_current_user_id
-from backend.web.utils.serializers import avatar_url
-
-logger = logging.getLogger(__name__)
-
-router = APIRouter(prefix="/api/chats", tags=["chats"])
-
-
-class CreateChatBody(BaseModel):
-    user_ids: list[str]
-    title: str | None = None
-
-
-class SendMessageBody(BaseModel):
-    content: str
-    sender_id: str
-    mentioned_ids: list[str] | None = None
-
-
-@router.get("")
-async def list_chats(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """List all chats for the current user (social identity from JWT)."""
-    return app.state.chat_service.list_chats_for_user(user_id)
-
-
-@router.post("")
-async def create_chat(
-    body: CreateChatBody,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """Create a chat between users. 2 users = 1:1 chat, 3+ = group chat."""
-    chat_service = app.state.chat_service
-    try:
-        if len(body.user_ids) >= 3:
-            chat = chat_service.create_group_chat(body.user_ids, body.title)
-        else:
-            chat = chat_service.find_or_create_chat(body.user_ids, body.title)
-        return {"id": chat.id, "title": chat.title, "status": chat.status, "created_at": chat.created_at}
-    except ValueError as e:
-        raise HTTPException(400, str(e))
-
-
-@router.get("/{chat_id}")
-async def get_chat(
-    chat_id: str,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """Get chat details with member list."""
-    chat = app.state.chat_repo.get_by_id(chat_id)
-    if not chat:
-        raise HTTPException(404, "Chat not found")
-    participants = app.state.chat_entity_repo.list_participants(chat_id)
-    entity_repo = app.state.entity_repo
-    member_repo = app.state.member_repo
-    entities_info = []
-    for p in participants:
-        e = entity_repo.get_by_id(p.user_id)
-        if e:
-            m = member_repo.get_by_id(e.member_id)
-            entities_info.append(
-                {
-                    "id": p.user_id,
-                    "name": e.name,
-                    "type": e.type,
-                    "avatar_url": avatar_url(e.member_id, bool(m.avatar if m else None)),
-                }
-            )
-        else:
-            m = member_repo.get_by_id(p.user_id)
-            if m:
-                entities_info.append(
-                    {
-                        "id": p.user_id,
-                        "name": m.name,
-                        "type": "human",
-                        "avatar_url": avatar_url(m.id, bool(m.avatar)),
-                    }
-                )
-    return {
-        "id": chat.id,
-        "title": chat.title,
-        "status": chat.status,
-        "created_at": chat.created_at,
-        "entities": entities_info,
-    }
-
-
-@router.get("/{chat_id}/messages")
-async def list_messages(
-    chat_id: str,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-    limit: int = Query(50, ge=1, le=200),
-    before: float | None = Query(None),
-):
-    """List messages in a chat."""
-    msgs = app.state.chat_message_repo.list_by_chat(chat_id, limit=limit, before=before)
-    entity_repo = app.state.entity_repo
-    member_repo = app.state.member_repo
-    sender_ids = {m.sender_id for m in msgs}
-    sender_names: dict[str, str] = {}
-    for sid in sender_ids:
-        e = entity_repo.get_by_id(sid)
-        if e:
-            sender_names[sid] = e.name
-        else:
-            m = member_repo.get_by_id(sid)
-            sender_names[sid] = m.name if m else "unknown"
-    return [
-        {
-            "id": m.id,
-            "chat_id": m.chat_id,
-            "sender_id": m.sender_id,
-            "sender_name": sender_names.get(m.sender_id, "unknown"),
-            "content": m.content,
-            "mentioned_ids": m.mentioned_ids,
-            "created_at": m.created_at,
-        }
-        for m in msgs
-    ]
-
-
-@router.post("/{chat_id}/read")
-async def mark_read(
-    chat_id: str,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """Mark all messages in this chat as read for the current user."""
-    import time
-
-    app.state.chat_entity_repo.update_last_read(chat_id, user_id, time.time())
-    return {"status": "ok"}
-
-
-@router.post("/{chat_id}/messages")
-async def send_message(
-    chat_id: str,
-    body: SendMessageBody,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """Send a message in a chat."""
-    if not body.content.strip():
-        raise HTTPException(400, "Content cannot be empty")
-    # Verify sender_id belongs to the authenticated user
-    _verify_participant_ownership(app, body.sender_id, user_id)
-    chat_service = app.state.chat_service
-    msg = chat_service.send_message(chat_id, body.sender_id, body.content, body.mentioned_ids)
-    return {
-        "id": msg.id,
-        "chat_id": msg.chat_id,
-        "sender_id": msg.sender_id,
-        "content": msg.content,
-        "mentioned_ids": msg.mentioned_ids,
-        "created_at": msg.created_at,
-    }
-
-
-@router.get("/{chat_id}/events")
-async def stream_chat_events(
-    chat_id: str,
-    token: str | None = None,
-    app: Annotated[Any, Depends(get_app)] = None,
-):
-    """SSE stream for chat events. Uses ?token= for auth."""
-    from backend.web.core.dependencies import _DEV_SKIP_AUTH
-
-    if not _DEV_SKIP_AUTH:
-        if not token:
-            raise HTTPException(401, "Missing token")
-        try:
-            app.state.auth_service.verify_token(token)
-        except ValueError as e:
-            raise HTTPException(401, str(e))
-
-    event_bus = app.state.chat_event_bus
-    queue = event_bus.subscribe(chat_id)
-
-    async def event_generator():
-        try:
-            yield "retry: 5000\n\n"
-            while True:
-                try:
-                    event = await asyncio.wait_for(queue.get(), timeout=30)
-                    event_type = event.get("event", "message")
-                    data = event.get("data", {})
-                    yield f"event: {event_type}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
-                except TimeoutError:
-                    yield ": keepalive\n\n"
-        finally:
-            event_bus.unsubscribe(chat_id, queue)
-
-    return StreamingResponse(event_generator(), media_type="text/event-stream")
-
-
-# ---------------------------------------------------------------------------
-# Contact management (block/mute)
-# ---------------------------------------------------------------------------
-
-
-class SetContactBody(BaseModel):
-    owner_id: str
-    target_id: str
-    relation: Literal["normal", "blocked", "muted"]
-
-
-def _verify_participant_ownership(app: Any, participant_id: str, user_id: str) -> None:
-    """Raise 403 if participant_id does not belong to the authenticated user.
-
-    For humans: participant_id == user_id (direct match).
-    For agents: participant_id == member_id, and agent_member.owner_user_id == user_id.
-    """
-    if participant_id == user_id:
-        return
-    # Check if it's an agent member owned by this user
-    agent_member = app.state.member_repo.get_by_id(participant_id)
-    if agent_member and agent_member.owner_user_id == user_id:
-        return
-    raise HTTPException(403, "Participant does not belong to you")
-
-
-@router.post("/contacts")
-async def set_contact(
-    body: SetContactBody,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """Set a directional contact relationship (block/mute/normal)."""
-    _verify_participant_ownership(app, body.owner_id, user_id)
-    import time
-
-    from storage.contracts import ContactRow
-
-    contact_repo = app.state.contact_repo
-    contact_repo.upsert(
-        ContactRow(
-            owner_id=body.owner_id,
-            target_id=body.target_id,
-            relation=body.relation,
-            created_at=time.time(),
-            updated_at=time.time(),
-        )
-    )
-    return {"status": "ok", "relation": body.relation}
-
-
-@router.delete("/contacts/{owner_id}/{target_id}")
-async def delete_contact(
-    owner_id: str,
-    target_id: str,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """Delete a contact relationship."""
-    _verify_participant_ownership(app, owner_id, user_id)
-    contact_repo = app.state.contact_repo
-    contact_repo.delete(owner_id, target_id)
-    return {"status": "deleted"}
-
-
-# ---------------------------------------------------------------------------
-# Chat mute
-# ---------------------------------------------------------------------------
-
-
-class MuteChatBody(BaseModel):
-    user_id: str
-    muted: bool
-    mute_until: float | None = None
-
-
-@router.post("/{chat_id}/mute")
-async def mute_chat(
-    chat_id: str,
-    body: MuteChatBody,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """Mute/unmute a chat for the current user."""
-    _verify_participant_ownership(app, body.user_id, user_id)
-    chat_entity_repo = app.state.chat_entity_repo
-    chat_entity_repo.update_mute(chat_id, body.user_id, body.muted, body.mute_until)
-    return {"status": "ok", "muted": body.muted}
-
-
-@router.delete("/{chat_id}")
-async def delete_chat(
-    chat_id: str,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-):
-    """Delete a chat. Caller must be a participant."""
-    chat = app.state.chat_repo.get_by_id(chat_id)
-    if not chat:
-        raise HTTPException(404, "Chat not found")
-    if not app.state.chat_entity_repo.is_participant_in_chat(chat_id, user_id):
-        raise HTTPException(403, "Not a participant of this chat")
-    app.state.chat_repo.delete(chat_id)
-    return {"status": "deleted"}
diff --git a/backend/web/routers/connections.py b/backend/web/routers/connections.py
deleted file mode 100644
index c5fa0adc2..000000000
--- a/backend/web/routers/connections.py
+++ /dev/null
@@ -1,150 +0,0 @@
-"""Connection endpoints — manage external platform connections (WeChat, etc.).
-
-@@@per-user — all endpoints scoped by user_id (the user's social identity).
-"""
-
-from typing import Annotated, Any
-
-from fastapi import APIRouter, Depends, HTTPException
-
-from backend.web.core.dependencies import get_app, get_current_user_id
-from backend.web.services.wechat_service import (
-    QrPollRequest,
-    RoutingConfig,
-    RoutingSetRequest,
-    WeChatConnectionRegistry,
-)
-
-router = APIRouter(prefix="/api/connections", tags=["connections"])
-
-
-def _get_registry(app: Any) -> WeChatConnectionRegistry:
-    return app.state.wechat_registry
-
-
-# --- WeChat ---
-
-
-@router.get("/wechat/state")
-async def wechat_state(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    return _get_registry(app).get(user_id).get_state()
-
-
-@router.post("/wechat/qrcode")
-async def wechat_qrcode(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    conn = _get_registry(app).get(user_id)
-    if conn.connected:
-        raise HTTPException(400, "Already connected. Disconnect first.")
-    return await conn.get_qr_code()
-
-
-@router.post("/wechat/qrcode/poll")
-async def wechat_qrcode_poll(
-    body: QrPollRequest,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    registry = _get_registry(app)
-    conn = registry.get(user_id)
-    result = await conn.poll_qr_status(body.qrcode)
-    # Evict duplicates after successful connection
-    if result.get("status") == "confirmed" and conn._credentials:
-        registry.evict_duplicates(conn._credentials.account_id, user_id)
-    return result
-
-
-@router.post("/wechat/disconnect")
-async def wechat_disconnect(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    _get_registry(app).get(user_id).disconnect()
-    return {"ok": True}
-
-
-@router.post("/wechat/polling/start")
-async def wechat_start_polling(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    conn = _get_registry(app).get(user_id)
-    if not conn.connected:
-        raise HTTPException(400, "Not connected")
-    conn.start_polling()
-    return {"ok": True, "polling": True}
-
-
-@router.post("/wechat/polling/stop")
-async def wechat_stop_polling(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    _get_registry(app).get(user_id).stop_polling()
-    return {"ok": True, "polling": False}
-
-
-# --- Routing config ---
-
-
-@router.get("/wechat/routing")
-async def wechat_get_routing(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    return _get_registry(app).get(user_id).routing.model_dump()
-
-
-@router.post("/wechat/routing")
-async def wechat_set_routing(
-    body: RoutingSetRequest,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    _get_registry(app).get(user_id).set_routing(RoutingConfig(type=body.type, id=body.id, label=body.label))
-    return {"ok": True}
-
-
-@router.delete("/wechat/routing")
-async def wechat_clear_routing(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    _get_registry(app).get(user_id).set_routing(RoutingConfig())
-    return {"ok": True}
-
-
-# --- List targets for routing picker ---
-
-
-@router.get("/wechat/routing/targets")
-async def wechat_routing_targets(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    """List available threads and chats for the routing picker."""
-    from backend.web.utils.serializers import avatar_url
-
-    raw_threads = app.state.thread_repo.list_by_owner_user_id(user_id)
-    threads = [
-        {
-            "id": t["id"],
-            "label": t.get("entity_name") or t.get("member_name") or t["id"][:12],
-            "avatar_url": avatar_url(t.get("member_id"), bool(t.get("member_avatar"))),
-        }
-        for t in raw_threads
-    ]
-
-    raw_chats = app.state.chat_service.list_chats_for_user(user_id)
-    chats = []
-    for c in raw_chats:
-        others = [e for e in c.get("entities", []) if e["id"] != user_id]
-        name = ", ".join(e["name"] for e in others) or "Unknown"
-        chats.append({"id": c["id"], "label": name})
-
-    return {"threads": threads, "chats": chats}
diff --git a/backend/web/routers/contacts.py b/backend/web/routers/contacts.py
new file mode 100644
index 000000000..689ff0f8b
--- /dev/null
+++ b/backend/web/routers/contacts.py
@@ -0,0 +1,68 @@
+"""Contacts API router — /api/contacts endpoints."""
+
+from __future__ import annotations
+
+import time
+from typing import Annotated, Any, Literal
+
+from fastapi import APIRouter, Depends
+from pydantic import BaseModel
+
+from backend.web.core.dependencies import get_app, get_current_user_id
+from storage.contracts import ContactRow
+
+router = APIRouter(prefix="/api/contacts", tags=["contacts"])
+
+
+class SetContactBody(BaseModel):
+    target_id: str
+    relation: Literal["normal", "blocked", "muted"]
+
+
+@router.get("")
+async def list_contacts(
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    """List contacts (blocked/muted) for the current user."""
+    rows = app.state.contact_repo.list_for_user(user_id)
+    return [
+        {
+            "owner_user_id": row.owner_id,
+            "target_user_id": row.target_id,
+            "relation": row.relation,
+            "created_at": row.created_at,
+            "updated_at": row.updated_at,
+        }
+        for row in rows
+    ]
+
+
+@router.post("")
+async def set_contact(
+    body: SetContactBody,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    """Upsert contact (block/mute/normal)."""
+    app.state.contact_repo.upsert(
+        ContactRow(
+            owner_id=user_id,
+            target_id=body.target_id,
+            relation=body.relation,
+            created_at=time.time(),
+            updated_at=time.time(),
+        )
+    )
+    return {"status": "ok", "relation": body.relation}
+
+
+@router.delete("/{target_id}")
+async def delete_contact(
+    target_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    """Remove contact entry."""
+    app.state.contact_repo.delete(user_id, target_id)
+    return {"status": "deleted"}
diff --git a/backend/web/routers/conversations.py b/backend/web/routers/conversations.py
new file mode 100644
index 000000000..57cd48256
--- /dev/null
+++ b/backend/web/routers/conversations.py
@@ -0,0 +1,164 @@
+"""Unified conversation list API — merges threads (hire) and chats (visit).
+
+GET /api/conversations returns a single sorted list so the frontend
+ConversationList can render a unified sidebar.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from typing import Annotated, Any
+
+from fastapi import APIRouter, Depends
+
+from backend.web.core.dependencies import get_app, get_current_user_id
+from backend.web.utils.serializers import avatar_url
+from core.runtime.middleware.monitor import AgentState
+
+router = APIRouter(prefix="/api/conversations", tags=["conversations"])
+
+
+def _is_internal_child_thread(thread_id: str) -> bool:
+    return thread_id.startswith("subagent-")
+
+
+def _resolve_display_member(app: Any, social_user_id: str) -> Any | None:
+    member = app.state.member_repo.get_by_id(social_user_id)
+    if member is not None:
+        return member
+    thread = app.state.thread_repo.get_by_user_id(social_user_id)
+    if thread is None:
+        return None
+    member_id = thread.get("member_id")
+    if not member_id:
+        return None
+    return app.state.member_repo.get_by_id(member_id)
+
+
+def _conversation_updated_at_key(item: dict[str, Any]) -> float:
+    raw = item.get("updated_at")
+    if raw is None:
+        return float("-inf")
+    if isinstance(raw, (int, float)):
+        return float(raw)
+    if isinstance(raw, str):
+        # @@@mixed-updated-at-sort - hire rows currently carry ISO strings while
+        # visit chats can still surface numeric timestamps from older chat storage.
+        # Normalize both before sorting so /api/conversations stays honest.
+        try:
+            return datetime.fromisoformat(raw.replace("Z", "+00:00")).timestamp()
+        except ValueError:
+            return float("-inf")
+    return float("-inf")
+
+
+@router.get("")
+async def list_conversations(
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)] = None,
+) -> list[dict[str, Any]]:
+    """Return hire threads + visit chats merged by updated_at desc."""
+    items: list[dict[str, Any]] = []
+
+    # ── Hire threads ──
+    raw_threads = app.state.thread_repo.list_by_owner_user_id(user_id)
+    pool = app.state.agent_pool
+    for t in raw_threads:
+        tid = t["id"]
+        if _is_internal_child_thread(tid):
+            continue
+        sandbox_type = t.get("sandbox_type", "local")
+        running = False
+        agent = pool.get(f"{tid}:{sandbox_type}")
+        if agent and hasattr(agent, "runtime"):
+            running = agent.runtime.current_state == AgentState.ACTIVE
+        last_active = app.state.thread_last_active.get(tid)
+        updated_at = datetime.fromtimestamp(last_active, tz=UTC).isoformat() if last_active else None
+        items.append(
+            {
+                "id": tid,
+                "type": "hire",
+                "title": t.get("member_name") or "Agent",
+                "member_id": t.get("member_id"),
+                "avatar_url": avatar_url(t.get("member_id"), bool(t.get("member_avatar"))),
+                "updated_at": updated_at,
+                "unread_count": 0,
+                "running": running,
+            }
+        )
+
+    # ── Visit chats ──
+    messaging = getattr(app.state, "messaging_service", None)
+    if messaging:
+        chats = messaging.list_chats_for_user(user_id)
+        messages_repo = getattr(app.state, "messages_repo", None)
+
+        # Pre-fetch all member data to avoid N+1 per-member lookups
+        all_member_ids: set[str] = set()
+        chat_members_cache: dict[str, list[dict[str, Any]]] = {}
+        chat_obj_cache: dict[str, Any] = {}
+
+        chat_ids = [c["id"] if isinstance(c, dict) else c for c in chats]
+        for chat_id in chat_ids:
+            chat_obj = app.state.chat_repo.get_by_id(chat_id) if hasattr(app.state, "chat_repo") else None
+            if not chat_obj:
+                continue
+            chat_obj_cache[chat_id] = chat_obj
+            members_list = messaging.list_chat_members(chat_id)
+            chat_members_cache[chat_id] = members_list
+            for m in members_list:
+                uid = m.get("user_id")
+                if uid and uid != user_id:
+                    all_member_ids.add(uid)
+
+        # Batch resolve members
+        member_cache: dict[str, Any] = {}
+        for uid in all_member_ids:
+            mem = _resolve_display_member(app, uid)
+            if mem:
+                member_cache[uid] = mem
+
+        for chat_id in chat_ids:
+            chat_obj = chat_obj_cache.get(chat_id)
+            if not chat_obj:
+                continue
+            members_list = chat_members_cache[chat_id]
+
+            # Determine display name + avatar in single pass
+            title = getattr(chat_obj, "title", None) or ""
+            chat_avatar = None
+            other_names: list[str] = []
+            for m in members_list:
+                uid = m.get("user_id")
+                if not uid or uid == user_id:
+                    continue
+                mem = member_cache.get(uid)
+                if not mem:
+                    continue
+                other_names.append(mem.name)
+                if chat_avatar is None:
+                    chat_avatar = avatar_url(mem.id, bool(mem.avatar))
+            if not title:
+                title = ", ".join(other_names) or "Chat"
+
+            # Unread count
+            unread = 0
+            if messages_repo:
+                unread = messages_repo.count_unread(chat_id, user_id)
+
+            items.append(
+                {
+                    "id": chat_id,
+                    "type": "visit",
+                    "title": title,
+                    "member_id": None,
+                    "avatar_url": chat_avatar,
+                    "updated_at": getattr(chat_obj, "updated_at", None) or getattr(chat_obj, "created_at", None),
+                    "unread_count": unread,
+                    "running": False,
+                }
+            )
+
+    # Sort by updated_at descending (None goes last)
+    items.sort(key=_conversation_updated_at_key, reverse=True)
+    return items
diff --git a/backend/web/routers/debug.py b/backend/web/routers/debug.py
deleted file mode 100644
index 57299f219..000000000
--- a/backend/web/routers/debug.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""Debug logging endpoints."""
-
-from fastapi import APIRouter
-from pydantic import BaseModel
-
-router = APIRouter(prefix="/api/debug", tags=["debug"])
-
-
-class LogMessage(BaseModel):
-    message: str
-    timestamp: str
-
-
-@router.post("/log")
-async def log_frontend_message(payload: LogMessage) -> dict:
-    """Receive frontend console logs and write to file."""
-    with open("/tmp/leon-frontend-console.log", "a") as f:
-        f.write(f"[{payload.timestamp}] {payload.message}\n")
-    return {"status": "ok"}
diff --git a/backend/web/routers/entities.py b/backend/web/routers/entities.py
index 96f636955..f1686eb51 100644
--- a/backend/web/routers/entities.py
+++ b/backend/web/routers/entities.py
@@ -1,4 +1,4 @@
-"""Entity & Member endpoints — new entity-chat system."""
+"""Member endpoints — social identity discovery and agent thread lookup."""
 
 import io
 import logging
@@ -12,6 +12,7 @@
 from backend.web.core.dependencies import get_app, get_current_user_id
 from backend.web.core.paths import avatars_dir
 from backend.web.utils.serializers import avatar_url
+from storage.contracts import MemberType
 
 logger = logging.getLogger(__name__)
 
@@ -40,7 +41,7 @@ def process_and_save_avatar(source: Path | bytes, member_id: str) -> str:
     img = ImageOps.exif_transpose(img)
     if img.mode not in ("RGB", "RGBA"):
         img = img.convert("RGB")
-    img = ImageOps.fit(img, (AVATAR_SIZE, AVATAR_SIZE), method=Image.LANCZOS)
+    img = ImageOps.fit(img, (AVATAR_SIZE, AVATAR_SIZE), method=Image.Resampling.LANCZOS)
     AVATARS_DIR.mkdir(parents=True, exist_ok=True)
     img.save(AVATARS_DIR / f"{member_id}.png", format="PNG", optimize=True)
     return f"avatars/{member_id}.png"
@@ -89,6 +90,15 @@ def _avatar_path(member_id: str) -> Path:
     return AVATARS_DIR / f"{safe_id}.png"
 
 
+def _get_owned_avatar_member_or_404(member_id: str, current_user_id: str, member_repo: Any) -> Any:
+    member = member_repo.get_by_id(member_id)
+    if not member:
+        raise HTTPException(404, "Member not found")
+    if member_id == current_user_id or member.owner_user_id == current_user_id:
+        return member
+    raise HTTPException(403, "Not authorized")
+
+
 @members_router.put("/{member_id}/avatar")
 async def upload_avatar(
     member_id: str,
@@ -98,11 +108,7 @@ async def upload_avatar(
 ) -> dict[str, str]:
     """Upload/replace avatar image. Resizes to 256x256 PNG."""
     repo = app.state.member_repo
-    member = repo.get_by_id(member_id)
-    if not member:
-        raise HTTPException(404, "Member not found")
-    if member_id != current_user_id and member.owner_user_id != current_user_id:
-        raise HTTPException(403, "Not authorized")
+    _get_owned_avatar_member_or_404(member_id, current_user_id, repo)
     ct = file.content_type or ""
     if ct not in ALLOWED_CONTENT_TYPES:
         raise HTTPException(400, f"Unsupported image type: {ct}")
@@ -137,11 +143,7 @@ async def delete_avatar(
 ) -> dict[str, str]:
     """Delete avatar."""
     repo = app.state.member_repo
-    member = repo.get_by_id(member_id)
-    if not member:
-        raise HTTPException(404, "Member not found")
-    if member_id != current_user_id and member.owner_user_id != current_user_id:
-        raise HTTPException(403, "Not authorized")
+    _get_owned_avatar_member_or_404(member_id, current_user_id, repo)
     path = _avatar_path(member_id)
     if path.exists():
         path.unlink()
@@ -160,66 +162,85 @@ async def list_entities(
     app: Annotated[Any, Depends(get_app)],
 ):
     """List chattable entities for discovery (New Chat picker).
-    Humans are represented by their user_id; agents by their member_id.
-    Excludes the current user (you don't chat with yourself)."""
-    entity_repo = app.state.entity_repo
+    Humans are keyed by user_id; agent templates are keyed by member_id plus
+    their default representative thread. Excludes the current user."""
     member_repo = app.state.member_repo
-
     members = member_repo.list_all()
     member_map = {m.id: m for m in members}
 
     items = []
 
-    # Human participants: all human members except self
     for m in members:
-        if m.type != "human" or m.id == user_id:
+        if m.id == user_id:
             continue
-        items.append(
-            {
-                "id": m.id,  # user_id IS the social identity for humans
-                "name": m.name,
-                "type": "human",
-                "avatar_url": avatar_url(m.id, bool(m.avatar)),
-                "owner_name": None,
-                "member_name": m.name,
-                "thread_id": None,
-                "is_main": None,
-                "branch_index": None,
-            }
-        )
-
-    # Agent participants: from entity_repo (agent entities have id = member_id)
-    all_entities = entity_repo.list_by_type("agent")
-    for entity in all_entities:
-        member = member_map.get(entity.member_id)
-        owner = member_map.get(member.owner_user_id) if member and member.owner_user_id else None
-        thread = app.state.thread_repo.get_by_id(entity.thread_id) if entity.thread_id else None
-        items.append(
-            {
-                "id": entity.id,  # entity.id = member_id = social identity for agents
-                "name": entity.name,
-                "type": entity.type,
-                "avatar_url": avatar_url(entity.member_id, bool(member.avatar if member else None)),
-                "owner_name": owner.name if owner else None,
-                "member_name": member.name if member else None,
-                "thread_id": entity.thread_id,
-                "is_main": thread["is_main"] if thread else None,
-                "branch_index": thread["branch_index"] if thread else None,
-            }
-        )
+        if m.type == MemberType.HUMAN:
+            items.append(
+                {
+                    "user_id": m.id,
+                    "name": m.name,
+                    "type": "human",
+                    "avatar_url": avatar_url(m.id, bool(m.avatar)),
+                    "owner_name": None,
+                    "member_name": m.name,
+                    "default_thread_id": None,
+                    "is_default_thread": None,
+                    "branch_index": None,
+                }
+            )
+        else:
+            owner = member_map.get(m.owner_user_id) if m.owner_user_id else None
+            default_thread = app.state.thread_repo.get_default_thread(m.id)
+            items.append(
+                {
+                    "member_id": m.id,
+                    "name": m.name,
+                    "type": m.type.value if hasattr(m.type, "value") else str(m.type),
+                    "avatar_url": avatar_url(m.id, bool(m.avatar)),
+                    "owner_name": owner.name if owner else None,
+                    "member_name": m.name,
+                    "default_thread_id": default_thread["id"] if default_thread else None,
+                    "is_default_thread": default_thread["is_main"] if default_thread else None,
+                    "branch_index": default_thread["branch_index"] if default_thread else None,
+                }
+            )
     return items
 
 
-@router.get("/{user_id}/agent-thread")
+@router.get("/{member_id}/profile")
+async def get_entity_profile(
+    member_id: str,
+    app: Annotated[Any, Depends(get_app)],
+):
+    """Public agent profile. No auth required (frontend uses plain fetch)."""
+    member = _get_member_or_404(app, member_id)
+    member_type = member.type.value if hasattr(member.type, "value") else str(member.type)
+    if "agent" not in member_type:
+        raise HTTPException(404, "Profile not available for this member type")
+    return {
+        "id": member.id,
+        "name": member.name,
+        "type": member_type,
+        "avatar_url": avatar_url(member.id, bool(member.avatar)),
+        "description": member.description,
+    }
+
+
+@router.get("/{member_id}/agent-thread")
 async def get_agent_thread(
-    user_id: str,
+    member_id: str,
     current_user_id: Annotated[str, Depends(get_current_user_id)],
     app: Annotated[Any, Depends(get_app)],
 ):
-    """Get the thread_id for an agent's main thread. user_id here is the agent's member_id."""
-    entity = app.state.entity_repo.get_by_id(user_id)
-    if not entity:
-        raise HTTPException(404, "Entity not found")
-    if entity.type == "agent" and entity.thread_id:
-        return {"user_id": user_id, "thread_id": entity.thread_id}
+    """Get the default representative thread for an agent template."""
+    member = _get_member_or_404(app, member_id)
+    default_thread = app.state.thread_repo.get_default_thread(member_id)
+    if member.type != MemberType.HUMAN and default_thread is not None:
+        return {"member_id": member_id, "default_thread_id": default_thread["id"]}
     raise HTTPException(404, "No agent thread found")
+
+
+def _get_member_or_404(app: Any, member_id: str) -> Any:
+    member = app.state.member_repo.get_by_id(member_id)
+    if not member:
+        raise HTTPException(404, "Member not found")
+    return member
diff --git a/backend/web/routers/invite_codes.py b/backend/web/routers/invite_codes.py
index 53a17efeb..290b43631 100644
--- a/backend/web/routers/invite_codes.py
+++ b/backend/web/routers/invite_codes.py
@@ -11,15 +11,26 @@
 router = APIRouter(prefix="/api/invite-codes", tags=["invite-codes"])
 
 
-def _get_invite_code_repo(app: Any):
-    """Get SupabaseInviteCodeRepo from app state, or raise 503 if unavailable."""
-    sb_client = getattr(app.state, "_supabase_client", None)
+async def _call_invite_code_repo(
+    request: Request,
+    error_prefix: str,
+    method_name: str,
+    *args: Any,
+    **kwargs: Any,
+) -> Any:
+    sb_client = getattr(request.app.state, "_supabase_client", None)
     if sb_client is None:
         raise HTTPException(503, "邀请码服务不可用（当前为 SQLite 模式）")
-    repo = getattr(app.state, "invite_code_repo", None)
+    repo = getattr(request.app.state, "invite_code_repo", None)
     if repo is None:
         raise HTTPException(503, "邀请码仓库未初始化")
-    return repo
+    try:
+        method = getattr(repo, method_name)
+        return await asyncio.to_thread(method, *args, **kwargs)
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(500, f"{error_prefix}{e}") from e
 
 
 # ── List all invite codes ────────────────────────────────────────────────────
@@ -30,14 +41,8 @@ async def list_invite_codes(
     request: Request,
     user_id: Annotated[str, Depends(get_current_user_id)],
 ) -> dict:
-    repo = _get_invite_code_repo(request.app)
-    try:
-        codes = await asyncio.to_thread(repo.list_all)
-        return {"codes": codes}
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(500, f"获取邀请码列表失败：{e}") from e
+    codes = await _call_invite_code_repo(request, "获取邀请码列表失败：", "list_all")
+    return {"codes": codes}
 
 
 # ── Generate a new invite code ───────────────────────────────────────────────
@@ -53,18 +58,13 @@ async def generate_invite_code(
     request: Request,
     user_id: Annotated[str, Depends(get_current_user_id)],
 ) -> dict:
-    repo = _get_invite_code_repo(request.app)
-    try:
-        code = await asyncio.to_thread(
-            repo.generate,
-            created_by=user_id,
-            expires_days=payload.expires_days,
-        )
-        return code
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(500, f"生成邀请码失败：{e}") from e
+    return await _call_invite_code_repo(
+        request,
+        "生成邀请码失败：",
+        "generate",
+        created_by=user_id,
+        expires_days=payload.expires_days,
+    )
 
 
 # ── Revoke (delete) an invite code ──────────────────────────────────────────
@@ -76,16 +76,10 @@ async def revoke_invite_code(
     request: Request,
     user_id: Annotated[str, Depends(get_current_user_id)],
 ) -> dict:
-    repo = _get_invite_code_repo(request.app)
-    try:
-        ok = await asyncio.to_thread(repo.revoke, code)
-        if not ok:
-            raise HTTPException(404, "邀请码不存在")
-        return {"ok": True}
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(500, f"吊销邀请码失败：{e}") from e
+    ok = await _call_invite_code_repo(request, "吊销邀请码失败：", "revoke", code)
+    if not ok:
+        raise HTTPException(404, "邀请码不存在")
+    return {"ok": True}
 
 
 # ── Validate an invite code (no auth required) ───────────────────────────────
@@ -93,11 +87,5 @@ async def revoke_invite_code(
 
 @router.get("/validate/{code}")
 async def validate_invite_code(code: str, request: Request) -> dict:
-    repo = _get_invite_code_repo(request.app)
-    try:
-        valid = await asyncio.to_thread(repo.is_valid, code)
-        return {"valid": valid}
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(500, f"校验邀请码失败：{e}") from e
+    valid = await _call_invite_code_repo(request, "校验邀请码失败：", "is_valid", code)
+    return {"valid": valid}
diff --git a/backend/web/routers/messaging.py b/backend/web/routers/messaging.py
new file mode 100644
index 000000000..ce2b2579a
--- /dev/null
+++ b/backend/web/routers/messaging.py
@@ -0,0 +1,329 @@
+"""Messaging API router — replaces chats.py.
+
+All operations go through MessagingService (Supabase-backed).
+No legacy fallback.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from datetime import UTC, datetime
+from typing import Annotated, Any
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel
+
+from backend.web.core.dependencies import get_app, get_current_user_id
+from backend.web.utils.serializers import avatar_url
+
+router = APIRouter(prefix="/api/chats", tags=["chats"])
+
+
+# ---------------------------------------------------------------------------
+# Request models
+# ---------------------------------------------------------------------------
+
+
+class CreateChatBody(BaseModel):
+    user_ids: list[str]
+    title: str | None = None
+
+
+class SendMessageBody(BaseModel):
+    content: str
+    sender_id: str
+    mentioned_ids: list[str] | None = None
+    message_type: str = "human"
+    signal: str | None = None
+
+
+class MuteChatBody(BaseModel):
+    user_id: str
+    muted: bool
+    mute_until: float | None = None
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _messaging(app: Any):
+    svc = getattr(app.state, "messaging_service", None)
+    if svc is None:
+        raise HTTPException(503, "MessagingService not initialized")
+    return svc
+
+
+def _verify_member_ownership(app: Any, member_id: str, user_id: str) -> None:
+    # @@@thread-social-owner-check - sender_id can be a thread-owned social user_id, so
+    # ownership must resolve through the thread back to the template member before checking owner.
+    member = _resolve_display_member(app, member_id)
+    if not member:
+        raise HTTPException(403, "Member not found")
+    if member.id == user_id:
+        return  # human member sending as themselves
+    if member.owner_user_id == user_id:
+        return  # agent owned by current user
+    raise HTTPException(403, "Member does not belong to you")
+
+
+def _get_accessible_chat_or_404(app: Any, chat_id: str, user_id: str) -> Any:
+    chat = app.state.chat_repo.get_by_id(chat_id)
+    if not chat:
+        raise HTTPException(404, "Chat not found")
+    if not _messaging(app).is_chat_member(chat_id, user_id):
+        raise HTTPException(403, "Not a participant of this chat")
+    return chat
+
+
+def _resolve_display_member(app: Any, social_user_id: str) -> Any | None:
+    member = app.state.member_repo.get_by_id(social_user_id)
+    if member is not None:
+        return member
+    thread_repo = getattr(app.state, "thread_repo", None)
+    if thread_repo is None:
+        return None
+    thread = thread_repo.get_by_user_id(social_user_id)
+    if thread is None:
+        return None
+    member_id = thread.get("member_id")
+    if not member_id:
+        return None
+    return app.state.member_repo.get_by_id(member_id)
+
+
+def _msg_response(m: dict[str, Any], app: Any) -> dict[str, Any]:
+    sender = _resolve_display_member(app, m.get("sender_id", ""))
+    return {
+        "id": m["id"],
+        "chat_id": m["chat_id"],
+        "sender_id": m.get("sender_id"),
+        "sender_name": sender.name if sender else "unknown",
+        "content": m["content"],
+        "message_type": m.get("message_type", "human"),
+        "mentioned_ids": m.get("mentioned_ids") or m.get("mentions") or [],
+        "signal": m.get("signal"),
+        "retracted_at": m.get("retracted_at"),
+        "created_at": m.get("created_at"),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Chat list / create
+# ---------------------------------------------------------------------------
+
+
+@router.get("")
+async def list_chats(
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    return _messaging(app).list_chats_for_user(user_id)
+
+
+@router.post("")
+async def create_chat(
+    body: CreateChatBody,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    try:
+        if len(body.user_ids) >= 3:
+            chat = _messaging(app).create_group_chat(body.user_ids, body.title)
+        else:
+            chat = _messaging(app).find_or_create_chat(body.user_ids, body.title)
+        return {
+            "id": chat["id"],
+            "title": chat.get("title"),
+            "status": chat.get("status"),
+            "created_at": chat.get("created_at"),
+        }
+    except ValueError as e:
+        raise HTTPException(400, str(e))
+
+
+# ---------------------------------------------------------------------------
+# Chat detail
+# ---------------------------------------------------------------------------
+
+
+@router.get("/{chat_id}")
+async def get_chat(
+    chat_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    chat = _get_accessible_chat_or_404(app, chat_id, user_id)
+    members_list = _messaging(app).list_chat_members(chat_id)
+    members_info = []
+    for m in members_list:
+        uid = m.get("user_id")
+        if not uid:
+            continue
+        mem = _resolve_display_member(app, uid)
+        if mem:
+            members_info.append(
+                {
+                    "id": uid,
+                    "name": mem.name,
+                    "type": mem.type.value if hasattr(mem.type, "value") else str(mem.type),
+                    "avatar_url": avatar_url(mem.id, bool(mem.avatar)),
+                }
+            )
+    return {
+        "id": chat.id,
+        "title": chat.title,
+        "status": chat.status,
+        "created_at": chat.created_at,
+        "entities": members_info,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Messages
+# ---------------------------------------------------------------------------
+
+
+@router.get("/{chat_id}/messages")
+async def list_messages(
+    chat_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+    limit: int = Query(50, ge=1, le=200),
+    before: str | None = Query(None),
+):
+    if not _messaging(app).is_chat_member(chat_id, user_id):
+        raise HTTPException(403, "Not a participant of this chat")
+    msgs = _messaging(app).list_messages(chat_id, limit=limit, before=before, viewer_id=user_id)
+    return [_msg_response(m, app) for m in msgs]
+
+
+@router.post("/{chat_id}/messages")
+async def send_message(
+    chat_id: str,
+    body: SendMessageBody,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    if not body.content.strip():
+        raise HTTPException(400, "Content cannot be empty")
+    _verify_member_ownership(app, body.sender_id, user_id)
+    msg = _messaging(app).send(
+        chat_id,
+        body.sender_id,
+        body.content,
+        mentions=body.mentioned_ids,
+        signal=body.signal,
+        message_type=body.message_type,
+    )
+    return _msg_response(msg, app)
+
+
+@router.post("/{chat_id}/messages/{message_id}/retract")
+async def retract_message(
+    chat_id: str,
+    message_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    ok = _messaging(app).retract(message_id, user_id)
+    if not ok:
+        raise HTTPException(400, "Cannot retract: not sender, already retracted, or 2-min window expired")
+    return {"status": "retracted"}
+
+
+@router.delete("/{chat_id}/messages/{message_id}")
+async def delete_message_for_self(
+    chat_id: str,
+    message_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    _messaging(app).delete_for(message_id, user_id)
+    return {"status": "deleted"}
+
+
+@router.post("/{chat_id}/read")
+async def mark_read(
+    chat_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    _messaging(app).mark_read(chat_id, user_id)
+    return {"status": "ok"}
+
+
+# ---------------------------------------------------------------------------
+# Delete chat
+# ---------------------------------------------------------------------------
+
+
+@router.delete("/{chat_id}")
+async def delete_chat(
+    chat_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    _get_accessible_chat_or_404(app, chat_id, user_id)
+    app.state.chat_repo.delete(chat_id)
+    return {"status": "deleted"}
+
+
+# ---------------------------------------------------------------------------
+# SSE stream (typing indicators fallback, messages come via Supabase Realtime)
+# ---------------------------------------------------------------------------
+
+
+@router.get("/{chat_id}/events")
+async def stream_chat_events(
+    chat_id: str,
+    token: str | None = None,
+    app: Annotated[Any, Depends(get_app)] = None,
+):
+    if not token:
+        raise HTTPException(401, "Missing token")
+    try:
+        app.state.auth_service.verify_token(token)
+    except ValueError as e:
+        raise HTTPException(401, str(e))
+
+    from fastapi.responses import StreamingResponse
+
+    event_bus = app.state.chat_event_bus
+    queue = event_bus.subscribe(chat_id)
+
+    async def event_generator():
+        try:
+            yield "retry: 5000\n\n"
+            while True:
+                try:
+                    event = await asyncio.wait_for(queue.get(), timeout=30)
+                    event_type = event.get("event", "message")
+                    data = event.get("data", {})
+                    yield f"event: {event_type}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
+                except TimeoutError:
+                    yield ": keepalive\n\n"
+        finally:
+            event_bus.unsubscribe(chat_id, queue)
+
+    return StreamingResponse(event_generator(), media_type="text/event-stream")
+
+
+# ---------------------------------------------------------------------------
+# Chat mute
+# ---------------------------------------------------------------------------
+
+
+@router.post("/{chat_id}/mute")
+async def mute_chat(
+    chat_id: str,
+    body: MuteChatBody,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    _verify_member_ownership(app, body.user_id, user_id)
+    mute_until_iso = datetime.fromtimestamp(body.mute_until, tz=UTC).isoformat() if body.mute_until else None
+    _messaging(app).update_mute(chat_id, body.user_id, body.muted, mute_until_iso)
+    return {"status": "ok", "muted": body.muted}
diff --git a/backend/web/routers/monitor.py b/backend/web/routers/monitor.py
index 8b389c308..eb1781db6 100644
--- a/backend/web/routers/monitor.py
+++ b/backend/web/routers/monitor.py
@@ -1,57 +1,26 @@
-"""Sandbox Monitor API - thin router over monitor core."""
+"""Monitor router compatibility layer.
+
+Expose the richer monitor implementation from ``backend.web.monitor`` while
+preserving the newer resource/health helper endpoints added on main.
+"""
 
 import asyncio
 
-from fastapi import APIRouter, HTTPException, Query
+from fastapi import HTTPException, Query
+from pydantic import BaseModel, Field
 
+from backend.web.monitor import list_leases, router
 from backend.web.services import monitor_service
 from backend.web.services.resource_cache import (
-    get_resource_overview_snapshot,
-    refresh_resource_overview_sync,
+    get_monitor_resource_overview_snapshot,
+    refresh_monitor_resource_overview_sync,
 )
 
-router = APIRouter(prefix="/api/monitor")
-
-
-@router.get("/threads")
-def list_threads():
-    return monitor_service.list_threads()
-
-
-@router.get("/thread/{thread_id}")
-def get_thread(thread_id: str):
-    return monitor_service.get_thread(thread_id)
-
-
-@router.get("/leases")
-def list_leases():
-    return monitor_service.list_leases()
-
-
-@router.get("/lease/{lease_id}")
-def get_lease(lease_id: str):
-    try:
-        return monitor_service.get_lease(lease_id)
-    except KeyError as e:
-        raise HTTPException(status_code=404, detail=str(e)) from e
-
 
-@router.get("/diverged")
-def list_diverged():
-    return monitor_service.list_diverged()
-
-
-@router.get("/events")
-def list_events(limit: int = 100):
-    return monitor_service.list_events(limit=limit)
-
-
-@router.get("/event/{event_id}")
-def get_event(event_id: str):
-    try:
-        return monitor_service.get_event(event_id)
-    except KeyError as e:
-        raise HTTPException(status_code=404, detail=str(e)) from e
+class ResourceCleanupRequest(BaseModel):
+    action: str = Field(default="cleanup_residue")
+    lease_ids: list[str]
+    expected_category: str
 
 
 @router.get("/health")
@@ -59,15 +28,60 @@ def health_snapshot():
     return monitor_service.runtime_health_snapshot()
 
 
+@router.get("/dashboard")
+def dashboard_snapshot():
+    health = monitor_service.runtime_health_snapshot()
+    resources = get_monitor_resource_overview_snapshot()
+    leases = list_leases()
+
+    resource_summary = resources.get("summary") or {}
+    lease_summary = leases.get("summary") or {}
+
+    return {
+        "snapshot_at": health.get("snapshot_at"),
+        "resources_summary": resource_summary,
+        "infra": {
+            "providers_active": int(resource_summary.get("active_providers") or 0),
+            "providers_unavailable": int(resource_summary.get("unavailable_providers") or 0),
+            "leases_total": int(lease_summary.get("total") or leases.get("count") or 0),
+            "leases_diverged": int(lease_summary.get("diverged") or 0) + int(lease_summary.get("orphan_diverged") or 0),
+            "leases_orphan": int(lease_summary.get("orphan") or 0) + int(lease_summary.get("orphan_diverged") or 0),
+            "leases_healthy": int(lease_summary.get("healthy") or 0),
+        },
+        "workload": {
+            "db_sessions_total": int(((health.get("db") or {}).get("counts") or {}).get("chat_sessions") or 0),
+            "provider_sessions_total": int(((health.get("sessions") or {}).get("total")) or 0),
+            "running_sessions": int(resource_summary.get("running_sessions") or 0),
+            "evaluations_running": 0,
+        },
+        "latest_evaluation": None,
+    }
+
+
 @router.get("/resources")
 def resources_overview():
-    return get_resource_overview_snapshot()
+    return get_monitor_resource_overview_snapshot()
 
 
 @router.post("/resources/refresh")
 async def resources_refresh():
     # @@@refresh-off-main-loop - provider I/O stays off event loop to avoid request head-of-line blocking.
-    return await asyncio.to_thread(refresh_resource_overview_sync)
+    return await asyncio.to_thread(refresh_monitor_resource_overview_sync)
+
+
+@router.post("/resources/cleanup")
+async def resources_cleanup(payload: ResourceCleanupRequest):
+    from backend.web.services import monitor_service
+
+    try:
+        return await asyncio.to_thread(
+            monitor_service.cleanup_resource_leases,
+            action=payload.action,
+            lease_ids=payload.lease_ids,
+            expected_category=payload.expected_category,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
 
 
 @router.get("/sandbox/{lease_id}/browse")
diff --git a/backend/web/routers/panel.py b/backend/web/routers/panel.py
index 3fe2f481b..0b5a8bd45 100644
--- a/backend/web/routers/panel.py
+++ b/backend/web/routers/panel.py
@@ -27,6 +27,15 @@
 router = APIRouter(prefix="/api/panel", tags=["panel"])
 
 
+def _get_owned_member_or_404(member_id: str, user_id: str) -> dict[str, Any]:
+    item = member_service.get_member(member_id)
+    if not item:
+        raise HTTPException(404, "Member not found")
+    if item.get("owner_user_id") != user_id:
+        raise HTTPException(403, "Forbidden")
+    return item
+
+
 # ── Members ──
 
 
@@ -41,11 +50,11 @@ async def list_members(
 
 
 @router.get("/members/{member_id}")
-async def get_member(member_id: str) -> dict[str, Any]:
-    item = await asyncio.to_thread(member_service.get_member, member_id)
-    if not item:
-        raise HTTPException(404, "Member not found")
-    return item
+async def get_member(
+    member_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    return await asyncio.to_thread(_get_owned_member_or_404, member_id, user_id)
 
 
 @router.post("/members")
@@ -55,20 +64,30 @@ async def create_member(
     request: Request,
 ) -> dict[str, Any]:
     member_repo = getattr(request.app.state, "member_repo", None)
-    return await asyncio.to_thread(member_service.create_member, req.name, req.description, owner_user_id=user_id, member_repo=member_repo)
+    agent_config_repo = getattr(request.app.state, "agent_config_repo", None)
+    return await asyncio.to_thread(
+        member_service.create_member,
+        req.name,
+        req.description,
+        owner_user_id=user_id,
+        member_repo=member_repo,
+        agent_config_repo=agent_config_repo,
+    )
 
 
 @router.put("/members/{member_id}")
-async def update_member(member_id: str, req: UpdateMemberRequest, request: Request) -> dict[str, Any]:
+async def update_member(
+    member_id: str,
+    req: UpdateMemberRequest,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     member_repo = getattr(request.app.state, "member_repo", None)
-    entity_repo = getattr(request.app.state, "entity_repo", None)
-    thread_repo = getattr(request.app.state, "thread_repo", None)
+    await asyncio.to_thread(_get_owned_member_or_404, member_id, user_id)
     item = await asyncio.to_thread(
         member_service.update_member,
         member_id,
         member_repo=member_repo,
-        entity_repo=entity_repo,
-        thread_repo=thread_repo,
         **req.model_dump(),
     )
     if not item:
@@ -77,29 +96,64 @@ async def update_member(member_id: str, req: UpdateMemberRequest, request: Reque
 
 
 @router.put("/members/{member_id}/config")
-async def update_member_config(member_id: str, req: MemberConfigPayload) -> dict[str, Any]:
-    item = await asyncio.to_thread(member_service.update_member_config, member_id, req.model_dump())
+async def update_member_config(
+    member_id: str,
+    req: MemberConfigPayload,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    await asyncio.to_thread(_get_owned_member_or_404, member_id, user_id)
+    agent_config_repo = getattr(request.app.state, "agent_config_repo", None)
+    item = await asyncio.to_thread(
+        member_service.update_member_config,
+        member_id,
+        req.model_dump(),
+        agent_config_repo=agent_config_repo,
+    )
     if not item:
         raise HTTPException(404, "Member not found")
     return item
 
 
 @router.put("/members/{member_id}/publish")
-async def publish_member(member_id: str, req: PublishMemberRequest) -> dict[str, Any]:
+async def publish_member(
+    member_id: str,
+    req: PublishMemberRequest,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     if member_id == "__leon__":
         raise HTTPException(403, "Cannot publish builtin member")
-    item = await asyncio.to_thread(member_service.publish_member, member_id, req.bump_type)
+    await asyncio.to_thread(_get_owned_member_or_404, member_id, user_id)
+    agent_config_repo = getattr(request.app.state, "agent_config_repo", None)
+    item = await asyncio.to_thread(
+        member_service.publish_member,
+        member_id,
+        req.bump_type,
+        agent_config_repo=agent_config_repo,
+    )
     if not item:
         raise HTTPException(404, "Member not found")
     return item
 
 
 @router.delete("/members/{member_id}")
-async def delete_member(member_id: str, request: Request) -> dict[str, Any]:
+async def delete_member(
+    member_id: str,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     if member_id == "__leon__":
         raise HTTPException(403, "Cannot delete builtin member")
+    await asyncio.to_thread(_get_owned_member_or_404, member_id, user_id)
     member_repo = getattr(request.app.state, "member_repo", None)
-    ok = await asyncio.to_thread(member_service.delete_member, member_id, member_repo=member_repo)
+    agent_config_repo = getattr(request.app.state, "agent_config_repo", None)
+    ok = await asyncio.to_thread(
+        member_service.delete_member,
+        member_id,
+        member_repo=member_repo,
+        agent_config_repo=agent_config_repo,
+    )
     if not ok:
         raise HTTPException(404, "Member not found")
     return {"success": True}
@@ -109,39 +163,95 @@ async def delete_member(member_id: str, request: Request) -> dict[str, Any]:
 
 
 @router.get("/tasks")
-async def list_tasks() -> dict[str, Any]:
-    items = await asyncio.to_thread(task_service.list_tasks)
+async def list_tasks(
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    items = await asyncio.to_thread(
+        task_service.list_tasks,
+        owner_user_id=user_id,
+        repo=request.app.state.panel_task_repo,
+        thread_repo=request.app.state.thread_repo,
+    )
     return {"items": items}
 
 
 @router.post("/tasks")
-async def create_task(req: CreateTaskRequest) -> dict[str, Any]:
-    return await asyncio.to_thread(task_service.create_task, **req.model_dump())
+async def create_task(
+    req: CreateTaskRequest,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    return await asyncio.to_thread(
+        task_service.create_task,
+        owner_user_id=user_id,
+        repo=request.app.state.panel_task_repo,
+        **req.model_dump(),
+    )
 
 
 @router.put("/tasks/bulk-status")
-async def bulk_update_status(req: BulkTaskStatusRequest) -> dict[str, Any]:
-    count = await asyncio.to_thread(task_service.bulk_update_task_status, req.ids, req.status)
+async def bulk_update_status(
+    req: BulkTaskStatusRequest,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    count = await asyncio.to_thread(
+        task_service.bulk_update_task_status,
+        req.ids,
+        req.status,
+        owner_user_id=user_id,
+        repo=request.app.state.panel_task_repo,
+    )
     return {"updated": count}
 
 
 @router.post("/tasks/bulk-delete")
-async def bulk_delete_tasks(req: BulkDeleteTasksRequest) -> dict[str, Any]:
-    count = await asyncio.to_thread(task_service.bulk_delete_tasks, req.ids)
+async def bulk_delete_tasks(
+    req: BulkDeleteTasksRequest,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    count = await asyncio.to_thread(
+        task_service.bulk_delete_tasks,
+        req.ids,
+        owner_user_id=user_id,
+        repo=request.app.state.panel_task_repo,
+    )
     return {"deleted": count}
 
 
 @router.put("/tasks/{task_id}")
-async def update_task(task_id: str, req: UpdateTaskRequest) -> dict[str, Any]:
-    item = await asyncio.to_thread(task_service.update_task, task_id, **req.model_dump())
+async def update_task(
+    task_id: str,
+    req: UpdateTaskRequest,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    item = await asyncio.to_thread(
+        task_service.update_task,
+        task_id,
+        owner_user_id=user_id,
+        repo=request.app.state.panel_task_repo,
+        **req.model_dump(),
+    )
     if not item:
         raise HTTPException(404, "Task not found")
     return item
 
 
 @router.delete("/tasks/{task_id}")
-async def delete_task(task_id: str) -> dict[str, Any]:
-    ok = await asyncio.to_thread(task_service.delete_task, task_id)
+async def delete_task(
+    task_id: str,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    ok = await asyncio.to_thread(
+        task_service.delete_task,
+        task_id,
+        owner_user_id=user_id,
+        repo=request.app.state.panel_task_repo,
+    )
     if not ok:
         raise HTTPException(404, "Task not found")
     return {"success": True}
@@ -151,49 +261,86 @@ async def delete_task(task_id: str) -> dict[str, Any]:
 
 
 @router.get("/cron-jobs")
-async def list_cron_jobs() -> dict[str, Any]:
-    items = await asyncio.to_thread(cron_job_service.list_cron_jobs)
+async def list_cron_jobs(
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    items = await asyncio.to_thread(
+        cron_job_service.list_cron_jobs,
+        owner_user_id=user_id,
+        repo=request.app.state.cron_job_repo,
+    )
     return {"items": items}
 
 
 @router.post("/cron-jobs")
-async def create_cron_job(req: CreateCronJobRequest) -> dict[str, Any]:
+async def create_cron_job(
+    req: CreateCronJobRequest,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     job = await asyncio.to_thread(
         cron_job_service.create_cron_job,
         name=req.name,
         cron_expression=req.cron_expression,
+        repo=request.app.state.cron_job_repo,
         description=req.description,
         task_template=req.task_template,
         enabled=int(req.enabled),
+        owner_user_id=user_id,
     )
     return {"item": job}
 
 
 @router.put("/cron-jobs/{job_id}")
-async def update_cron_job(job_id: str, req: UpdateCronJobRequest) -> dict[str, Any]:
+async def update_cron_job(
+    job_id: str,
+    req: UpdateCronJobRequest,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     fields = req.model_dump(exclude_none=True)
     if "enabled" in fields:
         fields["enabled"] = int(fields["enabled"])
-    job = await asyncio.to_thread(cron_job_service.update_cron_job, job_id, **fields)
+    job = await asyncio.to_thread(
+        cron_job_service.update_cron_job,
+        job_id,
+        owner_user_id=user_id,
+        repo=request.app.state.cron_job_repo,
+        **fields,
+    )
     if not job:
         raise HTTPException(404, "Cron job not found")
     return {"item": job}
 
 
 @router.delete("/cron-jobs/{job_id}")
-async def delete_cron_job(job_id: str) -> dict[str, Any]:
-    ok = await asyncio.to_thread(cron_job_service.delete_cron_job, job_id)
+async def delete_cron_job(
+    job_id: str,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    ok = await asyncio.to_thread(
+        cron_job_service.delete_cron_job,
+        job_id,
+        owner_user_id=user_id,
+        repo=request.app.state.cron_job_repo,
+    )
     if not ok:
         raise HTTPException(404, "Cron job not found")
     return {"ok": True}
 
 
 @router.post("/cron-jobs/{job_id}/run")
-async def trigger_cron_job(job_id: str, request: Request) -> dict[str, Any]:
+async def trigger_cron_job(
+    job_id: str,
+    request: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     cron_service = getattr(request.app.state, "cron_service", None)
     if not cron_service:
         raise HTTPException(503, "Cron service not available")
-    task = await cron_service.trigger_job(job_id)
+    task = await cron_service.trigger_job(job_id, owner_user_id=user_id)
     if not task:
         raise HTTPException(404, "Cron job not found or disabled")
     return {"item": task}
@@ -315,10 +462,17 @@ async def update_resource_content(resource_type: str, resource_id: str, req: Upd
 
 
 @router.get("/profile")
-async def get_profile() -> dict[str, Any]:
-    return await asyncio.to_thread(profile_service.get_profile)
+async def get_profile(
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    request: Request,
+) -> dict[str, Any]:
+    member = request.app.state.member_repo.get_by_id(user_id)
+    return await asyncio.to_thread(profile_service.get_profile, member)
 
 
 @router.put("/profile")
-async def update_profile(req: UpdateProfileRequest) -> dict[str, Any]:
+async def update_profile(
+    req: UpdateProfileRequest,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     return await asyncio.to_thread(profile_service.update_profile, **req.model_dump())
diff --git a/backend/web/routers/resources.py b/backend/web/routers/resources.py
new file mode 100644
index 000000000..4fc56e7a5
--- /dev/null
+++ b/backend/web/routers/resources.py
@@ -0,0 +1,28 @@
+"""User-scoped resource endpoints."""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Annotated, Any
+
+from fastapi import APIRouter, Depends, HTTPException, Request
+
+from backend.web.core.dependencies import get_current_user_id
+from backend.web.services import resource_projection_service
+
+router = APIRouter(prefix="/api/resources", tags=["resources"])
+
+
+@router.get("/overview")
+async def resources_overview(
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    request: Request,
+) -> dict[str, Any]:
+    try:
+        return await asyncio.to_thread(
+            resource_projection_service.list_user_resource_providers,
+            request.app,
+            user_id,
+        )
+    except RuntimeError as exc:
+        raise HTTPException(500, str(exc)) from exc
diff --git a/backend/web/routers/settings.py b/backend/web/routers/settings.py
index f765c0962..daf049255 100644
--- a/backend/web/routers/settings.py
+++ b/backend/web/routers/settings.py
@@ -6,11 +6,12 @@
 
 import json
 from pathlib import Path
-from typing import Any
+from typing import Annotated, Any
 
-from fastapi import APIRouter, HTTPException, Query, Request
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
 from pydantic import BaseModel
 
+from backend.web.core.dependencies import get_current_user_id
 from config.models_loader import ModelsLoader
 from config.models_schema import ModelsConfig
 from config.user_paths import user_home_path, user_home_read_candidates
@@ -42,6 +43,27 @@ class DirectoryItem(BaseModel):
     is_dir: bool
 
 
+def _resolve_workspace_path_or_400(
+    workspace: str,
+    *,
+    missing_detail: str,
+    not_dir_detail: str,
+) -> str:
+    workspace_path = Path(workspace).expanduser().resolve()
+    if not workspace_path.exists():
+        raise HTTPException(status_code=400, detail=missing_detail)
+    if not workspace_path.is_dir():
+        raise HTTPException(status_code=400, detail=not_dir_detail)
+    return str(workspace_path)
+
+
+def _remember_recent_workspace(settings: "WorkspaceSettings", workspace_str: str) -> None:
+    if workspace_str in settings.recent_workspaces:
+        settings.recent_workspaces.remove(workspace_str)
+    settings.recent_workspaces.insert(0, workspace_str)
+    settings.recent_workspaces = settings.recent_workspaces[:5]
+
+
 def load_settings() -> WorkspaceSettings:
     try:
         data = _load_user_json("preferences.json")
@@ -71,6 +93,25 @@ def _try_get_user_id(request: Request) -> str | None:
         return None
 
 
+def _load_models_for_user(repo, user_id: str | None) -> dict[str, Any]:
+    """Load models config: Supabase first, filesystem fallback."""
+    if repo and user_id:
+        data = repo.get_models_config(user_id)
+        if data is not None:
+            return data
+    return _load_user_json("models.json")
+
+
+def _save_models_for_user(repo, user_id: str | None, data: dict[str, Any]) -> None:
+    """Save models config: Supabase if available, else filesystem."""
+    if repo and user_id:
+        repo.set_models_config(user_id, data)
+    else:
+        MODELS_FILE.parent.mkdir(parents=True, exist_ok=True)
+        with open(MODELS_FILE, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+
+
 # ============================================================================
 # Models config (models.json)
 # ============================================================================
@@ -81,13 +122,6 @@ def load_models() -> dict[str, Any]:
     return _load_user_json("models.json")
 
 
-def save_models(data: dict[str, Any]) -> None:
-    """Save models.json to disk (user-level)."""
-    MODELS_FILE.parent.mkdir(parents=True, exist_ok=True)
-    with open(MODELS_FILE, "w", encoding="utf-8") as f:
-        json.dump(data, f, indent=2, ensure_ascii=False)
-
-
 def load_merged_models() -> ModelsConfig:
     """Load fully merged ModelsConfig (system + user)."""
     return ModelsLoader().load()
@@ -149,7 +183,7 @@ async def get_settings(request: Request) -> UserSettings:
     # Build compat view
     mapping = {k: v.model for k, v in models.mapping.items()}
     providers = {k: ProviderConfig(api_key=v.api_key, base_url=v.base_url) for k, v in models.providers.items()}
-    raw = load_models()
+    raw = _load_models_for_user(repo, user_id)
     custom_config = raw.get("pool", {}).get("custom_config", {})
 
     return UserSettings(
@@ -214,51 +248,49 @@ async def read_local_file(path: str = Query(...)) -> dict[str, Any]:
 
 
 @router.post("/workspace")
-async def set_default_workspace(request: WorkspaceRequest, req: Request) -> dict[str, Any]:
+async def set_default_workspace(
+    request: WorkspaceRequest,
+    req: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     """Set default workspace path."""
-    workspace_path = Path(request.workspace).expanduser().resolve()
-    if not workspace_path.exists():
-        raise HTTPException(status_code=400, detail="Workspace path does not exist")
-    if not workspace_path.is_dir():
-        raise HTTPException(status_code=400, detail="Workspace path is not a directory")
-
-    workspace_str = str(workspace_path)
+    workspace_str = _resolve_workspace_path_or_400(
+        request.workspace,
+        missing_detail="Workspace path does not exist",
+        not_dir_detail="Workspace path is not a directory",
+    )
 
     repo = _get_settings_repo(req)
-    user_id = _try_get_user_id(req) if repo else None
     if repo and user_id:
         repo.set_default_workspace(user_id, workspace_str)
     else:
         settings = load_settings()
         settings.default_workspace = workspace_str
-        if workspace_str in settings.recent_workspaces:
-            settings.recent_workspaces.remove(workspace_str)
-        settings.recent_workspaces.insert(0, workspace_str)
-        settings.recent_workspaces = settings.recent_workspaces[:5]
+        _remember_recent_workspace(settings, workspace_str)
         save_settings(settings)
 
     return {"success": True, "workspace": workspace_str}
 
 
 @router.post("/workspace/recent")
-async def add_recent_workspace(request: WorkspaceRequest, req: Request) -> dict[str, Any]:
+async def add_recent_workspace(
+    request: WorkspaceRequest,
+    req: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     """Add a workspace to recent list."""
-    workspace_path = Path(request.workspace).expanduser().resolve()
-    if not workspace_path.exists() or not workspace_path.is_dir():
-        raise HTTPException(status_code=400, detail="Invalid workspace path")
-
-    workspace_str = str(workspace_path)
+    workspace_str = _resolve_workspace_path_or_400(
+        request.workspace,
+        missing_detail="Invalid workspace path",
+        not_dir_detail="Invalid workspace path",
+    )
 
     repo = _get_settings_repo(req)
-    user_id = _try_get_user_id(req) if repo else None
     if repo and user_id:
         repo.add_recent_workspace(user_id, workspace_str)
     else:
         settings = load_settings()
-        if workspace_str in settings.recent_workspaces:
-            settings.recent_workspaces.remove(workspace_str)
-        settings.recent_workspaces.insert(0, workspace_str)
-        settings.recent_workspaces = settings.recent_workspaces[:5]
+        _remember_recent_workspace(settings, workspace_str)
         save_settings(settings)
 
     return {"success": True}
@@ -269,10 +301,13 @@ class DefaultModelRequest(BaseModel):
 
 
 @router.post("/default-model")
-async def set_default_model(request: DefaultModelRequest, req: Request) -> dict[str, Any]:
+async def set_default_model(
+    request: DefaultModelRequest,
+    req: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
     """Set default virtual model preference."""
     repo = _get_settings_repo(req)
-    user_id = _try_get_user_id(req) if repo else None
     if repo and user_id:
         repo.set_default_model(user_id, request.model)
     else:
@@ -387,9 +422,14 @@ class ModelMappingRequest(BaseModel):
 
 
 @router.post("/model-mapping")
-async def update_model_mapping(request: ModelMappingRequest) -> dict[str, Any]:
-    """Update virtual model mapping → models.json."""
-    data = load_models()
+async def update_model_mapping(
+    request: ModelMappingRequest,
+    req: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    """Update virtual model mapping → models config."""
+    repo = _get_settings_repo(req)
+    data = _load_models_for_user(repo, user_id)
     mapping = data.get("mapping", {})
     for name, spec in request.mapping.items():
         if isinstance(spec, dict):
@@ -398,7 +438,7 @@ async def update_model_mapping(request: ModelMappingRequest) -> dict[str, Any]:
             else:
                 mapping[name] = spec
     data["mapping"] = mapping
-    save_models(data)
+    _save_models_for_user(repo, user_id, data)
     return {"success": True, "model_mapping": request.mapping}
 
 
@@ -413,9 +453,14 @@ class ModelToggleRequest(BaseModel):
 
 
 @router.post("/models/toggle")
-async def toggle_model(request: ModelToggleRequest) -> dict[str, Any]:
-    """Enable or disable a model → models.json pool.enabled."""
-    data = load_models()
+async def toggle_model(
+    request: ModelToggleRequest,
+    req: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    """Enable or disable a model."""
+    repo = _get_settings_repo(req)
+    data = _load_models_for_user(repo, user_id)
     pool = data.setdefault("pool", {"enabled": [], "custom": []})
     enabled = pool.setdefault("enabled", [])
 
@@ -426,7 +471,7 @@ async def toggle_model(request: ModelToggleRequest) -> dict[str, Any]:
         if request.model_id in enabled:
             enabled.remove(request.model_id)
 
-    save_models(data)
+    _save_models_for_user(repo, user_id, data)
     return {"success": True, "enabled_models": enabled}
 
 
@@ -438,9 +483,14 @@ class CustomModelRequest(BaseModel):
 
 
 @router.post("/models/custom")
-async def add_custom_model(request: CustomModelRequest) -> dict[str, Any]:
-    """Add a custom model → models.json pool.custom + auto-enable."""
-    data = load_models()
+async def add_custom_model(
+    request: CustomModelRequest,
+    req: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    """Add a custom model + auto-enable."""
+    repo = _get_settings_repo(req)
+    data = _load_models_for_user(repo, user_id)
     pool = data.setdefault("pool", {"enabled": [], "custom": []})
     custom = pool.setdefault("custom", [])
     enabled = pool.setdefault("enabled", [])
@@ -463,7 +513,7 @@ async def add_custom_model(request: CustomModelRequest) -> dict[str, Any]:
             cfg["context_limit"] = request.context_limit
         custom_config[request.model_id] = cfg
 
-    save_models(data)
+    _save_models_for_user(repo, user_id, data)
     return {"success": True, "custom_models": custom, "enabled_models": enabled}
 
 
@@ -528,9 +578,11 @@ async def test_model(request: ModelTestRequest) -> dict[str, Any]:
 
 
 @router.delete("/models/custom")
-async def remove_custom_model(model_id: str = Query(...)) -> dict[str, Any]:
-    """Remove a custom model from models.json pool.custom + pool.enabled."""
-    data = load_models()
+async def remove_custom_model(req: Request, model_id: str = Query(...)) -> dict[str, Any]:
+    """Remove a custom model."""
+    repo = _get_settings_repo(req)
+    user_id = _try_get_user_id(req) if repo else None
+    data = _load_models_for_user(repo, user_id)
     pool = data.setdefault("pool", {"enabled": [], "custom": []})
     custom = pool.setdefault("custom", [])
     enabled = pool.setdefault("enabled", [])
@@ -546,7 +598,7 @@ async def remove_custom_model(model_id: str = Query(...)) -> dict[str, Any]:
     custom_config = pool.get("custom_config", {})
     custom_config.pop(model_id, None)
 
-    save_models(data)
+    _save_models_for_user(repo, user_id, data)
     return {"success": True, "custom_models": custom}
 
 
@@ -558,9 +610,11 @@ class CustomModelConfigRequest(BaseModel):
 
 
 @router.post("/models/custom/config")
-async def update_custom_model_config(request: CustomModelConfigRequest) -> dict[str, Any]:
+async def update_custom_model_config(request: CustomModelConfigRequest, req: Request) -> dict[str, Any]:
     """Update based_on/context_limit/provider for a custom model."""
-    data = load_models()
+    repo = _get_settings_repo(req)
+    user_id = _try_get_user_id(req) if repo else None
+    data = _load_models_for_user(repo, user_id)
     pool = data.setdefault("pool", {})
     custom_config = pool.setdefault("custom_config", {})
     cfg: dict[str, Any] = custom_config.get(request.model_id, {})
@@ -572,7 +626,7 @@ async def update_custom_model_config(request: CustomModelConfigRequest) -> dict[
     if request.provider:
         custom_providers = pool.setdefault("custom_providers", {})
         custom_providers[request.model_id] = request.provider
-    save_models(data)
+    _save_models_for_user(repo, user_id, data)
     return {"success": True, "custom_config": custom_config}
 
 
@@ -588,9 +642,14 @@ class ProviderRequest(BaseModel):
 
 
 @router.post("/providers")
-async def update_provider(request: ProviderRequest, req: Request) -> dict[str, Any]:
-    """Update provider config → models.json providers, then reload all agents."""
-    data = load_models()
+async def update_provider(
+    request: ProviderRequest,
+    req: Request,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+) -> dict[str, Any]:
+    """Update provider config, then reload all agents."""
+    repo = _get_settings_repo(req)
+    data = _load_models_for_user(repo, user_id)
     providers = data.setdefault("providers", {})
     provider_data: dict[str, Any] = {}
     if request.api_key is not None:
@@ -598,7 +657,7 @@ async def update_provider(request: ProviderRequest, req: Request) -> dict[str, A
     if request.base_url is not None:
         provider_data["base_url"] = request.base_url
     providers[request.provider] = provider_data
-    save_models(data)
+    _save_models_for_user(repo, user_id, data)
 
     # @@@reload-agents-on-key-change — hot-reload all cached agents so they pick up new API keys
     pool = getattr(req.app.state, "agent_pool", {})
@@ -633,8 +692,14 @@ class ObservationRequest(BaseModel):
 
 
 @router.get("/observation")
-async def get_observation_settings() -> dict[str, Any]:
+async def get_observation_settings(req: Request) -> dict[str, Any]:
     """Get observation provider configuration."""
+    repo = _get_settings_repo(req)
+    user_id = _try_get_user_id(req) if repo else None
+    if repo and user_id:
+        data = repo.get_observation_config(user_id)
+        if data is not None:
+            return data
     from config.observation_loader import ObservationLoader
 
     config = ObservationLoader().load()
@@ -642,13 +707,19 @@ async def get_observation_settings() -> dict[str, Any]:
 
 
 @router.post("/observation")
-async def update_observation_settings(request: ObservationRequest) -> dict[str, Any]:
-    """Update observation provider config (persists to observation.json).
+async def update_observation_settings(request: ObservationRequest, req: Request) -> dict[str, Any]:
+    """Update observation provider config.
 
     New threads will pick up the active provider at creation time.
     Existing threads keep their locked provider — only credentials are read live.
     """
-    data = _load_user_json("observation.json")
+    repo = _get_settings_repo(req)
+    user_id = _try_get_user_id(req) if repo else None
+
+    if repo and user_id:
+        data = repo.get_observation_config(user_id) or {}
+    else:
+        data = _load_user_json("observation.json")
 
     data["active"] = request.active
     if request.langfuse is not None:
@@ -660,9 +731,12 @@ async def update_observation_settings(request: ObservationRequest) -> dict[str,
         existing.update(request.langsmith)
         data["langsmith"] = existing
 
-    OBSERVATION_FILE.parent.mkdir(parents=True, exist_ok=True)
-    with open(OBSERVATION_FILE, "w", encoding="utf-8") as f:
-        json.dump(data, f, indent=2, ensure_ascii=False)
+    if repo and user_id:
+        repo.set_observation_config(user_id, data)
+    else:
+        OBSERVATION_FILE.parent.mkdir(parents=True, exist_ok=True)
+        with open(OBSERVATION_FILE, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
 
     return {"success": True, "active": data.get("active")}
 
@@ -740,8 +814,15 @@ class SandboxConfigRequest(BaseModel):
 
 
 @router.get("/sandboxes")
-async def list_sandbox_configs() -> dict[str, Any]:
-    """List all sandbox configurations from ~/.leon/sandboxes/."""
+async def list_sandbox_configs(req: Request) -> dict[str, Any]:
+    """List all sandbox configurations."""
+    repo = _get_settings_repo(req)
+    user_id = _try_get_user_id(req) if repo else None
+    if repo and user_id:
+        data = repo.get_sandbox_configs(user_id)
+        if data is not None:
+            return {"sandboxes": data}
+    # Filesystem fallback
     sandboxes: dict[str, Any] = {}
     seen: set[Path] = set()
     for root in user_home_read_candidates("sandboxes"):
@@ -760,13 +841,23 @@ async def list_sandbox_configs() -> dict[str, Any]:
 
 
 @router.post("/sandboxes")
-async def save_sandbox_config(request: SandboxConfigRequest) -> dict[str, Any]:
-    """Save a sandbox configuration to ~/.leon/sandboxes/<name>.json."""
+async def save_sandbox_config(request: SandboxConfigRequest, req: Request) -> dict[str, Any]:
+    """Save a sandbox configuration."""
+    repo = _get_settings_repo(req)
+    user_id = _try_get_user_id(req) if repo else None
+
     from sandbox.config import SandboxConfig
 
     try:
         cfg = SandboxConfig(**request.config)
-        path = cfg.save(request.name)
-        return {"success": True, "path": str(path)}
+        if repo and user_id:
+            # Save to Supabase
+            existing = repo.get_sandbox_configs(user_id) or {}
+            existing[request.name] = cfg.model_dump()
+            repo.set_sandbox_configs(user_id, existing)
+            return {"success": True, "path": f"supabase://user_settings/{user_id}/sandbox_configs/{request.name}"}
+        else:
+            path = cfg.save(request.name)
+            return {"success": True, "path": str(path)}
     except Exception as e:
         raise HTTPException(status_code=400, detail=str(e))
diff --git a/backend/web/routers/thread_files.py b/backend/web/routers/thread_files.py
index ef92a670d..30b0fcd09 100644
--- a/backend/web/routers/thread_files.py
+++ b/backend/web/routers/thread_files.py
@@ -21,6 +21,17 @@
 _public = APIRouter(prefix="/api/threads/{thread_id}/files", tags=["thread-files"])
 
 
+async def _call_channel_file_service(method, *args, missing_status: int | None = None, **kwargs):
+    try:
+        return await asyncio.to_thread(method, *args, **kwargs)
+    except ValueError as e:
+        raise HTTPException(400, str(e)) from e
+    except FileNotFoundError as e:
+        if missing_status is None:
+            raise
+        raise HTTPException(missing_status, str(e)) from e
+
+
 @router.get("/list")
 async def list_workspace_path(
     thread_id: str,
@@ -185,16 +196,12 @@ async def download_file(
     path: str = Query(...),
 ) -> FileResponse:
     """Download a file from thread-scoped files directory."""
-    try:
-        target = await asyncio.to_thread(
-            file_channel_service.resolve_channel_file,
-            thread_id=thread_id,
-            relative_path=path,
-        )
-    except ValueError as e:
-        raise HTTPException(400, str(e)) from e
-    except FileNotFoundError as e:
-        raise HTTPException(404, str(e)) from e
+    target = await _call_channel_file_service(
+        file_channel_service.resolve_channel_file,
+        thread_id=thread_id,
+        relative_path=path,
+        missing_status=404,
+    )
     return FileResponse(path=str(target), filename=target.name, media_type="application/octet-stream")
 
 
@@ -204,16 +211,12 @@ async def delete_workspace_file(
     path: str = Query(...),
 ) -> dict[str, Any]:
     """Delete a file from workspace."""
-    try:
-        await asyncio.to_thread(
-            file_channel_service.delete_channel_file,
-            thread_id=thread_id,
-            relative_path=path,
-        )
-    except ValueError as e:
-        raise HTTPException(400, str(e)) from e
-    except FileNotFoundError as e:
-        raise HTTPException(404, str(e)) from e
+    await _call_channel_file_service(
+        file_channel_service.delete_channel_file,
+        thread_id=thread_id,
+        relative_path=path,
+        missing_status=404,
+    )
     return {"ok": True, "path": path}
 
 
@@ -222,11 +225,8 @@ async def list_channel_files(
     thread_id: str,
 ) -> dict[str, Any]:
     """List files under thread-scoped files directory."""
-    try:
-        entries = await asyncio.to_thread(
-            file_channel_service.list_channel_files,
-            thread_id=thread_id,
-        )
-    except ValueError as e:
-        raise HTTPException(400, str(e)) from e
+    entries = await _call_channel_file_service(
+        file_channel_service.list_channel_files,
+        thread_id=thread_id,
+    )
     return {"thread_id": thread_id, "entries": entries}
diff --git a/backend/web/routers/threads.py b/backend/web/routers/threads.py
index 33a75b8aa..8b380e050 100644
--- a/backend/web/routers/threads.py
+++ b/backend/web/routers/threads.py
@@ -21,25 +21,29 @@
 from backend.web.models.requests import (
     CreateThreadRequest,
     ResolveMainThreadRequest,
+    ResolvePermissionRequest,
     SaveThreadLaunchConfigRequest,
     SendMessageRequest,
+    ThreadPermissionRuleRequest,
 )
 from backend.web.services import sandbox_service
 from backend.web.services.agent_pool import get_or_create_agent, resolve_thread_sandbox
 from backend.web.services.event_buffer import ThreadEventBuffer
 from backend.web.services.file_channel_service import get_file_channel_source
-from backend.web.services.resource_cache import clear_resource_overview_cache
+from backend.web.services.resource_cache import clear_monitor_resource_overview_cache
 from backend.web.services.sandbox_service import destroy_thread_resources_sync, init_providers_and_managers
 from backend.web.services.streaming_service import (
     get_or_create_thread_buffer,
     observe_thread_events,
 )
 from backend.web.services.thread_launch_config_service import (
+    build_existing_launch_config,
+    build_new_launch_config,
     resolve_default_config,
     save_last_confirmed_config,
     save_last_successful_config,
 )
-from backend.web.services.thread_naming import canonical_entity_name, sidebar_label
+from backend.web.services.thread_naming import sidebar_label
 from backend.web.services.thread_state_service import (
     get_lease_status,
     get_sandbox_info,
@@ -50,19 +54,45 @@
 from backend.web.utils.serializers import avatar_url, serialize_message
 from core.runtime.middleware.monitor import AgentState
 from sandbox.config import MountSpec
+from sandbox.manager import bind_thread_to_existing_lease
 from sandbox.recipes import normalize_recipe_snapshot, provider_type_from_name
 from sandbox.thread_context import set_current_thread_id
-from storage.contracts import EntityRow
 
 logger = logging.getLogger(__name__)
 
 router = APIRouter(prefix="/api/threads", tags=["threads"])
 
 
+class _NoopAsyncLock:
+    async def __aenter__(self) -> None:
+        return None
+
+    async def __aexit__(self, exc_type, exc, tb) -> bool:
+        return False
+
+
+def _is_internal_child_thread(thread_id: str) -> bool:
+    return thread_id.startswith("subagent-")
+
+
 def _invalidate_resource_overview_cache() -> None:
-    # @@@resource-overview-invalidation - thread/lease mutations change the monitor topology immediately.
+    # @@@monitor-resource-overview-invalidation - thread/lease mutations change the monitor topology immediately.
     # Clear the overview snapshot so the next /api/monitor/resources read reflects the fresh binding/state.
-    clear_resource_overview_cache()
+    clear_monitor_resource_overview_cache()
+
+
+def _find_owned_member(app: Any, member_id: str, owner_user_id: str) -> Any | None:
+    member = app.state.member_repo.get_by_id(member_id)
+    if not member or member.owner_user_id != owner_user_id:
+        return None
+    return member
+
+
+def _require_owned_member(app: Any, member_id: str, owner_user_id: str) -> Any:
+    member = _find_owned_member(app, member_id, owner_user_id)
+    if member is None:
+        raise HTTPException(403, "Not authorized")
+    return member
 
 
 async def _prepare_attachment_message(
@@ -179,6 +209,86 @@ async def _validate_mount_capability_gate(
     )
 
 
+def _provider_unavailable_response(sandbox_type: str) -> JSONResponse:
+    return JSONResponse(
+        status_code=400,
+        content={
+            "error": "sandbox_provider_unavailable",
+            "provider": sandbox_type,
+        },
+    )
+
+
+def _format_ask_user_question_followup(
+    pending_request: dict[str, Any],
+    *,
+    answers: list[dict[str, Any]],
+    annotations: dict[str, Any] | None,
+) -> str:
+    payload: dict[str, Any] = {
+        "questions": (pending_request.get("args") or {}).get("questions", []),
+        "answers": answers,
+    }
+    if annotations is not None:
+        payload["annotations"] = annotations
+    # @@@ask-user-followup-payload - keep this as one narrow, structured owner reply
+    # so the resumed run can continue from the user's choices without inventing
+    # a bespoke second continuation channel.
+    return (
+        "The user answered your AskUserQuestion prompt. Continue the task using these answers.\n"
+        "<ask_user_question_answers>\n"
+        f"{json.dumps(payload, ensure_ascii=False, indent=2)}\n"
+        "</ask_user_question_answers>"
+    )
+
+
+def _build_ask_user_question_answered_payload(
+    pending_request: dict[str, Any],
+    *,
+    answers: list[dict[str, Any]],
+    annotations: dict[str, Any] | None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "questions": (pending_request.get("args") or {}).get("questions", []),
+        "answers": answers,
+    }
+    if annotations is not None:
+        payload["annotations"] = annotations
+    return payload
+
+
+def _serialize_permission_answers(payload: Any) -> list[dict[str, Any]] | None:
+    raw_answers = getattr(payload, "answers", None)
+    if raw_answers is None:
+        return None
+    serialized: list[dict[str, Any]] = []
+    for item in raw_answers:
+        if hasattr(item, "model_dump"):
+            serialized.append(item.model_dump(exclude_none=True))
+        elif isinstance(item, dict):
+            serialized.append({key: value for key, value in item.items() if value is not None})
+        else:
+            serialized.append({key: value for key, value in vars(item).items() if value is not None})
+    return serialized
+
+
+def _validate_sandbox_provider_gate(app: Any, owner_user_id: str, payload: CreateThreadRequest) -> JSONResponse | None:
+    sandbox_type = payload.sandbox or "local"
+    if payload.lease_id:
+        owned_lease = next(
+            (lease for lease in sandbox_service.list_user_leases(owner_user_id) if lease["lease_id"] == payload.lease_id),
+            None,
+        )
+        if owned_lease is not None:
+            sandbox_type = str(owned_lease["provider_name"] or sandbox_type)
+    if sandbox_type == "local":
+        return None
+    provider = sandbox_service.build_provider_from_config_name(sandbox_type)
+    if provider is not None:
+        return None
+    return _provider_unavailable_response(sandbox_type)
+
+
 def _get_agent_for_thread(app: Any, thread_id: str) -> Any | None:
     """Get agent instance for a thread from the agent pool."""
     pool = getattr(app.state, "agent_pool", None)
@@ -194,15 +304,13 @@ def _thread_payload(app: Any, thread_id: str, sandbox_type: str) -> dict[str, An
     if thread is None:
         raise HTTPException(404, "Thread not found")
     member = app.state.member_repo.get_by_id(thread["member_id"])
-    entity = app.state.entity_repo.get_by_id(thread["member_id"])
-    if member is None or entity is None:
-        raise HTTPException(500, f"Thread {thread_id} missing member/entity")
+    if member is None:
+        raise HTTPException(500, f"Thread {thread_id} missing member")
     return {
         "thread_id": thread_id,
         "sandbox": sandbox_type,
         "member_id": member.id,
         "member_name": member.name,
-        "entity_name": entity.name,
         "branch_index": thread["branch_index"],
         "sidebar_label": sidebar_label(is_main=thread["is_main"], branch_index=thread["branch_index"]),
         "avatar_url": avatar_url(member.id, bool(member.avatar)),
@@ -210,7 +318,165 @@ def _thread_payload(app: Any, thread_id: str, sandbox_type: str) -> dict[str, An
     }
 
 
-def _create_thread_sandbox_resources(thread_id: str, sandbox_type: str, recipe: dict[str, Any] | None) -> None:
+_IDLE_REPLAYABLE_RUN_EVENTS = frozenset({"error", "cancelled", "retry"})
+
+
+def _checkpoint_tail_is_pending_owner_turn(messages: list[dict[str, Any]]) -> bool:
+    if not messages:
+        return False
+    tail = messages[-1]
+    if tail.get("type") != "HumanMessage":
+        return False
+    meta = tail.get("metadata") or {}
+    return meta.get("source") not in {"system", "external"}
+
+
+async def _get_thread_display_entries(app: Any, thread_id: str) -> list[dict[str, Any]]:
+    display_builder = app.state.display_builder
+    entries = display_builder.get_entries(thread_id)
+    if entries is not None:
+        _normalize_blocking_subagent_terminal_status(entries)
+    sandbox_type = resolve_thread_sandbox(app, thread_id)
+    agent = await get_or_create_agent(app, sandbox_type, thread_id=thread_id)
+    if entries is not None and getattr(agent.runtime, "current_state", None) != AgentState.IDLE:
+        return entries
+
+    set_current_thread_id(thread_id)
+    config = {"configurable": {"thread_id": thread_id}}
+    state = await agent.agent.aget_state(config)
+    values = getattr(state, "values", {}) if state else {}
+    messages = values.get("messages", []) if isinstance(values, dict) else []
+    serialized = [serialize_message(msg) for msg in messages]
+
+    from core.runtime.visibility import annotate_owner_visibility
+
+    annotated, _ = annotate_owner_visibility(serialized)
+    if entries is not None and not _display_entries_need_idle_rebuild(entries, annotated):
+        return entries
+    entries = display_builder.build_from_checkpoint(thread_id, annotated)
+    if _checkpoint_tail_is_pending_owner_turn(annotated):
+        await _replay_latest_run_failure_events(
+            thread_id=thread_id,
+            display_builder=display_builder,
+        )
+        entries = display_builder.get_entries(thread_id) or entries
+    _normalize_blocking_subagent_terminal_status(entries)
+    return entries
+
+
+def _display_entries_need_idle_rebuild(entries: list[dict[str, Any]], messages: list[dict[str, Any]]) -> bool:
+    if not messages:
+        return bool(entries)
+    if not entries:
+        return True
+    # @@@idle-cache-honesty - idle detail must not trust cached assistant shells after
+    # clear/restart. Rebuild only when cache is visibly impossible for the persisted checkpoint.
+    return any(entry.get("role") == "assistant" and not entry.get("segments") for entry in entries)
+
+
+def _normalize_blocking_subagent_terminal_status(entries: list[dict[str, Any]]) -> None:
+    for entry in entries:
+        if entry.get("role") != "assistant":
+            continue
+        for seg in entry.get("segments", []):
+            if seg.get("type") != "tool":
+                continue
+            step = seg.get("step") or {}
+            if step.get("name") != "Agent" or step.get("status") != "done":
+                continue
+            stream = step.get("subagent_stream")
+            if not isinstance(stream, dict):
+                continue
+            result_text = step.get("result")
+            existing_status = str(stream.get("status") or "").lower()
+            terminal_status = (
+                existing_status
+                if existing_status in {"completed", "error", "cancelled"}
+                else ("error" if isinstance(result_text, str) and result_text.startswith("<tool_use_error>") else "completed")
+            )
+            if stream.get("status") != terminal_status:
+                # @@@blocking-subagent-terminal-honesty - a finished blocking Agent tool
+                # must not keep exposing a stale running child status on refresh/detail/tasks.
+                stream["status"] = terminal_status
+            if terminal_status == "error" and not stream.get("error") and isinstance(result_text, str):
+                stream["error"] = result_text
+
+
+def _collect_display_subagent_tasks(entries: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
+    tasks: dict[str, dict[str, Any]] = {}
+    for entry in entries:
+        if entry.get("role") != "assistant":
+            continue
+        for seg in entry.get("segments", []):
+            if seg.get("type") != "tool":
+                continue
+            step = seg.get("step") or {}
+            if step.get("name") != "Agent":
+                continue
+            stream = step.get("subagent_stream")
+            if not isinstance(stream, dict) or not stream.get("task_id"):
+                continue
+            task_id = str(stream["task_id"])
+            raw_args = step.get("args")
+            args: dict[str, Any] = raw_args if isinstance(raw_args, dict) else {}
+            description = stream.get("description") or args.get("description") or args.get("prompt")
+            status = str(stream.get("status") or ("completed" if step.get("status") == "done" else "running"))
+            result_text = step.get("result") or stream.get("text")
+            # @@@dual-source-task-surface - blocking Agent subagents never enter parent _background_runs,
+            # so /tasks must also project persisted subagent_stream state from display history.
+            tasks[task_id] = {
+                "task_id": task_id,
+                "task_type": "agent",
+                "status": status,
+                "command_line": None,
+                "description": description,
+                "exit_code": None,
+                "error": stream.get("error"),
+                "result": result_text,
+                "text": result_text,
+                "thread_id": stream.get("thread_id"),
+            }
+    return tasks
+
+
+async def _replay_latest_run_failure_events(
+    *,
+    thread_id: str,
+    display_builder: Any,
+) -> None:
+    from backend.web.services.event_store import get_latest_run_id, read_events_after
+
+    run_id = await get_latest_run_id(thread_id)
+    if not run_id or run_id.startswith("activity_"):
+        return
+
+    events = await read_events_after(thread_id, run_id, 0)
+    if not any(event.get("event") in _IDLE_REPLAYABLE_RUN_EVENTS for event in events):
+        return
+
+    # @@@idle-run-error-replay - checkpoint can stop at the owner's input when
+    # the run dies before first persisted AI/Tool message. Rebuild must replay
+    # the latest run-level failure events so refresh/detail stays honest.
+    for event in events:
+        event_type = event.get("event", "")
+        if event_type not in {"run_start", "run_done", *_IDLE_REPLAYABLE_RUN_EVENTS}:
+            continue
+        raw_data = event.get("data", "{}")
+        try:
+            payload = json.loads(raw_data) if isinstance(raw_data, str) else raw_data
+        except (json.JSONDecodeError, TypeError):
+            payload = {}
+        if not isinstance(payload, dict):
+            payload = {}
+        display_builder.apply_event(thread_id, event_type, payload)
+
+
+def _create_thread_sandbox_resources(
+    thread_id: str,
+    sandbox_type: str,
+    recipe: dict[str, Any] | None,
+    cwd: str | None = None,
+) -> None:
     """Create volume, lease, and terminal eagerly so volume exists before file uploads."""
     from datetime import datetime
 
@@ -250,11 +516,11 @@ def _create_thread_sandbox_resources(thread_id: str, sandbox_type: str, recipe:
     terminal_repo = SQLiteTerminalRepo(db_path=sandbox_db)
     try:
         terminal_id = f"term-{uuid.uuid4().hex[:12]}"
-        # @@@initial-cwd - use project root for local, provider default for remote
+        # @@@initial-cwd - local threads own their requested cwd; remote threads start from provider defaults.
         from backend.web.core.config import LOCAL_WORKSPACE_ROOT
 
         if sandbox_type == "local":
-            initial_cwd = str(LOCAL_WORKSPACE_ROOT)
+            initial_cwd = cwd or str(LOCAL_WORKSPACE_ROOT)
         else:
             from backend.web.services.sandbox_service import build_provider_from_config_name
             from sandbox.manager import resolve_provider_cwd
@@ -271,43 +537,6 @@ def _create_thread_sandbox_resources(thread_id: str, sandbox_type: str, recipe:
         terminal_repo.close()
 
 
-def _resolve_existing_lease_cwd(lease_id: str, fallback_cwd: str | None) -> str:
-    if fallback_cwd:
-        return fallback_cwd
-
-    from backend.web.core.config import LOCAL_WORKSPACE_ROOT
-    from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-    from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-    terminal_repo = SQLiteTerminalRepo(db_path=resolve_role_db_path(SQLiteDBRole.SANDBOX))
-    try:
-        row = terminal_repo.get_latest_by_lease(lease_id)
-    finally:
-        terminal_repo.close()
-    if row and row.get("cwd"):
-        return str(row["cwd"])
-
-    return str(LOCAL_WORKSPACE_ROOT)
-
-
-def _bind_thread_to_existing_lease(thread_id: str, lease_id: str, *, cwd: str | None) -> str:
-    from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-    from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-    initial_cwd = _resolve_existing_lease_cwd(lease_id, cwd)
-    terminal_repo = SQLiteTerminalRepo(db_path=resolve_role_db_path(SQLiteDBRole.SANDBOX))
-    try:
-        terminal_repo.create(
-            terminal_id=f"term-{uuid.uuid4().hex[:12]}",
-            thread_id=thread_id,
-            lease_id=lease_id,
-            initial_cwd=initial_cwd,
-        )
-    finally:
-        terminal_repo.close()
-    return initial_cwd
-
-
 def _create_owned_thread(
     app: Any,
     owner_user_id: str,
@@ -342,16 +571,17 @@ def _create_owned_thread(
             raise HTTPException(403, "Lease not authorized")
         sandbox_type = str(owned_lease["provider_name"] or sandbox_type)
 
-    # @@@non-atomic-create - these 3 steps (seq++, thread, entity) are not atomic.
-    seq = app.state.member_repo.increment_entity_seq(agent_member_id)
+    # @@@non-atomic-create - these 3 steps (seq++, thread) are not atomic.
+    seq = app.state.member_repo.increment_thread_seq(agent_member_id)
     new_thread_id = f"{agent_member_id}-{seq}"
-    has_main = app.state.thread_repo.get_main_thread(agent_member_id) is not None
+    has_main = app.state.thread_repo.get_default_thread(agent_member_id) is not None
     resolved_is_main = is_main or not has_main
     branch_index = 0 if resolved_is_main else app.state.thread_repo.get_next_branch_index(agent_member_id)
 
     app.state.thread_repo.create(
         thread_id=new_thread_id,
         member_id=agent_member_id,
+        user_id=new_thread_id,
         sandbox_type=sandbox_type,
         cwd=payload.cwd,
         created_at=time.time(),
@@ -360,29 +590,6 @@ def _create_owned_thread(
         branch_index=branch_index,
     )
 
-    # @@@entity-name-convention - entity display names derive from member + thread role, never sandbox strings.
-    entity_name = canonical_entity_name(agent_member.name, is_main=resolved_is_main, branch_index=branch_index)
-
-    # @@@entity-id-is-member-id - agent entity id = member_id (per-agent, not per-thread).
-    # thread_id field on the entity points to the current main thread.
-    # If entity already exists, update thread_id (main thread changed); otherwise create.
-    existing_entity = app.state.entity_repo.get_by_id(agent_member_id)
-    if existing_entity is not None:
-        if resolved_is_main:
-            app.state.entity_repo.update(agent_member_id, thread_id=new_thread_id, name=entity_name)
-        # Branch threads don't update the entity — it represents the main identity
-    else:
-        app.state.entity_repo.create(
-            EntityRow(
-                id=agent_member_id,
-                type="agent",
-                member_id=agent_member_id,
-                name=entity_name,
-                thread_id=new_thread_id if resolved_is_main else None,
-                created_at=time.time(),
-            )
-        )
-
     # Set thread state
     app.state.thread_sandbox[new_thread_id] = sandbox_type
     if payload.cwd:
@@ -390,7 +597,7 @@ def _create_owned_thread(
 
     if selected_lease_id:
         # @@@reuse-lease-binding - Reuse an existing lease by attaching a fresh terminal for the new thread.
-        bound_cwd = _bind_thread_to_existing_lease(
+        bound_cwd = bind_thread_to_existing_lease(
             new_thread_id,
             selected_lease_id,
             cwd=payload.cwd,
@@ -403,29 +610,22 @@ def _create_owned_thread(
             new_thread_id,
             sandbox_type,
             payload.recipe.model_dump() if payload.recipe else None,
+            payload.cwd,
         )
 
     if selected_lease_id and owned_lease is not None:
-        successful_config = {
-            "create_mode": "existing",
-            "provider_config": sandbox_type,
-            "recipe": owned_lease.get("recipe"),
-            "lease_id": owned_lease["lease_id"],
-            "model": payload.model,
-            "workspace": app.state.thread_cwd.get(new_thread_id),
-        }
+        successful_config = build_existing_launch_config(
+            lease=owned_lease,
+            model=payload.model,
+            workspace=app.state.thread_cwd.get(new_thread_id),
+        )
     else:
-        successful_config = {
-            "create_mode": "new",
-            "provider_config": sandbox_type,
-            "recipe": normalize_recipe_snapshot(
-                provider_type_from_name(sandbox_type),
-                payload.recipe.model_dump() if payload.recipe else None,
-            ),
-            "lease_id": None,
-            "model": payload.model,
-            "workspace": app.state.thread_cwd.get(new_thread_id) or payload.cwd,
-        }
+        successful_config = build_new_launch_config(
+            provider_config=sandbox_type,
+            recipe=payload.recipe.model_dump() if payload.recipe else None,
+            model=payload.model,
+            workspace=app.state.thread_cwd.get(new_thread_id) or payload.cwd,
+        )
     save_last_successful_config(app, owner_user_id, agent_member_id, successful_config)
 
     return {
@@ -433,7 +633,6 @@ def _create_owned_thread(
         "sandbox": sandbox_type,
         "member_id": agent_member_id,
         "member_name": agent_member.name,
-        "entity_name": entity_name,
         "branch_index": branch_index,
         "sidebar_label": sidebar_label(is_main=resolved_is_main, branch_index=branch_index),
         "avatar_url": avatar_url(agent_member_id, bool(agent_member.avatar)),
@@ -448,6 +647,9 @@ async def create_thread(
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any] | JSONResponse:
     """Create a new child thread for an agent member."""
+    provider_error = _validate_sandbox_provider_gate(app, user_id, payload)
+    if provider_error is not None:
+        return provider_error
     # Validate bind_mounts capability before creating thread
     sandbox_type = payload.sandbox or "local"
     requested_mounts = payload.bind_mounts if payload.bind_mounts else []
@@ -467,17 +669,45 @@ async def resolve_main_thread(
     user_id: Annotated[str, Depends(get_current_user_id)],
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any]:
-    """Return the main thread for a member, or null when none exists."""
-    agent_member = app.state.member_repo.get_by_id(payload.member_id)
-    if not agent_member or agent_member.owner_user_id != user_id:
+    """Return the default representative thread for a member template."""
+    agent_member = _find_owned_member(app, payload.member_id, user_id)
+    if agent_member is None:
         # Return null instead of 403 — member may not exist yet (stale client state)
         # or belong to another user (harmless to reveal "no thread")
-        return {"thread": None}
+        return {
+            "member_id": payload.member_id,
+            "default_thread_id": None,
+            "thread": None,
+        }
 
-    existing = app.state.thread_repo.get_main_thread(payload.member_id)
-    if existing is None:
-        return {"thread": None}
-    return {"thread": _thread_payload(app, existing["id"], existing.get("sandbox_type", "local"))}
+    default_thread = app.state.thread_repo.get_default_thread(payload.member_id)
+    if default_thread is None:
+        return {
+            "member_id": payload.member_id,
+            "default_thread_id": None,
+            "thread": None,
+        }
+    try:
+        return {
+            "member_id": payload.member_id,
+            "default_thread_id": default_thread["id"],
+            "thread": _thread_payload(app, default_thread["id"], default_thread.get("sandbox_type", "local")),
+        }
+    except HTTPException as exc:
+        # @@@orphan-default-thread - stale bootstrap data can leave the member pointing at a thread whose
+        # member rows are gone. Treat that as "no resolvable default thread" instead of surfacing a 500.
+        if exc.status_code == 500 and "missing member" in str(exc.detail):
+            logger.warning(
+                "resolve_main_thread ignored orphaned default thread %s for member %s",
+                default_thread["id"],
+                payload.member_id,
+            )
+            return {
+                "member_id": payload.member_id,
+                "default_thread_id": None,
+                "thread": None,
+            }
+        raise
 
 
 @router.get("/default-config")
@@ -486,9 +716,7 @@ async def get_default_thread_config(
     user_id: Annotated[str, Depends(get_current_user_id)],
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any]:
-    agent_member = app.state.member_repo.get_by_id(member_id)
-    if not agent_member or agent_member.owner_user_id != user_id:
-        raise HTTPException(403, "Not authorized")
+    _require_owned_member(app, member_id, user_id)
     return resolve_default_config(app, user_id, member_id)
 
 
@@ -498,9 +726,7 @@ async def save_default_thread_config(
     user_id: Annotated[str, Depends(get_current_user_id)],
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any]:
-    agent_member = app.state.member_repo.get_by_id(payload.member_id)
-    if not agent_member or agent_member.owner_user_id != user_id:
-        raise HTTPException(403, "Not authorized")
+    _require_owned_member(app, payload.member_id, user_id)
     save_last_confirmed_config(app, user_id, payload.member_id, payload.model_dump())
     return {"ok": True}
 
@@ -518,6 +744,8 @@ async def list_threads(
     threads = []
     for t in raw:
         tid = t["id"]
+        if _is_internal_child_thread(tid):
+            continue
         sandbox_type = t.get("sandbox_type", "local")
         # Check if agent is currently running — pool key is "{thread_id}:{sandbox_type}"
         running = False
@@ -536,7 +764,6 @@ async def list_threads(
                 "sandbox": t.get("sandbox_type", "local"),
                 "member_name": t.get("member_name"),
                 "member_id": t.get("member_id"),
-                "entity_name": t.get("entity_name"),
                 "branch_index": t.get("branch_index"),
                 "sidebar_label": sidebar_label(
                     is_main=bool(t.get("is_main", False)),
@@ -562,26 +789,10 @@ async def get_thread_messages(
     @@@display-builder — returns pre-computed ChatEntry[] from DisplayBuilder.
     Hot path: return in-memory state.  Cold path: rebuild from checkpoint.
     """
-    display_builder = app.state.display_builder
     sandbox_type = resolve_thread_sandbox(app, thread_id)
     agent = await get_or_create_agent(app, sandbox_type, thread_id=thread_id)
-
-    # Hot path: return cached display entries
-    entries = display_builder.get_entries(thread_id)
-    if entries is None:
-        # Cold path: rebuild from checkpoint
-        set_current_thread_id(thread_id)
-        config = {"configurable": {"thread_id": thread_id}}
-        state = await agent.agent.aget_state(config)
-        values = getattr(state, "values", {}) if state else {}
-        messages = values.get("messages", []) if isinstance(values, dict) else []
-        serialized = [serialize_message(msg) for msg in messages]
-
-        from core.runtime.visibility import annotate_owner_visibility
-
-        annotated, _ = annotate_owner_visibility(serialized)
-        entries = display_builder.build_from_checkpoint(thread_id, annotated)
-
+    display_builder = app.state.display_builder
+    entries = await _get_thread_display_entries(app, thread_id)
     sandbox_info = get_sandbox_info(agent, thread_id, sandbox_type)
     return {
         "thread_id": thread_id,
@@ -622,17 +833,8 @@ async def delete_thread(
         except Exception as exc:
             logger.warning("Failed to destroy sandbox resources for thread %s: %s", thread_id, exc)
         await asyncio.to_thread(delete_thread_in_db, thread_id)
-        # Also delete from threads table (entity-chat addition)
-        thread_data = app.state.thread_repo.get_by_id(thread_id)
-        member_id = thread_data["member_id"] if thread_data else None
+        # Also delete from threads table (member-chat addition)
         app.state.thread_repo.delete(thread_id)
-        # Entity is keyed by member_id (shared across threads) — update its thread_id
-        # to the next main thread, or clear it if no threads remain
-        if member_id:
-            entity = app.state.entity_repo.get_by_id(member_id)
-            if entity and entity.thread_id == thread_id:
-                next_main = app.state.thread_repo.get_main_thread(member_id)
-                app.state.entity_repo.update(member_id, thread_id=next_main["id"] if next_main else None)
 
     # Clean up thread-specific state
     app.state.thread_sandbox.pop(thread_id, None)
@@ -647,6 +849,28 @@ async def delete_thread(
     return {"ok": True, "thread_id": thread_id}
 
 
+@router.post("/{thread_id}/clear")
+async def clear_thread_history(
+    thread_id: str,
+    user_id: Annotated[str, Depends(verify_thread_owner)],
+    app: Annotated[Any, Depends(get_app)] = None,
+) -> dict[str, Any]:
+    """Clear replayable thread history while preserving the thread itself."""
+    sandbox_type = resolve_thread_sandbox(app, thread_id)
+
+    lock = await get_thread_lock(app, thread_id)
+    async with lock:
+        agent = await get_or_create_agent(app, sandbox_type, thread_id=thread_id)
+        if hasattr(agent, "runtime") and agent.runtime.current_state == AgentState.ACTIVE:
+            raise HTTPException(status_code=409, detail="Cannot clear thread while run is in progress")
+        await agent.aclear_thread(thread_id)
+
+    app.state.display_builder.clear(thread_id)
+    app.state.thread_event_buffers.pop(thread_id, None)
+    app.state.queue_manager.clear_all(thread_id)
+    return {"ok": True, "thread_id": thread_id}
+
+
 @router.post("/{thread_id}/messages")
 async def send_message(
     thread_id: str,
@@ -705,7 +929,7 @@ async def get_thread_history(
     thread_id: str,
     limit: int = 20,
     truncate: int = 300,
-    user_id: Annotated[str, Depends(verify_thread_owner)] = None,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any]:
     """Compact conversation history for debugging — no raw LangChain noise.
@@ -743,6 +967,8 @@ def _expand(msg: Any) -> list[dict[str, Any]]:
         cls = msg.__class__.__name__
         if cls == "HumanMessage":
             metadata = getattr(msg, "metadata", {}) or {}
+            if metadata.get("source") == "internal":
+                return []
             if metadata.get("source") == "system":
                 return [{"role": "notification", "text": _trunc(extract_text_content(msg.content))}]
             return [{"role": "human", "text": _trunc(extract_text_content(msg.content))}]
@@ -759,7 +985,7 @@ def _expand(msg: Any) -> list[dict[str, Any]]:
             text = extract_text_content(msg.content)
             if text:
                 entries.append({"role": "assistant", "text": _trunc(text)})
-            return entries or [{"role": "assistant", "text": ""}]
+            return entries
         if cls == "ToolMessage":
             return [
                 {
@@ -782,11 +1008,155 @@ def _expand(msg: Any) -> list[dict[str, Any]]:
     }
 
 
+@router.get("/{thread_id}/permissions")
+async def get_thread_permissions(
+    thread_id: str,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
+    thread_lock: Annotated[asyncio.Lock | None, Depends(get_thread_lock)] = None,
+    agent: Annotated[Any, Depends(get_thread_agent)] = None,
+) -> dict[str, Any]:
+    # @@@permission-state-lock - owner polling and resolve can race on idle
+    # threads. Serialize the lightweight /permissions read with resolve/persist
+    # so stale checkpoint hydration cannot resurrect an already-resolved request.
+    async with thread_lock or _NoopAsyncLock():
+        await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+        rule_state = agent.get_thread_permission_rules(thread_id)
+        return {
+            "thread_id": thread_id,
+            "requests": agent.get_pending_permission_requests(thread_id),
+            "session_rules": rule_state["rules"],
+            "managed_only": rule_state["managed_only"],
+        }
+
+
+@router.post("/{thread_id}/permissions/{request_id}/resolve")
+async def resolve_thread_permission_request(
+    thread_id: str,
+    request_id: str,
+    payload: ResolvePermissionRequest,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
+    agent: Annotated[Any, Depends(get_thread_agent)] = None,
+    app: Annotated[Any, Depends(get_app)] = None,
+    thread_lock: Annotated[asyncio.Lock | None, Depends(get_thread_lock)] = None,
+) -> dict[str, Any]:
+    async with thread_lock or _NoopAsyncLock():
+        await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+        pending_requests = {
+            item.get("request_id"): item
+            for item in agent.get_pending_permission_requests(thread_id)
+            if isinstance(item, dict) and item.get("request_id")
+        }
+        pending_request = pending_requests.get(request_id)
+        is_ask_user_question = bool(pending_request and pending_request.get("tool_name") == "AskUserQuestion")
+        answers = _serialize_permission_answers(payload)
+        if is_ask_user_question and payload.decision == "allow" and not answers:
+            raise HTTPException(status_code=400, detail="AskUserQuestion answers are required when approving the request")
+        ok = agent.resolve_permission_request(
+            request_id,
+            decision=payload.decision,
+            message=payload.message,
+            answers=answers,
+            annotations=getattr(payload, "annotations", None),
+        )
+        if not ok:
+            raise HTTPException(status_code=404, detail="Permission request not found")
+        await agent.agent.apersist_state(thread_id)
+        if is_ask_user_question and payload.decision == "allow" and answers is not None:
+            # @@@ask-user-lifecycle - the owner's answer is about to become a
+            # real follow-up user message. Clear the old request before that
+            # run starts so checkpoint replay cannot resurrect the popup.
+            agent.drop_permission_request(request_id)
+            await agent.agent.apersist_state(thread_id)
+
+    followup: dict[str, Any] | None = None
+    if is_ask_user_question and payload.decision == "allow" and pending_request is not None and answers is not None:
+        from backend.web.services.message_routing import route_message_to_brain
+
+        answered_payload = _build_ask_user_question_answered_payload(
+            pending_request,
+            answers=answers,
+            annotations=getattr(payload, "annotations", None),
+        )
+
+        followup = await route_message_to_brain(
+            app,
+            thread_id,
+            _format_ask_user_question_followup(
+                pending_request,
+                answers=answers,
+                annotations=getattr(payload, "annotations", None),
+            ),
+            source="internal",
+            message_metadata={"ask_user_question_answered": answered_payload},
+        )
+
+    response = {"ok": True, "thread_id": thread_id, "request_id": request_id}
+    if followup is not None:
+        response["followup"] = followup
+    return response
+
+
+@router.post("/{thread_id}/permissions/rules")
+async def add_thread_permission_rule(
+    thread_id: str,
+    payload: ThreadPermissionRuleRequest,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
+    agent: Annotated[Any, Depends(get_thread_agent)] = None,
+) -> dict[str, Any]:
+    await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+    rule_state = agent.get_thread_permission_rules(thread_id)
+    if rule_state["managed_only"]:
+        raise HTTPException(status_code=409, detail="Managed permission rules only; session overrides are disabled")
+    ok = agent.add_thread_permission_rule(
+        thread_id,
+        behavior=payload.behavior,
+        tool_name=payload.tool_name,
+    )
+    if not ok:
+        raise HTTPException(status_code=400, detail="Could not add thread permission rule")
+    await agent.agent.apersist_state(thread_id)
+    updated = agent.get_thread_permission_rules(thread_id)
+    return {
+        "ok": True,
+        "thread_id": thread_id,
+        "scope": "session",
+        "rules": updated["rules"],
+        "managed_only": updated["managed_only"],
+    }
+
+
+@router.delete("/{thread_id}/permissions/rules/{behavior}/{tool_name}")
+async def delete_thread_permission_rule(
+    thread_id: str,
+    behavior: str,
+    tool_name: str,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
+    agent: Annotated[Any, Depends(get_thread_agent)] = None,
+) -> dict[str, Any]:
+    await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+    ok = agent.remove_thread_permission_rule(
+        thread_id,
+        behavior=behavior,
+        tool_name=tool_name,
+    )
+    if not ok:
+        raise HTTPException(status_code=404, detail="Thread permission rule not found")
+    await agent.agent.apersist_state(thread_id)
+    updated = agent.get_thread_permission_rules(thread_id)
+    return {
+        "ok": True,
+        "thread_id": thread_id,
+        "scope": "session",
+        "rules": updated["rules"],
+        "managed_only": updated["managed_only"],
+    }
+
+
 @router.get("/{thread_id}/runtime")
 async def get_thread_runtime(
     thread_id: str,
     stream: bool = False,
-    user_id: Annotated[str, Depends(verify_thread_owner)] = None,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any]:
     """Get runtime status for a thread."""
@@ -902,12 +1272,9 @@ async def get_thread_terminal_status(
 async def get_thread_lease_status(
     thread_id: str,
     agent: Annotated[Any, Depends(get_thread_agent)] = None,
-) -> dict[str, Any]:
+) -> dict[str, Any] | None:
     """Get SandboxLease status for a thread."""
-    try:
-        return await get_lease_status(agent, thread_id)
-    except ValueError as e:
-        raise HTTPException(404, str(e)) from e
+    return await get_lease_status(agent, thread_id)
 
 
 # SSE response headers: disable proxy buffering for real-time streaming
@@ -931,17 +1298,12 @@ async def stream_thread_events(
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> EventSourceResponse:
     """Persistent SSE event stream — uses ?token= for auth (EventSource can't set headers)."""
-    from backend.web.core.dependencies import _DEV_PAYLOAD, _DEV_SKIP_AUTH
-
-    if _DEV_SKIP_AUTH:
-        sse_user_id = _DEV_PAYLOAD["user_id"]
-    else:
-        if not token:
-            raise HTTPException(401, "Missing token")
-        try:
-            sse_user_id = app.state.auth_service.verify_token(token)["user_id"]
-        except ValueError as e:
-            raise HTTPException(401, str(e))
+    if not token:
+        raise HTTPException(401, "Missing token")
+    try:
+        sse_user_id = app.state.auth_service.verify_token(token)["user_id"]
+    except ValueError as e:
+        raise HTTPException(401, str(e))
     thread = app.state.thread_repo.get_by_id(thread_id)
     if not thread:
         raise HTTPException(404, "Thread not found")
@@ -995,7 +1357,7 @@ async def stream_thread_events(
 @router.post("/{thread_id}/runs/cancel")
 async def cancel_run(
     thread_id: str,
-    user_id: Annotated[str, Depends(verify_thread_owner)] = None,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
     app: Annotated[Any, Depends(get_app)] = None,
 ):
     """Cancel an active run for the given thread."""
@@ -1016,6 +1378,33 @@ def _get_background_runs(app: Any, thread_id: str) -> dict:
     return getattr(agent, "_background_runs", {}) if agent else {}
 
 
+def _background_run_type(run: Any) -> str:
+    return "bash" if run.__class__.__name__ == "_BashBackgroundRun" else "agent"
+
+
+def _serialize_background_run(task_id: str, run: Any, *, include_result: bool) -> dict[str, Any]:
+    run_type = _background_run_type(run)
+    result_text = run.get_result() if include_result and run.is_done else None
+    payload = {
+        "task_id": task_id,
+        "task_type": run_type,
+        "status": "completed" if run.is_done else "running",
+        "command_line": getattr(run, "command", None) if run_type == "bash" else None,
+    }
+    if include_result:
+        payload["result"] = result_text
+        payload["text"] = result_text
+        return payload
+    payload["description"] = getattr(run, "description", None)
+    payload["exit_code"] = getattr(getattr(run, "_cmd", None), "exit_code", None) if run_type == "bash" else None
+    payload["error"] = None
+    return payload
+
+
+async def _get_display_task_map(app: Any, thread_id: str) -> dict[str, dict[str, Any]]:
+    return _collect_display_subagent_tasks(await _get_thread_display_entries(app, thread_id))
+
+
 @router.get("/{thread_id}/tasks")
 async def list_tasks(
     thread_id: str,
@@ -1023,18 +1412,20 @@ async def list_tasks(
 ) -> list[dict]:
     """列出线程的所有后台 run（bash + agent）"""
     runs = _get_background_runs(request.app, thread_id)
-    result = []
-    for task_id, run in runs.items():
-        run_type = "bash" if run.__class__.__name__ == "_BashBackgroundRun" else "agent"
+    result = [_serialize_background_run(task_id, run, include_result=False) for task_id, run in runs.items()]
+    seen_task_ids = set(runs)
+    for task_id, task in (await _get_display_task_map(request.app, thread_id)).items():
+        if task_id in seen_task_ids:
+            continue
         result.append(
             {
-                "task_id": task_id,
-                "task_type": run_type,
-                "status": "completed" if run.is_done else "running",
-                "command_line": getattr(run, "command", None) if run_type == "bash" else None,
-                "description": getattr(run, "description", None),
-                "exit_code": getattr(getattr(run, "_cmd", None), "exit_code", None) if run_type == "bash" else None,
-                "error": None,
+                "task_id": task["task_id"],
+                "task_type": task["task_type"],
+                "status": task["status"],
+                "command_line": task["command_line"],
+                "description": task["description"],
+                "exit_code": task["exit_code"],
+                "error": task["error"],
             }
         )
     return result
@@ -1050,18 +1441,19 @@ async def get_task(
     runs = _get_background_runs(request.app, thread_id)
     run = runs.get(task_id)
     if not run:
-        raise HTTPException(status_code=404, detail="Task not found")
+        task = (await _get_display_task_map(request.app, thread_id)).get(task_id)
+        if task is None:
+            raise HTTPException(status_code=404, detail="Task not found")
+        return {
+            "task_id": task["task_id"],
+            "task_type": task["task_type"],
+            "status": task["status"],
+            "command_line": task["command_line"],
+            "result": task["result"],
+            "text": task["text"],
+        }
 
-    run_type = "bash" if run.__class__.__name__ == "_BashBackgroundRun" else "agent"
-    result_text = run.get_result() if run.is_done else None
-    return {
-        "task_id": task_id,
-        "task_type": run_type,
-        "status": "completed" if run.is_done else "running",
-        "command_line": getattr(run, "command", None) if run_type == "bash" else None,
-        "result": result_text,
-        "text": result_text,
-    }
+    return _serialize_background_run(task_id, run, include_result=True)
 
 
 @router.post("/{thread_id}/tasks/{task_id}/cancel")
@@ -1074,7 +1466,16 @@ async def cancel_task(
     runs = _get_background_runs(request.app, thread_id)
     run = runs.get(task_id)
     if not run:
-        raise HTTPException(status_code=404, detail="Task not found")
+        task = (await _get_display_task_map(request.app, thread_id)).get(task_id)
+        if task is None:
+            raise HTTPException(status_code=404, detail="Task not found")
+        if task["status"] != "running":
+            raise HTTPException(status_code=400, detail="Task is not running")
+        thread_task = request.app.state.thread_tasks.get(thread_id)
+        if thread_task is None or thread_task.done():
+            raise HTTPException(status_code=400, detail="Task is not independently cancellable")
+        thread_task.cancel()
+        return {"ok": True, "message": "Run cancellation requested", "task_id": task_id}
     if run.is_done:
         raise HTTPException(status_code=400, detail="Task is not running")
 
@@ -1112,7 +1513,7 @@ async def _notify_task_cancelled(app: Any, thread_id: str, task_id: str, run: An
             agent_id=task_id,
             agent_name=f"cancel-{task_id[:8]}",
         )
-        await emit_fn(
+        emission = emit_fn(
             {
                 "event": "task_done",
                 "data": json.dumps(
@@ -1125,6 +1526,8 @@ async def _notify_task_cancelled(app: Any, thread_id: str, task_id: str, run: An
                 ),
             }
         )
+        if asyncio.iscoroutine(emission):
+            await emission
     except Exception:
         logger.warning("Failed to emit task_done for cancelled task %s", task_id, exc_info=True)
 
diff --git a/backend/web/services/agent_pool.py b/backend/web/services/agent_pool.py
index 50ecb5dbf..b3041c6a9 100644
--- a/backend/web/services/agent_pool.py
+++ b/backend/web/services/agent_pool.py
@@ -1,18 +1,21 @@
 """Agent pool management service."""
 
 import asyncio
-import os
+import logging
 from pathlib import Path
 from typing import Any
 
 from fastapi import FastAPI
 
+from config.user_paths import preferred_existing_user_home_path
 from core.identity.agent_registry import get_or_create_agent_id
 from core.runtime.agent import create_leon_agent
 from sandbox.manager import lookup_sandbox_for_thread
 from sandbox.thread_context import set_current_thread_id
 from storage.runtime import build_storage_container
 
+logger = logging.getLogger(__name__)
+
 # Thread lock for config updates
 _config_update_locks: dict[str, asyncio.Lock] = {}
 _agent_create_locks: dict[str, asyncio.Lock] = {}
@@ -23,15 +26,16 @@ def create_agent_sync(
     workspace_root: Path | None = None,
     model_name: str | None = None,
     agent: str | None = None,
+    bundle_dir: Path | None = None,
+    thread_repo: Any = None,
+    member_repo: Any = None,
     queue_manager: Any = None,
     chat_repos: dict | None = None,
     extra_allowed_paths: list[str] | None = None,
+    web_app: Any = None,
 ) -> Any:
     """Create a LeonAgent with the given sandbox. Runs in a thread."""
-    storage_container = build_storage_container(
-        main_db_path=os.getenv("LEON_DB_PATH"),
-        eval_db_path=os.getenv("LEON_EVAL_DB_PATH"),
-    )
+    storage_container = build_storage_container()
     # @@@web-file-ops-repo - inject storage-backed repo so file_operations route to correct provider.
     from core.operations import FileOperationRecorder, set_recorder
 
@@ -41,10 +45,15 @@ def create_agent_sync(
         workspace_root=workspace_root or Path.cwd(),
         sandbox=sandbox_name if sandbox_name != "local" else None,
         storage_container=storage_container,
+        permission_resolver_scope="thread",
+        thread_repo=thread_repo,
+        member_repo=member_repo,
         queue_manager=queue_manager,
         chat_repos=chat_repos,
+        web_app=web_app,
         verbose=True,
         agent=agent,
+        bundle_dir=bundle_dir,
         extra_allowed_paths=extra_allowed_paths,
     )
 
@@ -76,11 +85,27 @@ async def get_or_create_agent(app_obj: FastAPI, sandbox_type: str, thread_id: st
         thread_data = app_obj.state.thread_repo.get_by_id(thread_id) if hasattr(app_obj.state, "thread_repo") else None
         if sandbox_type == "local":
             cwd = app_obj.state.thread_cwd.get(thread_id)
+            cwd_from_live_map = cwd is not None
             if not cwd and thread_data and thread_data.get("cwd"):
                 cwd = thread_data["cwd"]
-                app_obj.state.thread_cwd[thread_id] = cwd
             if cwd:
-                workspace_root = Path(cwd).resolve()
+                path = Path(cwd).expanduser()
+                # @@@fresh-local-cwd-owns-workspace - a cwd chosen in this live backend session is
+                # the caller contract for local threads; create it instead of silently falling
+                # back to the repo root. Persisted paths from another host stay advisory.
+                if cwd_from_live_map:
+                    path.mkdir(parents=True, exist_ok=True)
+                    workspace_root = path.resolve()
+                    app_obj.state.thread_cwd[thread_id] = str(workspace_root)
+                # @@@host-local-cwd-is-advisory - persisted local thread cwd can come from another
+                # host (for example a macOS path stored in shared Supabase but replayed inside a
+                # Linux staging container). Only pin workspace_root when that path exists here.
+                elif path.exists() and path.is_dir():
+                    workspace_root = path.resolve()
+                    app_obj.state.thread_cwd[thread_id] = str(workspace_root)
+                else:
+                    app_obj.state.thread_cwd.pop(thread_id, None)
+                    logger.warning("Ignoring unavailable local cwd for thread %s: %s", thread_id, cwd)
 
         # Look up model for this thread (threads table → preferences default)
         model_name = thread_data.get("model") if thread_data else None
@@ -93,29 +118,35 @@ async def get_or_create_agent(app_obj: FastAPI, sandbox_type: str, thread_id: st
         # @@@agent-vs-member - thread_config.agent stores a member ID (e.g. "__leon__") for display,
         # NOT an agent type name ("bash", "general", etc.). Never pass it to create_leon_agent.
         agent_name = agent  # explicit caller-provided type only; None → default Leon agent
+        bundle_dir = None
+        if thread_data and thread_data.get("member_id"):
+            member_dir = preferred_existing_user_home_path("members", str(thread_data["member_id"]))
+            if member_dir.is_dir():
+                bundle_dir = member_dir.resolve()
 
-        # @@@chat-repos - construct chat_repos for ChatToolService if entity system is available
+        # @@@chat-repos - construct chat_repos for ChatToolService (v2 messaging)
         chat_repos = None
-        if hasattr(app_obj.state, "entity_repo") and thread_data:
-            entity_repo = app_obj.state.entity_repo
-            member_repo = getattr(app_obj.state, "member_repo", None)
-            # Entity id = member_id in the new model; look up by member_id, not thread_id
+        if hasattr(app_obj.state, "member_repo") and thread_data:
+            member_repo = app_obj.state.member_repo
             agent_member_id = thread_data.get("member_id")
-            agent_entity = entity_repo.get_by_id(agent_member_id) if agent_member_id else None
-            if agent_entity:
-                # agent social identity = member_id
-                agent_member = member_repo.get_by_id(agent_entity.member_id) if member_repo else None
-                # owner social identity = owner's user_id (same as their member_id for humans)
-                owner_user_id = agent_member.owner_user_id if agent_member else ""
+            agent_member = member_repo.get_by_id(agent_member_id) if agent_member_id else None
+            if agent_member:
+                chat_identity_id = thread_data.get("user_id")
+                # @@@thread-chat-identity-source - agent chat identity must come from the
+                # thread-owned dedicated user_id, never from the member template id.
+                if not chat_identity_id:
+                    raise RuntimeError(f"thread.user_id is required for agent chat identity: {thread_id}")
+                owner_id = agent_member.owner_user_id or ""
                 chat_repos = {
-                    "user_id": agent_entity.member_id,  # agent's social identity = member_id
-                    "owner_user_id": owner_user_id,
-                    "entity_repo": entity_repo,
-                    "chat_service": getattr(app_obj.state, "chat_service", None),
-                    "chat_entity_repo": getattr(app_obj.state, "chat_entity_repo", None),
-                    "chat_message_repo": getattr(app_obj.state, "chat_message_repo", None),
+                    "chat_identity_id": chat_identity_id,
+                    "user_id": chat_identity_id,
+                    "owner_id": owner_id,
                     "member_repo": member_repo,
-                    "chat_event_bus": getattr(app_obj.state, "chat_event_bus", None),
+                    "messaging_service": getattr(app_obj.state, "messaging_service", None),
+                    "chat_member_repo": getattr(app_obj.state, "chat_member_repo", None),
+                    "messages_repo": getattr(app_obj.state, "messages_repo", None),
+                    "relationship_repo": getattr(app_obj.state, "relationship_repo", None),
+                    "agent_config_repo": getattr(app_obj.state, "agent_config_repo", None),
                 }
 
         # @@@per-thread-file-access - ensure thread files are accessible from agent
@@ -136,12 +167,23 @@ async def get_or_create_agent(app_obj: FastAPI, sandbox_type: str, thread_id: st
         except FileNotFoundError:
             pass
 
-        extra_allowed_paths = extra_allowed_paths or None
+        extra_allowed_paths_or_none: list[str] | None = extra_allowed_paths or None
 
         # @@@ agent-init-thread - LeonAgent.__init__ uses run_until_complete, must run in thread
         qm = getattr(app_obj.state, "queue_manager", None)
         agent_obj = await asyncio.to_thread(
-            create_agent_sync, sandbox_type, workspace_root, model_name, agent_name, qm, chat_repos, extra_allowed_paths
+            create_agent_sync,
+            sandbox_name=sandbox_type,
+            workspace_root=workspace_root,
+            model_name=model_name,
+            agent=agent_name,
+            bundle_dir=bundle_dir,
+            thread_repo=getattr(app_obj.state, "thread_repo", None),
+            member_repo=getattr(app_obj.state, "member_repo", None),
+            queue_manager=qm,
+            chat_repos=chat_repos,
+            extra_allowed_paths=extra_allowed_paths_or_none,
+            web_app=app_obj,
         )
         member = agent_name or "leon"
         agent_id = get_or_create_agent_id(
diff --git a/backend/web/services/auth_service.py b/backend/web/services/auth_service.py
index 85c9c21c6..dd7b46c21 100644
--- a/backend/web/services/auth_service.py
+++ b/backend/web/services/auth_service.py
@@ -5,10 +5,11 @@
 import logging
 import os
 import time
+from collections.abc import Callable
 
 import jwt
 
-from storage.contracts import AccountRepo, EntityRepo, InviteCodeRepo, MemberRepo, MemberRow, MemberType
+from storage.contracts import InviteCodeRepo, MemberRepo, MemberRow, MemberType
 
 logger = logging.getLogger(__name__)
 
@@ -19,15 +20,15 @@ class AuthService:
     def __init__(
         self,
         members: MemberRepo,
-        accounts: AccountRepo,
-        entities: EntityRepo,
         supabase_client=None,
+        supabase_auth_client=None,
+        supabase_auth_client_factory: Callable[[], object] | None = None,
         invite_codes: InviteCodeRepo | None = None,
     ) -> None:
         self._members = members
-        self._accounts = accounts
-        self._entities = entities
-        self._sb = supabase_client  # None in sqlite-only mode
+        self._sb = supabase_client  # storage/service-role client
+        self._sb_auth = supabase_auth_client  # end-user auth client
+        self._sb_auth_factory = supabase_auth_client_factory
         self._invite_codes = invite_codes
 
     # ------------------------------------------------------------------
@@ -39,6 +40,7 @@ def __init__(
 
     def send_otp(self, email: str, password: str, invite_code: str) -> None:
         """Validate invite code, create user via signUp (sends confirmation OTP to email)."""
+        auth_client = self._auth_api(self._require_auth_client())
         if self._sb is None:
             raise RuntimeError("Supabase client required.")
         if self._invite_codes is None or not self._invite_codes.is_valid(invite_code):
@@ -46,7 +48,7 @@ def send_otp(self, email: str, password: str, invite_code: str) -> None:
         from supabase_auth.errors import AuthApiError
 
         try:
-            self._sb.auth.sign_up({"email": email, "password": password})
+            auth_client.sign_up({"email": email, "password": password})
         except AuthApiError as e:
             msg = e.message or ""
             if "already registered" in msg or "already exists" in msg:
@@ -55,12 +57,13 @@ def send_otp(self, email: str, password: str, invite_code: str) -> None:
 
     def verify_register_otp(self, email: str, token: str) -> dict:
         """Verify signup OTP. Returns temp_token to be used in complete_register."""
+        auth_client = self._auth_api(self._require_auth_client())
         if self._sb is None:
             raise RuntimeError("Supabase client required.")
         from supabase_auth.errors import AuthApiError
 
         try:
-            resp = self._sb.auth.verify_otp({"email": email, "token": token, "type": "signup"})
+            resp = auth_client.verify_otp({"email": email, "token": token, "type": "signup"})
         except AuthApiError as e:
             raise ValueError(f"验证码错误: {e.message}") from e
         if resp.user is None or resp.session is None:
@@ -129,8 +132,7 @@ def complete_register(self, temp_token: str, invite_code: str) -> dict:
 
     def login(self, identifier: str, password: str) -> dict:
         """Login with email or mycel_id + password."""
-        if self._sb is None:
-            raise RuntimeError("Supabase client required for login. Set LEON_STORAGE_STRATEGY=supabase.")
+        auth_client = self._auth_api(self._require_auth_client())
 
         # Resolve email
         email = self._resolve_email(identifier)
@@ -139,7 +141,7 @@ def login(self, identifier: str, password: str) -> dict:
 
         # Sign in via Supabase
         try:
-            resp = self._sb.auth.sign_in_with_password({"email": email, "password": password})
+            resp = auth_client.sign_in_with_password({"email": email, "password": password})
         except AuthApiError:
             raise ValueError("邮箱或密码错误")
         if resp.user is None or resp.session is None:
@@ -175,6 +177,16 @@ def login(self, identifier: str, password: str) -> dict:
 
     def verify_token(self, token: str) -> dict:
         """Verify Supabase JWT. Returns {user_id}."""
+        auth_client = self._sb_auth_factory() if self._sb_auth_factory is not None else self._sb_auth
+        if auth_client is not None:
+            auth_api = self._auth_api(auth_client)
+            try:
+                user_resp = auth_api.get_user(token)
+            except Exception as e:
+                raise ValueError(f"Token 无效: {e}") from e
+            if user_resp is None or getattr(user_resp, "user", None) is None:
+                raise ValueError("Token 无效: user not found")
+            return {"user_id": str(user_resp.user.id)}
         jwt_secret = os.getenv("SUPABASE_JWT_SECRET")
         if not jwt_secret:
             raise RuntimeError("SUPABASE_JWT_SECRET env var required for token verification.")
@@ -204,12 +216,22 @@ def _resolve_email(self, identifier: str) -> str:
             return member.email
         return identifier.strip()
 
+    def _require_auth_client(self):
+        if self._sb_auth_factory is not None:
+            return self._sb_auth_factory()
+        if self._sb_auth is None:
+            raise RuntimeError("Supabase auth client required. Configure SUPABASE_ANON_KEY for auth runtime.")
+        return self._sb_auth
+
+    def _auth_api(self, auth_client):
+        return getattr(auth_client, "auth", auth_client)
+
     def _create_initial_agents(self, owner_user_id: str, now: float) -> dict | None:
         """Create Toad and Morel agents for a new user. Returns first agent info."""
         from pathlib import Path
 
         from backend.web.services.member_service import MEMBERS_DIR, _write_agent_md, _write_json
-        from storage.providers.sqlite.member_repo import generate_member_id
+        from storage.utils import generate_member_id
 
         initial_agents = [
             {"name": "Toad", "description": "Curious and energetic assistant", "avatar": "toad.jpeg"},
diff --git a/backend/web/services/chat_service.py b/backend/web/services/chat_service.py
deleted file mode 100644
index 51a5ebbeb..000000000
--- a/backend/web/services/chat_service.py
+++ /dev/null
@@ -1,255 +0,0 @@
-"""Chat service — entity-to-entity communication."""
-
-from __future__ import annotations
-
-import logging
-import time
-import uuid
-from collections.abc import Callable
-from typing import Any
-
-from backend.web.utils.serializers import avatar_url
-from storage.contracts import (
-    ChatEntityRepo,
-    ChatMessageRepo,
-    ChatMessageRow,
-    ChatRepo,
-    ChatRow,
-    DeliveryResolver,
-    EntityRepo,
-    MemberRepo,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class ChatService:
-    def __init__(
-        self,
-        chat_repo: ChatRepo,
-        chat_entity_repo: ChatEntityRepo,
-        chat_message_repo: ChatMessageRepo,
-        entity_repo: EntityRepo,
-        member_repo: MemberRepo,
-        event_bus: Any = None,
-        delivery_fn: Callable | None = None,
-        delivery_resolver: DeliveryResolver | None = None,
-    ) -> None:
-        self._chats = chat_repo
-        self._chat_entities = chat_entity_repo
-        self._messages = chat_message_repo
-        self._entities = entity_repo
-        self._members = member_repo
-        self._event_bus = event_bus
-        self._delivery_fn = delivery_fn
-        self._delivery_resolver = delivery_resolver
-
-    def _resolve_name(self, user_id: str) -> str:
-        """Resolve display name: entity_repo (agents) → member_repo (humans)."""
-        e = self._entities.get_by_id(user_id)
-        if e:
-            return e.name
-        m = self._members.get_by_id(user_id) if self._members else None
-        return m.name if m else "unknown"
-
-    def find_or_create_chat(self, user_ids: list[str], title: str | None = None) -> ChatRow:
-        """Find existing 1:1 chat between two social identities, or create one."""
-        if len(user_ids) != 2:
-            raise ValueError("Use create_group_chat() for 3+ participants")
-
-        existing_id = self._chat_entities.find_chat_between(user_ids[0], user_ids[1])
-        if existing_id:
-            return self._chats.get_by_id(existing_id)
-
-        now = time.time()
-        chat_id = str(uuid.uuid4())
-        self._chats.create(ChatRow(id=chat_id, title=title, created_at=now))
-        for uid in user_ids:
-            self._chat_entities.add_participant(chat_id, uid, now)
-        return self._chats.get_by_id(chat_id)
-
-    def create_group_chat(self, user_ids: list[str], title: str | None = None) -> ChatRow:
-        """Create a group chat with 3+ participants."""
-        if len(user_ids) < 3:
-            raise ValueError("Group chat requires 3+ participants")
-        now = time.time()
-        chat_id = str(uuid.uuid4())
-        self._chats.create(ChatRow(id=chat_id, title=title, created_at=now))
-        for uid in user_ids:
-            self._chat_entities.add_participant(chat_id, uid, now)
-        return self._chats.get_by_id(chat_id)
-
-    def send_message(
-        self,
-        chat_id: str,
-        sender_id: str,
-        content: str,
-        mentioned_ids: list[str] | None = None,
-        signal: str | None = None,
-    ) -> ChatMessageRow:
-        """Send a message in a chat."""
-        logger.debug(
-            "[send_message] chat=%s sender=%s content=%.50s signal=%s",
-            chat_id[:8],
-            sender_id[:15],
-            content[:50],
-            signal,
-        )
-        mentions = mentioned_ids or []
-        now = time.time()
-        msg_id = str(uuid.uuid4())
-        msg = ChatMessageRow(
-            id=msg_id,
-            chat_id=chat_id,
-            sender_id=sender_id,
-            content=content,
-            mentioned_ids=mentions,
-            created_at=now,
-        )
-        self._messages.create(msg)
-
-        sender_name = self._resolve_name(sender_id)
-
-        if self._event_bus:
-            self._event_bus.publish(
-                chat_id,
-                {
-                    "event": "message",
-                    "data": {
-                        "id": msg_id,
-                        "chat_id": chat_id,
-                        "sender_id": sender_id,
-                        "sender_name": sender_name,
-                        "content": content,
-                        "mentioned_ids": mentions,
-                        "created_at": now,
-                    },
-                },
-            )
-
-        self._deliver_to_agents(chat_id, sender_id, sender_name, content, mentions, signal=signal)
-        return msg
-
-    def _deliver_to_agents(
-        self,
-        chat_id: str,
-        sender_id: str,
-        sender_name: str,
-        content: str,
-        mentioned_ids: list[str] | None = None,
-        signal: str | None = None,
-    ) -> None:
-        """For each non-sender agent participant in the chat, deliver to their brain thread."""
-        mentions = set(mentioned_ids or [])
-        participants = self._chat_entities.list_participants(chat_id)
-        sender_avatar_url = None
-        sender_mid = sender_id
-        sender_entity = self._entities.get_by_id(sender_id)
-        if sender_entity:
-            sender_mid = sender_entity.member_id
-        m = self._members.get_by_id(sender_mid) if self._members else None
-        sender_avatar_url = avatar_url(sender_mid, bool(m.avatar if m else None))
-
-        for ce in participants:
-            if ce.user_id == sender_id:
-                continue
-            entity = self._entities.get_by_id(ce.user_id)
-            if not entity or entity.type != "agent" or not entity.thread_id:
-                logger.debug(
-                    "[deliver] SKIP %s type=%s thread=%s",
-                    ce.user_id,
-                    getattr(entity, "type", None),
-                    getattr(entity, "thread_id", None),
-                )
-                continue
-            # @@@delivery-strategy-gate — check contact block/mute + chat mute
-            # @@@mention-override — mentioned entities skip mute (but not block)
-            if self._delivery_resolver:
-                from storage.contracts import DeliveryAction
-
-                is_mentioned = ce.user_id in mentions
-                action = self._delivery_resolver.resolve(
-                    ce.user_id,
-                    chat_id,
-                    sender_id,
-                    is_mentioned=is_mentioned,
-                )
-                if action != DeliveryAction.DELIVER:
-                    logger.info(
-                        "[deliver] POLICY %s for %s (sender=%s chat=%s mentioned=%s)",
-                        action.value,
-                        ce.user_id,
-                        sender_id,
-                        chat_id[:8],
-                        is_mentioned,
-                    )
-                    continue
-            if self._delivery_fn:
-                logger.debug("[deliver] → %s (thread=%s) from=%s", entity.id, entity.thread_id, sender_name)
-                try:
-                    self._delivery_fn(entity, content, sender_name, chat_id, sender_id, sender_avatar_url, signal=signal)
-                except Exception:
-                    logger.exception("Failed to deliver chat message to entity %s", entity.id)
-            else:
-                logger.warning("[deliver] NO delivery_fn for %s", entity.id)
-
-    def set_delivery_fn(self, fn) -> None:
-        self._delivery_fn = fn
-
-    def list_chats_for_user(self, user_id: str) -> list[dict]:
-        """List all chats for a user (social identity) with summary info."""
-        chat_ids = self._chat_entities.list_chats_for_user(user_id)
-        result = []
-        for cid in chat_ids:
-            chat = self._chats.get_by_id(cid)
-            if not chat or chat.status != "active":
-                continue
-            participants = self._chat_entities.list_participants(cid)
-            entities_info = []
-            for p in participants:
-                e = self._entities.get_by_id(p.user_id)
-                if e:
-                    m = self._members.get_by_id(e.member_id) if self._members else None
-                    entities_info.append(
-                        {
-                            "id": p.user_id,
-                            "name": e.name,
-                            "type": e.type,
-                            "avatar_url": avatar_url(e.member_id, bool(m.avatar if m else None)),
-                        }
-                    )
-                else:
-                    m = self._members.get_by_id(p.user_id) if self._members else None
-                    if m:
-                        entities_info.append(
-                            {
-                                "id": p.user_id,
-                                "name": m.name,
-                                "type": "human",
-                                "avatar_url": avatar_url(m.id, bool(m.avatar)),
-                            }
-                        )
-            msgs = self._messages.list_by_chat(cid, limit=1)
-            last_msg = None
-            if msgs:
-                m = msgs[0]
-                last_msg = {
-                    "content": m.content,
-                    "sender_name": self._resolve_name(m.sender_id),
-                    "created_at": m.created_at,
-                }
-            unread = self._messages.count_unread(cid, user_id)
-            has_mention = self._messages.has_unread_mention(cid, user_id)
-            result.append(
-                {
-                    "id": cid,
-                    "title": chat.title,
-                    "status": chat.status,
-                    "created_at": chat.created_at,
-                    "entities": entities_info,
-                    "last_message": last_msg,
-                    "unread_count": unread,
-                    "has_mention": has_mention,
-                }
-            )
-        return result
diff --git a/backend/web/services/cron_job_service.py b/backend/web/services/cron_job_service.py
index e7b3a7330..c59b54e5e 100644
--- a/backend/web/services/cron_job_service.py
+++ b/backend/web/services/cron_job_service.py
@@ -9,45 +9,55 @@ def _repo() -> Any:
     return make_cron_job_repo()
 
 
-def list_cron_jobs() -> list[dict[str, Any]]:
-    repo = _repo()
+def list_cron_jobs(owner_user_id: str | None = None, repo: Any = None) -> list[dict[str, Any]]:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.list_all()
+        return repo.list_all(owner_user_id=owner_user_id)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def get_cron_job(job_id: str) -> dict[str, Any] | None:
-    repo = _repo()
+def get_cron_job(job_id: str, owner_user_id: str | None = None, repo: Any = None) -> dict[str, Any] | None:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.get(job_id)
+        return repo.get(job_id, owner_user_id=owner_user_id)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def create_cron_job(*, name: str, cron_expression: str, **fields: Any) -> dict[str, Any]:
+def create_cron_job(*, name: str, cron_expression: str, repo: Any = None, **fields: Any) -> dict[str, Any]:
     if not name or not name.strip():
         raise ValueError("name must not be empty")
     if not cron_expression or not cron_expression.strip():
         raise ValueError("cron_expression must not be empty")
-    repo = _repo()
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
         return repo.create(name=name, cron_expression=cron_expression, **fields)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def update_cron_job(job_id: str, **fields: Any) -> dict[str, Any] | None:
-    repo = _repo()
+def update_cron_job(job_id: str, owner_user_id: str | None = None, repo: Any = None, **fields: Any) -> dict[str, Any] | None:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.update(job_id, **fields)
+        return repo.update(job_id, owner_user_id=owner_user_id, **fields)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def delete_cron_job(job_id: str) -> bool:
-    repo = _repo()
+def delete_cron_job(job_id: str, owner_user_id: str | None = None, repo: Any = None) -> bool:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.delete(job_id)
+        return repo.delete(job_id, owner_user_id=owner_user_id)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
diff --git a/backend/web/services/cron_service.py b/backend/web/services/cron_service.py
index bfb0ca244..2c9c8993f 100644
--- a/backend/web/services/cron_service.py
+++ b/backend/web/services/cron_service.py
@@ -26,9 +26,11 @@
 class CronService:
     """Background cron scheduler that creates panel_tasks from cron job templates."""
 
-    def __init__(self) -> None:
+    def __init__(self, *, cron_job_repo: Any = None, task_repo: Any = None) -> None:
         self._running = False
         self._task: asyncio.Task | None = None
+        self._cron_job_repo = cron_job_repo
+        self._task_repo = task_repo
 
     # -- public API ----------------------------------------------------------
 
@@ -52,13 +54,18 @@ async def stop(self) -> None:
             self._task = None
         logger.info("[cron-service] stopped")
 
-    async def trigger_job(self, job_id: str) -> dict[str, Any] | None:
+    async def trigger_job(self, job_id: str, owner_user_id: str | None = None) -> dict[str, Any] | None:
         """Manually trigger a cron job. Creates a task from template.
 
         Returns the created task dict, or None if the job doesn't exist,
         is disabled, or has an invalid template.
         """
-        job = await asyncio.to_thread(cron_job_service.get_cron_job, job_id)
+        job = await asyncio.to_thread(
+            cron_job_service.get_cron_job,
+            job_id,
+            owner_user_id=owner_user_id,
+            repo=self._cron_job_repo,
+        )
         if job is None:
             return None
         if not job.get("enabled"):
@@ -76,12 +83,19 @@ async def trigger_job(self, job_id: str) -> dict[str, Any] | None:
         task_fields: dict[str, Any] = {k: v for k, v in template.items() if k in _ALLOWED_TEMPLATE_KEYS}
         task_fields["source"] = "cron"
         task_fields["cron_job_id"] = job_id
+        task_fields["owner_user_id"] = job.get("owner_user_id")
 
-        task = await asyncio.to_thread(task_service.create_task, **task_fields)
+        task = await asyncio.to_thread(task_service.create_task, repo=self._task_repo, **task_fields)
 
         # Update last_run_at on the cron job
         now_ms = int(time.time() * 1000)
-        await asyncio.to_thread(cron_job_service.update_cron_job, job_id, last_run_at=now_ms)
+        await asyncio.to_thread(
+            cron_job_service.update_cron_job,
+            job_id,
+            owner_user_id=job.get("owner_user_id"),
+            repo=self._cron_job_repo,
+            last_run_at=now_ms,
+        )
 
         logger.info("[cron-service] triggered job %s → task %s", job_id, task.get("id"))
         return task
@@ -129,7 +143,7 @@ async def _scheduler_loop(self) -> None:
 
     async def _check_and_trigger(self) -> None:
         """Check all enabled cron jobs and trigger those that are due."""
-        jobs = await asyncio.to_thread(cron_job_service.list_cron_jobs)
+        jobs = await asyncio.to_thread(cron_job_service.list_cron_jobs, repo=self._cron_job_repo)
         for job in jobs:
             if self.is_due(job):
                 try:
diff --git a/backend/web/services/delivery_resolver.py b/backend/web/services/delivery_resolver.py
deleted file mode 100644
index 43e6e6bd7..000000000
--- a/backend/web/services/delivery_resolver.py
+++ /dev/null
@@ -1,74 +0,0 @@
-"""Delivery strategy resolver — evaluates per-recipient delivery action.
-
-@@@delivery-strategy-gate — single evaluation point between message storage
-and agent delivery. Checks contact-level block/mute → chat-level mute → default.
-"""
-
-from __future__ import annotations
-
-import logging
-import time
-
-from storage.contracts import ChatEntityRepo, ContactRepo, DeliveryAction
-
-logger = logging.getLogger(__name__)
-
-
-class DefaultDeliveryResolver:
-    """Evaluates delivery action for a chat message recipient.
-
-    Priority (highest wins):
-    1. Contact block (sender blocked by recipient) → DROP
-    2. Contact mute (sender muted by recipient)   → NOTIFY
-    3. Chat mute (recipient muted this chat)       → NOTIFY
-    4. Default                                     → DELIVER
-    """
-
-    def __init__(self, contact_repo: ContactRepo, chat_entity_repo: ChatEntityRepo) -> None:
-        self._contacts = contact_repo
-        self._chat_entities = chat_entity_repo
-
-    def resolve(
-        self,
-        recipient_id: str,
-        chat_id: str,
-        sender_id: str,
-        *,
-        is_mentioned: bool = False,
-    ) -> DeliveryAction:
-        # 1. Contact-level block — always DROP, even if mentioned
-        contact = self._contacts.get(recipient_id, sender_id)
-        if contact and contact.relation == "blocked":
-            logger.debug("[resolver] DROP: %s blocked %s", recipient_id[:15], sender_id[:15])
-            return DeliveryAction.DROP
-
-        # @@@mention-override — mentioned entities skip mute checks
-        if is_mentioned:
-            return DeliveryAction.DELIVER
-
-        # 2. Contact-level mute
-        if contact and contact.relation == "muted":
-            logger.debug("[resolver] NOTIFY: %s muted %s", recipient_id[:15], sender_id[:15])
-            return DeliveryAction.NOTIFY
-
-        # 3. Chat-level mute
-        if self._is_chat_muted(recipient_id, chat_id):
-            logger.debug("[resolver] NOTIFY: %s muted chat %s", recipient_id[:15], chat_id[:8])
-            return DeliveryAction.NOTIFY
-
-        # 4. Default
-        return DeliveryAction.DELIVER
-
-    def _is_chat_muted(self, user_id: str, chat_id: str) -> bool:
-        """Check if user has muted this specific chat."""
-        participants = self._chat_entities.list_participants(chat_id)
-        for ce in participants:
-            if ce.user_id == user_id:
-                muted = getattr(ce, "muted", False)
-                if not muted:
-                    return False
-                mute_until = getattr(ce, "mute_until", None)
-                if mute_until is not None and mute_until < time.time():
-                    return False  # mute expired
-                return True
-        return False
diff --git a/backend/web/services/display_builder.py b/backend/web/services/display_builder.py
index 25f034ed5..6af91d91d 100644
--- a/backend/web/services/display_builder.py
+++ b/backend/web/services/display_builder.py
@@ -38,18 +38,46 @@
 # Helpers — ported from message-mapper.ts
 # ---------------------------------------------------------------------------
 
-_CHAT_MESSAGE_RE = re.compile(r"<chat-message[^>]*>([\s\S]*?)</chat-message>")
-
-
-def _extract_chat_message(text: str) -> str | None:
-    m = _CHAT_MESSAGE_RE.search(text)
-    return m.group(1).strip() if m else None
+_TASK_NOTIFICATION_RUN_ID_RE = re.compile(r"<run-id>(.*?)</run-id>", re.IGNORECASE | re.DOTALL)
+_TASK_NOTIFICATION_STATUS_RE = re.compile(r"<status>(.*?)</status>", re.IGNORECASE | re.DOTALL)
 
 
 def _make_id(prefix: str = "db") -> str:
     return f"{prefix}-{uuid.uuid4().hex[:12]}"
 
 
+def _extract_terminal_task_status(notification_type: str | None, text: str) -> tuple[str | None, str | None]:
+    if notification_type != "agent" or "<task-notification>" not in text:
+        return None, None
+    task_match = _TASK_NOTIFICATION_RUN_ID_RE.search(text)
+    status_match = _TASK_NOTIFICATION_STATUS_RE.search(text)
+    task_id = task_match.group(1).strip() if task_match else None
+    status = status_match.group(1).strip().lower() if status_match else None
+    return task_id, status
+
+
+def _reconcile_subagent_stream_status(
+    entries: list[dict],
+    current_turn: dict | None,
+    task_id: str,
+    status: str,
+) -> None:
+    # @@@checkpoint-status-reconcile - idle detail rebuild only sees persisted
+    # checkpoint messages, not live task_done events. If a later persisted
+    # terminal notification names the child task, reconcile the earlier Agent
+    # subagent_stream status so cold rebuild does not regress it back to running.
+    turns: list[dict] = []
+    if current_turn is not None:
+        turns.append(current_turn)
+    turns.extend(entry for entry in reversed(entries) if entry.get("role") == "assistant" and entry is not current_turn)
+    for turn in turns:
+        for seg in turn.get("segments", []):
+            stream = seg.get("step", {}).get("subagent_stream")
+            if seg.get("type") == "tool" and stream and stream.get("task_id") == task_id:
+                stream["status"] = status
+                return
+
+
 # ---------------------------------------------------------------------------
 # Entry builders
 # ---------------------------------------------------------------------------
@@ -89,6 +117,39 @@ def _append_to_turn(turn: dict, msg_id: str, segments: list[dict]) -> None:
     turn.setdefault("messageIds", []).append(msg_id)
 
 
+def _build_subagent_stream(
+    *,
+    task_id: str,
+    thread_id: str,
+    description: str | None,
+    status: str,
+) -> dict[str, Any]:
+    return {
+        "task_id": task_id,
+        "thread_id": thread_id,
+        "description": description,
+        "text": "",
+        "tool_calls": [],
+        "status": status,
+    }
+
+
+def _build_hidden_ask_user_answer_entry(
+    *,
+    msg_id: str | None,
+    payload: dict[str, Any],
+    now: int,
+) -> dict[str, Any]:
+    return {
+        "id": msg_id or _make_id("hist-user"),
+        "role": "user",
+        "content": "",
+        "timestamp": now,
+        "showing": False,
+        "ask_user_question_answered": payload,
+    }
+
+
 # ---------------------------------------------------------------------------
 # ThreadDisplay — per-thread in-memory state
 # ---------------------------------------------------------------------------
@@ -234,6 +295,15 @@ def _handle_human(
 
         # Hidden
         if display.get("showing") is False:
+            ask_answered = meta.get("ask_user_question_answered")
+            if isinstance(ask_answered, dict):
+                entries.append(
+                    _build_hidden_ask_user_answer_entry(
+                        msg_id=msg.get("id"),
+                        payload=ask_answered,
+                        now=now,
+                    )
+                )
             return None, None
 
         # System / external chat notification → notice
@@ -242,6 +312,9 @@ def _handle_human(
         if source == "system" or (source == "external" and ntype == "chat"):
             content = _extract_text_content(msg.get("content"))
             msg_run_id = meta.get("run_id") or None
+            task_id, task_status = _extract_terminal_task_status(ntype, content)
+            if task_id and task_status:
+                _reconcile_subagent_stream_status(entries, current_turn, task_id, task_status)
 
             # Fold into current turn if same run
             if current_turn and (not msg_run_id or msg_run_id == current_run_id):
@@ -332,19 +405,12 @@ def _handle_tool(self, msg: dict, _i: int, current_turn: dict | None, _now: int)
                 seg["step"]["result"] = content_str
                 seg["step"]["status"] = "done"
 
-                # Restore subagent_stream from metadata
                 meta = msg.get("metadata") or {}
-                task_id = meta.get("task_id")
-                sub_thread = meta.get("subagent_thread_id") or (f"subagent-{task_id}" if task_id else None)
-
-                if not task_id and seg["step"].get("name") == "Agent":
-                    try:
-                        parsed = json.loads(content_str)
-                        if isinstance(parsed, dict) and parsed.get("task_id"):
-                            task_id = parsed["task_id"]
-                            sub_thread = parsed.get("thread_id") or f"subagent-{task_id}"
-                    except (json.JSONDecodeError, TypeError):
-                        pass
+                task_id, sub_thread, task_status = _extract_subagent_stream_identity(
+                    seg["step"].get("name"),
+                    meta,
+                    content_str,
+                )
 
                 if sub_thread and not seg["step"].get("subagent_stream"):
                     seg["step"]["subagent_stream"] = {
@@ -353,7 +419,7 @@ def _handle_tool(self, msg: dict, _i: int, current_turn: dict | None, _now: int)
                         "description": meta.get("description"),
                         "text": "",
                         "tool_calls": [],
-                        "status": "completed",
+                        "status": task_status,
                     }
                 break
 
@@ -381,6 +447,18 @@ def _handle_user_message(td: ThreadDisplay, data: dict) -> dict | None:
     run_start/run_done events.  This allows steers to appear at the
     bottom while the agent keeps streaming above.
     """
+    if data.get("showing") is False:
+        ask_answered = data.get("ask_user_question_answered")
+        if not isinstance(ask_answered, dict):
+            return None
+        entry = _build_hidden_ask_user_answer_entry(
+            msg_id=None,
+            payload=ask_answered,
+            now=int(time.time() * 1000),
+        )
+        td.entries.append(entry)
+        return {"type": "append_entry", "entry": entry}
+
     content = data.get("content", "")
     entry: dict = {
         "id": _make_id("user"),
@@ -502,18 +580,18 @@ def _handle_tool_result(td: ThreadDisplay, data: dict) -> dict | None:
             seg["step"]["result"] = result
             seg["step"]["status"] = "done"
 
-            # Subagent stream tracking
-            task_id = metadata.get("task_id")
-            sub_thread = metadata.get("subagent_thread_id") or (f"subagent-{task_id}" if task_id else None)
+            task_id, sub_thread, task_status = _extract_subagent_stream_identity(
+                seg["step"].get("name"),
+                metadata,
+                result,
+            )
             if sub_thread and not seg["step"].get("subagent_stream"):
-                seg["step"]["subagent_stream"] = {
-                    "task_id": task_id or "",
-                    "thread_id": sub_thread,
-                    "description": metadata.get("description"),
-                    "text": "",
-                    "tool_calls": [],
-                    "status": "running",
-                }
+                seg["step"]["subagent_stream"] = _build_subagent_stream(
+                    task_id=task_id or "",
+                    thread_id=sub_thread,
+                    description=metadata.get("description"),
+                    status=task_status,
+                )
 
             return {
                 "type": "update_segment",
@@ -526,8 +604,15 @@ def _handle_tool_result(td: ThreadDisplay, data: dict) -> dict | None:
 def _handle_notice(td: ThreadDisplay, data: dict) -> dict | None:
     content = data.get("content", "")
     ntype = data.get("notification_type")
+    task_id, task_status = _extract_terminal_task_status(ntype, content)
 
     turn = _get_current_turn(td)
+    if task_id and task_status:
+        # @@@live-notice-status-reconcile - live parent detail stays on the
+        # in-memory display while the followthrough run is still active, so the
+        # terminal notice must reconcile the earlier Agent step immediately
+        # instead of waiting for a later cold rebuild from checkpoint.
+        _reconcile_subagent_stream_status(td.entries, turn, task_id, task_status)
     if turn:
         # Fold into current turn
         seg = {"type": "notice", "content": content, "notification_type": ntype}
@@ -629,22 +714,18 @@ def _handle_task_start(td: ThreadDisplay, data: dict) -> dict | None:
     task_id = data["task_id"]
     sub_thread = data.get("thread_id") or f"subagent-{task_id}"
 
-    # Find most recent Agent tool call without subagent_stream
+    # @@@late-task-start-race - background Agent tools can return their
+    # immediate "started" ToolMessage before the async task_start activity
+    # reaches the parent thread. Still patch the newest Agent step that
+    # has no child stream, even if its tool_result already marked it done.
     for seg in reversed(turn["segments"]):
-        if (
-            seg.get("type") == "tool"
-            and seg.get("step", {}).get("name") == "Agent"
-            and seg.get("step", {}).get("status") == "calling"
-            and not seg.get("step", {}).get("subagent_stream")
-        ):
-            seg["step"]["subagent_stream"] = {
-                "task_id": task_id,
-                "thread_id": sub_thread,
-                "description": data.get("description"),
-                "text": "",
-                "tool_calls": [],
-                "status": "running",
-            }
+        if seg.get("type") == "tool" and seg.get("step", {}).get("name") == "Agent" and not seg.get("step", {}).get("subagent_stream"):
+            seg["step"]["subagent_stream"] = _build_subagent_stream(
+                task_id=task_id,
+                thread_id=sub_thread,
+                description=data.get("description"),
+                status="running",
+            )
             idx = _find_seg_index(turn, seg["step"]["id"])
             return {
                 "type": "update_segment",
@@ -679,6 +760,28 @@ def _find_seg_index(turn: dict, tc_id: str) -> int:
     return -1
 
 
+def _extract_subagent_stream_identity(step_name: str | None, metadata: dict, content: str) -> tuple[str | None, str | None, str]:
+    task_id = metadata.get("task_id")
+    sub_thread = metadata.get("subagent_thread_id") or (f"subagent-{task_id}" if task_id else None)
+    task_status = "completed" if task_id else "running"
+
+    if task_id or step_name != "Agent":
+        return task_id, sub_thread, task_status
+
+    try:
+        parsed = json.loads(content)
+    except (json.JSONDecodeError, TypeError):
+        return task_id, sub_thread, task_status
+
+    if not isinstance(parsed, dict) or not parsed.get("task_id"):
+        return task_id, sub_thread, task_status
+
+    task_id = parsed["task_id"]
+    sub_thread = parsed.get("thread_id") or f"subagent-{task_id}"
+    task_status = parsed.get("status") or "running"
+    return task_id, sub_thread, task_status
+
+
 # Event type → handler
 _EVENT_HANDLERS: dict[str, Any] = {
     "user_message": _handle_user_message,
diff --git a/backend/web/services/event_buffer.py b/backend/web/services/event_buffer.py
index df2db5263..103622ca3 100644
--- a/backend/web/services/event_buffer.py
+++ b/backend/web/services/event_buffer.py
@@ -70,6 +70,9 @@ class ThreadEventBuffer:
     _ring: deque[dict] = field(default_factory=lambda: deque(maxlen=2000))
     _notify: asyncio.Condition = field(default_factory=asyncio.Condition)
     _total_count: int = 0  # monotonic counter (total events ever put)
+    # @@@thread-buffer-never-finishes - keep the same observer protocol surface
+    # as RunEventBuffer, but thread buffers never mark completion.
+    finished: asyncio.Event = field(default_factory=asyncio.Event)
 
     async def put(self, event: dict) -> None:
         self._ring.append(event)
diff --git a/backend/web/services/event_store.py b/backend/web/services/event_store.py
index 998b08018..b33eb61ea 100644
--- a/backend/web/services/event_store.py
+++ b/backend/web/services/event_store.py
@@ -2,56 +2,34 @@
 
 import asyncio
 import json
-from pathlib import Path
 from typing import Any
 
 from storage.contracts import RunEventRepo
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
 from storage.runtime import build_storage_container
 
-_DB_PATH = resolve_role_db_path(SQLiteDBRole.MAIN)
 _default_run_event_repo: RunEventRepo | None = None
-_default_run_event_repo_path: Path | None = None
 
 
-def init_event_store() -> None:
-    """Initialize run event storage for current provider strategy."""
-    global _default_run_event_repo, _default_run_event_repo_path
-    if _default_run_event_repo is not None:
-        _default_run_event_repo.close()
-    _default_run_event_repo = None
-    _default_run_event_repo_path = None
-
-    container = build_storage_container(main_db_path=_DB_PATH)
-    provider = container.provider_for("run_event_repo")
-    if provider != "sqlite":
-        return
-
-    # Connection factory in RunEventRepo already guarantees WAL + PRAGMA settings.
-    repo = container.run_event_repo()
-    repo.close()
-
-
-def _resolve_run_event_repo(run_event_repo: RunEventRepo | None) -> RunEventRepo:
+def _resolve_run_event_repo(run_event_repo: RunEventRepo | None) -> RunEventRepo | None:
     if run_event_repo is not None:
         return run_event_repo
 
-    global _default_run_event_repo, _default_run_event_repo_path
-    if _default_run_event_repo is not None and _default_run_event_repo_path == _DB_PATH:
-        return _default_run_event_repo
-
+    global _default_run_event_repo
     if _default_run_event_repo is not None:
-        _default_run_event_repo.close()
-        _default_run_event_repo = None
-        _default_run_event_repo_path = None
+        return _default_run_event_repo
 
-    container = build_storage_container(main_db_path=_DB_PATH)
+    try:
+        container = build_storage_container()
+    except RuntimeError:
+        return None
     # @@@event-store-single-path - keep one persistence boundary; when caller omits repo, resolve default repo from storage container.
     _default_run_event_repo = container.run_event_repo()
-    _default_run_event_repo_path = _DB_PATH
     return _default_run_event_repo
 
 
+_noop_seq = 0
+
+
 async def append_event(
     thread_id: str,
     run_id: str,
@@ -61,6 +39,10 @@ async def append_event(
 ) -> int:
     """Persist one SSE event and return its sequence number."""
     repo = _resolve_run_event_repo(run_event_repo)
+    if repo is None:
+        global _noop_seq
+        _noop_seq += 1
+        return _noop_seq
     payload = _event_payload_to_dict(event)
     return int(
         await asyncio.to_thread(
@@ -82,6 +64,8 @@ async def read_events_after(
 ) -> list[dict[str, Any]]:
     """Return events with seq > after_seq for the given run."""
     repo = _resolve_run_event_repo(run_event_repo)
+    if repo is None:
+        return []
     rows = await asyncio.to_thread(
         repo.list_events,
         thread_id,
@@ -103,18 +87,24 @@ async def read_events_after(
 async def get_last_seq(thread_id: str, run_event_repo: RunEventRepo | None = None) -> int:
     """Return the highest seq for a thread, or 0."""
     repo = _resolve_run_event_repo(run_event_repo)
+    if repo is None:
+        return 0
     return int(await asyncio.to_thread(repo.latest_seq, thread_id))
 
 
 async def get_run_start_seq(thread_id: str, run_id: str, run_event_repo: RunEventRepo | None = None) -> int:
     """Return the first seq for a specific run, or 0."""
     repo = _resolve_run_event_repo(run_event_repo)
+    if repo is None:
+        return 0
     return int(await asyncio.to_thread(repo.run_start_seq, thread_id, run_id))
 
 
 async def get_latest_run_id(thread_id: str, run_event_repo: RunEventRepo | None = None) -> str | None:
     """Return the run_id of the most recent run for a thread, or None."""
     repo = _resolve_run_event_repo(run_event_repo)
+    if repo is None:
+        return None
     return await asyncio.to_thread(repo.latest_run_id, thread_id)
 
 
@@ -125,6 +115,8 @@ async def cleanup_old_runs(
 ) -> int:
     """Delete all but the N most recent runs for a thread. Returns deleted count."""
     repo = _resolve_run_event_repo(run_event_repo)
+    if repo is None:
+        return 0
     run_ids = await asyncio.to_thread(repo.list_run_ids, thread_id)
     if len(run_ids) <= keep_latest:
         return 0
@@ -136,12 +128,6 @@ async def cleanup_old_runs(
     return int(await asyncio.to_thread(repo.delete_runs, thread_id, old_ids))
 
 
-async def cleanup_thread(thread_id: str, run_event_repo: RunEventRepo | None = None) -> int:
-    """Delete all events for a thread. Returns deleted count."""
-    repo = _resolve_run_event_repo(run_event_repo)
-    return int(await asyncio.to_thread(repo.delete_thread_events, thread_id))
-
-
 def _event_payload_to_dict(event: dict[str, Any]) -> dict[str, Any]:
     raw_data = event.get("data", {})
     if isinstance(raw_data, dict):
diff --git a/backend/web/services/idle_reaper.py b/backend/web/services/idle_reaper.py
index 90651365a..a739aa9fb 100644
--- a/backend/web/services/idle_reaper.py
+++ b/backend/web/services/idle_reaper.py
@@ -40,7 +40,7 @@ async def idle_reaper_loop(app_obj: FastAPI) -> None:
         try:
             count = await asyncio.to_thread(run_idle_reaper_once, app_obj)
             if count > 0:
-                print(f"[idle-reaper] paused+closed {count} expired chat session(s)")
+                print(f"[idle-reaper] reclaimed+closed {count} expired chat session(s)")
         except Exception as e:
             print(f"[idle-reaper] error: {e}")
         await asyncio.sleep(IDLE_REAPER_INTERVAL_SEC)
diff --git a/backend/web/services/library_service.py b/backend/web/services/library_service.py
index 2919f8dd6..a33886e17 100644
--- a/backend/web/services/library_service.py
+++ b/backend/web/services/library_service.py
@@ -15,19 +15,6 @@
 LIBRARY_DIR = library_dir()
 
 
-def ensure_library_dir() -> None:
-    LIBRARY_DIR.mkdir(parents=True, exist_ok=True)
-    (LIBRARY_DIR / "skills").mkdir(exist_ok=True)
-    (LIBRARY_DIR / "agents").mkdir(exist_ok=True)
-    legacy_recipe_dir = LIBRARY_DIR / "recipes"
-    # @@@recipe-storage-cutover - recipes now live in SQLite only; delete the dead file tree so it cannot masquerade as live state.
-    if legacy_recipe_dir.exists():
-        if legacy_recipe_dir.is_dir():
-            shutil.rmtree(legacy_recipe_dir)
-        else:
-            legacy_recipe_dir.unlink()
-
-
 def _read_json(path: Path, default: Any = None) -> Any:
     if not path.exists():
         return default if default is not None else {}
diff --git a/backend/web/services/marketplace_client.py b/backend/web/services/marketplace_client.py
index 49de82258..47dc1fb49 100644
--- a/backend/web/services/marketplace_client.py
+++ b/backend/web/services/marketplace_client.py
@@ -17,7 +17,7 @@
 
 HUB_URL = os.environ.get("MYCEL_HUB_URL", "http://localhost:8090")
 
-_hub_client = httpx.Client(timeout=30.0)
+_hub_client = httpx.Client(timeout=30.0, trust_env=False)
 
 
 def _hub_api(method: str, path: str, **kwargs: Any) -> dict:
diff --git a/backend/web/services/member_service.py b/backend/web/services/member_service.py
index ac295e4f4..d1ae1f965 100644
--- a/backend/web/services/member_service.py
+++ b/backend/web/services/member_service.py
@@ -22,7 +22,6 @@
 import yaml
 
 from backend.web.core.paths import avatars_dir, members_dir
-from backend.web.services.thread_naming import canonical_entity_name
 from backend.web.utils.serializers import avatar_url
 from config.defaults.tool_catalog import TOOLS_BY_NAME, ToolDef
 from config.loader import AgentLoader
@@ -38,10 +37,6 @@ def _load_tools_catalog() -> dict[str, ToolDef]:
     return TOOLS_BY_NAME
 
 
-def ensure_members_dir() -> None:
-    MEMBERS_DIR.mkdir(parents=True, exist_ok=True)
-
-
 # ── Low-level I/O helpers ──
 
 
@@ -346,15 +341,8 @@ def list_members(owner_user_id: str | None = None, member_repo: Any = None) -> l
     # @@@auth-scope — scoped by owner from DB, config from filesystem
     if owner_user_id:
         if member_repo is None:
-            from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-
-            repo = SQLiteMemberRepo()
-            try:
-                agents = repo.list_by_owner_user_id(owner_user_id)
-            finally:
-                repo.close()
-        else:
-            agents = member_repo.list_by_owner_user_id(owner_user_id)
+            raise RuntimeError("member_repo is required when owner_user_id is provided")
+        agents = member_repo.list_by_owner_user_id(owner_user_id)
         results = []
         for agent in agents:
             agent_dir = MEMBERS_DIR / agent.id
@@ -391,9 +379,15 @@ def get_member(member_id: str) -> dict[str, Any] | None:
     return _member_to_dict(member_dir)
 
 
-def create_member(name: str, description: str = "", owner_user_id: str | None = None, member_repo: Any = None) -> dict[str, Any]:
+def create_member(
+    name: str,
+    description: str = "",
+    owner_user_id: str | None = None,
+    member_repo: Any = None,
+    agent_config_repo: Any = None,
+) -> dict[str, Any]:
     from storage.contracts import MemberRow, MemberType
-    from storage.providers.sqlite.member_repo import generate_member_id
+    from storage.utils import generate_member_id
 
     now = time.time()
     now_ms = int(now * 1000)
@@ -411,6 +405,19 @@ def create_member(name: str, description: str = "", owner_user_id: str | None =
         },
     )
 
+    # Dual-write to Supabase repo
+    if agent_config_repo:
+        _save_config_to_repo(
+            agent_config_repo,
+            member_id,
+            name=name,
+            description=description,
+            status="draft",
+            version="0.1.0",
+            created_at=now_ms,
+            updated_at=now_ms,
+        )
+
     # Persist to members table so list_members finds it
     if owner_user_id:
         row = MemberRow(
@@ -422,16 +429,9 @@ def create_member(name: str, description: str = "", owner_user_id: str | None =
             owner_user_id=owner_user_id,
             created_at=now,
         )
-        if member_repo is not None:
-            member_repo.create(row)
-        else:
-            from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-
-            repo = SQLiteMemberRepo()
-            try:
-                repo.create(row)
-            finally:
-                repo.close()
+        if member_repo is None:
+            raise RuntimeError("member_repo is required when owner_user_id is provided")
+        member_repo.create(row)
 
     return get_member(member_id)  # type: ignore
 
@@ -439,8 +439,6 @@ def create_member(name: str, description: str = "", owner_user_id: str | None =
 def update_member(
     member_id: str,
     member_repo: Any = None,
-    entity_repo: Any = None,
-    thread_repo: Any = None,
     **fields: Any,
 ) -> dict[str, Any] | None:
     if member_id == "__leon__":
@@ -472,45 +470,15 @@ def update_member(
         meta["updated_at"] = int(time.time() * 1000)
         _write_json(member_dir / "meta.json", meta)
 
-        # Sync name to DB
         if "name" in updates:
             if member_repo is None:
-                from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-
-                member_repo = SQLiteMemberRepo()
-            if entity_repo is None:
-                from storage.providers.sqlite.entity_repo import SQLiteEntityRepo
-
-                entity_repo = SQLiteEntityRepo()
-            if thread_repo is None:
-                from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
-
-                thread_repo = SQLiteThreadRepo()
-
+                raise RuntimeError("member_repo is required to update member name")
             member_repo.update(member_id, name=updates["name"])
-            member = member_repo.get_by_id(member_id)
-            if member is None:
-                raise ValueError(f"Member {member_id} not found after update")
-            for entity in entity_repo.get_by_member_id(member_id):
-                if entity.thread_id is None:
-                    entity_repo.update(entity.id, name=member.name)
-                    continue
-                thread = thread_repo.get_by_id(entity.thread_id)
-                if thread is None:
-                    raise ValueError(f"Entity {entity.id} references missing thread {entity.thread_id}")
-                entity_repo.update(
-                    entity.id,
-                    name=canonical_entity_name(
-                        member.name,
-                        is_main=bool(thread["is_main"]),
-                        branch_index=int(thread["branch_index"]),
-                    ),
-                )
 
     return get_member(member_id)
 
 
-def update_member_config(member_id: str, config_patch: dict[str, Any]) -> dict[str, Any] | None:
+def update_member_config(member_id: str, config_patch: dict[str, Any], agent_config_repo: Any = None) -> dict[str, Any] | None:
     if member_id == "__leon__":
         member_dir = _ensure_leon_dir()
     else:
@@ -549,9 +517,94 @@ def update_member_config(member_id: str, config_patch: dict[str, Any]) -> dict[s
     meta = _read_json(member_dir / "meta.json", {})
     meta["updated_at"] = int(time.time() * 1000)
     _write_json(member_dir / "meta.json", meta)
+
+    # Dual-write full state to Supabase repo
+    if agent_config_repo:
+        try:
+            bundle = AgentLoader().load_bundle(member_dir)
+            _save_config_to_repo(
+                agent_config_repo,
+                member_id,
+                name=bundle.agent.name,
+                description=bundle.agent.description,
+                model=bundle.agent.model,
+                tools=bundle.agent.tools,
+                system_prompt=bundle.agent.system_prompt,
+                status=bundle.meta.get("status", "draft"),
+                version=bundle.meta.get("version", "0.1.0"),
+                created_at=bundle.meta.get("created_at", 0),
+                updated_at=bundle.meta.get("updated_at", 0),
+                runtime={k: {"enabled": v.enabled, "desc": v.desc} for k, v in bundle.runtime.items()},
+                mcp={n: {"command": s.command, "args": s.args, "env": s.env, "disabled": s.disabled} for n, s in bundle.mcp.items()},
+            )
+            # Sync rules
+            for rule in bundle.rules:
+                agent_config_repo.save_rule(member_id, f"{rule['name']}.md", rule.get("content", ""))
+            # Sync sub-agents
+            for agent_cfg in bundle.agents:
+                if agent_cfg.source_dir and agent_cfg.source_dir.resolve() == _SYSTEM_AGENTS_DIR:
+                    continue  # skip builtins
+                agent_config_repo.save_sub_agent(
+                    member_id,
+                    agent_cfg.name,
+                    description=agent_cfg.description,
+                    model=agent_cfg.model,
+                    tools=agent_cfg.tools,
+                    system_prompt=agent_cfg.system_prompt,
+                )
+            # Sync skills
+            for skill in bundle.skills:
+                skill_path = Path(skill.get("path", ""))
+                skill_md = skill_path / "SKILL.md"
+                content = skill_md.read_text(encoding="utf-8") if skill_md.exists() else ""
+                agent_config_repo.save_skill(member_id, skill["name"], content)
+        except Exception:
+            logger.warning("Failed to sync config to repo for member %s", member_id, exc_info=True)
+
     return get_member(member_id)
 
 
+# ── Supabase repo dual-write helper ──
+
+
+def _save_config_to_repo(
+    agent_config_repo: Any,
+    member_id: str,
+    *,
+    name: str,
+    description: str = "",
+    model: str | None = None,
+    tools: list[str] | None = None,
+    system_prompt: str = "",
+    status: str = "draft",
+    version: str = "0.1.0",
+    created_at: int = 0,
+    updated_at: int = 0,
+    runtime: dict | None = None,
+    mcp: dict | None = None,
+) -> None:
+    """Save agent config to Supabase repo. Best-effort — logs errors but doesn't raise."""
+    try:
+        agent_config_repo.save_config(
+            member_id,
+            {
+                "name": name,
+                "description": description,
+                "model": model,
+                "tools": tools or ["*"],
+                "system_prompt": system_prompt,
+                "status": status,
+                "version": version,
+                "created_at": created_at,
+                "updated_at": updated_at,
+                "runtime": runtime or {},
+                "mcp": mcp or {},
+            },
+        )
+    except Exception:
+        logger.warning("Failed to save config to repo for member %s", member_id, exc_info=True)
+
+
 # ── Write helpers for config fields → file structure ──
 
 
@@ -678,7 +731,7 @@ def _write_mcps(member_dir: Path, mcps: list[dict[str, Any]]) -> None:
 # ── Publish / Delete ──
 
 
-def publish_member(member_id: str, bump_type: str = "patch") -> dict[str, Any] | None:
+def publish_member(member_id: str, bump_type: str = "patch", agent_config_repo: Any = None) -> dict[str, Any] | None:
     member_dir = MEMBERS_DIR / member_id
     if not member_dir.is_dir():
         return None
@@ -695,29 +748,47 @@ def publish_member(member_id: str, bump_type: str = "patch") -> dict[str, Any] |
     meta["status"] = "active"
     meta["updated_at"] = int(time.time() * 1000)
     _write_json(member_dir / "meta.json", meta)
+
+    # Dual-write publish status to Supabase repo
+    if agent_config_repo:
+        try:
+            config = agent_config_repo.get_config(member_id)
+            if config:
+                agent_config_repo.save_config(
+                    member_id,
+                    {
+                        **config,
+                        "version": meta["version"],
+                        "status": "active",
+                        "updated_at": meta["updated_at"],
+                    },
+                )
+        except Exception:
+            logger.warning("Failed to update repo for publish of %s", member_id, exc_info=True)
+
     return get_member(member_id)
 
 
-def delete_member(member_id: str, member_repo: Any = None) -> bool:
+def delete_member(member_id: str, member_repo: Any = None, agent_config_repo: Any = None) -> bool:
     if member_id == "__leon__":
         return False
     member_dir = MEMBERS_DIR / member_id
     if not member_dir.is_dir():
         return False
 
+    # Delete from Supabase repo before removing filesystem
+    if agent_config_repo:
+        try:
+            agent_config_repo.delete_config(member_id)
+        except Exception:
+            logger.warning("Failed to delete config from repo for %s", member_id, exc_info=True)
+
     shutil.rmtree(member_dir)
 
     # Also remove from DB
-    if member_repo is not None:
-        member_repo.delete(member_id)
-    else:
-        from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-
-        repo = SQLiteMemberRepo()
-        try:
-            repo.delete(member_id)
-        finally:
-            repo.close()
+    if member_repo is None:
+        raise RuntimeError("member_repo is required to delete member")
+    member_repo.delete(member_id)
 
     return True
 
@@ -740,10 +811,11 @@ def install_from_snapshot(
     owner_user_id: str,
     existing_member_id: str | None = None,
     member_repo: Any = None,
+    agent_config_repo: Any = None,
 ) -> str:
     """Create or update a local member from a marketplace snapshot."""
     from storage.contracts import MemberRow, MemberType
-    from storage.providers.sqlite.member_repo import generate_member_id
+    from storage.utils import generate_member_id
 
     now = time.time()
     now_ms = int(now * 1000)
@@ -843,15 +915,37 @@ def install_from_snapshot(
             owner_user_id=owner_user_id,
             created_at=now,
         )
-        if member_repo is not None:
-            member_repo.create(row)
-        else:
-            from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-
-            repo = SQLiteMemberRepo()
+        if member_repo is None:
+            raise RuntimeError("member_repo is required to register new member from snapshot")
+        member_repo.create(row)
+
+    # Dual-write to Supabase repo
+    if agent_config_repo:
+        _save_config_to_repo(
+            agent_config_repo,
+            member_id,
+            name=name,
+            description=description,
+            status=meta["status"],
+            version=meta["version"],
+            created_at=meta["created_at"],
+            updated_at=meta["updated_at"],
+            runtime=runtime_data if runtime_data else {},
+            mcp=mcp_data if mcp_data else {},
+        )
+        # Sync rules from snapshot
+        for rule in snapshot.get("rules", []):
+            rule_name = _sanitize_name(rule.get("name", "default"))
+            try:
+                agent_config_repo.save_rule(member_id, f"{rule_name}.md", rule.get("content", ""))
+            except Exception:
+                logger.warning("Failed to save snapshot rule %s for member %s", rule_name, member_id, exc_info=True)
+        # Sync skills from snapshot
+        for skill in snapshot.get("skills", []):
+            skill_name = _sanitize_name(skill.get("name", "default"))
             try:
-                repo.create(row)
-            finally:
-                repo.close()
+                agent_config_repo.save_skill(member_id, skill_name, skill.get("content", ""))
+            except Exception:
+                logger.warning("Failed to save snapshot skill %s for member %s", skill_name, member_id, exc_info=True)
 
     return member_id
diff --git a/backend/web/services/message_routing.py b/backend/web/services/message_routing.py
index 7984e9552..d73dfef32 100644
--- a/backend/web/services/message_routing.py
+++ b/backend/web/services/message_routing.py
@@ -19,6 +19,7 @@ async def route_message_to_brain(
     sender_name: str | None = None,
     sender_avatar_url: str | None = None,
     attachments: list[str] | None = None,
+    message_metadata: dict[str, Any] | None = None,
 ) -> dict:
     """Route message to agent brain thread.
 
@@ -26,6 +27,7 @@ async def route_message_to_brain(
     ACTIVE → enqueue as steer
     """
     from backend.web.services.agent_pool import get_or_create_agent, resolve_thread_sandbox
+    from backend.web.services.resource_cache import clear_monitor_resource_overview_cache
     from backend.web.services.streaming_service import start_agent_run
 
     sandbox_type = resolve_thread_sandbox(app, thread_id)
@@ -71,7 +73,12 @@ async def route_message_to_brain(
             return {"status": "injected", "routing": "steer", "thread_id": thread_id}
         logger.debug("[route] → START RUN (idle→active)")
         meta = {"source": source, "sender_name": sender_name, "sender_avatar_url": sender_avatar_url}
+        if message_metadata:
+            meta.update(message_metadata)
         if attachments:
             meta["attachments"] = attachments
         run_id = start_agent_run(agent, thread_id, run_content, app, message_metadata=meta)
+        # @@@monitor-resource-cache-run-start - a fresh run can create or resume a lease immediately.
+        # Drop the cached monitor snapshot so the next /api/monitor/resources read reflects the live topology.
+        clear_monitor_resource_overview_cache()
     return {"status": "started", "routing": "direct", "run_id": run_id, "thread_id": thread_id}
diff --git a/backend/web/services/monitor_service.py b/backend/web/services/monitor_service.py
index 31f59b729..e813718a6 100644
--- a/backend/web/services/monitor_service.py
+++ b/backend/web/services/monitor_service.py
@@ -3,18 +3,29 @@
 from __future__ import annotations
 
 import json
+import re
 from datetime import UTC, datetime
 from typing import Any
 
 from backend.web.core.storage_factory import make_sandbox_monitor_repo
 from backend.web.services.sandbox_service import init_providers_and_managers, load_all_sessions
+from storage.providers.sqlite.chat_session_repo import SQLiteChatSessionRepo
 from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
+from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
 
 # ---------------------------------------------------------------------------
 # Mapping helpers (private)
 # ---------------------------------------------------------------------------
 
 
+def make_chat_session_repo() -> SQLiteChatSessionRepo:
+    return SQLiteChatSessionRepo(db_path=resolve_role_db_path(SQLiteDBRole.SANDBOX))
+
+
+def make_lease_repo() -> SQLiteLeaseRepo:
+    return SQLiteLeaseRepo(db_path=resolve_role_db_path(SQLiteDBRole.SANDBOX))
+
+
 def _format_time_ago(iso_timestamp: str | None) -> str:
     if not iso_timestamp:
         return "never"
@@ -75,6 +86,325 @@ def _lease_link(lease_id: str | None) -> dict[str, Any]:
     return {"lease_id": lease_id, "lease_url": f"/lease/{lease_id}" if lease_id else None}
 
 
+LEASE_SEMANTIC_ORDER = [
+    "orphan_diverged",
+    "diverged",
+    "orphan",
+    "healthy",
+]
+
+LEASE_SEMANTIC_META = {
+    "orphan_diverged": {
+        "title": "Orphaned + Diverged",
+        "description": "Lease lost thread binding while desired and observed state still disagree.",
+    },
+    "diverged": {
+        "title": "Diverged",
+        "description": "Lease is still attached to a thread, but runtime state has not converged.",
+    },
+    "orphan": {
+        "title": "Orphans",
+        "description": "Lease has no active thread binding. Usually cleanup or historical residue.",
+    },
+    "healthy": {
+        "title": "Healthy",
+        "description": "Lease has a thread binding and desired state matches observed state.",
+    },
+}
+
+
+EVAL_NOTE_KEYS = [
+    "runner",
+    "rc",
+    "sandbox",
+    "run_dir",
+    "stdout_log",
+    "stderr_log",
+]
+
+LEASE_TRIAGE_ORDER = [
+    "active_drift",
+    "detached_residue",
+    "orphan_cleanup",
+    "healthy_capacity",
+]
+
+LEASE_TRIAGE_META = {
+    "active_drift": {
+        "title": "Active Drift",
+        "description": "Leases whose desired and observed state still disagree recently enough to warrant active operator attention.",
+        "tone": "warning",
+    },
+    "detached_residue": {
+        "title": "Detached Residue",
+        "description": (
+            "Leases still marked desired=running but observed=detached long after the runtime "
+            "stopped moving. Usually cleanup debt, not live pressure."
+        ),
+        "tone": "danger",
+    },
+    "orphan_cleanup": {
+        "title": "Orphan Cleanup",
+        "description": "Lease rows that have already lost thread binding and mainly represent cleanup backlog or historical residue.",
+        "tone": "warning",
+    },
+    "healthy_capacity": {
+        "title": "Healthy Capacity",
+        "description": "Leases with attached thread context and converged runtime state.",
+        "tone": "success",
+    },
+}
+
+DETACHED_RESIDUE_THRESHOLD_HOURS = 4.0
+RESOURCE_CLEANUP_ALLOWED_CATEGORIES = {"detached_residue", "orphan_cleanup"}
+ACTIVE_CHAT_SESSION_STATUSES = {"active", "idle", "paused"}
+
+
+def _classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) -> dict[str, str]:
+    is_orphan = not bool(thread_id)
+    is_converged = bool(badge.get("converged"))
+    if is_orphan and not is_converged:
+        category = "orphan_diverged"
+    elif not is_converged:
+        category = "diverged"
+    elif is_orphan:
+        category = "orphan"
+    else:
+        category = "healthy"
+    meta = LEASE_SEMANTIC_META[category]
+    return {
+        "category": category,
+        "title": meta["title"],
+        "description": meta["description"],
+    }
+
+
+def _parse_local_timestamp(iso_timestamp: str | None) -> datetime | None:
+    if not iso_timestamp:
+        return None
+    cleaned = iso_timestamp
+    if "Z" in cleaned:
+        cleaned = cleaned.replace("Z", "")
+    if "+" in cleaned:
+        cleaned = cleaned.split("+")[0]
+    try:
+        return datetime.fromisoformat(cleaned)
+    except ValueError:
+        return None
+
+
+def _hours_since(iso_timestamp: str | None) -> float | None:
+    dt = _parse_local_timestamp(iso_timestamp)
+    if dt is None:
+        return None
+    delta = datetime.now() - dt
+    return delta.total_seconds() / 3600
+
+
+def _classify_lease_triage(
+    *,
+    thread_id: str | None,
+    badge: dict[str, Any],
+    observed_state: str | None,
+    desired_state: str | None,
+    updated_at: str | None,
+) -> dict[str, Any]:
+    observed = str(observed_state or "").strip().lower() or None
+    desired = str(desired_state or "").strip().lower() or None
+    age_hours = _hours_since(updated_at)
+    is_orphan = not bool(thread_id)
+    is_converged = bool(badge.get("converged"))
+
+    if is_orphan:
+        key = "orphan_cleanup"
+    elif is_converged:
+        key = "healthy_capacity"
+    elif observed == "detached" and desired == "running" and age_hours is not None and age_hours >= DETACHED_RESIDUE_THRESHOLD_HOURS:
+        key = "detached_residue"
+    else:
+        key = "active_drift"
+
+    meta = LEASE_TRIAGE_META[key]
+    return {
+        "category": key,
+        "title": meta["title"],
+        "description": meta["description"],
+        "tone": meta["tone"],
+        "age_hours": age_hours,
+    }
+
+
+def _cleanable_lease_ids(lease_ids: list[str]) -> list[str]:
+    cleaned: list[str] = []
+    seen: set[str] = set()
+    for raw in lease_ids:
+        lease_id = str(raw or "").strip()
+        if not lease_id or lease_id in seen:
+            continue
+        seen.add(lease_id)
+        cleaned.append(lease_id)
+    if not cleaned:
+        raise ValueError("lease_ids must contain at least one non-empty lease id")
+    return cleaned
+
+
+def _triage_category_for_row(row: dict[str, Any]) -> str:
+    badge = _make_badge(row.get("desired_state"), row.get("observed_state"))
+    triage = _classify_lease_triage(
+        thread_id=row.get("thread_id"),
+        badge=badge,
+        observed_state=row.get("observed_state"),
+        desired_state=row.get("desired_state"),
+        updated_at=row.get("updated_at"),
+    )
+    return str(triage["category"])
+
+
+def _extract_eval_note_value(notes: str, key: str) -> str | None:
+    match = re.search(rf"(?:^|[ |]){re.escape(key)}=([^ ]+)", notes)
+    if not match:
+        return None
+    return match.group(1).strip()
+
+
+def build_evaluation_operator_surface(
+    *,
+    status: str,
+    notes: str,
+    score: dict[str, Any],
+    threads_total: int,
+    threads_running: int,
+    threads_done: int,
+) -> dict[str, Any]:
+    extracted = {key: _extract_eval_note_value(notes, key) for key in EVAL_NOTE_KEYS}
+    rc_text = extracted.get("rc")
+    try:
+        rc = int(rc_text) if rc_text is not None else None
+    except ValueError:
+        rc = None
+
+    scored = bool(score.get("scored"))
+    score_gate = str(score.get("score_gate") or "provisional")
+    artifacts = [
+        {
+            "label": "Run directory",
+            "path": score.get("run_dir") or extracted.get("run_dir"),
+        },
+        {"label": "Run manifest", "path": score.get("manifest_path")},
+        {"label": "STDOUT log", "path": extracted.get("stdout_log")},
+        {"label": "STDERR log", "path": extracted.get("stderr_log")},
+        {"label": "Eval summary", "path": score.get("eval_summary_path")},
+        {"label": "Trace summaries", "path": score.get("trace_summaries_path")},
+    ]
+    artifacts = [
+        {
+            **item,
+            "status": "present" if item["path"] else "missing",
+        }
+        for item in artifacts
+    ]
+    artifact_summary = {
+        "present": sum(1 for item in artifacts if item["status"] == "present"),
+        "missing": sum(1 for item in artifacts if item["status"] == "missing"),
+        "total": len(artifacts),
+    }
+
+    facts = [
+        {"label": "Status", "value": status},
+        {"label": "Score gate", "value": score_gate},
+        {"label": "Threads materialized", "value": str(threads_total)},
+        {"label": "Threads running", "value": str(threads_running)},
+        {"label": "Threads done", "value": str(threads_done)},
+    ]
+    runner = extracted.get("runner")
+    if runner:
+        facts.append({"label": "Runner", "value": runner})
+    if rc is not None:
+        facts.append({"label": "Exit code", "value": str(rc)})
+
+    kind = "collecting_runtime_evidence"
+    tone = "default"
+    headline = "Evaluation is still collecting runtime evidence."
+    summary = "Use the artifacts below to inspect progress and confirm whether thread rows are materializing."
+    next_steps = [
+        "Open the run manifest to confirm the slice payload and output directory.",
+        "Inspect stdout/stderr before assuming the run is healthy.",
+    ]
+
+    if status == "provisional" and not scored:
+        kind = "provisional_waiting_for_summary"
+        tone = "warning"
+        headline = "Evaluation is provisional. Final score is blocked."
+        summary = "This run has not produced the final eval summary yet, so publishable scoring is intentionally withheld."
+        next_steps = [
+            "Check whether eval_summary_path is still missing because the run is ongoing or because the runner exited early.",
+            "Use stdout/stderr logs to confirm whether the solve phase actually started.",
+        ]
+
+    if rc is not None and rc != 0 and threads_total == 0:
+        kind = "bootstrap_failure"
+        tone = "danger"
+        headline = "Runner exited before evaluation threads materialized."
+        summary = "Treat this as a bootstrap failure, not as an empty successful run. No evaluation thread rows were created."
+        next_steps = [
+            "Inspect STDERR first to find the failing bootstrap step.",
+            "Use the run manifest and stdout log to confirm whether the slice was prepared before exit.",
+            "Re-run only after the failing dependency or model configuration is understood.",
+        ]
+    elif status == "running" and threads_total == 0 and threads_running > 0:
+        kind = "running_waiting_for_threads"
+        tone = "default"
+        headline = "Evaluation is actively running while thread rows catch up."
+        summary = (
+            "The runner is alive, but thread rows have not materialized yet. Treat this as an ingestion lag window, not as an empty run."
+        )
+        next_steps = [
+            "Refresh after the first thread row materializes.",
+            "Use stdout/stderr to confirm the solve loop is still advancing.",
+        ]
+    elif status == "running":
+        kind = "running_active"
+        tone = "default"
+        headline = "Evaluation is actively running."
+        summary = "Thread rows and traces may lag behind the runner. Use live progress and logs before declaring drift."
+        next_steps = [
+            "Refresh after new thread rows materialize.",
+            "Inspect traces only after the first active thread appears.",
+        ]
+    elif status == "completed_with_errors" and scored:
+        kind = "completed_with_errors"
+        tone = "warning"
+        headline = "Evaluation completed with recorded errors."
+        summary = (
+            "Some thread rows reached completion, but at least one instance recorded an error. Treat this as reviewable but not clean."
+        )
+        next_steps = [
+            "Inspect error-bearing threads before comparing this run against cleaner baselines.",
+            "Use eval summary and trace summaries to isolate failing instances.",
+        ]
+    elif status == "completed" and scored:
+        kind = "completed_publishable"
+        tone = "success"
+        headline = "Evaluation finished with a publishable score surface."
+        summary = "Score artifacts are present. Use the thread table to drill into trace-level evidence."
+        next_steps = [
+            "Open threads with low-quality traces and inspect tool-call detail.",
+            "Use the eval summary and trace summaries to compare runs.",
+        ]
+
+    return {
+        "kind": kind,
+        "tone": tone,
+        "headline": headline,
+        "summary": summary,
+        "facts": facts,
+        "artifacts": artifacts,
+        "artifact_summary": artifact_summary,
+        "next_steps": next_steps,
+        "raw_notes": notes,
+    }
+
+
 # ---------------------------------------------------------------------------
 # Mappers (private)
 # ---------------------------------------------------------------------------
@@ -130,21 +460,82 @@ def _map_thread_detail(thread_id: str, sessions: list[dict[str, Any]]) -> dict[s
 
 
 def _map_leases(rows: list[dict[str, Any]]) -> dict[str, Any]:
-    items = [
-        {
-            "lease_id": row["lease_id"],
-            "lease_url": f"/lease/{row['lease_id']}",
-            "provider": row["provider_name"],
-            "instance_id": row["current_instance_id"],
-            "thread": _thread_ref(row["thread_id"]),
-            "state_badge": _make_badge(row["desired_state"], row["observed_state"]),
-            "error": row["last_error"],
-            "updated_at": row["updated_at"],
-            "updated_ago": _format_time_ago(row["updated_at"]),
-        }
-        for row in rows
-    ]
-    return {"title": "All Leases", "count": len(items), "items": items}
+    items = []
+    for row in rows:
+        badge = _make_badge(row["desired_state"], row["observed_state"])
+        triage = _classify_lease_triage(
+            thread_id=row["thread_id"],
+            badge=badge,
+            observed_state=row["observed_state"],
+            desired_state=row["desired_state"],
+            updated_at=row["updated_at"],
+        )
+        items.append(
+            {
+                "lease_id": row["lease_id"],
+                "lease_url": f"/lease/{row['lease_id']}",
+                "provider": row["provider_name"],
+                "instance_id": row["current_instance_id"],
+                "thread": _thread_ref(row["thread_id"]),
+                "state_badge": badge,
+                "semantics": _classify_lease_semantics(thread_id=row["thread_id"], badge=badge),
+                "triage": triage,
+                "error": row["last_error"],
+                "updated_at": row["updated_at"],
+                "updated_ago": _format_time_ago(row["updated_at"]),
+            }
+        )
+
+    summary = {key: 0 for key in LEASE_SEMANTIC_ORDER}
+    for item in items:
+        summary[item["semantics"]["category"]] += 1
+    summary["total"] = len(items)
+
+    groups = []
+    for key in LEASE_SEMANTIC_ORDER:
+        meta = LEASE_SEMANTIC_META[key]
+        group_items = [item for item in items if item["semantics"]["category"] == key]
+        groups.append(
+            {
+                "key": key,
+                "title": meta["title"],
+                "description": meta["description"],
+                "count": len(group_items),
+                "items": group_items,
+            }
+        )
+
+    triage_summary = {key: 0 for key in LEASE_TRIAGE_ORDER}
+    for item in items:
+        triage_summary[item["triage"]["category"]] += 1
+    triage_summary["total"] = len(items)
+
+    triage_groups = []
+    for key in LEASE_TRIAGE_ORDER:
+        meta = LEASE_TRIAGE_META[key]
+        group_items = [item for item in items if item["triage"]["category"] == key]
+        triage_groups.append(
+            {
+                "key": key,
+                "title": meta["title"],
+                "description": meta["description"],
+                "tone": meta["tone"],
+                "count": len(group_items),
+                "items": group_items,
+            }
+        )
+
+    return {
+        "title": "All Leases",
+        "count": len(items),
+        "summary": summary,
+        "groups": groups,
+        "triage": {
+            "summary": triage_summary,
+            "groups": triage_groups,
+        },
+        "items": items,
+    }
 
 
 def _map_lease_detail(
@@ -192,6 +583,47 @@ def _map_lease_detail(
     }
 
 
+def _historical_lease_detail(
+    lease_id: str,
+    sessions: list[dict[str, Any]],
+    events: list[dict[str, Any]],
+) -> dict[str, Any] | None:
+    if not sessions and not events:
+        return None
+
+    created_candidates = [
+        str(value) for value in [*(row.get("started_at") for row in sessions), *(row.get("created_at") for row in events)] if value
+    ]
+    updated_candidates = [
+        str(value)
+        for value in [
+            *(row.get("ended_at") or row.get("started_at") for row in sessions),
+            *(row.get("created_at") for row in events),
+        ]
+        if value
+    ]
+    first_session = sessions[0] if sessions else {}
+    thread_ids: list[str] = []
+    seen_threads: set[str] = set()
+    for row in sessions:
+        thread_id = str(row.get("thread_id") or "").strip()
+        if thread_id and thread_id not in seen_threads:
+            seen_threads.add(thread_id)
+            thread_ids.append(thread_id)
+
+    lease = {
+        "provider_name": first_session.get("provider_name") or "unknown",
+        "current_instance_id": first_session.get("current_instance_id"),
+        "created_at": min(created_candidates) if created_candidates else None,
+        "updated_at": max(updated_candidates) if updated_candidates else None,
+        "desired_state": first_session.get("desired_state"),
+        "observed_state": first_session.get("observed_state"),
+        "last_error": first_session.get("last_error"),
+    }
+    threads = [{"thread_id": thread_id} for thread_id in thread_ids]
+    return _map_lease_detail(lease_id, lease, threads, events)
+
+
 def _map_diverged(rows: list[dict[str, Any]]) -> dict[str, Any]:
     items = [
         {
@@ -297,16 +729,152 @@ def list_leases() -> dict[str, Any]:
         repo.close()
 
 
+def cleanup_resource_leases(
+    *,
+    action: str,
+    lease_ids: list[str],
+    expected_category: str,
+) -> dict[str, Any]:
+    if action != "cleanup_residue":
+        raise ValueError(f"Unsupported cleanup action: {action}")
+    if expected_category not in RESOURCE_CLEANUP_ALLOWED_CATEGORIES:
+        raise ValueError("expected_category must be one of: detached_residue, orphan_cleanup")
+
+    target_lease_ids = _cleanable_lease_ids(lease_ids)
+    monitor_repo = make_sandbox_monitor_repo()
+    lease_repo = make_lease_repo()
+    chat_session_repo = make_chat_session_repo()
+    try:
+        rows_by_id = {str(row.get("lease_id") or ""): row for row in monitor_repo.query_leases() if row.get("lease_id")}
+        providers, _ = init_providers_and_managers()
+        cleaned: list[dict[str, Any]] = []
+        skipped: list[str] = []
+        errors: list[dict[str, Any]] = []
+
+        for lease_id in target_lease_ids:
+            row = rows_by_id.get(lease_id)
+            if row is None:
+                skipped.append(lease_id)
+                errors.append({"lease_id": lease_id, "reason": "lease_not_found"})
+                continue
+
+            actual_category = _triage_category_for_row(row)
+            if actual_category != expected_category:
+                skipped.append(lease_id)
+                errors.append(
+                    {
+                        "lease_id": lease_id,
+                        "reason": "category_mismatch",
+                        "expected_category": expected_category,
+                        "actual_category": actual_category,
+                    }
+                )
+                continue
+
+            sessions = monitor_repo.query_lease_sessions(lease_id)
+            live_session_ids = [
+                str(session.get("chat_session_id"))
+                for session in sessions
+                if str(session.get("status") or "").strip().lower() in ACTIVE_CHAT_SESSION_STATUSES
+            ]
+            if live_session_ids:
+                skipped.append(lease_id)
+                errors.append(
+                    {
+                        "lease_id": lease_id,
+                        "reason": "live_sessions_present",
+                        "session_ids": live_session_ids,
+                    }
+                )
+                continue
+
+            if chat_session_repo.lease_has_running_command(lease_id):
+                skipped.append(lease_id)
+                errors.append({"lease_id": lease_id, "reason": "running_command_present"})
+                continue
+
+            provider_name = str(row.get("provider_name") or "").strip()
+            instance_id = str(row.get("current_instance_id") or "").strip() or None
+            if instance_id:
+                provider = providers.get(provider_name)
+                if provider is None:
+                    skipped.append(lease_id)
+                    errors.append(
+                        {
+                            "lease_id": lease_id,
+                            "reason": "provider_unavailable",
+                            "provider": provider_name,
+                        }
+                    )
+                    continue
+                if not provider.get_capability().can_destroy:
+                    skipped.append(lease_id)
+                    errors.append(
+                        {
+                            "lease_id": lease_id,
+                            "reason": "provider_destroy_unsupported",
+                            "provider": provider_name,
+                        }
+                    )
+                    continue
+                try:
+                    destroyed = provider.destroy_session(instance_id, sync=True)
+                except Exception as exc:
+                    skipped.append(lease_id)
+                    errors.append(
+                        {
+                            "lease_id": lease_id,
+                            "reason": "provider_destroy_failed",
+                            "provider": provider_name,
+                            "detail": str(exc),
+                        }
+                    )
+                    continue
+                if not destroyed:
+                    skipped.append(lease_id)
+                    errors.append(
+                        {
+                            "lease_id": lease_id,
+                            "reason": "provider_destroy_failed",
+                            "provider": provider_name,
+                            "detail": "destroy_session returned false",
+                        }
+                    )
+                    continue
+
+            lease_repo.delete(lease_id)
+            cleaned.append({"lease_id": lease_id, "category": actual_category})
+
+        refreshed_summary = list_leases()["triage"]["summary"]
+        return {
+            "action": action,
+            "expected_category": expected_category,
+            "attempted": target_lease_ids,
+            "cleaned": cleaned,
+            "skipped": skipped,
+            "errors": errors,
+            "refreshed_summary": refreshed_summary,
+        }
+    finally:
+        chat_session_repo.close()
+        lease_repo.close()
+        monitor_repo.close()
+
+
 def get_lease(lease_id: str) -> dict[str, Any]:
     repo = make_sandbox_monitor_repo()
     try:
         lease = repo.query_lease(lease_id)
-        if not lease:
-            raise KeyError("Lease not found")
         threads = repo.query_lease_threads(lease_id)
         events = repo.query_lease_events(lease_id)
+        sessions = repo.query_lease_sessions(lease_id)
     finally:
         repo.close()
+    if not lease:
+        fallback = _historical_lease_detail(lease_id, sessions, events)
+        if fallback:
+            return fallback
+        raise KeyError("Lease not found")
     return _map_lease_detail(lease_id, lease, threads, events)
 
 
diff --git a/backend/web/services/profile_service.py b/backend/web/services/profile_service.py
index c6b755bde..60359431a 100644
--- a/backend/web/services/profile_service.py
+++ b/backend/web/services/profile_service.py
@@ -1,10 +1,11 @@
-"""Profile CRUD — config.json based."""
+"""Profile CRUD — config.json based, with auth-member override for signed-in shell."""
 
 import json
 from pathlib import Path
 from typing import Any
 
 from config.user_paths import preferred_existing_user_home_path, user_home_path
+from storage.contracts import MemberRow
 
 LEON_HOME = user_home_path()
 CONFIG_PATH = LEON_HOME / "config.json"
@@ -24,7 +25,23 @@ def _write_json(path: Path, data: Any) -> None:
     path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
 
 
-def get_profile() -> dict[str, Any]:
+def _initials_from_name(name: str) -> str:
+    stripped = name.strip()
+    if not stripped:
+        return "U"
+    compact = "".join(part[:1] for part in stripped.split() if part)
+    if len(compact) >= 2:
+        return compact[:2].upper()
+    return stripped[:2].upper()
+
+
+def get_profile(member: MemberRow | None = None) -> dict[str, Any]:
+    if member is not None:
+        return {
+            "name": member.name or "用户",
+            "initials": _initials_from_name(member.name or ""),
+            "email": member.email or "",
+        }
     cfg = _read_json(preferred_existing_user_home_path("config.json"), {})
     profile = cfg.get("profile", {})
     return {
diff --git a/backend/web/services/resource_cache.py b/backend/web/services/resource_cache.py
index 4b1d5f5fe..62846a653 100644
--- a/backend/web/services/resource_cache.py
+++ b/backend/web/services/resource_cache.py
@@ -10,7 +10,7 @@
 from datetime import UTC, datetime
 from typing import Any
 
-from backend.web.services import resource_service
+from backend.web.services import monitor_service, resource_service
 
 _DEFAULT_REFRESH_INTERVAL_SEC = 90.0
 
@@ -24,6 +24,10 @@ def clear_resource_overview_cache() -> None:
         _snapshot_cache = None
 
 
+def clear_monitor_resource_overview_cache() -> None:
+    clear_resource_overview_cache()
+
+
 def _now_iso() -> str:
     return datetime.now(UTC).isoformat().replace("+00:00", "Z")
 
@@ -55,12 +59,37 @@ def _with_refresh_metadata(
     return payload
 
 
+def _attach_monitor_triage(payload: dict[str, Any]) -> dict[str, Any]:
+    lease_payload = monitor_service.list_leases()
+    triage = lease_payload.get("triage") or {"summary": {}, "groups": []}
+    payload["triage"] = triage
+    return payload
+
+
+def _snapshot_drifted_from_live_sessions(snapshot: dict[str, Any]) -> bool:
+    live_stats = resource_service.visible_resource_session_stats()
+    for provider in snapshot.get("providers") or []:
+        provider_id = str(provider.get("id") or "")
+        current = live_stats.get(provider_id, {"sessions": 0, "running": 0})
+        cached_running = int(((provider.get("telemetry") or {}).get("running") or {}).get("used") or 0)
+        cached_sessions = len(provider.get("sessions") or [])
+        if cached_running != current["running"] or cached_sessions != current["sessions"]:
+            return True
+    for provider_id, current in live_stats.items():
+        if current["running"] or current["sessions"]:
+            cached = next((item for item in snapshot.get("providers") or [] if str(item.get("id") or "") == provider_id), None)
+            if cached is None:
+                return True
+    return False
+
+
 def refresh_resource_overview_sync() -> dict[str, Any]:
     """Refresh cached overview snapshot and return latest payload."""
     global _snapshot_cache
     started = time.perf_counter()
     try:
         payload = resource_service.list_resource_providers()
+        payload = _attach_monitor_triage(payload)
         duration_ms = (time.perf_counter() - started) * 1000
         payload = _with_refresh_metadata(payload, duration_ms=duration_ms, status="ok", error=None)
         with _snapshot_lock:
@@ -79,16 +108,29 @@ def refresh_resource_overview_sync() -> dict[str, Any]:
         return degraded
 
 
+def refresh_monitor_resource_overview_sync() -> dict[str, Any]:
+    return refresh_resource_overview_sync()
+
+
 def get_resource_overview_snapshot() -> dict[str, Any]:
     """Return cached snapshot; perform one synchronous refresh on cold start."""
     with _snapshot_lock:
         cached = copy.deepcopy(_snapshot_cache)
     if cached is not None:
+        # @@@resource-cache-live-drift - durable session truth lands in sandbox.db after a run
+        # starts; if the cached Resources snapshot no longer matches visible lease/session
+        # counts, refresh synchronously instead of serving a stale zero-sandbox card.
+        if _snapshot_drifted_from_live_sessions(cached):
+            return refresh_resource_overview_sync()
         return cached
     # @@@cold-start-cache-fill - route fallback fills cache once to keep first call deterministic.
     return refresh_resource_overview_sync()
 
 
+def get_monitor_resource_overview_snapshot() -> dict[str, Any]:
+    return get_resource_overview_snapshot()
+
+
 async def resource_overview_refresh_loop() -> None:
     """Continuously refresh resource overview snapshot."""
     interval_sec = _read_refresh_interval_sec()
@@ -116,3 +158,7 @@ async def resource_overview_refresh_loop() -> None:
             print("[monitor] resource refresh loop timeout")
         except Exception as exc:
             print(f"[monitor] resource refresh loop error: {exc}")
+
+
+async def monitor_resource_overview_refresh_loop() -> None:
+    await resource_overview_refresh_loop()
diff --git a/backend/web/services/resource_projection_service.py b/backend/web/services/resource_projection_service.py
new file mode 100644
index 000000000..41f3f1327
--- /dev/null
+++ b/backend/web/services/resource_projection_service.py
@@ -0,0 +1,119 @@
+"""User-visible resource projection service."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from typing import Any
+
+from backend.web.services import resource_service, sandbox_service
+from sandbox.provider import RESOURCE_CAPABILITY_KEYS
+from storage.models import map_lease_to_session_status
+
+
+def _now_iso() -> str:
+    return datetime.now(UTC).isoformat().replace("+00:00", "Z")
+
+
+def _empty_metric(unit: str) -> dict[str, Any]:
+    return {
+        "used": None,
+        "limit": None,
+        "unit": unit,
+        "source": "unknown",
+        "freshness": "stale",
+    }
+
+
+def _empty_capabilities() -> dict[str, bool]:
+    return {key: False for key in RESOURCE_CAPABILITY_KEYS}
+
+
+def _build_provider_card(config_name: str, leases: list[dict[str, Any]]) -> dict[str, Any]:
+    display = resource_service.get_provider_display_contract(config_name)
+    capabilities, capability_error = resource_service.get_provider_capability_contract(config_name)
+    provider_type = str(display["type"])
+
+    sessions: list[dict[str, Any]] = []
+    running_count = 0
+    for lease in leases:
+        thread_id = str((lease.get("thread_ids") or [None])[0] or "")
+        owner = (lease.get("agents") or [{}])[0]
+        status = map_lease_to_session_status(lease.get("observed_state"), lease.get("desired_state"))
+        if status == "running":
+            running_count += 1
+        sessions.append(
+            resource_service.build_resource_session_payload(
+                session_identity=f"{lease['lease_id']}:{thread_id}",
+                lease_id=str(lease["lease_id"]),
+                thread_id=thread_id,
+                owner=owner,
+                status=status,
+                started_at=str(lease.get("created_at") or ""),
+                metrics=None,
+            )
+        )
+
+    telemetry = {
+        "running": {
+            "used": running_count,
+            "limit": None,
+            "unit": "sandbox",
+            "source": "derived",
+            "freshness": "live",
+        },
+        "cpu": _empty_metric("%"),
+        "memory": _empty_metric("GB"),
+        "disk": _empty_metric("GB"),
+    }
+    availability = resource_service.build_provider_availability_payload(
+        available=capability_error is None,
+        running_count=running_count,
+        unavailable_reason=capability_error,
+    )
+
+    return {
+        "id": config_name,
+        "name": config_name,
+        "description": display["description"],
+        "vendor": display["vendor"],
+        "type": provider_type,
+        **availability,
+        "capabilities": capabilities,
+        "telemetry": telemetry,
+        "cardCpu": dict(telemetry["cpu"]),
+        "consoleUrl": display["console_url"],
+        "sessions": sessions,
+    }
+
+
+def list_user_resource_providers(app: Any, owner_user_id: str) -> dict[str, Any]:
+    thread_repo = getattr(app.state, "thread_repo", None)
+    member_repo = getattr(app.state, "member_repo", None)
+    if thread_repo is None or member_repo is None:
+        raise RuntimeError("thread_repo and member_repo are required")
+
+    leases = sandbox_service.list_user_leases(
+        owner_user_id,
+        thread_repo=thread_repo,
+        member_repo=member_repo,
+    )
+
+    leases_by_provider: dict[str, list[dict[str, Any]]] = {}
+    for lease in leases:
+        config_name = str(lease.get("provider_name") or "local")
+        leases_by_provider.setdefault(config_name, []).append(lease)
+
+    providers = [_build_provider_card(config_name, provider_leases) for config_name, provider_leases in sorted(leases_by_provider.items())]
+
+    return {
+        "summary": {
+            "snapshot_at": _now_iso(),
+            "total_providers": len(providers),
+            "active_providers": len([item for item in providers if item["status"] == "active"]),
+            "unavailable_providers": len([item for item in providers if item["status"] == "unavailable"]),
+            "running_sessions": sum(int(item["telemetry"]["running"]["used"] or 0) for item in providers),
+            "scope": "user",
+            "lease_count": len(leases),
+        },
+        "providers": providers,
+    }
diff --git a/backend/web/services/resource_service.py b/backend/web/services/resource_service.py
index 236db63ab..58a58d8f6 100644
--- a/backend/web/services/resource_service.py
+++ b/backend/web/services/resource_service.py
@@ -8,7 +8,7 @@
 from typing import Any
 
 from backend.web.core.config import SANDBOXES_DIR
-from backend.web.core.storage_factory import list_resource_snapshots, make_sandbox_monitor_repo, upsert_resource_snapshot
+from backend.web.core.storage_factory import list_resource_snapshots, make_sandbox_monitor_repo
 from backend.web.services.config_loader import SandboxConfigLoader
 from backend.web.services.sandbox_service import available_sandbox_types, build_provider_from_config_name
 from backend.web.utils.serializers import avatar_url
@@ -23,6 +23,7 @@
     probe_and_upsert_for_instance,
 )
 from storage.models import map_lease_to_session_status
+from storage.runtime import build_member_repo, build_resource_snapshot_repo, build_thread_repo
 
 _CONFIG_LOADER = SandboxConfigLoader(SANDBOXES_DIR)
 
@@ -72,7 +73,8 @@ def _resolve_console_url(provider_name: str, config_name: str, *, sandboxes_dir:
     if provider_name == "e2b":
         return "https://e2b.dev"
     if provider_name == "daytona":
-        daytona = payload.get("daytona") if isinstance(payload.get("daytona"), dict) else {}
+        raw_daytona = payload.get("daytona")
+        daytona = raw_daytona if isinstance(raw_daytona, dict) else {}
         target = str(daytona.get("target") or "").strip().lower()
         if target == "cloud":
             return "https://app.daytona.io"
@@ -81,6 +83,18 @@ def _resolve_console_url(provider_name: str, config_name: str, *, sandboxes_dir:
     return None
 
 
+def get_provider_display_contract(config_name: str) -> dict[str, Any]:
+    provider_name = resolve_provider_name(config_name, sandboxes_dir=SANDBOXES_DIR)
+    catalog = _CATALOG.get(provider_name) or _CatalogEntry(vendor=None, description=provider_name, provider_type="cloud")
+    return {
+        "provider_name": provider_name,
+        "description": catalog.description,
+        "vendor": catalog.vendor,
+        "type": _resolve_provider_type(provider_name, config_name, sandboxes_dir=SANDBOXES_DIR),
+        "console_url": _resolve_console_url(provider_name, config_name, sandboxes_dir=SANDBOXES_DIR),
+    }
+
+
 # ---------------------------------------------------------------------------
 # Capability helpers
 # ---------------------------------------------------------------------------
@@ -102,6 +116,13 @@ def _resolve_instance_capabilities(config_name: str) -> tuple[dict[str, bool], s
     return {key: normalized[key] for key in RESOURCE_CAPABILITY_KEYS}, None
 
 
+def get_provider_capability_contract(config_name: str) -> tuple[dict[str, bool], str | None]:
+    capabilities, capability_error = _resolve_instance_capabilities(config_name)
+    if capability_error:
+        return _empty_capabilities(), capability_error
+    return capabilities, None
+
+
 # ---------------------------------------------------------------------------
 # Status/metric helpers
 # ---------------------------------------------------------------------------
@@ -113,6 +134,14 @@ def _to_resource_status(available: bool, running_count: int) -> str:
     return "active" if running_count > 0 else "ready"
 
 
+def build_provider_availability_payload(*, available: bool, running_count: int, unavailable_reason: str | None) -> dict[str, Any]:
+    return {
+        "status": _to_resource_status(available, running_count),
+        "unavailableReason": unavailable_reason,
+        "error": ({"code": "PROVIDER_UNAVAILABLE", "message": unavailable_reason} if unavailable_reason else None),
+    }
+
+
 def _to_metric_freshness(collected_at: str | None) -> str:
     if not collected_at:
         return "stale"
@@ -216,17 +245,13 @@ def _to_session_metrics(snapshot: dict[str, Any] | None) -> dict[str, Any] | Non
 
 def _member_meta_map(member_repo: Any = None) -> dict[str, dict[str, str | None]]:
     """Build member_id → display metadata map from DB."""
+    repo = member_repo
+    own_repo = False
+    if repo is None:
+        repo = build_member_repo()
+        own_repo = True
     try:
-        if member_repo is not None:
-            members = member_repo.list_all()
-        else:
-            from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-
-            repo = SQLiteMemberRepo()
-            try:
-                members = repo.list_all()
-            finally:
-                repo.close()
+        members = repo.list_all()
         return {
             m.id: {
                 "member_name": m.name,
@@ -237,6 +262,9 @@ def _member_meta_map(member_repo: Any = None) -> dict[str, dict[str, str | None]
         }
     except Exception:
         return {}
+    finally:
+        if own_repo:
+            repo.close()
 
 
 def _thread_agent_refs(thread_ids: list[str], thread_repo: Any = None) -> dict[str, str]:
@@ -244,14 +272,11 @@ def _thread_agent_refs(thread_ids: list[str], thread_repo: Any = None) -> dict[s
     unique = sorted({tid for tid in thread_ids if tid})
     if not unique:
         return {}
-    if thread_repo is None:
-        from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
-
-        repo = SQLiteThreadRepo()
+    repo = thread_repo
+    own_repo = False
+    if repo is None:
+        repo = build_thread_repo()
         own_repo = True
-    else:
-        repo = thread_repo
-        own_repo = False
     try:
         refs: dict[str, str] = {}
         for tid in unique:
@@ -350,6 +375,92 @@ def _resolve_card_cpu_metric(provider_type: str, telemetry: dict[str, Any]) -> d
     return cpu
 
 
+def _is_resource_visible_thread(thread_id: str | None) -> bool:
+    raw = str(thread_id or "").strip()
+    if raw.startswith("subagent-"):
+        return False
+    return True
+
+
+def _resource_session_identity(session: dict[str, Any]) -> str:
+    lease_id = str(session.get("lease_id") or "")
+    thread_id = str(session.get("thread_id") or "")
+    if lease_id and thread_id:
+        # @@@resource-session-contract - resource cards are lease/thread scoped, not chat-session scoped.
+        # Terminal fallback rows can carry distinct session ids for the same visible lease+thread binding.
+        return f"{lease_id}:{thread_id}"
+    session_id = str(session.get("session_id") or "")
+    if session_id:
+        return session_id
+    return f"{lease_id}:{thread_id or 'unbound'}"
+
+
+def build_resource_session_payload(
+    *,
+    session_identity: str,
+    lease_id: str,
+    thread_id: str,
+    owner: dict[str, Any],
+    status: str,
+    started_at: str,
+    metrics: dict[str, Any] | None,
+) -> dict[str, Any]:
+    return {
+        "id": session_identity,
+        "leaseId": lease_id,
+        "threadId": thread_id,
+        "memberId": str(owner.get("member_id") or ""),
+        "memberName": str(owner.get("member_name") or "未绑定Agent"),
+        "avatarUrl": owner.get("avatar_url"),
+        "status": status,
+        "startedAt": started_at,
+        "metrics": metrics,
+    }
+
+
+def _project_user_visible_resource_sessions(repo: Any, rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Project raw monitor rows into the user-visible resource surface.
+
+    @@@user-visible-resource-projection - raw monitor rows may be bound to a newer
+    subagent terminal even though the lease still belongs to a user-visible parent
+    thread. Keep raw monitor truth in the repo; only the Resources UI gets this
+    parent-thread preference.
+    """
+    grouped: dict[str, list[dict[str, Any]]] = {}
+    for row in rows:
+        lease_id = str(row.get("lease_id") or "")
+        grouped.setdefault(lease_id, []).append(dict(row))
+
+    projected: list[dict[str, Any]] = []
+    for lease_id, group in grouped.items():
+        visible_rows = [row for row in group if _is_resource_visible_thread(row.get("thread_id"))]
+        if visible_rows:
+            projected.extend(visible_rows)
+            continue
+
+        if not lease_id:
+            continue
+
+        try:
+            thread_rows = repo.query_lease_threads(lease_id)
+        except Exception:
+            thread_rows = []
+
+        preferred_thread_id = next(
+            (str(item.get("thread_id") or "").strip() for item in thread_rows if _is_resource_visible_thread(item.get("thread_id"))),
+            "",
+        )
+        if not preferred_thread_id:
+            continue
+
+        base = dict(group[0])
+        base["thread_id"] = preferred_thread_id
+        base["session_id"] = None
+        projected.append(base)
+
+    return projected
+
+
 # ---------------------------------------------------------------------------
 # Public API: resource overview
 # ---------------------------------------------------------------------------
@@ -359,7 +470,8 @@ def list_resource_providers() -> dict[str, Any]:
     # @@@overview-fast-path - avoid provider-network calls; overview uses DB session snapshot.
     repo = make_sandbox_monitor_repo()
     try:
-        sessions = repo.list_sessions_with_leases()
+        raw_sessions = repo.list_sessions_with_leases()
+        sessions = _project_user_visible_resource_sessions(repo, raw_sessions)
     finally:
         repo.close()
 
@@ -376,9 +488,8 @@ def list_resource_providers() -> dict[str, Any]:
     for item in available_sandbox_types():
         config_name = str(item["name"])
         available = bool(item.get("available"))
-        provider_name = resolve_provider_name(config_name, sandboxes_dir=SANDBOXES_DIR)
-        catalog = _CATALOG.get(provider_name) or _CatalogEntry(vendor=None, description=provider_name, provider_type="cloud")
-        capabilities, capability_error = _resolve_instance_capabilities(config_name)
+        display = get_provider_display_contract(config_name)
+        capabilities, capability_error = get_provider_capability_contract(config_name)
         effective_available = available and capability_error is None
         unavailable_reason: str | None = None
         if not effective_available:
@@ -386,6 +497,7 @@ def list_resource_providers() -> dict[str, Any]:
 
         provider_sessions = grouped.get(config_name, [])
         normalized_sessions: list[dict[str, Any]] = []
+        seen_session_ids: set[str] = set()
         running_count = 0
         # @@@running-dedup - lease-driven query may yield multiple rows per lease (one per crew member).
         # Count each running lease only once.
@@ -402,23 +514,26 @@ def list_resource_providers() -> dict[str, Any]:
                 seen_running_leases.add(lease_id)
             session_metrics = _to_session_metrics(snapshot_by_lease.get(lease_id))
             owner = owners.get(thread_id, {"member_id": None, "member_name": "未绑定Agent"})
+            session_identity = _resource_session_identity(session)
+            # @@@resource-session-dedup - terminal fallback can surface multiple
+            # monitor rows for the same lease/thread binding. The overview
+            # contract is one session row per stable session identity.
+            if session_identity in seen_session_ids:
+                continue
+            seen_session_ids.add(session_identity)
             normalized_sessions.append(
-                {
-                    # @@@resource-session-identity - monitor rows can legitimately have empty chat session ids.
-                    # Use stable lease+thread identity so React keys do not collapse when one lease has multiple threads.
-                    "id": str(session.get("session_id") or f"{lease_id}:{thread_id or 'unbound'}"),
-                    "leaseId": lease_id,
-                    "threadId": thread_id,
-                    "memberId": str(owner.get("member_id") or ""),
-                    "memberName": str(owner.get("member_name") or "未绑定Agent"),
-                    "avatarUrl": owner.get("avatar_url"),
-                    "status": normalized,
-                    "startedAt": str(session.get("created_at") or ""),
-                    "metrics": session_metrics,
-                }
+                build_resource_session_payload(
+                    session_identity=session_identity,
+                    lease_id=lease_id,
+                    thread_id=thread_id,
+                    owner=owner,
+                    status=normalized,
+                    started_at=str(session.get("created_at") or ""),
+                    metrics=session_metrics,
+                )
             )
 
-        provider_type = _resolve_provider_type(provider_name, config_name, sandboxes_dir=SANDBOXES_DIR)
+        provider_type = str(display["type"])
         telemetry = _aggregate_provider_telemetry(
             provider_sessions=provider_sessions,
             running_count=running_count,
@@ -441,20 +556,23 @@ def list_resource_providers() -> dict[str, Any]:
                     ),
                     "disk": _metric(host_m.disk_used_gb, host_m.disk_total_gb, "GB", "direct", "live"),
                 }
+        availability = build_provider_availability_payload(
+            available=effective_available,
+            running_count=running_count,
+            unavailable_reason=unavailable_reason,
+        )
         providers.append(
             {
                 "id": config_name,
                 "name": config_name,
-                "description": catalog.description,
-                "vendor": catalog.vendor,
+                "description": display["description"],
+                "vendor": display["vendor"],
                 "type": provider_type,
-                "status": _to_resource_status(effective_available, running_count),
-                "unavailableReason": unavailable_reason,
-                "error": ({"code": "PROVIDER_UNAVAILABLE", "message": unavailable_reason} if unavailable_reason else None),
+                **availability,
                 "capabilities": capabilities,
                 "telemetry": telemetry,
                 "cardCpu": _resolve_card_cpu_metric(provider_type, telemetry),
-                "consoleUrl": _resolve_console_url(provider_name, config_name, sandboxes_dir=SANDBOXES_DIR),
+                "consoleUrl": display["console_url"],
                 "sessions": normalized_sessions,
             }
         )
@@ -469,6 +587,36 @@ def list_resource_providers() -> dict[str, Any]:
     return {"summary": summary, "providers": providers}
 
 
+def visible_resource_session_stats() -> dict[str, dict[str, int]]:
+    """Return the current user-visible session/running counts per provider."""
+    repo = make_sandbox_monitor_repo()
+    try:
+        raw_sessions = repo.list_sessions_with_leases()
+        sessions = _project_user_visible_resource_sessions(repo, raw_sessions)
+    finally:
+        repo.close()
+
+    stats: dict[str, dict[str, int]] = {}
+    seen_session_ids: set[str] = set()
+    seen_running_leases: set[tuple[str, str]] = set()
+    for session in sessions:
+        provider_instance = str(session.get("provider") or "local")
+        provider_stats = stats.setdefault(provider_instance, {"sessions": 0, "running": 0})
+        session_identity = _resource_session_identity(session)
+        if session_identity not in seen_session_ids:
+            seen_session_ids.add(session_identity)
+            provider_stats["sessions"] += 1
+
+        lease_id = str(session.get("lease_id") or "")
+        normalized = map_lease_to_session_status(session.get("observed_state"), session.get("desired_state"))
+        running_identity = (provider_instance, lease_id)
+        if normalized == "running" and lease_id and running_identity not in seen_running_leases:
+            seen_running_leases.add(running_identity)
+            provider_stats["running"] += 1
+
+    return stats
+
+
 # ---------------------------------------------------------------------------
 # Public API: sandbox filesystem browse
 # ---------------------------------------------------------------------------
@@ -576,6 +724,7 @@ def refresh_resource_snapshots() -> dict[str, Any]:
         probe_targets = repo.list_probe_targets()
     finally:
         repo.close()
+    snapshot_repo = build_resource_snapshot_repo()
 
     provider_cache: dict[str, Any] = {}
     probed = 0
@@ -583,44 +732,48 @@ def refresh_resource_snapshots() -> dict[str, Any]:
     running_targets = 0
     non_running_targets = 0
 
-    for item in probe_targets:
-        lease_id = item["lease_id"]
-        provider_key = item["provider_name"]
-        instance_id = item["instance_id"]
-        status = item["observed_state"]
-        # detached means running (not connected to terminal)
-        probe_mode = "running_runtime" if status in ("running", "detached") else "non_running_sdk"
-        if probe_mode == "running_runtime":
-            running_targets += 1
-        else:
-            non_running_targets += 1
-
-        provider = provider_cache.get(provider_key)
-        if provider is None:
-            provider = build_provider_from_config_name(provider_key)
-            provider_cache[provider_key] = provider
-        if provider is None:
-            upsert_resource_snapshot(
+    try:
+        for item in probe_targets:
+            lease_id = item["lease_id"]
+            provider_key = item["provider_name"]
+            instance_id = item["instance_id"]
+            status = item["observed_state"]
+            # detached means running (not connected to terminal)
+            probe_mode = "running_runtime" if status in ("running", "detached") else "non_running_sdk"
+            if probe_mode == "running_runtime":
+                running_targets += 1
+            else:
+                non_running_targets += 1
+
+            provider = provider_cache.get(provider_key)
+            if provider is None:
+                provider = build_provider_from_config_name(provider_key)
+                provider_cache[provider_key] = provider
+            if provider is None:
+                snapshot_repo.upsert_lease_resource_snapshot(
+                    lease_id=lease_id,
+                    provider_name=provider_key,
+                    observed_state=status,
+                    probe_mode=probe_mode,
+                    probe_error=f"provider init failed: {provider_key}",
+                )
+                errors += 1
+                continue
+
+            result = probe_and_upsert_for_instance(
                 lease_id=lease_id,
                 provider_name=provider_key,
                 observed_state=status,
                 probe_mode=probe_mode,
-                probe_error=f"provider init failed: {provider_key}",
+                provider=provider,
+                instance_id=instance_id,
+                repo=snapshot_repo,
             )
-            errors += 1
-            continue
-
-        result = probe_and_upsert_for_instance(
-            lease_id=lease_id,
-            provider_name=provider_key,
-            observed_state=status,
-            probe_mode=probe_mode,
-            provider=provider,
-            instance_id=instance_id,
-        )
-        probed += 1
-        if not result["ok"]:
-            errors += 1
+            probed += 1
+            if not result["ok"]:
+                errors += 1
+    finally:
+        snapshot_repo.close()
 
     return {
         "probed": probed,
diff --git a/backend/web/services/sandbox_service.py b/backend/web/services/sandbox_service.py
index 2e5e06cf0..4076bd280 100644
--- a/backend/web/services/sandbox_service.py
+++ b/backend/web/services/sandbox_service.py
@@ -16,9 +16,8 @@
 from sandbox.manager import SandboxManager
 from sandbox.provider import ProviderCapability
 from sandbox.recipes import default_recipe_id, list_builtin_recipes, normalize_recipe_snapshot, provider_type_from_name
+from storage.models import map_lease_to_session_status
 from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
 
 logger = logging.getLogger(__name__)
 
@@ -55,9 +54,11 @@ def list_user_leases(
     sandbox_db_path: str | Path | None = None,
 ) -> list[dict[str, Any]]:
     monitor_repo = make_sandbox_monitor_repo()
-    _thread_repo = thread_repo or SQLiteThreadRepo(db_path=main_db_path)
-    _member_repo = member_repo or SQLiteMemberRepo(db_path=main_db_path)
-    own_repos = thread_repo is None  # only close if we created them
+    if thread_repo is None or member_repo is None:
+        raise RuntimeError("thread_repo and member_repo are required for list_user_leases")
+    _thread_repo = thread_repo
+    _member_repo = member_repo
+    own_repos = False
     try:
         rows = monitor_repo.list_leases_with_threads()
         grouped: dict[str, dict[str, Any]] = {}
@@ -74,13 +75,15 @@ def list_user_leases(
                     "recipe": row.get("recipe_json"),
                     "observed_state": row.get("observed_state"),
                     "desired_state": row.get("desired_state"),
+                    "created_at": row.get("created_at"),
                     "cwd": row.get("cwd"),
                     "thread_ids": [],
                     "agents": [],
+                    "_seen_member_ids": set(),
                 },
             )
             thread_id = str(row.get("thread_id") or "").strip()
-            if not thread_id or thread_id in group["thread_ids"]:
+            if not _is_user_visible_lease_thread(thread_id) or thread_id in group["thread_ids"]:
                 continue
             thread = _thread_repo.get_by_id(thread_id)
             if thread is None:
@@ -89,20 +92,25 @@ def list_user_leases(
             if member is None or member.owner_user_id != user_id:
                 continue
             group["thread_ids"].append(thread_id)
-            group["agents"].append(
-                {
-                    "member_id": member.id,
-                    "member_name": member.name,
-                    "avatar_url": avatar_url(member.id, bool(member.avatar)),
-                }
-            )
+            if member.id not in group["_seen_member_ids"]:
+                group["_seen_member_ids"].add(member.id)
+                group["agents"].append(
+                    {
+                        "member_id": member.id,
+                        "member_name": member.name,
+                        "avatar_url": avatar_url(member.id, bool(member.avatar)),
+                    }
+                )
             if not group["cwd"] and row.get("cwd"):
                 group["cwd"] = row.get("cwd")
 
         leases: list[dict[str, Any]] = []
         for lease in grouped.values():
+            lease.pop("_seen_member_ids", None)
             if not lease["thread_ids"]:
                 continue
+            if not _is_user_visible_lease_state(lease):
+                continue
             provider_name = lease["provider_name"]
             provider_type = provider_type_from_name(provider_name)
             if lease["recipe"]:
@@ -123,6 +131,25 @@ def list_user_leases(
         monitor_repo.close()
 
 
+def _is_user_visible_lease_thread(thread_id: str | None) -> bool:
+    raw = str(thread_id or "").strip()
+    if not raw:
+        return False
+    if raw.startswith("subagent-"):
+        return False
+    if is_virtual_thread_id(raw):
+        return False
+    return True
+
+
+def _is_user_visible_lease_state(lease: dict[str, Any]) -> bool:
+    # @@@user-visible-lease-scope - product-facing lease surfaces should only
+    # expose leases the user can still act on, not historical stopped/destroying
+    # residue from monitor storage.
+    status = map_lease_to_session_status(lease.get("observed_state"), lease.get("desired_state"))
+    return status in {"running", "paused"}
+
+
 def available_sandbox_types() -> list[dict[str, Any]]:
     """Scan ~/.leon/sandboxes/ for configured providers."""
     providers, _ = init_providers_and_managers()
@@ -142,6 +169,16 @@ def available_sandbox_types() -> list[dict[str, Any]]:
         try:
             config = SandboxConfig.load(name)
             provider_obj = providers.get(name)
+            if provider_obj is None:
+                types.append(
+                    {
+                        "name": name,
+                        "provider": config.provider,
+                        "available": False,
+                        "reason": f"Provider {name} is configured but unavailable in the current process",
+                    }
+                )
+                continue
             item: dict[str, Any] = {
                 "name": name,
                 "provider": config.provider,
@@ -194,6 +231,8 @@ def _build_providers_and_managers() -> tuple[dict[str, Any], dict[str, Any]]:
                     default_context_path=config.agentbay.context_path,
                     image_id=config.agentbay.image_id,
                     provider_name=name,
+                    supports_pause=config.agentbay.supports_pause,
+                    supports_resume=config.agentbay.supports_resume,
                 )
             elif config.provider == "docker":
                 from sandbox.providers.docker import DockerProvider
@@ -387,6 +426,35 @@ def mutate_sandbox_session(
     }
 
 
+def get_session_metrics(session_id: str, provider_hint: str | None = None) -> dict[str, Any]:
+    """Load one session's provider metrics through the current manager inventory."""
+    _, managers = init_providers_and_managers()
+    sessions = load_all_sessions(managers)
+    session, manager = find_session_and_manager(sessions, managers, session_id, provider_name=provider_hint)
+    if not session:
+        raise RuntimeError(f"Session not found: {session_id}")
+    if manager is None:
+        raise RuntimeError(f"Provider manager unavailable: {session.get('provider')}")
+
+    target_session_id = str(session.get("instance_id") or session.get("session_id") or session_id)
+    metrics = manager.provider.get_metrics(target_session_id)
+    if metrics is None:
+        return {"session_id": target_session_id, "provider": session.get("provider"), "metrics": None}
+    return {
+        "session_id": target_session_id,
+        "provider": session.get("provider"),
+        "metrics": {
+            "cpu_percent": metrics.cpu_percent,
+            "memory_used_mb": metrics.memory_used_mb,
+            "memory_total_mb": metrics.memory_total_mb,
+            "disk_used_gb": metrics.disk_used_gb,
+            "disk_total_gb": metrics.disk_total_gb,
+            "network_rx_kbps": metrics.network_rx_kbps,
+            "network_tx_kbps": metrics.network_tx_kbps,
+        },
+    }
+
+
 def build_provider_from_config_name(name: str, *, sandboxes_dir: Path | None = None) -> Any | None:
     """Build one provider instance from sandbox config name. Used by resource_service for per-session ops."""
     providers, _ = init_providers_and_managers()
diff --git a/backend/web/services/streaming_service.py b/backend/web/services/streaming_service.py
index 9e6e71a77..7227a87e6 100644
--- a/backend/web/services/streaming_service.py
+++ b/backend/web/services/streaming_service.py
@@ -4,7 +4,6 @@
 import json
 import logging
 import random
-import traceback
 import uuid as _uuid
 from collections.abc import AsyncGenerator
 from typing import Any
@@ -13,11 +12,31 @@
 from backend.web.services.event_store import cleanup_old_runs
 from backend.web.utils.serializers import extract_text_content
 from core.runtime.middleware.monitor import AgentState
+from core.runtime.notifications import is_terminal_background_notification
 from sandbox.thread_context import set_current_run_id, set_current_thread_id
 from storage.contracts import RunEventRepo
 
 logger = logging.getLogger(__name__)
 
+type SSEEvent = dict[str, str | int]
+
+_TERMINAL_FOLLOWTHROUGH_SYSTEM_NOTE = (
+    "Terminal background completion notifications require an explicit assistant followthrough. "
+    "Treat these notifications as fresh inputs that need a visible assistant reply. "
+    "You must produce at least one visible assistant message for them; "
+    "do not stay silent and do not end the run after only surfacing a notice. "
+    "Do not call TaskOutput or TaskStop for a terminal notification. "
+    "If no further tool is truly needed, answer directly in natural language "
+    "and briefly acknowledge the completion, failure, or cancellation honestly."
+)
+
+
+def _log_captured_exception(message: str, err: BaseException) -> None:
+    logger.error(
+        message,
+        exc_info=(type(err), err, err.__traceback__),
+    )
+
 
 def _resolve_run_event_repo(agent: Any) -> RunEventRepo | None:
     storage_container = getattr(agent, "storage_container", None)
@@ -28,6 +47,18 @@ def _resolve_run_event_repo(agent: Any) -> RunEventRepo | None:
     return storage_container.run_event_repo()
 
 
+def _augment_system_prompt_for_terminal_followthrough(system_prompt: Any) -> Any:
+    content = getattr(system_prompt, "content", None)
+    if not isinstance(content, str):
+        return system_prompt
+    if _TERMINAL_FOLLOWTHROUGH_SYSTEM_NOTE in content:
+        return system_prompt
+    # @@@terminal-followthrough-system-note - live models can otherwise treat
+    # terminal background notifications as internal reminders and emit no
+    # assistant text, leaving caller surfaces notice-only.
+    return system_prompt.__class__(content=f"{content}\n\n{_TERMINAL_FOLLOWTHROUGH_SYSTEM_NOTE}")
+
+
 async def prime_sandbox(agent: Any, thread_id: str) -> None:
     """Prime sandbox session before tool calls to avoid race conditions."""
 
@@ -256,8 +287,7 @@ def _ensure_thread_handlers(agent: Any, thread_id: str, app: Any) -> None:
     runtime = getattr(agent, "runtime", None)
     if not runtime:
         return
-    # Already bound? Skip.
-    if getattr(runtime, "_activity_sink", None) is not None:
+    if getattr(runtime, "_bound_thread_id", None) == thread_id and getattr(runtime, "_bound_thread_app", None) is app:
         return
     # Runtime must support bind_thread (AgentRuntime does, test fakes may not)
     if not hasattr(runtime, "bind_thread"):
@@ -288,6 +318,7 @@ async def activity_sink(event: dict) -> None:
         if event_type and isinstance(data, dict):
             delta = display_builder_ref.apply_event(thread_id, event_type, data)
             if delta:
+                delta["_seq"] = seq
                 await thread_buf.put(
                     {
                         "event": "display_delta",
@@ -373,6 +404,8 @@ async def _start_run():
                 agent.runtime.transition(AgentState.IDLE)
 
     runtime.bind_thread(activity_sink=activity_sink)
+    runtime._bound_thread_id = thread_id
+    runtime._bound_thread_app = app
     qm.register_wake(thread_id, wake_handler)
 
     # Subscribe to EventBus so sub-agent events (spawned via AgentService)
@@ -380,17 +413,227 @@ async def _start_run():
     try:
         from backend.web.event_bus import get_event_bus
 
-        get_event_bus().subscribe(thread_id, activity_sink)
+        unsubscribe = getattr(runtime, "_thread_event_unsubscribe", None)
+        if callable(unsubscribe):
+            unsubscribe()
+        runtime._thread_event_unsubscribe = get_event_bus().subscribe(thread_id, activity_sink)
     except ImportError:
         pass
 
 
+def _is_terminal_background_notification_message(
+    message: str,
+    *,
+    source: str | None,
+    notification_type: str | None,
+) -> bool:
+    return is_terminal_background_notification(
+        message,
+        source=source,
+        notification_type=notification_type,
+    )
+
+
+def _partition_terminal_followups(items: list[Any]) -> tuple[list[Any], list[Any]]:
+    terminal = []
+    passthrough = []
+    for item in items:
+        if _is_terminal_background_notification_message(
+            item.content,
+            source=item.source or "system",
+            notification_type=item.notification_type,
+        ):
+            terminal.append(item)
+        else:
+            passthrough.append(item)
+    return terminal, passthrough
+
+
+def _message_metadata_dict(message_metadata: dict[str, Any] | None) -> dict[str, Any]:
+    return dict(message_metadata or {})
+
+
+def _message_already_persisted(message: Any, *, content: str, metadata: dict[str, Any]) -> bool:
+    if message.__class__.__name__ != "HumanMessage":
+        return False
+    if getattr(message, "content", None) != content:
+        return False
+    return (getattr(message, "metadata", None) or {}) == metadata
+
+
+async def _persist_cancelled_run_input_if_missing(
+    *,
+    agent: Any,
+    config: dict[str, Any],
+    message: str,
+    message_metadata: dict[str, Any] | None,
+) -> None:
+    graph = getattr(agent, "agent", None)
+    if graph is None or not hasattr(graph, "aget_state") or not hasattr(graph, "aupdate_state"):
+        return
+
+    from langchain_core.messages import HumanMessage
+
+    metadata = _message_metadata_dict(message_metadata)
+    state = await graph.aget_state(config)
+    persisted = list((getattr(state, "values", None) or {}).get("messages", []))
+    if persisted and _message_already_persisted(persisted[-1], content=message, metadata=metadata):
+        return
+
+    # @@@cancelled-run-input-persist - a started run has already accepted this
+    # input at the caller boundary. If cancellation lands before the next loop
+    # checkpoint save, persist the input here so later turns do not pretend it
+    # never happened.
+    candidate = HumanMessage(content=message, metadata=metadata) if metadata else HumanMessage(content=message)
+    await graph.aupdate_state(config, {"messages": [candidate]})
+
+
+def _is_owner_steer_followup_message(
+    *,
+    source: str | None,
+    notification_type: str | None,
+) -> bool:
+    return source == "owner" and notification_type == "steer"
+
+
+async def _persist_cancelled_owner_steers(
+    *,
+    agent: Any,
+    config: dict[str, Any],
+    items: list[dict[str, str | None]],
+) -> None:
+    graph = getattr(agent, "agent", None)
+    if graph is None or not hasattr(graph, "aupdate_state") or not items:
+        return
+
+    from langchain_core.messages import HumanMessage
+
+    # @@@cancelled-steer-persist - accepted steer is a real user turn. If the
+    # active run is cancelled before the next model call, we must checkpoint it
+    # now instead of letting it silently relaunch as a ghost instruction.
+    await graph.aupdate_state(
+        config,
+        {
+            "messages": [
+                HumanMessage(
+                    content=str(item["content"] or ""),
+                    metadata={
+                        "source": "owner",
+                        "notification_type": "steer",
+                        "is_steer": True,
+                    },
+                )
+                for item in items
+            ]
+        },
+    )
+
+
+async def _flush_cancelled_owner_steers(
+    *,
+    agent: Any,
+    config: dict[str, Any],
+    thread_id: str,
+    app: Any,
+) -> None:
+    qm = app.state.queue_manager
+    queued_items = qm.drain_all(thread_id)
+    if not queued_items:
+        return
+
+    owner_steers: list[dict[str, str | None]] = []
+    passthrough: list[Any] = []
+    for item in queued_items:
+        if _is_owner_steer_followup_message(
+            source=item.source,
+            notification_type=item.notification_type,
+        ):
+            owner_steers.append(
+                {
+                    "content": item.content,
+                    "source": item.source or "owner",
+                    "notification_type": item.notification_type,
+                }
+            )
+        else:
+            passthrough.append(item)
+
+    await _persist_cancelled_owner_steers(agent=agent, config=config, items=owner_steers)
+
+    for item in passthrough:
+        qm.enqueue(
+            item.content,
+            thread_id,
+            notification_type=item.notification_type,
+            source=item.source,
+            sender_id=item.sender_id,
+            sender_name=item.sender_name,
+            sender_avatar_url=item.sender_avatar_url,
+            is_steer=item.is_steer,
+        )
+
+
+async def _emit_queued_terminal_followups(
+    *,
+    app: Any,
+    thread_id: str,
+    emit: Any,
+) -> list[dict[str, str | None]]:
+    emitted_terminal: list[dict[str, str | None]] = []
+
+    async def _drain_once() -> bool:
+        queued_items = app.state.queue_manager.drain_all(thread_id)
+        extra_terminal, passthrough = _partition_terminal_followups(queued_items)
+        for item in passthrough:
+            app.state.queue_manager.enqueue(
+                item.content,
+                thread_id,
+                notification_type=item.notification_type,
+                source=item.source,
+                sender_id=item.sender_id,
+                sender_name=item.sender_name,
+                sender_avatar_url=item.sender_avatar_url,
+                is_steer=item.is_steer,
+            )
+        for item in extra_terminal:
+            await emit(
+                {
+                    "event": "notice",
+                    "data": json.dumps(
+                        {
+                            "content": item.content,
+                            "source": item.source or "system",
+                            "notification_type": item.notification_type,
+                        },
+                        ensure_ascii=False,
+                    ),
+                }
+            )
+            emitted_terminal.append(
+                {
+                    "content": item.content,
+                    "source": item.source or "system",
+                    "notification_type": item.notification_type,
+                }
+            )
+        return bool(extra_terminal)
+
+    # @@@terminal-followup-race-window - multiple background tasks can finish
+    # while the first notice-only followthrough run is being emitted. Drain once
+    # for already-persisted notices, yield one loop tick, then drain again so
+    # same-turn terminal completions are folded into the same stable followthrough.
+    await _drain_once()
+    await asyncio.sleep(0)
+    await _drain_once()
+    return emitted_terminal
+
+
 # ---------------------------------------------------------------------------
 # Producer: runs agent, writes events to ThreadEventBuffer
 # ---------------------------------------------------------------------------
 
 
-async def _run_agent_to_buffer(
+async def _run_agent_to_buffer(  # pyright: ignore[reportGeneralTypeIssues]  # @@@nu59-complexity-honesty
     agent: Any,
     thread_id: str,
     message: str,
@@ -399,7 +642,8 @@ async def _run_agent_to_buffer(
     thread_buf: ThreadEventBuffer,
     run_id: str,
     message_metadata: dict[str, Any] | None = None,
-) -> None:
+    input_messages: list[Any] | None = None,
+) -> str:
     """Run agent execution and write all SSE events into *thread_buf*."""
     from backend.web.services.event_store import append_event
 
@@ -428,12 +672,16 @@ async def emit(event: dict, message_id: str | None = None) -> None:
             event = {**event, "data": json.dumps(data, ensure_ascii=False)}
         await thread_buf.put(event)
 
-        # Compute display delta and emit it (no _seq — avoids dedup conflict
-        # with the raw event that shares the same seq)
+        # Compute display delta and emit it alongside the raw event.
         event_type = event.get("event", "")
         if event_type and isinstance(data, dict):
             delta = display_builder.apply_event(thread_id, event_type, data)
             if delta:
+                # @@@display-delta-source-seq - replay after-filter only knows raw
+                # event seqs. Carry the source seq onto the derived delta so a
+                # reconnect after GET /thread can skip stale display_delta
+                # replays instead of rebuilding the same thread a second time.
+                delta["_seq"] = seq
                 await thread_buf.put(
                     {
                         "event": "display_delta",
@@ -444,6 +692,7 @@ async def emit(event: dict, message_id: str | None = None) -> None:
     task = None
     stream_gen = None
     pending_tool_calls: dict[str, dict] = {}
+    output_parts: list[str] = []
     try:
         config = {"configurable": {"thread_id": thread_id, "run_id": run_id}}
         if hasattr(agent, "_current_model_config"):
@@ -486,8 +735,8 @@ async def emit(event: dict, message_id: str | None = None) -> None:
                 obs_config = ObservationLoader().load()
 
                 if obs_provider == "langfuse":
-                    from langfuse import Langfuse
-                    from langfuse.langchain import CallbackHandler as LangfuseHandler
+                    from langfuse import Langfuse  # pyright: ignore[reportMissingImports]
+                    from langfuse.langchain import CallbackHandler as LangfuseHandler  # pyright: ignore[reportMissingImports]
 
                     cfg = obs_config.langfuse
                     if cfg.secret_key and cfg.public_key:
@@ -589,7 +838,21 @@ def on_activity_event(event: dict) -> None:
         # enqueue time (@@@steer-instant-feedback).
         # Note: is_steer is NOT persisted in queue, so check notification_type too.
         is_steer = meta.get("is_steer") or meta.get("notification_type") == "steer"
-        if (not src or src == "owner") and not is_steer:
+        if meta.get("ask_user_question_answered"):
+            await emit(
+                {
+                    "event": "user_message",
+                    "data": json.dumps(
+                        {
+                            "content": "",
+                            "showing": False,
+                            "ask_user_question_answered": meta["ask_user_question_answered"],
+                        },
+                        ensure_ascii=False,
+                    ),
+                }
+            )
+        elif (not src or src == "owner") and not is_steer:
             # @@@strip-for-display — agent sees full content (with system-reminder),
             # frontend sees clean text (tags stripped)
             from backend.web.utils.serializers import strip_system_tags
@@ -625,9 +888,10 @@ def on_activity_event(event: dict) -> None:
         )
 
         # @@@run-notice — emit notice right after run_start so frontend folds it
-        # into the (re)opened turn.  Only for external notifications (not owner steer).
+        # into the (re)opened turn. Mirror the cold-path DisplayBuilder rule:
+        # any source=system message is a notice; external notices stay chat-only.
         ntype = meta.get("notification_type")
-        if src and src != "owner" and ntype == "chat":
+        if src == "system" or (src == "external" and ntype == "chat"):
             await emit(
                 {
                     "event": "notice",
@@ -642,7 +906,46 @@ def on_activity_event(event: dict) -> None:
                 }
             )
 
-        if message_metadata:
+        terminal_followthrough_items: list[dict[str, str | None]] | None = None
+        original_system_prompt = None
+        # @@@terminal-followthrough-reentry - terminal background completions
+        # still surface as durable notices first, but they must then re-enter the
+        # model as a real followthrough turn instead of terminating at notice-only.
+        if _is_terminal_background_notification_message(
+            message,
+            source=src,
+            notification_type=ntype,
+        ):
+            terminal_followthrough_items = [
+                {
+                    "content": message,
+                    "source": src or "system",
+                    "notification_type": ntype,
+                }
+            ]
+            terminal_followthrough_items.extend(await _emit_queued_terminal_followups(app=app, thread_id=thread_id, emit=emit))
+            if hasattr(agent, "agent") and hasattr(agent.agent, "system_prompt"):
+                original_system_prompt = agent.agent.system_prompt
+                agent.agent.system_prompt = _augment_system_prompt_for_terminal_followthrough(original_system_prompt)
+
+        if terminal_followthrough_items:
+            from langchain_core.messages import HumanMessage
+
+            _initial_input = {
+                "messages": [
+                    HumanMessage(
+                        content=str(item["content"] or ""),
+                        metadata={
+                            "source": item["source"] or "system",
+                            "notification_type": item["notification_type"],
+                        },
+                    )
+                    for item in terminal_followthrough_items
+                ]
+            }
+        elif input_messages is not None:
+            _initial_input = {"messages": input_messages}
+        elif message_metadata:
             from langchain_core.messages import HumanMessage
 
             _initial_input: dict | None = {"messages": [HumanMessage(content=message, metadata=message_metadata)]}
@@ -725,7 +1028,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                 mode, data = chunk
 
                 if mode == "messages":
-                    msg_chunk, metadata = data
+                    msg_chunk, _metadata = data
                     msg_class = msg_chunk.__class__.__name__
                     if msg_class == "AIMessageChunk":
                         # @@@compact-leak-guard — skip chunks from compact's summary LLM call.
@@ -735,6 +1038,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                         content = extract_text_content(getattr(msg_chunk, "content", ""))
                         chunk_msg_id = getattr(msg_chunk, "id", None)
                         if content:
+                            output_parts.append(content)
                             await emit(
                                 {
                                     "event": "text",
@@ -792,14 +1096,13 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                             msg_class = msg.__class__.__name__
 
                             if msg_class == "HumanMessage":
-                                # @@@mid-turn-chat-notice — emit notice for chat
-                                # notifications injected by before_model. display_builder
-                                # folds it into the current turn as a segment (same as
-                                # cold-path checkpoint rebuild behavior).
+                                # @@@mid-turn-notice-parity — hot streaming must use the
+                                # same notice contract as cold checkpoint rebuild:
+                                # source=system always folds as notice; external stays
+                                # limited to chat notifications.
                                 meta = getattr(msg, "metadata", None) or {}
-                                if meta.get("notification_type") == "chat" and meta.get("source") in (
-                                    "external",
-                                    "system",
+                                if meta.get("source") == "system" or (
+                                    meta.get("source") == "external" and meta.get("notification_type") == "chat"
                                 ):
                                     await emit(
                                         {
@@ -808,7 +1111,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                                                 {
                                                     "content": msg.content if isinstance(msg.content, str) else str(msg.content),
                                                     "source": meta.get("source", "external"),
-                                                    "notification_type": "chat",
+                                                    "notification_type": meta.get("notification_type"),
                                                 },
                                                 ensure_ascii=False,
                                             ),
@@ -861,8 +1164,11 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                                     continue
                                 if tc_id:
                                     pending_tool_calls.pop(tc_id, None)
-                                if hasattr(msg, "metadata") and isinstance(msg.metadata, dict):
-                                    msg.metadata["run_id"] = run_id
+                                merged_meta = dict(getattr(msg, "metadata", None) or {})
+                                tool_result_meta = getattr(msg, "additional_kwargs", {}).get("tool_result_meta")
+                                if isinstance(tool_result_meta, dict):
+                                    merged_meta = {**tool_result_meta, **merged_meta}
+                                merged_meta["run_id"] = run_id
                                 tool_name = getattr(msg, "name", "") or ""
                                 await emit(
                                     {
@@ -872,7 +1178,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                                                 "tool_call_id": tc_id,
                                                 "name": tool_name,
                                                 "content": str(getattr(msg, "content", "")),
-                                                "metadata": getattr(msg, "metadata", None) or {},
+                                                "metadata": merged_meta,
                                                 "showing": True,
                                             },
                                             ensure_ascii=False,
@@ -920,7 +1226,10 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                 await stream_gen.aclose()
                 await asyncio.sleep(wait)
             else:
-                traceback.print_exc()
+                _log_captured_exception(
+                    f"[streaming] stream failed for thread {thread_id}",
+                    stream_err,
+                )
                 await emit({"event": "error", "data": json.dumps({"error": str(stream_err)}, ensure_ascii=False)})
                 break
 
@@ -954,8 +1263,21 @@ def _is_retryable_stream_error(err: Exception) -> bool:
 
         # A5: emit run_done instead of done (persistent buffer — no mark_done)
         await emit({"event": "run_done", "data": json.dumps({"thread_id": thread_id, "run_id": run_id})})
+        return "".join(output_parts).strip()
     except asyncio.CancelledError:
         cancelled_tool_call_ids = await write_cancellation_markers(agent, config, pending_tool_calls)
+        await _persist_cancelled_run_input_if_missing(
+            agent=agent,
+            config=config,
+            message=message,
+            message_metadata=message_metadata,
+        )
+        await _flush_cancelled_owner_steers(
+            agent=agent,
+            config=config,
+            thread_id=thread_id,
+            app=app,
+        )
         await emit(
             {
                 "event": "cancelled",
@@ -969,11 +1291,18 @@ def _is_retryable_stream_error(err: Exception) -> bool:
         )
         # Also emit run_done so frontend knows the run ended
         await emit({"event": "run_done", "data": json.dumps({"thread_id": thread_id, "run_id": run_id})})
+        return ""
     except Exception as e:
-        traceback.print_exc()
+        _log_captured_exception(
+            f"[streaming] run failed for thread {thread_id}",
+            e,
+        )
         await emit({"event": "error", "data": json.dumps({"error": str(e)}, ensure_ascii=False)})
         await emit({"event": "run_done", "data": json.dumps({"thread_id": thread_id, "run_id": run_id})})
+        return ""
     finally:
+        if original_system_prompt is not None and hasattr(agent, "agent") and hasattr(agent.agent, "system_prompt"):
+            agent.agent.system_prompt = original_system_prompt
         # @@@typing-lifecycle-stop — guaranteed cleanup even on crash/cancel
         typing_tracker = getattr(app.state, "typing_tracker", None)
         if typing_tracker is not None:
@@ -985,7 +1314,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
         if obs_handler is not None:
             try:
                 if obs_active == "langfuse":
-                    from langfuse import get_client
+                    from langfuse import get_client  # pyright: ignore[reportMissingImports]
 
                     get_client().flush()
                 elif obs_active == "langsmith":
@@ -1036,22 +1365,29 @@ async def _consume_followup_queue(agent: Any, thread_id: str, app: Any) -> None:
     item = None
     try:
         qm = app.state.queue_manager
+        if not qm.peek(thread_id) or not app:
+            return
+        if not (hasattr(agent, "runtime") and agent.runtime.transition(AgentState.ACTIVE)):
+            return
         item = qm.dequeue(thread_id)
-        if item and app:
-            if hasattr(agent, "runtime") and agent.runtime.transition(AgentState.ACTIVE):
-                start_agent_run(
-                    agent,
-                    thread_id,
-                    item.content,
-                    app,
-                    message_metadata={
-                        "source": item.source or "system",
-                        "notification_type": item.notification_type,
-                        "sender_name": item.sender_name,
-                        "sender_avatar_url": item.sender_avatar_url,
-                        "is_steer": getattr(item, "is_steer", False),
-                    },
-                )
+        if item is None:
+            logger.warning("followup dequeue lost race for thread %s; reverting to IDLE", thread_id)
+            if hasattr(agent, "runtime"):
+                agent.runtime.transition(AgentState.IDLE)
+            return
+        start_agent_run(
+            agent,
+            thread_id,
+            item.content,
+            app,
+            message_metadata={
+                "source": item.source or "system",
+                "notification_type": item.notification_type,
+                "sender_name": item.sender_name,
+                "sender_avatar_url": item.sender_avatar_url,
+                "is_steer": getattr(item, "is_steer", False),
+            },
+        )
     except Exception:
         logger.exception("Failed to consume followup queue for thread %s", thread_id)
         # Re-enqueue the message if it was already dequeued to prevent data loss
@@ -1074,18 +1410,90 @@ def start_agent_run(
     app: Any,
     enable_trajectory: bool = False,
     message_metadata: dict[str, Any] | None = None,
+    input_messages: list[Any] | None = None,
 ) -> str:
     """Launch agent producer on the persistent ThreadEventBuffer. Returns run_id."""
     thread_buf = get_or_create_thread_buffer(app, thread_id)
     run_id = str(_uuid.uuid4())
     bg_task = asyncio.create_task(
-        _run_agent_to_buffer(agent, thread_id, message, app, enable_trajectory, thread_buf, run_id, message_metadata)
+        _run_agent_to_buffer(
+            agent,
+            thread_id,
+            message,
+            app,
+            enable_trajectory,
+            thread_buf,
+            run_id,
+            message_metadata,
+            input_messages,
+        )
     )
     # Store the background task so cancel_run can still cancel it
     app.state.thread_tasks[thread_id] = bg_task
     return run_id
 
 
+async def run_child_thread_live(
+    agent: Any,
+    thread_id: str,
+    message: str,
+    app: Any,
+    *,
+    input_messages: list[Any],
+) -> str:
+    """Run a spawned child agent through the normal web thread bridge."""
+    from backend.web.services.agent_pool import resolve_thread_sandbox
+    from backend.web.utils.serializers import extract_text_content
+
+    sandbox_type = resolve_thread_sandbox(app, thread_id)
+    app.state.agent_pool[f"{thread_id}:{sandbox_type}"] = agent
+    thread_buf = get_or_create_thread_buffer(app, thread_id)
+    error_cursor = thread_buf.total_count
+    _ensure_thread_handlers(agent, thread_id, app)
+    if not (hasattr(agent, "runtime") and agent.runtime.transition(AgentState.ACTIVE)):
+        raise RuntimeError(f"Child thread {thread_id} could not transition to active")
+
+    start_agent_run(
+        agent,
+        thread_id,
+        message,
+        app,
+        input_messages=input_messages,
+    )
+    task = app.state.thread_tasks[thread_id]
+    result = await task
+    recent_events, _ = await thread_buf.read_with_timeout(error_cursor, timeout=0.01)
+    if recent_events:
+        # @@@child-live-error-surfacing - child live runs can emit an error event
+        # and still return an empty string from _run_agent_to_buffer(); treat that
+        # as a real child failure instead of laundering it into fake completion.
+        for event in recent_events:
+            if event.get("event") != "error":
+                continue
+            try:
+                payload = json.loads(event.get("data", "{}"))
+            except (json.JSONDecodeError, TypeError):
+                payload = {}
+            error_text = payload.get("error") if isinstance(payload, dict) else None
+            raise RuntimeError(error_text or f"Child thread {thread_id} failed")
+    if isinstance(result, str) and result.strip():
+        return result.strip()
+
+    state = await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+    values = getattr(state, "values", {}) if state else {}
+    messages = values.get("messages", []) if isinstance(values, dict) else []
+    visible_ai = [
+        extract_text_content(getattr(msg, "content", "")).strip()
+        for msg in messages
+        if msg.__class__.__name__ == "AIMessage" and extract_text_content(getattr(msg, "content", "")).strip()
+    ]
+    runtime_status = agent.runtime.get_status_dict() if hasattr(agent, "runtime") and hasattr(agent.runtime, "get_status_dict") else {}
+    runtime_calls = runtime_status.get("calls") if isinstance(runtime_status, dict) else None
+    if not visible_ai and runtime_calls == 0:
+        raise RuntimeError(f"Child thread {thread_id} failed before first model call")
+    return "\n".join(visible_ai) if visible_ai else "(Agent completed with no text output)"
+
+
 # ---------------------------------------------------------------------------
 # Consumer: persistent thread event stream
 # ---------------------------------------------------------------------------
@@ -1094,54 +1502,37 @@ def start_agent_run(
 async def observe_thread_events(
     thread_buf: ThreadEventBuffer,
     after: int = 0,
-) -> AsyncGenerator[dict[str, str], None]:
+) -> AsyncGenerator[SSEEvent, None]:
     """Consume events from a persistent ThreadEventBuffer. Yields SSE event dicts.
 
     Unlike observe_run_events, this never terminates on its own — the client
     disconnect (or server shutdown) closes the connection.
     run_done is a flow event, not a terminal signal.
     """
-    yield {"retry": 5000}
-
     # Always start from the beginning of the ring buffer.
     # For after=0 (new connection): replay all buffered events so we never miss
     # events emitted between postRun and SSE connect (race condition fix).
     # For after>0 (reconnect): start from ring start, filter by _seq below.
-    cursor = 0
-
-    while True:
-        events, cursor = await thread_buf.read_with_timeout(cursor, timeout=30)
-        if events is None:
-            yield {"comment": "keepalive"}
-            continue
-        if not events:
-            continue
-        for event in events:
-            parsed_data = None
-            try:
-                parsed_data = json.loads(event.get("data", "{}"))
-            except (json.JSONDecodeError, TypeError):
-                pass
-
-            # @@@after-filter — skip events already seen on reconnect.
-            # Events without _seq (e.g. display_delta) are never filtered —
-            # they are ephemeral derivatives of persisted events.
-            if after > 0 and isinstance(parsed_data, dict) and "_seq" in parsed_data:
-                if parsed_data["_seq"] <= after:
-                    continue
-
-            seq_id = str(parsed_data["_seq"]) if isinstance(parsed_data, dict) and "_seq" in parsed_data else None
-            if seq_id:
-                yield {**event, "id": seq_id}
-            else:
-                yield event
+    async for event in _observe_sse_buffer(thread_buf, after=after, stop_on_finish=False):
+        yield event
 
 
 async def observe_run_events(
     buf: RunEventBuffer,
     after: int = 0,
-) -> AsyncGenerator[dict[str, str], None]:
+) -> AsyncGenerator[SSEEvent, None]:
     """Consume events from a RunEventBuffer (subagent streams only). Yields SSE event dicts."""
+    async for event in _observe_sse_buffer(buf, after=after, stop_on_finish=True):
+        yield event
+
+
+async def _observe_sse_buffer(
+    buf: ThreadEventBuffer | RunEventBuffer,
+    *,
+    after: int,
+    stop_on_finish: bool,
+) -> AsyncGenerator[SSEEvent, None]:
+    """Shared SSE observer loop for thread and run buffers."""
     yield {"retry": 5000}
 
     cursor = 0
@@ -1150,7 +1541,7 @@ async def observe_run_events(
         if events is None and not buf.finished.is_set():
             yield {"comment": "keepalive"}
             continue
-        if not events and buf.finished.is_set():
+        if stop_on_finish and not events and buf.finished.is_set():
             break
         if not events:
             continue
@@ -1162,8 +1553,8 @@ async def observe_run_events(
                 pass
 
             # @@@after-filter — skip events already seen on reconnect.
-            # Events without _seq (e.g. display_delta) are never filtered —
-            # they are ephemeral derivatives of persisted events.
+            # display_delta now carries the source raw-event seq too, so stale
+            # derived deltas are filtered together with their persisted source.
             if after > 0 and isinstance(parsed_data, dict) and "_seq" in parsed_data:
                 if parsed_data["_seq"] <= after:
                     continue
diff --git a/backend/web/services/task_service.py b/backend/web/services/task_service.py
index 86197b584..3c7ae1b91 100644
--- a/backend/web/services/task_service.py
+++ b/backend/web/services/task_service.py
@@ -3,71 +3,114 @@
 from typing import Any
 
 from backend.web.core.storage_factory import make_panel_task_repo
+from storage.runtime import build_thread_repo
 
 
 def _repo() -> Any:
     return make_panel_task_repo()
 
 
-def list_tasks() -> list[dict[str, Any]]:
-    repo = _repo()
+def list_tasks(owner_user_id: str | None = None, repo: Any = None, thread_repo: Any = None) -> list[dict[str, Any]]:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.list_all()
+        return _enrich_task_thread_members(repo.list_all(owner_user_id=owner_user_id), thread_repo=thread_repo)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def get_task(task_id: str) -> dict[str, Any] | None:
-    repo = _repo()
+def _enrich_task_thread_members(tasks: list[dict[str, Any]], thread_repo: Any = None) -> list[dict[str, Any]]:
+    thread_ids = [str(task.get("thread_id") or "").strip() for task in tasks]
+    thread_ids = [thread_id for thread_id in dict.fromkeys(thread_ids) if thread_id]
+    if not thread_ids:
+        return tasks
+
+    # @@@task-thread-member-enrichment - panel tasks persist thread_id only, so enrich member_id
+    # from canonical thread metadata before frontend deep-links are rendered.
+    own_thread_repo = thread_repo is None
+    thread_repo = thread_repo or build_thread_repo()
+    try:
+        member_ids = {thread_id: (thread_repo.get_by_id(thread_id) or {}).get("member_id") for thread_id in thread_ids}
+    finally:
+        if own_thread_repo:
+            thread_repo.close()
+
+    enriched: list[dict[str, Any]] = []
+    for task in tasks:
+        thread_id = str(task.get("thread_id") or "").strip()
+        if thread_id and member_ids.get(thread_id):
+            enriched.append({**task, "member_id": member_ids[thread_id]})
+        else:
+            enriched.append(task)
+    return enriched
+
+
+def get_task(task_id: str, owner_user_id: str | None = None, repo: Any = None) -> dict[str, Any] | None:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.get(task_id)
+        return repo.get(task_id, owner_user_id=owner_user_id)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def get_highest_priority_pending_task() -> dict[str, Any] | None:
-    repo = _repo()
+def get_highest_priority_pending_task(owner_user_id: str | None = None, repo: Any = None) -> dict[str, Any] | None:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.get_highest_priority_pending()
+        return repo.get_highest_priority_pending(owner_user_id=owner_user_id)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def create_task(**fields: Any) -> dict[str, Any]:
-    repo = _repo()
+def create_task(repo: Any = None, **fields: Any) -> dict[str, Any]:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
         return repo.create(**fields)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def update_task(task_id: str, **fields: Any) -> dict[str, Any] | None:
-    repo = _repo()
+def update_task(task_id: str, owner_user_id: str | None = None, repo: Any = None, **fields: Any) -> dict[str, Any] | None:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.update(task_id, **fields)
+        return repo.update(task_id, owner_user_id=owner_user_id, **fields)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def delete_task(task_id: str) -> bool:
-    repo = _repo()
+def delete_task(task_id: str, owner_user_id: str | None = None, repo: Any = None) -> bool:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.delete(task_id)
+        return repo.delete(task_id, owner_user_id=owner_user_id)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def bulk_delete_tasks(ids: list[str]) -> int:
-    repo = _repo()
+def bulk_delete_tasks(ids: list[str], owner_user_id: str | None = None, repo: Any = None) -> int:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.bulk_delete(ids)
+        return repo.bulk_delete(ids, owner_user_id=owner_user_id)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
 
 
-def bulk_update_task_status(ids: list[str], status: str) -> int:
-    repo = _repo()
+def bulk_update_task_status(ids: list[str], status: str, owner_user_id: str | None = None, repo: Any = None) -> int:
+    own_repo = repo is None
+    repo = repo or _repo()
     try:
-        return repo.bulk_update_status(ids, status)
+        return repo.bulk_update_status(ids, status, owner_user_id=owner_user_id)
     finally:
-        repo.close()
+        if own_repo:
+            repo.close()
diff --git a/backend/web/services/thread_launch_config_service.py b/backend/web/services/thread_launch_config_service.py
index 00060e222..b9202c21c 100644
--- a/backend/web/services/thread_launch_config_service.py
+++ b/backend/web/services/thread_launch_config_service.py
@@ -6,7 +6,7 @@
 
 from backend.web.services import sandbox_service
 from backend.web.services.library_service import list_library
-from sandbox.recipes import provider_type_from_name
+from sandbox.recipes import normalize_recipe_snapshot, provider_type_from_name
 
 
 def normalize_launch_config_payload(payload: dict[str, Any]) -> dict[str, Any]:
@@ -20,22 +20,51 @@ def normalize_launch_config_payload(payload: dict[str, Any]) -> dict[str, Any]:
     }
 
 
-def save_last_confirmed_config(app: Any, owner_user_id: str, member_id: str, payload: dict[str, Any]) -> None:
-    app.state.thread_launch_pref_repo.save_confirmed(
-        owner_user_id,
-        member_id,
-        normalize_launch_config_payload(payload),
+def build_existing_launch_config(
+    *,
+    lease: dict[str, Any],
+    model: str | None,
+    workspace: str | None,
+) -> dict[str, Any]:
+    return normalize_launch_config_payload(
+        {
+            "create_mode": "existing",
+            "provider_config": lease.get("provider_name"),
+            "recipe": lease.get("recipe"),
+            "lease_id": lease.get("lease_id"),
+            "model": model,
+            "workspace": workspace,
+        }
     )
 
 
-def save_last_successful_config(app: Any, owner_user_id: str, member_id: str, payload: dict[str, Any]) -> None:
-    app.state.thread_launch_pref_repo.save_successful(
-        owner_user_id,
-        member_id,
-        normalize_launch_config_payload(payload),
+def build_new_launch_config(
+    *,
+    provider_config: str,
+    recipe: dict[str, Any] | None,
+    model: str | None,
+    workspace: str | None,
+) -> dict[str, Any]:
+    return normalize_launch_config_payload(
+        {
+            "create_mode": "new",
+            "provider_config": provider_config,
+            "recipe": normalize_recipe_snapshot(provider_type_from_name(provider_config), recipe),
+            "lease_id": None,
+            "model": model,
+            "workspace": workspace,
+        }
     )
 
 
+def save_last_confirmed_config(app: Any, owner_user_id: str, member_id: str, payload: dict[str, Any]) -> None:
+    _save_launch_config(app.state.thread_launch_pref_repo.save_confirmed, owner_user_id, member_id, payload)
+
+
+def save_last_successful_config(app: Any, owner_user_id: str, member_id: str, payload: dict[str, Any]) -> None:
+    _save_launch_config(app.state.thread_launch_pref_repo.save_successful, owner_user_id, member_id, payload)
+
+
 def resolve_default_config(app: Any, owner_user_id: str, member_id: str) -> dict[str, Any]:
     prefs = app.state.thread_launch_pref_repo.get(owner_user_id, member_id) or {}
     leases = sandbox_service.list_user_leases(
@@ -119,6 +148,14 @@ def _validate_saved_config(
     }
 
 
+def _save_launch_config(save_fn: Any, owner_user_id: str, member_id: str, payload: dict[str, Any]) -> None:
+    save_fn(
+        owner_user_id,
+        member_id,
+        normalize_launch_config_payload(payload),
+    )
+
+
 def _derive_default_config(
     *,
     member_threads: list[dict[str, Any]],
diff --git a/backend/web/services/thread_naming.py b/backend/web/services/thread_naming.py
index ee65a9923..0e3fba68d 100644
--- a/backend/web/services/thread_naming.py
+++ b/backend/web/services/thread_naming.py
@@ -1,4 +1,4 @@
-"""Canonical thread/entity naming helpers."""
+"""Canonical thread naming helpers."""
 
 from __future__ import annotations
 
@@ -7,18 +7,11 @@ def validate_thread_identity(*, is_main: bool, branch_index: int) -> None:
     if branch_index < 0:
         raise ValueError(f"branch_index must be >= 0, got {branch_index}")
     if is_main and branch_index != 0:
-        raise ValueError(f"Main thread must have branch_index=0, got {branch_index}")
+        raise ValueError(f"Default thread must have branch_index=0, got {branch_index}")
     if not is_main and branch_index == 0:
         raise ValueError("Child thread must have branch_index>0")
 
 
-def canonical_entity_name(member_name: str, *, is_main: bool, branch_index: int) -> str:
-    validate_thread_identity(is_main=is_main, branch_index=branch_index)
-    if is_main:
-        return member_name
-    return f"{member_name} · 分身{branch_index}"
-
-
 def sidebar_label(*, is_main: bool, branch_index: int) -> str | None:
     validate_thread_identity(is_main=is_main, branch_index=branch_index)
     if is_main:
diff --git a/backend/web/services/thread_state_service.py b/backend/web/services/thread_state_service.py
index 30e0186ec..b9acf4ae2 100644
--- a/backend/web/services/thread_state_service.py
+++ b/backend/web/services/thread_state_service.py
@@ -21,7 +21,14 @@ def _resolve_thread_sandbox_instance(mgr: Any, lease: Any) -> Any | None:
 
 def _display_sandbox_status(lease: Any, instance: Any) -> str:
     observed = getattr(lease, "observed_state", None)
-    return instance.status if observed in {None, "", "detached"} else observed
+    if observed in {None, "", "detached"}:
+        status = getattr(instance, "status", None)
+        if not isinstance(status, str) or not status:
+            raise RuntimeError("Sandbox instance missing status")
+        return status
+    if not isinstance(observed, str):
+        raise RuntimeError("Lease observed_state must be a string when present")
+    return observed
 
 
 def get_sandbox_info(agent: Any, thread_id: str, sandbox_type: str) -> dict[str, Any]:
@@ -125,14 +132,14 @@ def _get_terminal():
     }
 
 
-async def get_lease_status(agent: Any, thread_id: str) -> dict[str, Any]:
+async def get_lease_status(agent: Any, thread_id: str) -> dict[str, Any] | None:
     """Get SandboxLease status for a thread.
 
     Returns:
         Dict with lease_id, provider_name, states, instance info, timestamps
 
     Raises:
-        ValueError: If no lease found for thread
+        None: If no lease found for thread
     """
 
     def _get_lease():
@@ -147,7 +154,7 @@ def _get_lease():
 
     lease = await asyncio.to_thread(_get_lease)
     if not lease:
-        raise ValueError(f"No lease found for thread {thread_id}")
+        return None
 
     instance = lease.get_instance()
     created_at, updated_at = await asyncio.to_thread(get_lease_timestamps, lease.lease_id)
diff --git a/backend/web/services/wechat_service.py b/backend/web/services/wechat_service.py
deleted file mode 100644
index b19261d79..000000000
--- a/backend/web/services/wechat_service.py
+++ /dev/null
@@ -1,517 +0,0 @@
-"""WeChat connection service — ilink API client + connection lifecycle + background poll.
-
-Uses the official WeChat ClawBot ilink API at ilinkai.weixin.qq.com.
-Protocol: HTTP/JSON long-polling, modeled after Telegram Bot API.
-Auth: Bearer token obtained via QR code scan.
-
-@@@per-user — each human user_id gets its own WeChatConnection.
-user_id is the social identity in Leon's network (Supabase auth UUID for humans).
-Polling auto-starts at backend boot via lifespan.py for all users with saved credentials.
-
-@@@no-globals — WeChatConnectionRegistry lives on app.state, not module-level.
-"""
-
-import asyncio
-import json
-import logging
-import os
-import random
-import struct
-import time
-from base64 import b64encode
-from collections.abc import Awaitable, Callable
-from pathlib import Path
-from typing import Literal
-
-import httpx
-from pydantic import BaseModel
-
-from config.user_paths import user_home_path, user_home_read_candidates
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_BASE_URL = "https://ilinkai.weixin.qq.com"
-BOT_TYPE = "3"
-CHANNEL_VERSION = "0.1.0"
-LONG_POLL_TIMEOUT_S = 35
-SEND_TIMEOUT_S = 15
-
-MSG_TYPE_USER = 1
-MSG_TYPE_BOT = 2
-MSG_ITEM_TEXT = 1
-MSG_ITEM_VOICE = 3
-MSG_STATE_FINISH = 2
-
-CONNECTIONS_BASE = user_home_path("connections", "wechat")
-
-RoutingType = Literal["thread", "chat"]
-
-# @@@delivery-callback — injected at construction, avoids circular import of app
-DeliveryFn = Callable[["WeChatConnection", "WeChatMessage"], Awaitable[None]]
-
-
-# --- Pydantic models for API ---
-
-
-class WeChatCredentials(BaseModel):
-    token: str
-    base_url: str = DEFAULT_BASE_URL
-    account_id: str
-    user_id: str = ""
-    saved_at: str = ""
-
-
-class RoutingConfig(BaseModel):
-    type: RoutingType | None = None
-    id: str | None = None
-    label: str = ""
-
-
-class QrPollRequest(BaseModel):
-    qrcode: str
-
-
-class RoutingSetRequest(BaseModel):
-    type: RoutingType
-    id: str
-    label: str = ""
-
-
-class WeChatMessage(BaseModel):
-    from_user_id: str
-    text: str
-    context_token: str
-
-    class Config:
-        frozen = True
-
-
-class WeChatAPIError(Exception):
-    pass
-
-
-class SessionExpiredError(WeChatAPIError):
-    pass
-
-
-# --- ilink protocol helpers ---
-
-
-def _random_wechat_uin() -> str:
-    val = struct.unpack(">I", os.urandom(4))[0]
-    return b64encode(str(val).encode()).decode()
-
-
-def _build_headers(token: str | None = None, body: str | None = None) -> dict[str, str]:
-    headers: dict[str, str] = {
-        "Content-Type": "application/json",
-        "AuthorizationType": "ilink_bot_token",
-        "X-WECHAT-UIN": _random_wechat_uin(),
-    }
-    if body:
-        headers["Content-Length"] = str(len(body.encode()))
-    if token:
-        headers["Authorization"] = f"Bearer {token.strip()}"
-    return headers
-
-
-def _extract_text(msg: dict) -> str:
-    items = msg.get("item_list") or []
-    for item in items:
-        if item.get("type") == MSG_ITEM_TEXT:
-            text = (item.get("text_item") or {}).get("text", "")
-            ref = item.get("ref_msg")
-            if ref and ref.get("title"):
-                return f"[引用: {ref['title']}]\n{text}"
-            return text
-        if item.get("type") == MSG_ITEM_VOICE:
-            return (item.get("voice_item") or {}).get("text", "")
-    return ""
-
-
-# --- Per-user persistence (keyed by user_id) ---
-
-
-def _user_dir(user_id: str) -> Path:
-    return CONNECTIONS_BASE / user_id
-
-
-def _user_dir_candidates(user_id: str) -> tuple[Path, ...]:
-    return tuple(path / user_id for path in user_home_read_candidates("connections", "wechat"))
-
-
-def _save_json(user_id: str, filename: str, data: dict) -> None:
-    d = _user_dir(user_id)
-    d.mkdir(parents=True, exist_ok=True)
-    path = d / filename
-    path.write_text(json.dumps(data, indent=2))
-    if filename == "credentials.json":
-        path.chmod(0o600)
-
-
-def _load_json(user_id: str, filename: str) -> dict | None:
-    for path in reversed(_user_dir_candidates(user_id)):
-        candidate = path / filename
-        if not candidate.exists():
-            continue
-        try:
-            return json.loads(candidate.read_text())
-        except (json.JSONDecodeError, KeyError) as e:
-            logger.error("Failed to load %s for %s: %s", filename, user_id[:12], e)
-    return None
-
-
-def _delete_file(user_id: str, filename: str) -> None:
-    seen: set[Path] = set()
-    for user_dir in _user_dir_candidates(user_id):
-        path = user_dir / filename
-        if path in seen:
-            continue
-        seen.add(path)
-        if path.exists():
-            path.unlink()
-
-
-def migrate_entity_id_dirs() -> None:
-    """Startup migration: rename {user_id}-1/ → {user_id}/ for existing connections."""
-    if not CONNECTIONS_BASE.exists():
-        return
-    for user_dir in list(CONNECTIONS_BASE.iterdir()):
-        if not user_dir.is_dir():
-            continue
-        name = user_dir.name
-        # Old entity_id format was "{user_id}-1" — strip the suffix
-        if name.endswith("-1"):
-            new_name = name[:-2]
-            new_dir = CONNECTIONS_BASE / new_name
-            if not new_dir.exists():
-                try:
-                    user_dir.rename(new_dir)
-                    logger.info("Migrated WeChat dir: %s → %s", name, new_name)
-                except Exception as e:
-                    logger.error("Failed to migrate WeChat dir %s: %s", name, e)
-
-
-# --- WeChatConnection (one per human user) ---
-
-
-class WeChatConnection:
-    """A single user's WeChat connection. Keyed by user_id."""
-
-    def __init__(self, user_id: str, delivery_fn: DeliveryFn | None = None) -> None:
-        self.user_id = user_id
-        self._delivery_fn = delivery_fn
-        self._credentials: WeChatCredentials | None = None
-        self._context_tokens: dict[str, str] = {}
-        self._sync_buf: str = ""
-        self._poll_task: asyncio.Task | None = None
-        self._routing = RoutingConfig()
-        # @@@no-proxy — trust_env=False prevents httpx from inheriting
-        # http_proxy/all_proxy which causes bimodal latency on long-poll.
-        self._http = httpx.AsyncClient(
-            timeout=httpx.Timeout(LONG_POLL_TIMEOUT_S + 5),
-            trust_env=False,
-        )
-
-        # Load persisted state
-        routing_data = _load_json(user_id, "routing.json")
-        if routing_data:
-            try:
-                self._routing = RoutingConfig(**routing_data)
-            except Exception:
-                pass
-
-        ctx = _load_json(user_id, "context_tokens.json")
-        if ctx:
-            self._context_tokens = ctx
-
-        creds_data = _load_json(user_id, "credentials.json")
-        if creds_data:
-            try:
-                self._credentials = WeChatCredentials(**creds_data)
-                logger.info("Loaded WeChat credentials for user=%s", user_id[:12])
-            except Exception as e:
-                logger.error("Invalid WeChat credentials for %s: %s", user_id[:12], e)
-
-    @property
-    def connected(self) -> bool:
-        return self._credentials is not None
-
-    @property
-    def polling(self) -> bool:
-        return self._poll_task is not None and not self._poll_task.done()
-
-    @property
-    def routing(self) -> RoutingConfig:
-        return self._routing
-
-    def set_routing(self, config: RoutingConfig) -> None:
-        self._routing = config
-        _save_json(self.user_id, "routing.json", config.model_dump())
-
-    def get_state(self) -> dict:
-        if not self._credentials:
-            return {"connected": False, "routing": self._routing.model_dump()}
-        return {
-            "connected": True,
-            "polling": self.polling,
-            "account_id": self._credentials.account_id,
-            "user_id": self._credentials.user_id,
-            "contact_count": len(self._context_tokens),
-            "contacts": self.list_contacts(),
-            "routing": self._routing.model_dump(),
-        }
-
-    def list_contacts(self) -> list[dict[str, str]]:
-        return [{"user_id": uid, "display_name": uid.split("@")[0] or uid} for uid in self._context_tokens]
-
-    # --- QR Login ---
-
-    async def get_qr_code(self) -> dict:
-        url = f"{DEFAULT_BASE_URL}/ilink/bot/get_bot_qrcode?bot_type={BOT_TYPE}"
-        resp = await self._http.get(url, timeout=10)
-        resp.raise_for_status()
-        data = resp.json()
-        return {"qrcode": data["qrcode"], "qrcode_img_url": data["qrcode_img_content"]}
-
-    async def poll_qr_status(self, qrcode: str) -> dict:
-        url = f"{DEFAULT_BASE_URL}/ilink/bot/get_qrcode_status?qrcode={qrcode}"
-        try:
-            resp = await self._http.get(
-                url,
-                headers={"iLink-App-ClientVersion": "1"},
-                timeout=LONG_POLL_TIMEOUT_S + 5,
-            )
-            resp.raise_for_status()
-            data = resp.json()
-        except httpx.TimeoutException:
-            return {"status": "wait"}
-
-        status = data.get("status", "wait")
-        if status == "confirmed":
-            bot_token = data.get("bot_token")
-            bot_id = data.get("ilink_bot_id")
-            if not bot_token or not bot_id:
-                return {"status": "error", "message": "Missing bot credentials in response"}
-            creds = WeChatCredentials(
-                token=bot_token,
-                base_url=data.get("baseurl") or DEFAULT_BASE_URL,
-                account_id=bot_id,
-                user_id=data.get("ilink_user_id", ""),
-                saved_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
-            )
-            self._credentials = creds
-            _save_json(self.user_id, "credentials.json", creds.model_dump())
-            logger.info("WeChat connected for user=%s account=%s", self.user_id[:12], creds.account_id)
-            self.start_polling()
-            return {"status": "confirmed", "account_id": creds.account_id}
-        return {"status": status}
-
-    # --- Disconnect ---
-
-    def disconnect(self) -> None:
-        self.stop_polling()
-        self._credentials = None
-        self._context_tokens.clear()
-        self._sync_buf = ""
-        _delete_file(self.user_id, "credentials.json")
-        _delete_file(self.user_id, "context_tokens.json")
-        logger.info("WeChat disconnected for user=%s", self.user_id[:12])
-
-    async def close(self) -> None:
-        """Shutdown: stop polling + close HTTP client."""
-        self.stop_polling()
-        await self._http.aclose()
-
-    # --- Polling ---
-
-    def start_polling(self) -> None:
-        if self.polling:
-            return
-        if not self._credentials:
-            raise RuntimeError("Cannot start polling: not connected")
-        self._poll_task = asyncio.create_task(self._poll_loop())
-        logger.info("WeChat polling started for user=%s", self.user_id[:12])
-
-    def stop_polling(self) -> None:
-        if self._poll_task and not self._poll_task.done():
-            self._poll_task.cancel()
-            self._poll_task = None
-
-    async def _deliver_message(self, msg: WeChatMessage) -> None:
-        """Deliver via injected callback. No circular imports."""
-        if not self._delivery_fn:
-            logger.warning("No delivery function configured for user=%s", self.user_id[:12])
-            return
-        if not self._routing.type or not self._routing.id:
-            logger.debug("WeChat message not delivered — no routing configured")
-            return
-        try:
-            await self._delivery_fn(self, msg)
-        except Exception:
-            logger.exception("Failed to deliver WeChat message")
-
-    async def _poll_loop(self) -> None:
-        consecutive_failures = 0
-        while True:
-            try:
-                messages = await self._get_updates()
-                consecutive_failures = 0
-                for msg in messages:
-                    logger.info("WeChat[%s] from=%s: %s", self.user_id[:8], msg.from_user_id[:20], msg.text[:60])
-                    asyncio.create_task(self._deliver_message(msg))
-            except asyncio.CancelledError:
-                return
-            except SessionExpiredError:
-                logger.error("WeChat session expired for user=%s", self.user_id[:12])
-                self._credentials = None
-                _delete_file(self.user_id, "credentials.json")
-                return
-            except Exception:
-                consecutive_failures += 1
-                logger.exception("WeChat poll error #%d user=%s", consecutive_failures, self.user_id[:12])
-                if consecutive_failures >= 3:
-                    consecutive_failures = 0
-                    await asyncio.sleep(30)
-                else:
-                    await asyncio.sleep(2)
-
-    async def _get_updates(self) -> list[WeChatMessage]:
-        if not self._credentials:
-            raise RuntimeError("Not connected")
-        body = json.dumps(
-            {
-                "get_updates_buf": self._sync_buf,
-                "base_info": {"channel_version": CHANNEL_VERSION},
-            }
-        )
-        headers = _build_headers(self._credentials.token, body)
-        try:
-            resp = await self._http.post(
-                f"{self._credentials.base_url}/ilink/bot/getupdates",
-                content=body,
-                headers=headers,
-                timeout=LONG_POLL_TIMEOUT_S + 5,
-            )
-            resp.raise_for_status()
-            data = resp.json()
-        except httpx.TimeoutException:
-            return []
-
-        if data.get("ret", 0) != 0 or data.get("errcode", 0) != 0:
-            errcode = data.get("errcode", 0)
-            errmsg = data.get("errmsg", "")
-            if errcode == -14:
-                raise SessionExpiredError("Session expired")
-            raise WeChatAPIError(f"getUpdates: errcode={errcode} {errmsg}")
-
-        if data.get("get_updates_buf"):
-            self._sync_buf = data["get_updates_buf"]
-
-        messages = []
-        tokens_changed = False
-        for msg in data.get("msgs") or []:
-            if msg.get("message_type") != MSG_TYPE_USER:
-                continue
-            text = _extract_text(msg)
-            if not text:
-                continue
-            sender = msg.get("from_user_id", "unknown")
-            ctx_token = msg.get("context_token", "")
-            if ctx_token:
-                self._context_tokens[sender] = ctx_token
-                tokens_changed = True
-            messages.append(
-                WeChatMessage(
-                    from_user_id=sender,
-                    text=text,
-                    context_token=ctx_token,
-                )
-            )
-        if tokens_changed:
-            await asyncio.to_thread(_save_json, self.user_id, "context_tokens.json", self._context_tokens)
-        return messages
-
-    # --- Send ---
-
-    async def send_message(self, to_user_id: str, text: str) -> str:
-        if not self._credentials:
-            raise RuntimeError("WeChat not connected")
-        context_token = self._context_tokens.get(to_user_id)
-        if not context_token:
-            raise RuntimeError(f"No context_token for {to_user_id}. The user needs to message the bot first.")
-        client_id = f"leon:{int(time.time())}-{random.randint(0, 0xFFFF):04x}"
-        body = json.dumps(
-            {
-                "msg": {
-                    "from_user_id": "",
-                    "to_user_id": to_user_id,
-                    "client_id": client_id,
-                    "message_type": MSG_TYPE_BOT,
-                    "message_state": MSG_STATE_FINISH,
-                    "item_list": [{"type": MSG_ITEM_TEXT, "text_item": {"text": text}}],
-                    "context_token": context_token,
-                },
-                "base_info": {"channel_version": CHANNEL_VERSION},
-            }
-        )
-        headers = _build_headers(self._credentials.token, body)
-        resp = await self._http.post(
-            f"{self._credentials.base_url}/ilink/bot/sendmessage",
-            content=body,
-            headers=headers,
-            timeout=SEND_TIMEOUT_S,
-        )
-        resp.raise_for_status()
-        return client_id
-
-
-# --- WeChatConnectionRegistry (lives on app.state) ---
-
-
-class WeChatConnectionRegistry:
-    """Manages per-user WeChatConnections. Lives on app.state, not module-level."""
-
-    def __init__(self, delivery_fn: DeliveryFn | None = None) -> None:
-        self._connections: dict[str, WeChatConnection] = {}
-        self._delivery_fn = delivery_fn
-
-    def get(self, user_id: str) -> WeChatConnection:
-        if user_id not in self._connections:
-            self._connections[user_id] = WeChatConnection(user_id, self._delivery_fn)
-        return self._connections[user_id]
-
-    def auto_start_all(self) -> None:
-        """Resume polling for all users with saved credentials on disk."""
-        if not CONNECTIONS_BASE.exists():
-            return
-        for user_dir in CONNECTIONS_BASE.iterdir():
-            if user_dir.is_dir() and (user_dir / "credentials.json").exists():
-                conn = self.get(user_dir.name)
-                if conn.connected and not conn.polling:
-                    conn.start_polling()
-
-    def evict_duplicates(self, account_id: str, keep_user_id: str) -> None:
-        """@@@unique-wechat — one WeChat account → one Leon user. Last one wins."""
-        for uid, conn in list(self._connections.items()):
-            if uid == keep_user_id:
-                continue
-            if conn._credentials and conn._credentials.account_id == account_id:
-                logger.info("Evicting WeChat: user=%s (same account=%s)", uid[:12], account_id[:12])
-                conn.disconnect()
-
-        if CONNECTIONS_BASE.exists():
-            for user_dir in CONNECTIONS_BASE.iterdir():
-                if not user_dir.is_dir() or user_dir.name == keep_user_id:
-                    continue
-                data = _load_json(user_dir.name, "credentials.json")
-                if data and data.get("account_id") == account_id:
-                    logger.info("Evicting persisted WeChat: user=%s", user_dir.name[:12])
-                    _delete_file(user_dir.name, "credentials.json")
-                    _delete_file(user_dir.name, "context_tokens.json")
-
-    async def shutdown(self) -> None:
-        """Close all connections gracefully."""
-        for conn in self._connections.values():
-            await conn.close()
-        self._connections.clear()
diff --git a/backend/web/utils/helpers.py b/backend/web/utils/helpers.py
index b652e04f1..436f42948 100644
--- a/backend/web/utils/helpers.py
+++ b/backend/web/utils/helpers.py
@@ -5,19 +5,16 @@
 
 from fastapi import HTTPException
 
-from backend.web.core.config import DB_PATH
 from sandbox.sync.state import SyncState
 from storage.container import StorageContainer
 from storage.providers.sqlite.chat_session_repo import SQLiteChatSessionRepo
 from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
 from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-from storage.runtime import build_storage_container
+from storage.runtime import build_storage_container, build_thread_repo
 
 SANDBOX_DB_PATH = resolve_role_db_path(SQLiteDBRole.SANDBOX)
 
-# @@@cached-container - reuse a single StorageContainer across helper calls to avoid per-call rebuild.
 _cached_container: StorageContainer | None = None
-_cached_container_db_path: Path | None = None
 
 
 def is_virtual_thread_id(thread_id: str | None) -> bool:
@@ -71,11 +68,10 @@ def extract_webhook_instance_id(payload: dict[str, Any]) -> str | None:
 
 
 def _get_container() -> StorageContainer:
-    global _cached_container, _cached_container_db_path
-    if _cached_container is not None and _cached_container_db_path == DB_PATH:
+    global _cached_container
+    if _cached_container is not None:
         return _cached_container
-    _cached_container = build_storage_container(main_db_path=DB_PATH)
-    _cached_container_db_path = DB_PATH
+    _cached_container = build_storage_container()
     return _cached_container
 
 
@@ -89,34 +85,15 @@ def _get_thread_repo(thread_repo=None):
     global _cached_thread_repo
     if _cached_thread_repo is not None:
         return _cached_thread_repo
-    from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
-
-    _cached_thread_repo = SQLiteThreadRepo(DB_PATH)
+    _cached_thread_repo = build_thread_repo()
     return _cached_thread_repo
 
 
-def save_thread_config(thread_id: str, thread_repo=None, **fields: Any) -> None:
-    """Update specific fields of thread config."""
-    allowed = {"sandbox_type", "cwd", "model", "observation_provider"}
-    updates = {k: v for k, v in fields.items() if k in allowed}
-    if not updates:
-        return
-    _get_thread_repo(thread_repo).update(thread_id, **updates)
-
-
 def load_thread_config(thread_id: str, thread_repo=None) -> dict[str, Any] | None:
     """Load thread data. Returns dict or None."""
     return _get_thread_repo(thread_repo).get_by_id(thread_id)
 
 
-def get_active_observation_provider() -> str | None:
-    """Read global observation config and return the active provider name."""
-    from config.observation_loader import ObservationLoader
-
-    config = ObservationLoader().load()
-    return config.active if config.active else None
-
-
 def resolve_local_workspace_path(
     raw_path: str | None,
     thread_id: str | None = None,
diff --git a/backend/web/utils/serializers.py b/backend/web/utils/serializers.py
index 4c070f285..082f08b44 100644
--- a/backend/web/utils/serializers.py
+++ b/backend/web/utils/serializers.py
@@ -38,7 +38,15 @@ def extract_text_content(raw_content: Any) -> str:
 def serialize_message(msg: Any) -> dict[str, Any]:
     """Serialize a LangChain message to a JSON-compatible dict."""
     content = getattr(msg, "content", "")
-    metadata = getattr(msg, "metadata", None) or {}
+    metadata = dict(getattr(msg, "metadata", None) or {})
+    additional_kwargs = getattr(msg, "additional_kwargs", None) or {}
+    tool_result_meta = additional_kwargs.get("tool_result_meta")
+    # @@@tool-result-meta-bridge - LangChain ToolMessage keeps durable tool
+    # metadata in additional_kwargs, but Leon display rebuild consumes
+    # serialized metadata. Merge the exact structured tool_result_meta here so
+    # checkpoint rebuild can recover blocking subagent identity honestly.
+    if isinstance(tool_result_meta, dict):
+        metadata = {**tool_result_meta, **metadata}
 
     # Strip system tags from owner HumanMessages (context-shift hints).
     # External HumanMessages keep their <system-reminder> so frontend can
@@ -63,4 +71,6 @@ def serialize_message(msg: Any) -> dict[str, Any]:
     }
     if metadata:
         result["metadata"] = metadata
+    if metadata.get("source") == "internal":
+        result["display"] = {"showing": False}
     return result
diff --git a/config/defaults/tool_catalog.py b/config/defaults/tool_catalog.py
index 294293874..1c2e67d2e 100644
--- a/config/defaults/tool_catalog.py
+++ b/config/defaults/tool_catalog.py
@@ -21,7 +21,9 @@ class ToolGroup(StrEnum):
     COMMAND = "command"
     WEB = "web"
     AGENT = "agent"
+    CHAT = "chat"
     TODO = "todo"
+    CRON = "cron"
     SKILLS = "skills"
     SYSTEM = "system"
     TASKBOARD = "taskboard"
@@ -62,16 +64,26 @@ class ToolDef(BaseModel):
     ToolDef(name="TaskOutput", desc="获取后台任务输出", group=ToolGroup.AGENT),
     ToolDef(name="TaskStop", desc="停止后台任务", group=ToolGroup.AGENT),
     ToolDef(name="Agent", desc="启动子 Agent 执行任务", group=ToolGroup.AGENT),
-    ToolDef(name="SendMessage", desc="向其他 Agent 发送消息", group=ToolGroup.AGENT),
+    ToolDef(name="SendMessage", desc="向运行中的 Agent 发送排队消息", group=ToolGroup.AGENT),
+    # chat
+    ToolDef(name="list_chats", desc="列出当前实体可访问的聊天会话", group=ToolGroup.CHAT),
+    ToolDef(name="read_messages", desc="读取聊天消息并标记为已读", group=ToolGroup.CHAT),
+    ToolDef(name="send_message", desc="向聊天对象发送消息", group=ToolGroup.CHAT),
+    ToolDef(name="search_messages", desc="搜索历史聊天消息", group=ToolGroup.CHAT),
     # todo
     ToolDef(name="TaskCreate", desc="创建待办任务", group=ToolGroup.TODO, mode=ToolMode.DEFERRED),
     ToolDef(name="TaskGet", desc="获取任务详情", group=ToolGroup.TODO, mode=ToolMode.DEFERRED),
     ToolDef(name="TaskList", desc="列出所有任务", group=ToolGroup.TODO, mode=ToolMode.DEFERRED),
     ToolDef(name="TaskUpdate", desc="更新任务状态", group=ToolGroup.TODO, mode=ToolMode.DEFERRED),
+    # cron — backed by existing cron_jobs substrate; off by default until explicitly enabled
+    ToolDef(name="CronCreate", desc="创建定时任务", group=ToolGroup.CRON, mode=ToolMode.DEFERRED, default=False),
+    ToolDef(name="CronDelete", desc="删除定时任务", group=ToolGroup.CRON, mode=ToolMode.DEFERRED, default=False),
+    ToolDef(name="CronList", desc="列出定时任务", group=ToolGroup.CRON, mode=ToolMode.DEFERRED, default=False),
     # skills
     ToolDef(name="load_skill", desc="加载 Skill", group=ToolGroup.SKILLS),
     # system
     ToolDef(name="tool_search", desc="搜索可用工具", group=ToolGroup.SYSTEM),
+    ToolDef(name="LSP", desc="Language Server Protocol 操作", group=ToolGroup.SYSTEM, mode=ToolMode.DEFERRED, default=False),
     # taskboard — all off by default; enable on dedicated scheduler members
     ToolDef(name="ListBoardTasks", desc="列出任务板上的任务", group=ToolGroup.TASKBOARD, default=False),
     ToolDef(name="ClaimTask", desc="认领一个任务板任务", group=ToolGroup.TASKBOARD, default=False),
diff --git a/config/env_manager.py b/config/env_manager.py
deleted file mode 100644
index a5f5a6cc6..000000000
--- a/config/env_manager.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-Leon 配置管理模块
-"""
-
-import os
-from pathlib import Path
-
-
-class ConfigManager:
-    """管理 Leon 的配置"""
-
-    def __init__(self):
-        self.config_dir = Path.home() / ".leon"
-        self.config_file = self.config_dir / "config.env"
-        self.config_dir.mkdir(parents=True, exist_ok=True)
-
-    def _parse_file(self) -> dict[str, str]:
-        if not self.config_file.exists():
-            return {}
-        config = {}
-        for line in self.config_file.read_text().splitlines():
-            line = line.strip()
-            if line and not line.startswith("#") and "=" in line:
-                k, v = line.split("=", 1)
-                config[k.strip()] = v.strip()
-        return config
-
-    def get(self, key: str) -> str | None:
-        """获取配置值"""
-        return self._parse_file().get(key)
-
-    def set(self, key: str, value: str):
-        """设置配置值"""
-        config = self._parse_file()
-        config[key] = value
-        with self.config_file.open("w") as f:
-            for k, v in config.items():
-                f.write(f"{k}={v}\n")
-
-    def list_all(self) -> dict[str, str]:
-        """列出所有配置"""
-        return self._parse_file()
-
-    def load_to_env(self):
-        """加载配置到环境变量"""
-        for key, value in self.list_all().items():
-            if key not in os.environ:
-                # 规范化 OPENAI_BASE_URL：确保包含 /v1
-                if key == "OPENAI_BASE_URL" and value:
-                    value = normalize_base_url(value)
-                os.environ[key] = value
-
-
-def normalize_base_url(url: str) -> str:
-    """
-    规范化 OpenAI 兼容 API 的 base_url
-
-    OpenAI SDK 会在 base_url 后直接拼接 /chat/completions，
-    所以 base_url 必须以 /v1 结尾。
-
-    Examples:
-        https://api.openai.com -> https://api.openai.com/v1
-        https://yunwu.ai -> https://yunwu.ai/v1
-        https://yunwu.ai/v1 -> https://yunwu.ai/v1 (不变)
-        https://example.com/api/v1 -> https://example.com/api/v1 (不变)
-    """
-    if not url:
-        return url
-
-    url = url.rstrip("/")
-
-    # 如果已经以 /v1 结尾，不处理
-    if url.endswith("/v1"):
-        return url
-
-    # 如果包含 /v1/ 在中间（如 /v1/engines），不处理
-    if "/v1/" in url:
-        return url
-
-    # 否则补全 /v1
-    return f"{url}/v1"
diff --git a/config/loader.py b/config/loader.py
index 7b2f3190c..3931147ff 100644
--- a/config/loader.py
+++ b/config/loader.py
@@ -153,7 +153,7 @@ def _load_agents_from_members(self, members_dir: Path) -> None:
                 continue
             config = self.parse_agent_file(agent_md)
             if config:
-                # source_dir is already set to member_dir by parse_agent_file
+                config.source_dir = member_dir.resolve()
                 self._agents[config.name] = config
 
     @staticmethod
@@ -184,7 +184,7 @@ def parse_agent_file(path: Path) -> AgentConfig | None:
             tools=fm.get("tools", ["*"]),
             system_prompt=parts[2].strip(),
             model=fm.get("model"),
-            source_dir=path.resolve().parent,
+            source_dir=None,
         )
 
     def get_agent(self, name: str) -> AgentConfig | None:
@@ -422,3 +422,74 @@ def load_config(
 ) -> LeonSettings:
     """Convenience function to load runtime configuration."""
     return AgentLoader(workspace_root=workspace_root).load(cli_overrides=cli_overrides)
+
+
+def load_bundle_from_repo(agent_config_repo: Any, member_id: str) -> AgentBundle | None:
+    """Load agent bundle from Supabase agent_config tables. Returns None if no config found."""
+    config = agent_config_repo.get_config(member_id)
+    if not config:
+        return None
+
+    # Parse agent identity from config
+    agent = AgentConfig(
+        name=config.get("name", ""),
+        description=config.get("description", ""),
+        tools=config.get("tools", ["*"]),
+        system_prompt=config.get("system_prompt", ""),
+        model=config.get("model"),
+        source_dir=None,
+    )
+
+    meta = {
+        "status": config.get("status", "draft"),
+        "version": config.get("version", "0.1.0"),
+        "created_at": config.get("created_at", 0),
+        "updated_at": config.get("updated_at", 0),
+    }
+
+    # Runtime from config
+    runtime_data = config.get("runtime") or {}
+    runtime = {}
+    for rname, rcfg in runtime_data.items():
+        if isinstance(rcfg, dict):
+            runtime[rname] = RuntimeResourceConfig(**rcfg)
+
+    # Rules from agent_rules table
+    rule_rows = agent_config_repo.list_rules(member_id)
+    rules = [{"name": r.get("filename", "").replace(".md", ""), "content": r.get("content", "")} for r in rule_rows]
+
+    # Sub-agents from agent_sub_agents table
+    sub_agent_rows = agent_config_repo.list_sub_agents(member_id)
+    agents = []
+    for sa in sub_agent_rows:
+        agents.append(
+            AgentConfig(
+                name=sa.get("name", ""),
+                description=sa.get("description", ""),
+                tools=sa.get("tools", ["*"]),
+                system_prompt=sa.get("system_prompt", ""),
+                model=sa.get("model"),
+                source_dir=None,
+            )
+        )
+
+    # Skills from agent_skills table
+    skill_rows = agent_config_repo.list_skills(member_id)
+    skills = [{"name": s.get("name", ""), "content": s.get("content", "")} for s in skill_rows]
+
+    # MCP from config
+    mcp_data = config.get("mcp") or {}
+    mcp = {}
+    for mname, mcfg in mcp_data.items():
+        if isinstance(mcfg, dict):
+            mcp[mname] = McpServerConfig(**{k: v for k, v in mcfg.items() if k in McpServerConfig.model_fields})
+
+    return AgentBundle(
+        agent=agent,
+        meta=meta,
+        runtime=runtime,
+        rules=rules,
+        agents=agents,
+        skills=skills,
+        mcp=mcp,
+    )
diff --git a/config/observation_schema.py b/config/observation_schema.py
index eb01acd02..3d819cf78 100644
--- a/config/observation_schema.py
+++ b/config/observation_schema.py
@@ -3,6 +3,8 @@
 Per-provider named fields, following sandbox/config.py pattern.
 """
 
+from typing import Annotated
+
 from pydantic import BaseModel, Field
 
 
@@ -11,7 +13,7 @@ class LangfuseConfig(BaseModel):
 
     secret_key: str | None = None
     public_key: str | None = None
-    host: str | None = Field(None, description="e.g. https://cloud.langfuse.com")
+    host: Annotated[str | None, Field(description="e.g. https://cloud.langfuse.com")] = None
 
 
 class LangSmithConfig(BaseModel):
@@ -26,5 +28,5 @@ class ObservationConfig(BaseModel):
     """Observation configuration with per-provider named fields."""
 
     active: str | None = Field(None, description="'langfuse' | 'langsmith' | None (disabled)")
-    langfuse: LangfuseConfig = Field(default_factory=LangfuseConfig)
-    langsmith: LangSmithConfig = Field(default_factory=LangSmithConfig)
+    langfuse: LangfuseConfig = Field(default_factory=lambda: LangfuseConfig())
+    langsmith: LangSmithConfig = Field(default_factory=lambda: LangSmithConfig())
diff --git a/config/schema.py b/config/schema.py
index 53a0cc8ea..8aff62bb7 100644
--- a/config/schema.py
+++ b/config/schema.py
@@ -11,7 +11,7 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import Any
+from typing import Annotated, Any
 
 from pydantic import BaseModel, Field, field_validator
 
@@ -26,15 +26,17 @@
 class RuntimeConfig(BaseModel):
     """Runtime behavior configuration (non-model identity)."""
 
-    temperature: float | None = Field(None, ge=0.0, le=2.0, description="Temperature")
-    max_tokens: int | None = Field(None, gt=0, description="Max tokens")
-    model_kwargs: dict[str, Any] = Field(default_factory=dict, description="Extra kwargs for init_chat_model")
-    context_limit: int = Field(0, ge=0, description="Context window limit in tokens (0 = auto-detect from model)")
-    enable_audit_log: bool = Field(True, description="Enable audit logging")
-    allowed_extensions: list[str] | None = Field(None, description="Allowed extensions (None = all)")
-    block_dangerous_commands: bool = Field(True, description="Block dangerous commands")
-    block_network_commands: bool = Field(False, description="Block network commands")
-    queue_mode: str = Field("steer", deprecated=True, description="Deprecated. Queue mode is now determined by message timing.")
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0, description="Temperature")] = None
+    max_tokens: Annotated[int | None, Field(gt=0, description="Max tokens")] = None
+    model_kwargs: Annotated[dict[str, Any], Field(default_factory=dict, description="Extra kwargs for init_chat_model")] = Field(
+        default_factory=dict
+    )
+    context_limit: Annotated[int, Field(ge=0, description="Context window limit in tokens (0 = auto-detect from model)")] = 0
+    enable_audit_log: Annotated[bool, Field(description="Enable audit logging")] = True
+    allowed_extensions: Annotated[list[str] | None, Field(description="Allowed extensions (None = all)")] = None
+    block_dangerous_commands: Annotated[bool, Field(description="Block dangerous commands")] = True
+    block_network_commands: Annotated[bool, Field(description="Block network commands")] = False
+    queue_mode: Annotated[str, Field(deprecated=True, description="Deprecated. Queue mode is now determined by message timing.")] = "steer"
 
 
 # ============================================================================
@@ -48,11 +50,11 @@ class PruningConfig(BaseModel):
     Field names match SessionPruner constructor for direct passthrough.
     """
 
-    enabled: bool = Field(True, description="Enable message pruning")
-    soft_trim_chars: int = Field(3000, gt=0, description="Soft-trim tool results longer than this")
-    hard_clear_threshold: int = Field(10000, gt=0, description="Hard-clear tool results longer than this")
-    protect_recent: int = Field(3, gt=0, description="Keep last N tool messages untrimmed")
-    trim_tool_results: bool = Field(True, description="Trim large tool results")
+    enabled: Annotated[bool, Field(description="Enable message pruning")] = True
+    soft_trim_chars: Annotated[int, Field(gt=0, description="Soft-trim tool results longer than this")] = 3000
+    hard_clear_threshold: Annotated[int, Field(gt=0, description="Hard-clear tool results longer than this")] = 10000
+    protect_recent: Annotated[int, Field(gt=0, description="Keep last N tool messages untrimmed")] = 3
+    trim_tool_results: Annotated[bool, Field(description="Trim large tool results")] = True
 
 
 class CompactionConfig(BaseModel):
@@ -61,17 +63,17 @@ class CompactionConfig(BaseModel):
     Field names match ContextCompactor constructor for direct passthrough.
     """
 
-    enabled: bool = Field(True, description="Enable context compaction")
-    reserve_tokens: int = Field(16384, gt=0, description="Reserve space for new messages")
-    keep_recent_tokens: int = Field(20000, gt=0, description="Keep recent messages verbatim")
-    min_messages: int = Field(20, gt=0, description="Minimum messages before compaction")
+    enabled: Annotated[bool, Field(description="Enable context compaction")] = True
+    reserve_tokens: Annotated[int, Field(gt=0, description="Reserve space for new messages")] = 16384
+    keep_recent_tokens: Annotated[int, Field(gt=0, description="Keep recent messages verbatim")] = 20000
+    min_messages: Annotated[int, Field(gt=0, description="Minimum messages before compaction")] = 20
 
 
 class MemoryConfig(BaseModel):
     """Memory management configuration."""
 
-    pruning: PruningConfig = Field(default_factory=PruningConfig)
-    compaction: CompactionConfig = Field(default_factory=CompactionConfig)
+    pruning: PruningConfig = Field(default_factory=lambda: PruningConfig())
+    compaction: CompactionConfig = Field(default_factory=lambda: CompactionConfig())
 
 
 # ============================================================================
@@ -83,13 +85,13 @@ class ReadFileConfig(BaseModel):
     """Configuration for read_file tool."""
 
     enabled: bool = True
-    max_file_size: int = Field(10485760, gt=0, description="Max file size in bytes (10MB)")
+    max_file_size: Annotated[int, Field(gt=0, description="Max file size in bytes (10MB)")] = 10485760
 
 
 class FileSystemToolsConfig(BaseModel):
     """Configuration for filesystem tools."""
 
-    read_file: ReadFileConfig = Field(default_factory=ReadFileConfig)
+    read_file: ReadFileConfig = Field(default_factory=lambda: ReadFileConfig())
     write_file: bool = True
     edit_file: bool = True
     list_dir: bool = True
@@ -99,20 +101,20 @@ class FileSystemConfig(BaseModel):
     """Configuration for filesystem middleware."""
 
     enabled: bool = True
-    tools: FileSystemToolsConfig = Field(default_factory=FileSystemToolsConfig)
+    tools: FileSystemToolsConfig = Field(default_factory=lambda: FileSystemToolsConfig())
 
 
 class GrepConfig(BaseModel):
     """Configuration for Grep tool."""
 
     enabled: bool = True
-    max_file_size: int = Field(10485760, gt=0, description="Max file size in bytes (10MB)")
+    max_file_size: Annotated[int, Field(gt=0, description="Max file size in bytes (10MB)")] = 10485760
 
 
 class SearchToolsConfig(BaseModel):
     """Configuration for search tools."""
 
-    grep: GrepConfig = Field(default_factory=GrepConfig)
+    grep: GrepConfig = Field(default_factory=lambda: GrepConfig())
     glob: bool = True
 
 
@@ -120,52 +122,52 @@ class SearchConfig(BaseModel):
     """Configuration for search middleware."""
 
     enabled: bool = True
-    tools: SearchToolsConfig = Field(default_factory=SearchToolsConfig)
+    tools: SearchToolsConfig = Field(default_factory=lambda: SearchToolsConfig())
 
 
 class WebSearchConfig(BaseModel):
     """Configuration for web_search tool."""
 
     enabled: bool = True
-    max_results: int = Field(5, gt=0, description="Max search results")
-    tavily_api_key: str | None = Field(None, description="Tavily API key")
-    exa_api_key: str | None = Field(None, description="Exa API key")
-    firecrawl_api_key: str | None = Field(None, description="Firecrawl API key")
+    max_results: Annotated[int, Field(gt=0, description="Max search results")] = 5
+    tavily_api_key: Annotated[str | None, Field(description="Tavily API key")] = None
+    exa_api_key: Annotated[str | None, Field(description="Exa API key")] = None
+    firecrawl_api_key: Annotated[str | None, Field(description="Firecrawl API key")] = None
 
 
 class FetchConfig(BaseModel):
     """Configuration for Fetch tool (AI extraction mode)."""
 
     enabled: bool = True
-    jina_api_key: str | None = Field(None, description="Jina AI API key")
+    jina_api_key: Annotated[str | None, Field(description="Jina AI API key")] = None
 
 
 class WebToolsConfig(BaseModel):
     """Configuration for web tools."""
 
-    web_search: WebSearchConfig = Field(default_factory=WebSearchConfig)
-    fetch: FetchConfig = Field(default_factory=FetchConfig)
+    web_search: WebSearchConfig = Field(default_factory=lambda: WebSearchConfig())
+    fetch: FetchConfig = Field(default_factory=lambda: FetchConfig())
 
 
 class WebConfig(BaseModel):
     """Configuration for web middleware."""
 
     enabled: bool = True
-    timeout: int = Field(15, gt=0, description="Request timeout in seconds")
-    tools: WebToolsConfig = Field(default_factory=WebToolsConfig)
+    timeout: Annotated[int, Field(gt=0, description="Request timeout in seconds")] = 15
+    tools: WebToolsConfig = Field(default_factory=lambda: WebToolsConfig())
 
 
 class RunCommandConfig(BaseModel):
     """Configuration for run_command tool."""
 
     enabled: bool = True
-    default_timeout: int = Field(120, gt=0, description="Default timeout in seconds")
+    default_timeout: Annotated[int, Field(gt=0, description="Default timeout in seconds")] = 120
 
 
 class CommandToolsConfig(BaseModel):
     """Configuration for command tools."""
 
-    run_command: RunCommandConfig = Field(default_factory=RunCommandConfig)
+    run_command: RunCommandConfig = Field(default_factory=lambda: RunCommandConfig())
     command_status: bool = True
 
 
@@ -173,14 +175,14 @@ class CommandConfig(BaseModel):
     """Configuration for command middleware."""
 
     enabled: bool = True
-    tools: CommandToolsConfig = Field(default_factory=CommandToolsConfig)
+    tools: CommandToolsConfig = Field(default_factory=lambda: CommandToolsConfig())
 
 
 class SpillBufferConfig(BaseModel):
     """Configuration for SpillBuffer middleware."""
 
     enabled: bool = True
-    default_threshold: int = Field(50_000, gt=0, description="Default spill threshold in bytes")
+    default_threshold: Annotated[int, Field(gt=0, description="Default spill threshold in bytes")] = 50_000
     thresholds: dict[str, int] = Field(
         default_factory=lambda: {
             "Grep": 20_000,
@@ -196,11 +198,11 @@ class SpillBufferConfig(BaseModel):
 class ToolsConfig(BaseModel):
     """Tools configuration."""
 
-    filesystem: FileSystemConfig = Field(default_factory=FileSystemConfig)
-    search: SearchConfig = Field(default_factory=SearchConfig)
-    web: WebConfig = Field(default_factory=WebConfig)
-    command: CommandConfig = Field(default_factory=CommandConfig)
-    spill_buffer: SpillBufferConfig = Field(default_factory=SpillBufferConfig)
+    filesystem: FileSystemConfig = Field(default_factory=lambda: FileSystemConfig())
+    search: SearchConfig = Field(default_factory=lambda: SearchConfig())
+    web: WebConfig = Field(default_factory=lambda: WebConfig())
+    command: CommandConfig = Field(default_factory=lambda: CommandConfig())
+    spill_buffer: SpillBufferConfig = Field(default_factory=lambda: SpillBufferConfig())
     tool_modes: dict[str, str] = Field(
         default_factory=dict,
         description="Per-tool mode overrides: tool_name -> 'inline' | 'deferred'",
@@ -215,6 +217,10 @@ class ToolsConfig(BaseModel):
 class MCPServerConfig(BaseModel):
     """Configuration for a single MCP server."""
 
+    transport: str | None = Field(
+        None,
+        description="MCP transport type: stdio | streamable_http | sse | websocket",
+    )
     command: str | None = Field(None, description="Command to run the MCP server")
     args: list[str] = Field(default_factory=list, description="Command arguments")
     env: dict[str, str] = Field(default_factory=dict, description="Environment variables")
@@ -271,13 +277,13 @@ class LeonSettings(BaseModel):
     """
 
     # Runtime behavior (replaces APIConfig model-identity fields)
-    runtime: RuntimeConfig = Field(default_factory=RuntimeConfig, description="Runtime behavior config")
+    runtime: RuntimeConfig = Field(default_factory=lambda: RuntimeConfig(), description="Runtime behavior config")
 
     # Core configuration groups
-    memory: MemoryConfig = Field(default_factory=MemoryConfig, description="Memory management")
-    tools: ToolsConfig = Field(default_factory=ToolsConfig, description="Tools configuration")
-    mcp: MCPConfig = Field(default_factory=MCPConfig, description="MCP configuration")
-    skills: SkillsConfig = Field(default_factory=SkillsConfig, description="Skills configuration")
+    memory: MemoryConfig = Field(default_factory=lambda: MemoryConfig(), description="Memory management")
+    tools: ToolsConfig = Field(default_factory=lambda: ToolsConfig(), description="Tools configuration")
+    mcp: MCPConfig = Field(default_factory=lambda: MCPConfig(), description="MCP configuration")
+    skills: SkillsConfig = Field(default_factory=lambda: SkillsConfig(), description="Skills configuration")
 
     # Agent configuration
     system_prompt: str | None = Field(None, description="Custom system prompt")
diff --git a/config/types.py b/config/types.py
index 9731d5aff..0c49458fd 100644
--- a/config/types.py
+++ b/config/types.py
@@ -20,10 +20,12 @@ class AgentConfig(BaseModel):
 class McpServerConfig(BaseModel):
     """Single MCP server entry from .mcp.json."""
 
+    transport: str | None = None
     command: str | None = None
     args: list[str] = Field(default_factory=list)
     env: dict[str, str] = Field(default_factory=dict)
     url: str | None = None
+    instructions: str | None = None
     allowed_tools: list[str] | None = None
     disabled: bool = False
 
diff --git a/core/agents/communication/delivery.py b/core/agents/communication/delivery.py
index c14ee6025..c79e4c121 100644
--- a/core/agents/communication/delivery.py
+++ b/core/agents/communication/delivery.py
@@ -1,22 +1,30 @@
 """Chat delivery — enqueues lightweight notifications for agent threads.
 
 v3: no full message text injected. Agent must chat_read to see content.
-ChatService._deliver_to_agents calls the delivery function for each
-non-sender agent entity.
+MessagingService._deliver_to_agents calls the delivery function for each
+non-sender agent member.
 """
 
 from __future__ import annotations
 
+import functools
 import logging
 from typing import Any
 
-from storage.contracts import EntityRow
+from storage.contracts import MemberRow
 
 logger = logging.getLogger(__name__)
 
 
+def _resolve_recipient_thread_id(app: Any, recipient_id: str) -> str | None:
+    thread = app.state.thread_repo.get_by_user_id(recipient_id)
+    if thread is None:
+        return None
+    return thread["id"]
+
+
 def make_chat_delivery_fn(app: Any):
-    """Create a delivery callback for ChatService.
+    """Create a delivery callback for MessagingService.
 
     Uses qm.enqueue() + wake_handler to route notifications.
     No more route_fn injection from backend layer.
@@ -27,7 +35,8 @@ def make_chat_delivery_fn(app: Any):
     logger.info("[delivery] make_chat_delivery_fn: loop=%s", loop)
 
     def _deliver(
-        entity: EntityRow,
+        recipient_id: str,
+        member: MemberRow,
         content: str,
         sender_name: str,
         chat_id: str,
@@ -35,27 +44,30 @@ def _deliver(
         sender_avatar_url: str | None = None,
         signal: str | None = None,
     ) -> None:
-        logger.info("[delivery] _deliver called: entity=%s, thread=%s", entity.id, entity.thread_id)
+        logger.info("[delivery] _deliver called: recipient=%s member=%s", recipient_id, member.id)
         future = asyncio.run_coroutine_threadsafe(
-            _async_deliver(app, entity, sender_name, chat_id, sender_id, sender_avatar_url, signal=signal),
+            _async_deliver(app, recipient_id, member, sender_name, chat_id, sender_id, sender_avatar_url, signal=signal),
             loop,
         )
 
-        def _on_done(f):
-            exc = f.exception()
-            if exc:
-                logger.error("[delivery] async delivery failed for %s: %s", entity.id, exc, exc_info=exc)
-            else:
-                logger.info("[delivery] async delivery completed for %s", entity.id)
-
-        future.add_done_callback(_on_done)
+        future.add_done_callback(functools.partial(_log_delivery_result, recipient_id))
 
     return _deliver
 
 
+def _log_delivery_result(member_id: str, f: Any) -> None:
+    """Done-callback for async delivery futures."""
+    exc = f.exception()
+    if exc:
+        logger.error("[delivery] async delivery failed for %s: %s", member_id, exc, exc_info=exc)
+    else:
+        logger.info("[delivery] async delivery completed for %s", member_id)
+
+
 async def _async_deliver(
     app: Any,
-    entity: EntityRow,
+    recipient_id: str,
+    member: MemberRow,
     sender_name: str,
     chat_id: str,
     sender_id: str,
@@ -64,25 +76,22 @@ async def _async_deliver(
 ) -> None:
     """Enqueue chat notification to an agent's brain thread.
 
-    @@@v3-notification-only — no message content. Agent calls chat_read to see it.
+    @@@v3-notification-only — no message content. Agent calls read_messages to see it.
     """
-    # @@@context-isolation — clear inherited LangChain ContextVar so the recipient
-    # agent's astream doesn't inherit the sender's StreamMessagesHandler callbacks.
     from langchain_core.runnables.config import var_child_runnable_config
 
     var_child_runnable_config.set(None)
 
-    logger.info("[delivery] _async_deliver: entity=%s thread=%s from=%s", entity.id, entity.thread_id, sender_name)
+    # @@@thread-delivery-route - delivery target must come from the recipient social handle,
+    # never from the template default-thread shortcut.
+    thread_id = _resolve_recipient_thread_id(app, recipient_id)
+    logger.info("[delivery] _async_deliver: recipient=%s member=%s thread=%s from=%s", recipient_id, member.id, thread_id, sender_name)
     from core.runtime.middleware.queue.formatters import format_chat_notification
 
-    if not entity.thread_id:
-        logger.warning("Entity %s has no thread_id, skipping delivery", entity.id)
+    if not thread_id:
+        logger.warning("Recipient %s has no thread, skipping delivery", recipient_id)
         return
 
-    thread_id = entity.thread_id
-
-    # @@@cold-wake — ensure agent + wake_handler exist before enqueue.
-    # Without this, enqueue on an unvisited thread has no handler to wake the agent.
     from backend.web.services.agent_pool import get_or_create_agent, resolve_thread_sandbox
     from backend.web.services.streaming_service import _ensure_thread_handlers
 
@@ -90,13 +99,11 @@ async def _async_deliver(
     agent = await get_or_create_agent(app, sandbox_type, thread_id=thread_id)
     _ensure_thread_handlers(agent, thread_id, app)
 
-    # @@@typing-lifecycle - start typing indicator
     typing_tracker = getattr(app.state, "typing_tracker", None)
     if typing_tracker is not None:
-        typing_tracker.start_chat(thread_id, chat_id, entity.id)
+        typing_tracker.start_chat(thread_id, chat_id, recipient_id)
 
-    # Unread count for this recipient
-    unread_count = app.state.chat_message_repo.count_unread(chat_id, entity.id)
+    unread_count = app.state.messaging_service.count_unread(chat_id, recipient_id)
 
     formatted = format_chat_notification(sender_name, chat_id, unread_count, signal=signal)
 
diff --git a/core/agents/registry.py b/core/agents/registry.py
index f74f4f4ec..d6f492f34 100644
--- a/core/agents/registry.py
+++ b/core/agents/registry.py
@@ -1,4 +1,4 @@
-"""Agent Registry — SQLite-backed agent_id -> thread_id mapping.
+"""Agent Registry — Supabase-backed agent_id -> thread_id mapping.
 
 @@@id-based — all lookups use agent_id, never name.
 Name is stored for display only.
@@ -8,9 +8,9 @@
 
 import asyncio
 from dataclasses import dataclass
-from pathlib import Path
+from typing import Any
 
-from backend.web.core.storage_factory import make_agent_registry_repo
+from storage.runtime import build_agent_registry_repo
 
 
 @dataclass
@@ -23,17 +23,42 @@ class AgentEntry:
     subagent_type: str | None = None
 
 
-class AgentRegistry:
-    """SQLite-backed registry mapping agent_ids to thread IDs.
+class _InMemoryAgentRegistryRepo:
+    """Noop in-memory fallback when Supabase is unavailable (tests/CLI)."""
+
+    def __init__(self) -> None:
+        self._rows: dict[str, tuple] = {}
+
+    def register(
+        self, *, agent_id: str, name: str, thread_id: str, status: str, parent_agent_id: str | None = None, subagent_type: str | None = None
+    ) -> None:
+        self._rows[agent_id] = (agent_id, name, thread_id, status, parent_agent_id, subagent_type)
+
+    def get_by_id(self, agent_id: str) -> tuple | None:
+        return self._rows.get(agent_id)
+
+    def list_running_by_name(self, name: str) -> list[tuple]:
+        return [r for r in self._rows.values() if r[1] == name and r[3] == "running"]
+
+    def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None) -> tuple | None:
+        matches = [r for r in self._rows.values() if r[1] == name and r[4] == parent_agent_id]
+        return matches[-1] if matches else None
+
+    def update_status(self, agent_id: str, status: str) -> None:
+        if agent_id in self._rows:
+            old = self._rows[agent_id]
+            self._rows[agent_id] = (old[0], old[1], old[2], status, old[4], old[5])
 
-    Persisted at ~/.leon/agent_registry.db
-    """
+    def list_running(self) -> list[tuple]:
+        return [r for r in self._rows.values() if r[3] == "running"]
 
-    DEFAULT_DB_PATH = None  # resolved by storage_factory
 
-    def __init__(self, db_path: Path | None = None):
+class AgentRegistry:
+    """Supabase-backed registry mapping agent_ids to thread IDs."""
+
+    def __init__(self, repo: Any = None):
         self._lock = asyncio.Lock()
-        self._repo = make_agent_registry_repo()
+        self._repo = repo or build_agent_registry_repo()
 
     async def register(self, entry: AgentEntry) -> None:
         async with self._lock:
@@ -59,6 +84,33 @@ async def get_by_id(self, agent_id: str) -> AgentEntry | None:
             subagent_type=row[5],
         )
 
+    async def list_running_by_name(self, name: str) -> list[AgentEntry]:
+        rows = self._repo.list_running_by_name(name)
+        return [
+            AgentEntry(
+                agent_id=row[0],
+                name=row[1],
+                thread_id=row[2],
+                status=row[3],
+                parent_agent_id=row[4],
+                subagent_type=row[5],
+            )
+            for row in rows
+        ]
+
+    async def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None) -> AgentEntry | None:
+        row = self._repo.get_latest_by_name_and_parent(name, parent_agent_id)
+        if row is None:
+            return None
+        return AgentEntry(
+            agent_id=row[0],
+            name=row[1],
+            thread_id=row[2],
+            status=row[3],
+            parent_agent_id=row[4],
+            subagent_type=row[5],
+        )
+
     async def update_status(self, agent_id: str, status: str) -> None:
         async with self._lock:
             self._repo.update_status(agent_id, status)
diff --git a/core/agents/service.py b/core/agents/service.py
index e7baff89b..a35da5d37 100644
--- a/core/agents/service.py
+++ b/core/agents/service.py
@@ -11,89 +11,305 @@
 import asyncio
 import json
 import logging
+import os
+import time
 import uuid
+from collections.abc import Awaitable, Callable
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
 
+from config.loader import AgentLoader
 from core.agents.registry import AgentEntry, AgentRegistry
-from core.runtime.middleware.queue.formatters import format_background_notification
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.middleware.queue.formatters import (
+    format_agent_message,
+    format_background_notification,
+    format_progress_notification,
+)
+from core.runtime.permissions import ToolPermissionContext
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+from core.runtime.state import BootstrapConfig, ToolUseContext
+from core.runtime.tool_result import tool_error, tool_permission_request, tool_success
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    from core.runtime.agent import LeonAgent
 
-AGENT_SCHEMA = {
-    "name": "Agent",
-    "description": (
-        "Launch a new agent to handle complex tasks autonomously. "
-        "Use subagent_type to select a specialized agent, or omit for default. "
-        "Agents run independently with their own tool stack."
+
+EventEmitter = Callable[[dict[str, Any]], Awaitable[None] | None]
+ChildAgentFactory = Callable[..., "LeonAgent"]
+
+
+def _resolve_default_child_agent_factory() -> ChildAgentFactory:
+    from core.runtime.agent import create_leon_agent
+
+    return cast(ChildAgentFactory, create_leon_agent)
+
+
+# ── Sub-agent tool filtering (CC alignment) ──────────────────────────────────
+# Tools that sub-agents must never access (prevents controlling parent).
+AGENT_DISALLOWED: set[str] = {"TaskOutput", "TaskStop", "Agent"}
+
+# Per-type allowed tool sets. Tools not in the set are blocked.
+EXPLORE_ALLOWED: set[str] = {"Read", "Grep", "Glob", "list_dir", "WebSearch", "WebFetch", "tool_search"}
+PLAN_ALLOWED: set[str] = EXPLORE_ALLOWED  # plan agents are also read-only
+BASH_ALLOWED: set[str] = {"Bash", "Read", "Grep", "Glob", "list_dir", "tool_search"}
+
+
+def _get_tool_filters(subagent_type: str) -> tuple[set[str], set[str] | None]:
+    """Return (extra_blocked_tools, allowed_tools) for a sub-agent type.
+
+    For explore/plan/bash: use allowed_tools whitelist (ToolRegistry skips unmatched).
+    For general: only block AGENT_DISALLOWED, no whitelist.
+    """
+    agent_type = subagent_type.lower()
+    allowed_map: dict[str, set[str]] = {
+        "explore": EXPLORE_ALLOWED,
+        "plan": PLAN_ALLOWED,
+        "bash": BASH_ALLOWED,
+    }
+
+    if agent_type in allowed_map:
+        return AGENT_DISALLOWED, allowed_map[agent_type]
+
+    # general: only block parent-controlling tools, no whitelist
+    return AGENT_DISALLOWED, None
+
+
+def _get_subagent_agent_name(subagent_type: str) -> str:
+    return subagent_type.lower()
+
+
+def _resolve_subagent_model(
+    workspace_root: Path,
+    subagent_type: str,
+    requested_model: str | None,
+    inherited_model: str,
+    fallback_model: str | None = None,
+) -> str:
+    def _is_inherit_marker(value: str | None) -> bool:
+        return value is None or value.lower() in {"default", "inherit"}
+
+    env_model = os.getenv("CLAUDE_CODE_SUBAGENT_MODEL")
+    if env_model:
+        return env_model
+    if requested_model and not _is_inherit_marker(requested_model):
+        return requested_model
+
+    agent_def = AgentLoader(workspace_root=workspace_root).load_all_agents().get(_get_subagent_agent_name(subagent_type))
+    if agent_def and agent_def.model:
+        return agent_def.model
+
+    if inherited_model and not _is_inherit_marker(inherited_model):
+        return inherited_model
+    if fallback_model and not _is_inherit_marker(fallback_model):
+        return fallback_model
+    return inherited_model
+
+
+def _normalize_child_workspace_prompt(prompt: str, workspace_root: Path) -> str:
+    workspace_text = str(workspace_root)
+    for suffix in ("current working directory", "working directory"):
+        prompt = prompt.replace(f"{workspace_text}/{suffix}", workspace_text)
+    return prompt
+
+
+def _filter_fork_messages(messages: list) -> list:
+    """Filter parent messages for forkContext sub-agent spawning.
+
+    Equivalent to CC's yF0: removes assistant messages whose tool_use blocks
+    have no matching tool_result in a subsequent user message (orphan tool_use).
+    Orphan tool_use blocks cause Anthropic API validation errors.
+    """
+    # Collect all tool_use_ids that have a corresponding tool_result
+    answered: set[str] = set()
+    for msg in messages:
+        # ToolMessage or user message with tool_result content
+        tool_call_id = getattr(msg, "tool_call_id", None)
+        if tool_call_id:
+            answered.add(tool_call_id)
+        content = getattr(msg, "content", None)
+        if isinstance(content, list):
+            for block in content:
+                if isinstance(block, dict) and block.get("type") == "tool_result":
+                    tid = block.get("tool_use_id") or block.get("tool_call_id")
+                    if tid:
+                        answered.add(tid)
+
+    result = []
+    for msg in messages:
+        content = getattr(msg, "content", None)
+        if isinstance(content, list):
+            tool_uses = [b for b in content if isinstance(b, dict) and b.get("type") == "tool_use"]
+            if tool_uses and any(b.get("id") not in answered for b in tool_uses):
+                continue  # skip assistant message with unanswered tool_use
+        result.append(msg)
+    return result
+
+
+AGENT_SCHEMA = make_tool_schema(
+    name="Agent",
+    description=(
+        "Launch a sub-agent for independent task execution. "
+        "Types: explore (read-only codebase search), plan (architecture design, read-only), "
+        "bash (shell commands only), general (broad tool access except Agent, TaskOutput, and TaskStop). "
+        "Use for: multi-step tasks, parallel work, tasks needing isolation. "
+        "Do NOT use for simple file reads or single grep searches — use the tools directly."
     ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "subagent_type": {
-                "type": "string",
-                "description": "Type of agent to spawn (e.g. 'Explore', 'Coder'). Omit for general-purpose.",
-            },
-            "prompt": {
-                "type": "string",
-                "description": "Task for the agent",
-            },
-            "name": {
-                "type": "string",
-                "description": "Name for the agent (used for SendMessage routing)",
-            },
-            "description": {
-                "type": "string",
-                "description": (
-                    "Short description of what agent will do. Required when run_in_background is true; "
-                    "shown in the background task indicator."
-                ),
-            },
-            "run_in_background": {
-                "type": "boolean",
-                "default": False,
-                "description": "Fire-and-forget: return immediately with task_id instead of waiting for completion",
-            },
-            "max_turns": {
-                "type": "integer",
-                "description": "Maximum turns the agent can take",
-            },
+    properties={
+        "subagent_type": {
+            "type": "string",
+            "enum": ["explore", "plan", "general", "bash"],
+            "description": "Type of agent to spawn. Omit for general-purpose.",
+        },
+        "prompt": {
+            "type": "string",
+            "description": "Task for the agent",
+        },
+        "name": {
+            "type": "string",
+            "description": "Optional display name for the spawned agent",
+        },
+        "description": {
+            "type": "string",
+            "description": (
+                "Short description of what agent will do. Required when run_in_background is true; shown in the background task indicator."
+            ),
+        },
+        "run_in_background": {
+            "type": "boolean",
+            "default": False,
+            "description": "Fire-and-forget: return immediately with task_id instead of waiting for completion",
+        },
+        "model": {
+            "type": "string",
+            "description": "Optional sub-agent model override. Priority: env > this field > agent frontmatter > inherit.",
+        },
+        "max_turns": {
+            "type": "integer",
+            "description": "Maximum turns the agent can take",
+        },
+        "fork_context": {
+            "type": "boolean",
+            "default": False,
+            "description": (
+                "Inherit parent conversation history as read-only context. "
+                "Use when the sub-agent needs background from the parent's work. "
+                "Adds a ### ENTERING SUB-AGENT ROUTINE ### marker so the sub-agent "
+                "knows which messages are context vs its actual task."
+            ),
         },
-        "required": ["prompt"],
     },
-}
-
-TASK_OUTPUT_SCHEMA = {
-    "name": "TaskOutput",
-    "description": "Get the output of a background agent task by its task_id.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_id": {
-                "type": "string",
-                "description": "The task ID returned when starting a background agent",
-            },
+    required=["prompt", "description"],
+)
+
+TASK_OUTPUT_SCHEMA = make_tool_schema(
+    name="TaskOutput",
+    description=(
+        "Get output of a background task (agent or bash). Blocks until task completes by default. Returns full text output or error."
+    ),
+    properties={
+        "task_id": {
+            "type": "string",
+            "description": "The task ID returned when starting a background agent",
+        },
+        "block": {
+            "type": "boolean",
+            "default": True,
+            "description": "Whether to wait for completion. Use false for a non-blocking status check.",
+        },
+        "timeout": {
+            "type": "integer",
+            "default": 30000,
+            "minimum": 0,
+            "maximum": 600000,
+            "description": "Maximum wait time in milliseconds when block=true (default: 30000, max: 600000).",
+        },
+    },
+    required=["task_id"],
+)
+
+TASK_STOP_SCHEMA = make_tool_schema(
+    name="TaskStop",
+    description="Cancel a running background task. Sends cancellation signal; task may take a moment to stop.",
+    properties={
+        "task_id": {
+            "type": "string",
+            "description": "The task ID to stop",
+        },
+    },
+    required=["task_id"],
+)
+
+SEND_MESSAGE_SCHEMA = make_tool_schema(
+    name="SendMessage",
+    description="Send a queued message to another running agent by name. Delivered before that agent's next model turn.",
+    properties={
+        "target_name": {
+            "type": "string",
+            "description": "Display name of the running target agent",
+        },
+        "message": {
+            "type": "string",
+            "description": "Message body to deliver",
+        },
+        "sender_name": {
+            "type": "string",
+            "description": "Optional sender label for the delivered message",
         },
-        "required": ["task_id"],
     },
-}
-
-TASK_STOP_SCHEMA = {
-    "name": "TaskStop",
-    "description": "Stop a running background agent task.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_id": {
-                "type": "string",
-                "description": "The task ID to stop",
+    required=["target_name", "message"],
+)
+
+ASK_USER_QUESTION_SCHEMA = make_tool_schema(
+    name="AskUserQuestion",
+    description=(
+        "Ask the user one or more structured questions when progress requires their choice or clarification. "
+        "Use for genuine ambiguity, preference selection, or approval that needs an explicit answer before continuing."
+    ),
+    properties={
+        "questions": {
+            "type": "array",
+            "description": "Questions to present to the user.",
+            "minItems": 1,
+            "items": {
+                "type": "object",
+                "properties": {
+                    "header": {"type": "string", "description": "Short UI label for the question."},
+                    "question": {"type": "string", "description": "Full question text shown to the user."},
+                    "multiSelect": {
+                        "type": "boolean",
+                        "default": False,
+                        "description": "Whether the user may pick multiple options.",
+                    },
+                    "options": {
+                        "type": "array",
+                        "minItems": 1,
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "label": {"type": "string"},
+                                "description": {"type": "string"},
+                                "preview": {"type": "string"},
+                            },
+                            "required": ["label", "description"],
+                        },
+                    },
+                },
+                "required": ["header", "question", "options"],
             },
         },
-        "required": ["task_id"],
+        "annotations": {
+            "type": "object",
+            "description": "Optional structured annotations kept with the question request.",
+        },
+        "metadata": {
+            "type": "object",
+            "description": "Optional metadata describing the source of the question request.",
+        },
     },
-}
+    required=["questions"],
+)
 
 
 class _RunningTask:
@@ -150,6 +366,33 @@ def get_result(self) -> str | None:
 BackgroundRun = _RunningTask | _BashBackgroundRun
 
 
+def _background_run_running_message(running: BackgroundRun) -> str:
+    return "Command is still running." if isinstance(running, _BashBackgroundRun) else "Agent is still running."
+
+
+def _background_run_result_status(result: str | None) -> str:
+    return "error" if (result and result.startswith("<tool_use_error>")) else "completed"
+
+
+async def _wait_for_background_run(running: BackgroundRun, timeout_ms: int) -> bool:
+    timeout_s = max(timeout_ms, 0) / 1000.0
+    if isinstance(running, _RunningTask):
+        try:
+            await asyncio.wait_for(asyncio.shield(running.task), timeout=timeout_s)
+            return True
+        except TimeoutError:
+            return running.is_done
+
+    loop = asyncio.get_running_loop()
+    deadline = loop.time() + timeout_s
+    while True:
+        if running.is_done:
+            return True
+        if loop.time() >= deadline:
+            return False
+        await asyncio.sleep(0.1)
+
+
 class AgentService:
     """Registers Agent, TaskOutput, TaskStop tools into ToolRegistry.
 
@@ -170,11 +413,23 @@ def __init__(
         model_name: str,
         queue_manager: Any | None = None,
         shared_runs: dict[str, BackgroundRun] | None = None,
+        background_progress_interval_s: float = 30.0,
+        thread_repo: Any = None,
+        member_repo: Any = None,
+        web_app: Any = None,
+        child_agent_factory: ChildAgentFactory | None = None,
     ):
         self._agent_registry = agent_registry
         self._workspace_root = workspace_root
         self._model_name = model_name
         self._queue_manager = queue_manager
+        self._background_progress_interval_s = background_progress_interval_s
+        self._thread_repo = thread_repo
+        self._member_repo = member_repo
+        self._web_app = web_app
+        self._child_agent_factory = child_agent_factory or _resolve_default_child_agent_factory()
+        self._parent_bootstrap: BootstrapConfig | None = None
+        self._parent_tool_context: Any | None = None
         # Shared with CommandService so TaskOutput covers both bash and agent runs.
         self._tasks: dict[str, BackgroundRun] = shared_runs if shared_runs is not None else {}
 
@@ -185,6 +440,7 @@ def __init__(
                 schema=AGENT_SCHEMA,
                 handler=self._handle_agent,
                 source="AgentService",
+                search_hint="launch sub-agent spawn parallel task independent",
             )
         )
         tool_registry.register(
@@ -194,6 +450,9 @@ def __init__(
                 schema=TASK_OUTPUT_SCHEMA,
                 handler=self._handle_task_output,
                 source="AgentService",
+                search_hint="get background task output result poll",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
         tool_registry.register(
@@ -203,8 +462,74 @@ def __init__(
                 schema=TASK_STOP_SCHEMA,
                 handler=self._handle_task_stop,
                 source="AgentService",
+                search_hint="stop cancel background task agent",
+            )
+        )
+        tool_registry.register(
+            ToolEntry(
+                name="SendMessage",
+                mode=ToolMode.INLINE,
+                schema=SEND_MESSAGE_SCHEMA,
+                handler=self._handle_send_message,
+                source="AgentService",
+                search_hint="send message running agent delivery queue",
             )
         )
+        tool_registry.register(
+            ToolEntry(
+                name="AskUserQuestion",
+                mode=ToolMode.INLINE,
+                schema=ASK_USER_QUESTION_SCHEMA,
+                handler=self._handle_ask_user_question,
+                source="AgentService",
+                search_hint="ask user question clarification choice preference",
+                is_read_only=True,
+                is_concurrency_safe=True,
+            )
+        )
+
+    @staticmethod
+    def _normalize_child_sandbox(sandbox_type: str | None) -> str | None:
+        return None if not sandbox_type or sandbox_type == "local" else sandbox_type
+
+    def _ensure_subagent_thread_metadata(
+        self,
+        *,
+        thread_id: str,
+        parent_thread_id: str | None,
+        agent_name: str,
+        model_name: str,
+    ) -> None:
+        if self._thread_repo is None or self._member_repo is None or not parent_thread_id:
+            return
+        existing_thread = self._thread_repo.get_by_id(thread_id)
+        if existing_thread is not None:
+            return
+
+        parent_thread = self._thread_repo.get_by_id(parent_thread_id)
+        if parent_thread is None:
+            return
+
+        member_id = parent_thread["member_id"]
+        member = self._member_repo.get_by_id(member_id)
+        if member is None:
+            return
+
+        created_at = time.time()
+        branch_index = self._thread_repo.get_next_branch_index(member_id)
+        sandbox_type = parent_thread.get("sandbox_type") or "local"
+        cwd = parent_thread.get("cwd")
+        self._thread_repo.create(
+            thread_id=thread_id,
+            member_id=member_id,
+            user_id=thread_id,
+            sandbox_type=sandbox_type,
+            cwd=cwd,
+            created_at=created_at,
+            model=model_name or parent_thread.get("model"),
+            is_main=False,
+            branch_index=branch_index,
+        )
 
     async def _handle_agent(
         self,
@@ -213,15 +538,22 @@ async def _handle_agent(
         name: str | None = None,
         description: str | None = None,
         run_in_background: bool = False,
+        model: str | None = None,
         max_turns: int | None = None,
-    ) -> str:
+        fork_context: bool = False,
+        tool_context: ToolUseContext | None = None,
+    ) -> Any:
         """Spawn an independent LeonAgent and run it with the given prompt."""
         from sandbox.thread_context import get_current_thread_id
 
         task_id = uuid.uuid4().hex[:8]
         agent_name = name or f"agent-{task_id}"
-        thread_id = f"subagent-{task_id}"
         parent_thread_id = get_current_thread_id()
+        existing_child = None
+        lookup_existing_child = getattr(self._agent_registry, "get_latest_by_name_and_parent", None)
+        if name and parent_thread_id and lookup_existing_child is not None:
+            existing_child = await lookup_existing_child(name, parent_thread_id)
+        thread_id = existing_child.thread_id if existing_child is not None and existing_child.status != "running" else f"subagent-{task_id}"
 
         # Register in AgentRegistry immediately
         entry = AgentEntry(
@@ -233,6 +565,12 @@ async def _handle_agent(
             subagent_type=subagent_type,
         )
         await self._agent_registry.register(entry)
+        self._ensure_subagent_thread_metadata(
+            thread_id=thread_id,
+            parent_thread_id=parent_thread_id,
+            agent_name=agent_name,
+            model_name=model or self._model_name,
+        )
 
         # Create async task (independent LeonAgent runs inside)
         task = asyncio.create_task(
@@ -243,33 +581,57 @@ async def _handle_agent(
                 prompt,
                 subagent_type,
                 max_turns,
+                model=model,
                 description=description or "",
                 run_in_background=run_in_background,
+                fork_context=fork_context,
+                parent_tool_context=tool_context,
             )
         )
         if run_in_background:
             # True fire-and-forget: track in self._tasks for TaskOutput/TaskStop
             running = _RunningTask(task=task, agent_id=task_id, thread_id=thread_id, description=description or "")
             self._tasks[task_id] = running
-            return json.dumps(
-                {
+            return tool_success(
+                json.dumps(
+                    {
+                        "task_id": task_id,
+                        "agent_name": agent_name,
+                        "thread_id": thread_id,
+                        "status": "running",
+                        "message": "Agent started in background. Use TaskOutput to get result.",
+                    },
+                    ensure_ascii=False,
+                ),
+                metadata={
                     "task_id": task_id,
-                    "agent_name": agent_name,
-                    "thread_id": thread_id,
-                    "status": "running",
-                    "message": "Agent started in background. Use TaskOutput to get result.",
+                    "subagent_thread_id": thread_id,
+                    "description": description or agent_name,
                 },
-                ensure_ascii=False,
             )
 
         # Default: parent blocks until sub-agent completes (does not block frontend event loop)
         try:
             result = await task
             await self._agent_registry.update_status(task_id, "completed")
-            return result
+            return tool_success(
+                result,
+                metadata={
+                    "task_id": task_id,
+                    "subagent_thread_id": thread_id,
+                    "description": description or agent_name,
+                },
+            )
         except Exception as e:
             await self._agent_registry.update_status(task_id, "error")
-            return f"<tool_use_error>Agent failed: {e}</tool_use_error>"
+            return tool_error(
+                f"<tool_use_error>Agent failed: {e}</tool_use_error>",
+                metadata={
+                    "task_id": task_id,
+                    "subagent_thread_id": thread_id,
+                    "description": description or agent_name,
+                },
+            )
 
     async def _run_agent(
         self,
@@ -279,8 +641,11 @@ async def _run_agent(
         prompt: str,
         subagent_type: str,
         max_turns: int | None,
+        model: str | None = None,
         description: str = "",
         run_in_background: bool = False,
+        fork_context: bool = False,
+        parent_tool_context: ToolUseContext | None = None,
     ) -> str:
         """Create and run an independent LeonAgent, collect its text output."""
         # Isolate this sub-agent from the parent's LangChain callback chain.
@@ -294,48 +659,164 @@ async def _run_agent(
 
         var_child_runnable_config.set(None)
 
-        # Lazy import avoids circular dependency (agent.py imports AgentService)
-        from core.runtime.agent import create_leon_agent
         from sandbox.thread_context import get_current_thread_id, set_current_thread_id
 
         parent_thread_id = get_current_thread_id()
+        self._ensure_subagent_thread_metadata(
+            thread_id=thread_id,
+            parent_thread_id=parent_thread_id,
+            agent_name=agent_name,
+            model_name=model or self._model_name,
+        )
 
         # emit_fn is set if EventBus is available; used for task lifecycle SSE events
-        emit_fn = None
+        emit_fn: EventEmitter | None = None
         try:
             from backend.web.event_bus import get_event_bus
 
-            event_bus = get_event_bus()
-            emit_fn = event_bus.make_emitter(
-                thread_id=parent_thread_id,
-                agent_id=task_id,
-                agent_name=agent_name,
-            )
+            if parent_thread_id:
+                event_bus = get_event_bus()
+                emit_fn = event_bus.make_emitter(
+                    thread_id=parent_thread_id,
+                    agent_id=task_id,
+                    agent_name=agent_name,
+                )
         except ImportError:
             pass  # backend not available in standalone core usage
 
-        agent = None
+        agent: LeonAgent | None = None
+        progress_task: asyncio.Task | None = None
+        progress_stop: asyncio.Event | None = None
+        child_bootstrap_start_cost = 0.0
+        child_bootstrap_start_tool_duration_ms = 0
         try:
-            agent = create_leon_agent(
-                model_name=self._model_name,
-                workspace_root=self._workspace_root,
-                verbose=False,
-            )
+            # Sub-agent context trimming: each spawn creates a fresh LeonAgent
+            # with its own _build_system_prompt(). No CLAUDE.md content or
+            # gitStatus is injected into the prompt pipeline (core/runtime/prompts
+            # has no such injection). Therefore explore/plan/bash sub-agents
+            # already run lightweight — no extra trimming is needed.
+            #
+            # Try to use context fork from parent agent's BootstrapConfig.
+            # Falls back to create_leon_agent when bootstrap is not available.
+            # Compute tool filtering for this sub-agent type
+            extra_blocked, allowed = _get_tool_filters(subagent_type)
+            agent_name_for_role = _get_subagent_agent_name(subagent_type)
+
+            try:
+                from core.runtime.fork import create_subagent_context
+                from core.runtime.fork import fork_context as fork_bootstrap
+
+                # Parent bootstrap is stored on the ToolUseContext or agent instance.
+                # AgentService stores workspace_root and model_name directly; use those
+                # to check if a richer bootstrap is available via a shared reference.
+                # _parent_bootstrap is injected by LeonAgent when building AgentService.
+                parent_bootstrap = getattr(self, "_parent_bootstrap", None)
+                child_tool_context = None
+                if parent_tool_context is not None:
+                    child_tool_context = create_subagent_context(parent_tool_context)
+                    child_bootstrap = child_tool_context.bootstrap
+                elif parent_bootstrap is not None:
+                    child_bootstrap = fork_bootstrap(parent_bootstrap)
+                    selected_model = _resolve_subagent_model(
+                        self._workspace_root,
+                        subagent_type,
+                        model,
+                        child_bootstrap.model_name,
+                        self._model_name,
+                    )
+                    agent = self._child_agent_factory(
+                        model_name=selected_model,
+                        workspace_root=child_bootstrap.workspace_root,
+                        sandbox=self._normalize_child_sandbox(getattr(child_bootstrap, "sandbox_type", None)),
+                        agent=agent_name_for_role,
+                        web_app=self._web_app,
+                        extra_blocked_tools=extra_blocked,
+                        allowed_tools=allowed,
+                        verbose=False,
+                    )
+                else:
+                    raise AttributeError("no parent bootstrap")
+                child_bootstrap_start_cost = float(getattr(child_bootstrap, "total_cost_usd", 0.0))
+                child_bootstrap_start_tool_duration_ms = int(getattr(child_bootstrap, "total_tool_duration_ms", 0))
+                if parent_tool_context is not None:
+                    # @@@sa-05-subagent-policy-resolution
+                    # Role-specific tool envelopes and model priority order must
+                    # be resolved explicitly here instead of leaking through
+                    # prompt text or whichever defaults happen to win later.
+                    selected_model = _resolve_subagent_model(
+                        self._workspace_root,
+                        subagent_type,
+                        model,
+                        child_bootstrap.model_name,
+                        self._model_name,
+                    )
+                    agent = self._child_agent_factory(
+                        model_name=selected_model,
+                        workspace_root=child_bootstrap.workspace_root,
+                        sandbox=self._normalize_child_sandbox(getattr(child_bootstrap, "sandbox_type", None)),
+                        agent=agent_name_for_role,
+                        web_app=self._web_app,
+                        extra_blocked_tools=extra_blocked,
+                        allowed_tools=allowed,
+                        verbose=False,
+                    )
+                # @@@sa-04-child-bootstrap-wiring
+                # Keep the forked bootstrap/context handoff behind an explicit
+                # LeonAgent API so AgentService stops reaching into QueryLoop
+                # internals directly.
+                assert agent is not None
+                agent.apply_forked_child_context(
+                    child_bootstrap,
+                    tool_context=child_tool_context,
+                )
+            except (AttributeError, ImportError):
+                inherited_model = getattr(parent_tool_context.bootstrap, "model_name", None) if parent_tool_context else None
+                selected_model = _resolve_subagent_model(
+                    self._workspace_root,
+                    subagent_type,
+                    model,
+                    inherited_model or self._model_name,
+                    self._model_name,
+                )
+                agent = self._child_agent_factory(
+                    model_name=selected_model,
+                    workspace_root=self._workspace_root,
+                    sandbox=self._normalize_child_sandbox(
+                        getattr(parent_tool_context.bootstrap, "sandbox_type", None) if parent_tool_context else None
+                    ),
+                    agent=agent_name_for_role,
+                    web_app=self._web_app,
+                    extra_blocked_tools=extra_blocked,
+                    allowed_tools=allowed,
+                    verbose=False,
+                )
             # In async context LeonAgent defers checkpointer init; call ainit() to
             # ensure state is persisted (and loadable via GET /api/threads/{thread_id}).
+            assert agent is not None
             await agent.ainit()
+            # @@@subagent-prompt-path-sanitize - Parent models sometimes satisfy
+            # "use absolute paths" by appending natural-language cwd labels onto the
+            # real workspace path. Normalize the obvious fake suffix before dispatch.
+            child_workspace_root = Path(getattr(agent, "workspace_root", self._workspace_root))
+            prompt = _normalize_child_workspace_prompt(prompt, child_workspace_root)
+
+            if parent_thread_id and parent_thread_id != thread_id:
+                from sandbox.manager import bind_thread_to_existing_thread_lease
+
+                bind_thread_to_existing_thread_lease(thread_id, parent_thread_id)
 
             # Wire child agent events to the parent's EventBus subscription
             # so the parent SSE stream shows sub-agent activity.
             if emit_fn is not None:
-                if hasattr(agent, "runtime") and hasattr(agent.runtime, "bind_thread"):
-                    agent.runtime.bind_thread(activity_sink=emit_fn)
+                runtime = getattr(agent, "runtime", None)
+                if runtime is not None and hasattr(runtime, "bind_thread"):
+                    runtime.bind_thread(activity_sink=emit_fn)
 
             set_current_thread_id(thread_id)
 
             # Notify frontend: task started
             if emit_fn is not None:
-                await emit_fn(
+                emission = emit_fn(
                     {
                         "event": "task_start",
                         "data": json.dumps(
@@ -350,38 +831,95 @@ async def _run_agent(
                         ),
                     }
                 )
+                if asyncio.iscoroutine(emission):
+                    await emission
 
             config = {"configurable": {"thread_id": thread_id}}
             output_parts: list[str] = []
+            latest_progress = description or agent_name
+
+            if run_in_background and self._queue_manager and parent_thread_id and self._background_progress_interval_s > 0:
+                progress_stop = asyncio.Event()
+                progress_task = asyncio.create_task(
+                    self._emit_background_progress(
+                        task_id=task_id,
+                        agent_name=agent_name,
+                        parent_thread_id=parent_thread_id,
+                        latest_progress=lambda: latest_progress,
+                        stop_event=progress_stop,
+                    )
+                )
 
-            async for chunk in agent.agent.astream(
-                {"messages": [{"role": "user", "content": prompt}]},
-                config=config,
-                stream_mode="updates",
-            ):
-                for _, node_update in chunk.items():
-                    if not isinstance(node_update, dict):
-                        continue
-                    msgs = node_update.get("messages", [])
-                    if not isinstance(msgs, list):
-                        msgs = [msgs]
-                    for msg in msgs:
-                        if msg.__class__.__name__ == "AIMessage":
-                            content = getattr(msg, "content", "")
-                            if isinstance(content, str) and content:
-                                output_parts.append(content)
-                            elif isinstance(content, list):
-                                for block in content:
-                                    if isinstance(block, dict) and block.get("type") == "text":
-                                        text = block.get("text", "")
-                                        if text:
-                                            output_parts.append(text)
+            # Build initial input — with or without forked parent context
+            if fork_context:
+                from sandbox.thread_context import get_current_messages
+
+                # @@@pt-04-fork-context-source
+                # The Agent tool already has an explicit parent ToolUseContext on
+                # the live ToolRunner path. Forked sub-agents must prefer that
+                # concrete message snapshot over ambient ContextVar state, or the
+                # direct runner path silently drops parent context.
+                parent_msgs = list(parent_tool_context.messages) if parent_tool_context is not None else get_current_messages()
+                fork_marker = (
+                    "\n\n### ENTERING SUB-AGENT ROUTINE ###\n"
+                    "Messages above are from the parent thread (read-only context).\n"
+                    "Only complete the specific task assigned below.\n\n"
+                )
+                initial_messages: list = [
+                    *_filter_fork_messages(parent_msgs),
+                    {"role": "user", "content": fork_marker + prompt},
+                ]
+            else:
+                initial_messages = [{"role": "user", "content": prompt}]
+
+            if self._web_app is not None:
+                from backend.web.services.streaming_service import run_child_thread_live
+
+                result = await run_child_thread_live(
+                    agent,
+                    thread_id,
+                    prompt,
+                    self._web_app,
+                    input_messages=initial_messages,
+                )
+                if result:
+                    output_parts.append(result)
+                    latest_progress = self._summarize_progress(result, description or agent_name)
+            else:
+                async for chunk in agent.agent.astream(
+                    {"messages": initial_messages},
+                    config=config,
+                    stream_mode="updates",
+                ):
+                    for _, node_update in chunk.items():
+                        if not isinstance(node_update, dict):
+                            continue
+                        msgs = node_update.get("messages", [])
+                        if not isinstance(msgs, list):
+                            msgs = [msgs]
+                        for msg in msgs:
+                            if msg.__class__.__name__ == "AIMessage":
+                                content = getattr(msg, "content", "")
+                                if isinstance(content, str) and content:
+                                    output_parts.append(content)
+                                    latest_progress = self._summarize_progress(content, description or agent_name)
+                                elif isinstance(content, list):
+                                    for block in content:
+                                        if isinstance(block, dict) and block.get("type") == "text":
+                                            text = block.get("text", "")
+                                            if text:
+                                                output_parts.append(text)
+                                                latest_progress = self._summarize_progress(text, description or agent_name)
 
             await self._agent_registry.update_status(task_id, "completed")
             result = "\n".join(output_parts) or "(Agent completed with no text output)"
+            if progress_stop is not None:
+                progress_stop.set()
+            if progress_task is not None:
+                await progress_task
             # Notify frontend: task done
             if emit_fn is not None:
-                await emit_fn(
+                emission = emit_fn(
                     {
                         "event": "task_done",
                         "data": json.dumps(
@@ -393,6 +931,8 @@ async def _run_agent(
                         ),
                     }
                 )
+                if asyncio.iscoroutine(emission):
+                    await emission
             # Queue notification only for background runs — blocking callers already
             # received the result as the tool's return value; sending a notification
             # would trigger a spurious new parent turn.
@@ -402,18 +942,23 @@ async def _run_agent(
                     task_id=task_id,
                     status="completed",
                     summary=label,
+                    result=result,
                     description=label,
                 )
                 self._queue_manager.enqueue(notification, parent_thread_id, notification_type="agent")
             return result
 
         except Exception:
+            if progress_stop is not None:
+                progress_stop.set()
+            if progress_task is not None:
+                await progress_task
             logger.exception("[AgentService] Agent %s failed", agent_name)
             await self._agent_registry.update_status(task_id, "error")
             # Notify frontend: task error
             if emit_fn is not None:
                 try:
-                    await emit_fn(
+                    emission = emit_fn(
                         {
                             "event": "task_error",
                             "data": json.dumps(
@@ -425,6 +970,8 @@ async def _run_agent(
                             ),
                         }
                     )
+                    if asyncio.iscoroutine(emission):
+                        await emission
                 except Exception:
                     pass
             if run_in_background and self._queue_manager and parent_thread_id:
@@ -433,6 +980,7 @@ async def _run_agent(
                     task_id=task_id,
                     status="error",
                     summary=label,
+                    result="Agent failed",
                     description=label,
                 )
                 self._queue_manager.enqueue(notification, parent_thread_id, notification_type="agent")
@@ -440,37 +988,252 @@ async def _run_agent(
         finally:
             if agent is not None:
                 try:
-                    agent.close()
+                    self._merge_child_bootstrap_accumulators(
+                        getattr(self, "_parent_bootstrap", None),
+                        getattr(agent, "_bootstrap", None),
+                        child_bootstrap_start_cost=child_bootstrap_start_cost,
+                        child_bootstrap_start_tool_duration_ms=child_bootstrap_start_tool_duration_ms,
+                    )
+                    if hasattr(agent, "_agent_service") and hasattr(agent._agent_service, "cleanup_background_runs"):
+                        await agent._agent_service.cleanup_background_runs()
+                    # @@@web-child-persistence - web child threads are user-visible
+                    # thread surfaces. Closing the LeonAgent here marks runtime
+                    # terminated and drops its live/checkpoint bridge right after
+                    # completion, so the child tab collapses to an empty shell.
+                    if self._web_app is None:
+                        # @@@subagent-sandbox-close-skip - Child agents can share the
+                        # parent's lease; closing the child sandbox here can pause the
+                        # shared lease mid-owner-turn.
+                        agent.close(cleanup_sandbox=False)
                 except Exception:
                     pass
 
-    async def _handle_task_output(self, task_id: str) -> str:
+    @staticmethod
+    def _merge_child_bootstrap_accumulators(
+        parent_bootstrap: Any,
+        child_bootstrap: Any,
+        *,
+        child_bootstrap_start_cost: float,
+        child_bootstrap_start_tool_duration_ms: int,
+    ) -> None:
+        if parent_bootstrap is None or child_bootstrap is None or parent_bootstrap is child_bootstrap:
+            return
+        # @@@sa-03-bootstrap-rollup
+        # Sub-agent loops start from a forked bootstrap snapshot. At join time we
+        # need to preserve both the parent's concurrent growth and the child's
+        # post-fork delta instead of letting one side overwrite the other.
+        child_cost_delta = max(
+            0.0,
+            float(getattr(child_bootstrap, "total_cost_usd", 0.0)) - child_bootstrap_start_cost,
+        )
+        child_tool_duration_delta = max(
+            0,
+            int(getattr(child_bootstrap, "total_tool_duration_ms", 0)) - child_bootstrap_start_tool_duration_ms,
+        )
+        parent_bootstrap.total_cost_usd = float(getattr(parent_bootstrap, "total_cost_usd", 0.0)) + child_cost_delta
+        parent_bootstrap.total_tool_duration_ms = int(getattr(parent_bootstrap, "total_tool_duration_ms", 0)) + child_tool_duration_delta
+
+    @staticmethod
+    def _summarize_progress(text: str, fallback: str) -> str:
+        collapsed = " ".join(text.split()).strip()
+        if not collapsed:
+            return fallback
+        return collapsed[:120]
+
+    async def _emit_background_progress(
+        self,
+        *,
+        task_id: str,
+        agent_name: str,
+        parent_thread_id: str,
+        latest_progress: Any,
+        stop_event: asyncio.Event,
+    ) -> None:
+        # @@@sa-06-progress-loop - keep prompt-facing coordinator updates on the
+        # real thread delivery queue instead of inventing a detached parallel channel.
+        while True:
+            try:
+                await asyncio.wait_for(stop_event.wait(), timeout=self._background_progress_interval_s)
+                return
+            except TimeoutError:
+                pass
+
+            if self._queue_manager is None:
+                return
+
+            notification = format_progress_notification(
+                task_id,
+                latest_progress(),
+                step="running",
+            )
+            self._queue_manager.enqueue(
+                notification,
+                parent_thread_id,
+                notification_type="agent",
+                source="system",
+                sender_name=agent_name,
+            )
+
+    async def _handle_task_output(self, task_id: str, block: bool = True, timeout: int = 30_000) -> str:
         """Get output of a background agent task."""
         running = self._tasks.get(task_id)
         if not running:
             return f"Error: task '{task_id}' not found"
 
+        if not block:
+            if not running.is_done:
+                return json.dumps(
+                    {
+                        "task_id": task_id,
+                        "status": "running",
+                        "message": _background_run_running_message(running),
+                    },
+                    ensure_ascii=False,
+                )
+
+            result = running.get_result()
+            return json.dumps(
+                {
+                    "task_id": task_id,
+                    "status": _background_run_result_status(result),
+                    "result": result,
+                },
+                ensure_ascii=False,
+            )
+
+        if not running.is_done:
+            completed = await _wait_for_background_run(running, min(timeout, 600_000))
+            if not completed and not running.is_done:
+                return json.dumps(
+                    {
+                        "task_id": task_id,
+                        "status": "timeout",
+                        "message": _background_run_running_message(running),
+                    },
+                    ensure_ascii=False,
+                )
+
         if not running.is_done:
             return json.dumps(
                 {
                     "task_id": task_id,
                     "status": "running",
-                    "message": "Agent is still running.",
+                    "message": _background_run_running_message(running),
                 },
                 ensure_ascii=False,
             )
 
         result = running.get_result()
-        status = "error" if (result and result.startswith("<tool_use_error>")) else "completed"
         return json.dumps(
             {
                 "task_id": task_id,
-                "status": status,
+                "status": _background_run_result_status(result),
                 "result": result,
             },
             ensure_ascii=False,
         )
 
+    async def _handle_send_message(
+        self,
+        target_name: str,
+        message: str,
+        sender_name: str | None = None,
+    ) -> str:
+        if self._queue_manager is None:
+            return "<tool_use_error>SendMessage requires queue_manager</tool_use_error>"
+
+        matches = await self._agent_registry.list_running_by_name(target_name)
+        if not matches:
+            return f"<tool_use_error>Running agent '{target_name}' not found</tool_use_error>"
+        if len(matches) > 1:
+            return (
+                f"<tool_use_error>Running agent name '{target_name}' is ambiguous. "
+                "Use a unique name before calling SendMessage.</tool_use_error>"
+            )
+        target = matches[0]
+
+        delivered = format_agent_message(sender_name or "agent", message)
+        self._queue_manager.enqueue(
+            delivered,
+            target.thread_id,
+            notification_type="agent",
+            source="system",
+            sender_name=sender_name or "agent",
+        )
+        return f"Message sent to {target.name}."
+
+    async def _handle_ask_user_question(
+        self,
+        questions: list[dict[str, Any]],
+        annotations: dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
+        tool_context: ToolUseContext | None = None,
+    ) -> Any:
+        if tool_context is None or tool_context.request_permission is None:
+            return tool_error("<tool_use_error>AskUserQuestion requires an interactive owner resolver</tool_use_error>")
+
+        payload: dict[str, Any] = {"questions": questions}
+        if annotations is not None:
+            payload["annotations"] = annotations
+        if metadata is not None:
+            payload["metadata"] = metadata
+
+        request_result = tool_context.request_permission(
+            "AskUserQuestion",
+            payload,
+            ToolPermissionContext(is_read_only=True, is_destructive=False),
+            None,
+            "Please answer the following questions so Leon can continue.",
+        )
+        request_id = request_result.get("request_id") if isinstance(request_result, dict) else request_result
+        if not isinstance(request_id, str) or not request_id:
+            return tool_error("<tool_use_error>AskUserQuestion could not create a user-facing request</tool_use_error>")
+
+        return tool_permission_request(
+            "User input required to continue.",
+            metadata={
+                "decision": "ask",
+                "request_id": request_id,
+                "request_kind": "ask_user_question",
+            },
+        )
+
+    async def _stop_background_run(self, task_id: str, running: BackgroundRun) -> None:
+        if isinstance(running, _RunningTask):
+            was_running = not running.task.done()
+            if was_running:
+                running.task.cancel()
+                try:
+                    await running.task
+                except asyncio.CancelledError:
+                    pass
+                await self._agent_registry.update_status(running.agent_id, "error")
+            self._tasks.pop(task_id, None)
+            return
+
+        if not running.is_done:
+            process = getattr(running._cmd, "process", None)
+            wait = getattr(process, "wait", None) if process is not None else None
+            terminate = getattr(process, "terminate", None) if process is not None else None
+            kill = getattr(process, "kill", None) if process is not None else None
+
+            if callable(terminate):
+                terminate()
+            if callable(wait):
+                wait_fn = cast(Callable[[], Awaitable[Any]], wait)
+                try:
+                    await asyncio.wait_for(wait_fn(), timeout=1.0)
+                except TimeoutError:
+                    if callable(kill):
+                        kill()
+                    await wait_fn()
+
+        self._tasks.pop(task_id, None)
+
+    async def cleanup_background_runs(self) -> None:
+        for task_id, running in list(self._tasks.items()):
+            await self._stop_background_run(task_id, running)
+
     async def _handle_task_stop(self, task_id: str) -> str:
         """Stop a running background agent task."""
         running = self._tasks.get(task_id)
@@ -480,6 +1243,5 @@ async def _handle_task_stop(self, task_id: str) -> str:
         if running.is_done:
             return f"Task {task_id} already completed"
 
-        running.task.cancel()
-        await self._agent_registry.update_status(running.agent_id, "error")
+        await self._stop_background_run(task_id, running)
         return f"Task {task_id} cancelled"
diff --git a/core/operations.py b/core/operations.py
index c0a471b33..768e49859 100644
--- a/core/operations.py
+++ b/core/operations.py
@@ -2,10 +2,8 @@
 
 from contextvars import ContextVar
 from dataclasses import dataclass
-from pathlib import Path
 
 from storage.models import FileOperationRow
-from storage.providers.sqlite.file_operation_repo import SQLiteFileOperationRepo
 
 # Context variable for tracking current thread (TUI only; web uses sandbox.thread_context)
 current_thread_id: ContextVar[str] = ContextVar("current_thread_id", default="")
@@ -31,16 +29,8 @@ class FileOperation:
 class FileOperationRecorder:
     """Records file operations for time travel rollback"""
 
-    def __init__(self, db_path: Path | str | None = None, repo=None):
-        # @@@repo-injection - web path injects Supabase repo; TUI falls back to SQLite via db_path.
-        if repo is not None:
-            self._repo = repo
-            return
-        if db_path is None:
-            db_path = Path.home() / ".leon" / "leon.db"
-        self.db_path = Path(db_path)
-        self.db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._repo = SQLiteFileOperationRepo(self.db_path)
+    def __init__(self, repo=None):
+        self._repo = repo
 
     def record(
         self,
@@ -52,7 +42,9 @@ def record(
         after_content: str,
         changes: list[dict] | None = None,
     ) -> str:
-        """Record a file operation"""
+        """Record a file operation. Noop if no repo configured."""
+        if self._repo is None:
+            return ""
         return self._repo.record(
             thread_id=thread_id,
             checkpoint_id=checkpoint_id,
@@ -64,35 +56,42 @@ def record(
         )
 
     def get_operations_for_thread(self, thread_id: str, status: str = "applied") -> list[FileOperation]:
-        """Get all operations for a thread"""
+        if self._repo is None:
+            return []
         rows = self._repo.get_operations_for_thread(thread_id, status=status)
         return [self._to_file_operation(row) for row in rows]
 
     def get_operations_after_checkpoint(self, thread_id: str, checkpoint_id: str) -> list[FileOperation]:
-        """Get operations after a specific checkpoint (for rollback)"""
+        if self._repo is None:
+            return []
         rows = self._repo.get_operations_after_checkpoint(thread_id, checkpoint_id)
         return [self._to_file_operation(row) for row in rows]
 
     def get_operations_between_checkpoints(self, thread_id: str, from_checkpoint_id: str, to_checkpoint_id: str) -> list[FileOperation]:
-        """Get operations between two checkpoints (exclusive of from, inclusive of to)"""
+        if self._repo is None:
+            return []
         rows = self._repo.get_operations_between_checkpoints(thread_id, from_checkpoint_id, to_checkpoint_id)
         return [self._to_file_operation(row) for row in rows]
 
     def get_operations_for_checkpoint(self, thread_id: str, checkpoint_id: str) -> list[FileOperation]:
-        """Get all operations for a specific checkpoint"""
+        if self._repo is None:
+            return []
         rows = self._repo.get_operations_for_checkpoint(thread_id, checkpoint_id)
         return [self._to_file_operation(row) for row in rows]
 
     def count_operations_for_checkpoint(self, thread_id: str, checkpoint_id: str) -> int:
-        """Count operations for a specific checkpoint"""
+        if self._repo is None:
+            return 0
         return self._repo.count_operations_for_checkpoint(thread_id, checkpoint_id)
 
     def mark_reverted(self, operation_ids: list[str]) -> None:
-        """Mark operations as reverted"""
+        if self._repo is None:
+            return
         self._repo.mark_reverted(operation_ids)
 
     def delete_thread_operations(self, thread_id: str) -> int:
-        """Delete all operations for a thread"""
+        if self._repo is None:
+            return 0
         return self._repo.delete_thread_operations(thread_id)
 
     def _to_file_operation(self, row: FileOperationRow) -> FileOperation:
diff --git a/core/runner.py b/core/runner.py
index 6c3902e3c..fddd6b135 100644
--- a/core/runner.py
+++ b/core/runner.py
@@ -153,7 +153,7 @@ def _print_memory_stats(self, status: dict) -> None:
 
     def _process_chunk(self, chunk: dict, result: dict) -> None:
         """Process streaming chunk, extract tool calls and response"""
-        for node_name, node_update in chunk.items():
+        for _node_name, node_update in chunk.items():
             if not isinstance(node_update, dict):
                 continue
 
diff --git a/core/runtime/abort.py b/core/runtime/abort.py
new file mode 100644
index 000000000..f95ca4e2f
--- /dev/null
+++ b/core/runtime/abort.py
@@ -0,0 +1,48 @@
+"""Minimal abort controller tree for runtime lifecycle wiring."""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+
+
+class AbortController:
+    def __init__(self) -> None:
+        self._aborted = False
+        self._listeners: dict[int, Callable[[], None]] = {}
+        self._next_listener_id = 0
+
+    def abort(self) -> None:
+        if self._aborted:
+            return
+        self._aborted = True
+        listeners = list(self._listeners.values())
+        self._listeners.clear()
+        for listener in listeners:
+            listener()
+
+    def is_aborted(self) -> bool:
+        return self._aborted
+
+    def on_abort(self, listener: Callable[[], None]) -> Callable[[], None]:
+        if self._aborted:
+            listener()
+            return lambda: None
+
+        listener_id = self._next_listener_id
+        self._next_listener_id += 1
+        self._listeners[listener_id] = listener
+
+        def unsubscribe() -> None:
+            self._listeners.pop(listener_id, None)
+
+        return unsubscribe
+
+
+def create_child_abort_controller(parent: AbortController | None) -> AbortController:
+    child = AbortController()
+    if parent is None:
+        return child
+
+    unsubscribe = parent.on_abort(child.abort)
+    child.on_abort(unsubscribe)
+    return child
diff --git a/core/runtime/agent.py b/core/runtime/agent.py
index e4d7299c6..7c17ad2e9 100644
--- a/core/runtime/agent.py
+++ b/core/runtime/agent.py
@@ -18,17 +18,16 @@
 All paths must be absolute. Full security mechanisms and audit logging.
 """
 
+import asyncio
+import concurrent.futures
+import inspect
+import logging
 import os
-import threading
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
-from langchain.agents import create_agent
 from langchain.chat_models import init_chat_model
 from langchain_core.messages import SystemMessage
-from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
-
-from config.schema import DEFAULT_MODEL
 
 # Load .env file
 _env_file = Path(__file__).parent / ".env"
@@ -53,6 +52,11 @@
 
 # Import file operation recorder for time travel
 from core.operations import get_recorder  # noqa: E402
+
+# New architecture: ToolRegistry + ToolRunner + Services
+from core.runtime.cleanup import CleanupRegistry  # noqa: E402
+from core.runtime.loop import QueryLoop  # noqa: E402
+from core.runtime.middleware.mcp_instructions import McpInstructionsDeltaMiddleware  # noqa: E402
 from core.runtime.middleware.memory import MemoryMiddleware  # noqa: E402
 from core.runtime.middleware.monitor import MonitorMiddleware, apply_usage_patches  # noqa: E402
 from core.runtime.middleware.prompt_caching import PromptCachingMiddleware  # noqa: E402
@@ -60,10 +64,9 @@
 
 # Middleware imports (migrated paths)
 from core.runtime.middleware.spill_buffer import SpillBufferMiddleware  # noqa: E402
-
-# New architecture: ToolRegistry + ToolRunner + Services
-from core.runtime.registry import ToolRegistry  # noqa: E402
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema  # noqa: E402
 from core.runtime.runner import ToolRunner  # noqa: E402
+from core.runtime.state import AppState, BootstrapConfig  # noqa: E402
 from core.runtime.validator import ToolValidator  # noqa: E402
 
 # Hooks (used by Services)
@@ -71,7 +74,9 @@
 from core.tools.command.hooks.file_access_logger import FileAccessLoggerHook  # noqa: E402
 from core.tools.command.hooks.file_permission import FilePermissionHook  # noqa: E402
 from core.tools.command.service import CommandService  # noqa: E402
+from core.tools.cron.service import CronToolService  # noqa: E402
 from core.tools.filesystem.service import FileSystemService  # noqa: E402
+from core.tools.mcp_resources.service import McpResourceToolService  # noqa: E402
 from core.tools.search.service import SearchService  # noqa: E402
 from core.tools.skills.service import SkillsService  # noqa: E402
 from core.tools.task.service import TaskService  # noqa: E402
@@ -82,10 +87,44 @@
 from core.tools.web.service import WebService  # noqa: E402
 from storage.container import StorageContainer  # noqa: E402
 
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    from sandbox import Sandbox
+
 # @@@langchain-anthropic-streaming-usage-regression
 apply_usage_patches()
 
 
+def _make_mcp_tool_entry(tool) -> ToolEntry:
+    schema_model = getattr(tool, "tool_call_schema", None)
+    if schema_model is not None and hasattr(schema_model, "model_json_schema"):
+        parameters = schema_model.model_json_schema()
+    else:
+        parameters = {
+            "type": "object",
+            "properties": getattr(tool, "args", {}) or {},
+        }
+
+    async def mcp_handler(**kwargs):
+        if hasattr(tool, "ainvoke"):
+            return await tool.ainvoke(kwargs)
+        return await asyncio.to_thread(tool.invoke, kwargs)
+
+    return ToolEntry(
+        name=tool.name,
+        mode=ToolMode.INLINE,
+        schema=make_tool_schema(
+            name=tool.name,
+            description=getattr(tool, "description", "") or tool.name,
+            properties={},
+            parameter_overrides=parameters,
+        ),
+        handler=mcp_handler,
+        source="mcp",
+    )
+
+
 class LeonAgent:
     """
     Leon Agent - AI Coding Assistant
@@ -108,6 +147,7 @@ def __init__(
         workspace_root: str | Path | None = None,
         *,
         agent: str | None = None,
+        bundle_dir: str | Path | None = None,
         allowed_file_extensions: list[str] | None = None,
         block_dangerous_commands: bool | None = None,
         block_network_commands: bool | None = None,
@@ -119,9 +159,15 @@ def __init__(
         jina_api_key: str | None = None,
         sandbox: Any = None,
         storage_container: StorageContainer | None = None,
+        thread_repo: Any = None,
+        member_repo: Any = None,
         queue_manager: MessageQueueManager | None = None,
         chat_repos: dict | None = None,
+        web_app: Any = None,
         extra_allowed_paths: list[str] | None = None,
+        extra_blocked_tools: set[str] | None = None,
+        allowed_tools: set[str] | None = None,
+        permission_resolver_scope: str = "none",
         verbose: bool = False,
     ):
         """
@@ -138,7 +184,10 @@ def __init__(
             enable_audit_log: Whether to enable audit logging
             enable_web_tools: Whether to enable web search and content fetching tools
             sandbox: Sandbox instance, name string, or None for local
+            thread_repo: Optional thread metadata repo for backend-integrated subagent registration
+            member_repo: Optional member repo for backend-integrated subagent registration
             queue_manager: Shared MessageQueueManager instance (created if not provided)
+            permission_resolver_scope: Permission request surface for this agent ("none" or "thread")
             verbose: Whether to output detailed logs (default False)
         """
         self.agent_id: str | None = None
@@ -146,11 +195,22 @@ def __init__(
         self.extra_allowed_paths = extra_allowed_paths
         self.queue_manager = queue_manager or MessageQueueManager()
         self._chat_repos: dict | None = chat_repos
+        self._thread_repo = thread_repo
+        self._member_repo = member_repo
+        self._web_app = web_app
+        self._session_started = False
+        self._session_ended = False
+        self._closing = False
+        self._closed = False
+        requested_sandbox_name = sandbox if isinstance(sandbox, str) else getattr(sandbox, "name", None)
+        self._explicit_model_name = model_name is not None
 
         # New config system mode
         self.config, self.models_config = self._load_config(
             agent_name=agent,
+            bundle_dir=bundle_dir,
             workspace_root=workspace_root,
+            sandbox_name=requested_sandbox_name,
             model_name=model_name,
             api_key=api_key,
             allowed_file_extensions=allowed_file_extensions,
@@ -167,8 +227,9 @@ def __init__(
             from config.schema import DEFAULT_MODEL  # noqa: E402
 
             active_model = DEFAULT_MODEL
-        # Member model override: agent.md's model field takes precedence over global config
-        if hasattr(self, "_agent_override") and self._agent_override and self._agent_override.model:
+        # Agent frontmatter model applies only when the caller did not explicitly
+        # request a model at construction time.
+        if not self._explicit_model_name and hasattr(self, "_agent_override") and self._agent_override and self._agent_override.model:
             active_model = self._agent_override.model
         resolved_model, model_overrides = self.models_config.resolve_model(active_model)
         self.model_name = resolved_model
@@ -177,6 +238,7 @@ def __init__(
         # Resolve API key (prefer resolved provider from mapping)
         provider_name = self._resolve_provider_name(resolved_model, model_overrides)
         p = self.models_config.get_provider(provider_name) if provider_name else None
+        self._explicit_api_key = api_key is not None
         self.api_key = api_key or (p.api_key if p else None) or self.models_config.get_api_key()
 
         if not self.api_key:
@@ -213,63 +275,74 @@ def __init__(
         }
 
         # Initialize checkpointer and MCP tools
-        self._aiosqlite_conn, mcp_tools = self._init_async_components()
+        self.checkpointer = None
+        _conn, mcp_tools = self._init_async_components()
 
         # If in async context (running loop detected), _init_async_components
         # skips init and returns (None, []). Distinguish from Postgres path
         # which also returns conn=None but DID initialize successfully.
-        self._needs_async_init = self._aiosqlite_conn is None and self.checkpointer is None
+        self._needs_async_init = self.checkpointer is None
 
         # Set checkpointer to None if in async context (will be initialized later)
         if self._needs_async_init:
             self.checkpointer = None
 
         # Initialize ToolRegistry and Services (new architecture)
-        self._tool_registry = ToolRegistry(blocked_tools=self._get_member_blocked_tools())
+        blocked = self._get_member_blocked_tools()
+        if extra_blocked_tools:
+            blocked = blocked | extra_blocked_tools
+        self._tool_registry = ToolRegistry(
+            blocked_tools=blocked,
+            allowed_tools=allowed_tools,
+        )
         self._init_services()
+        self._register_mcp_tools(mcp_tools)
 
         # Build middleware stack
         middleware = self._build_middleware_stack()
 
-        # Ensure ToolNode is created (middleware tools need at least one BaseTool)
+        # Ensure the bound model still sees at least one BaseTool-compatible entry.
         if not mcp_tools and not self._has_middleware_tools(middleware):
             mcp_tools = [self._create_placeholder_tool()]
 
-        # Build system prompt
-        self.system_prompt = self._build_system_prompt()
-        custom_prompt = self.config.system_prompt
-        if custom_prompt:
-            self.system_prompt += f"\n\n**Custom Instructions:**\n{custom_prompt}"
-
-        # @@@entity-identity — inject chat identity so agent knows who it is in the social layer
-        if self._chat_repos:
-            repos = self._chat_repos
-            uid = repos.get("user_id")
-            owner_uid = repos.get("owner_user_id", "")
-            if uid:
-                entity_repo = repos.get("entity_repo")
-                entity = entity_repo.get_by_id(uid) if entity_repo else None
-                member_repo = repos.get("member_repo")
-                owner_row = member_repo.get_by_id(owner_uid) if member_repo and owner_uid else None
-                name = entity.name if entity else uid
-                owner_name = owner_row.name if owner_row else "unknown"
-                self.system_prompt += (
-                    f"\n\n**Chat Identity:**\n"
-                    f"- Your name: {name}\n"
-                    f"- Your user_id: {uid}\n"
-                    f"- Your owner: {owner_name} (user_id: {owner_uid})\n"
-                    f"- When you receive a chat notification, READ the message with chat_read(), "
-                    f"then REPLY with chat_send(). Your text output goes to your owner's thread, "
-                    f"not to the chat — only chat_send() delivers to the other party.\n"
-                )
+        self._system_prompt_section_cache: dict[str, str] = {}
+        self.system_prompt = self._compose_system_prompt()
 
-        # Create agent
-        self.agent = create_agent(
+        # Build BootstrapConfig for sub-agent forking
+        self._bootstrap = BootstrapConfig(
+            workspace_root=self.workspace_root,
+            original_cwd=Path.cwd(),
+            project_root=self.workspace_root,
+            cwd=self.workspace_root,
+            model_name=self.model_name,
+            api_key=self.api_key,
+            sandbox_type=self._sandbox.name,
+            permission_resolver_scope=permission_resolver_scope,
+            block_dangerous_commands=self.block_dangerous_commands,
+            block_network_commands=self.block_network_commands,
+            enable_audit_log=self.enable_audit_log,
+            enable_web_tools=self.enable_web_tools,
+            allowed_file_extensions=self.allowed_file_extensions,
+            extra_allowed_paths=self.extra_allowed_paths,
+            model_provider=self._current_model_config.get("model_provider"),
+            base_url=self._current_model_config.get("base_url"),
+        )
+        self._app_state = AppState()
+        self.app_state = self._app_state
+        # Inject bootstrap into AgentService so sub-agents can fork from it
+        if hasattr(self, "_agent_service"):
+            self._agent_service._parent_bootstrap = self._bootstrap
+
+        # Create agent via QueryLoop (replaces LangGraph create_agent)
+        self.agent = QueryLoop(
             model=self.model,
-            tools=mcp_tools,
             system_prompt=SystemMessage(content=[{"type": "text", "text": self.system_prompt}]),
             middleware=middleware,
-            checkpointer=self.checkpointer if not self._needs_async_init else None,
+            checkpointer=self.checkpointer,
+            registry=self._tool_registry,
+            app_state=self._app_state,
+            runtime=self._monitor_middleware.runtime,
+            bootstrap=self._bootstrap,
         )
 
         # Get runtime from MonitorMiddleware
@@ -286,13 +359,45 @@ def __init__(
             print("[LeonAgent] Initialized successfully")
             print(f"[LeonAgent] Workspace: {self.workspace_root}")
             print(f"[LeonAgent] Audit log: {self.enable_audit_log}")
-            if self._needs_async_init:
+            if self.checkpointer is None:
                 print("[LeonAgent] Note: Async components need initialization via ainit()")
 
-        # Mark agent as ready (if not needing async init)
-        if not self._needs_async_init:
+        # Wire CleanupRegistry for priority-ordered resource teardown
+        self._cleanup_registry = CleanupRegistry()
+        self._cleanup_registry.register(self._cleanup_sandbox, priority=2)
+        self._cleanup_registry.register(self._mark_terminated, priority=3)
+        self._cleanup_registry.register(self._cleanup_mcp_client, priority=4)
+        self._cleanup_registry.register(self._cleanup_sqlite_connection, priority=5)
+
+        # Mark agent as ready (checkpointer is None when async init still pending)
+        if self.checkpointer is not None:
             self._monitor_middleware.mark_ready()
 
+    @property
+    def sandbox(self) -> "Sandbox":
+        # @@@public-sandbox-surface - integration callers already drive fs/shell through
+        # agent.sandbox; make that contract explicit instead of relying on a private attr.
+        return self._sandbox
+
+    def apply_forked_child_context(
+        self,
+        bootstrap: BootstrapConfig,
+        *,
+        tool_context: Any | None = None,
+    ) -> None:
+        # @@@subagent-fork-wiring
+        # AgentService should not reach through LeonAgent and mutate QueryLoop
+        # internals directly. Keep the child bootstrap + abort-controller wiring
+        # behind one explicit LeonAgent seam.
+        self._bootstrap = bootstrap
+        self.agent._bootstrap = bootstrap
+        if hasattr(self, "_agent_service"):
+            self._agent_service._parent_bootstrap = bootstrap
+            if tool_context is not None:
+                self._agent_service._parent_tool_context = tool_context
+        if tool_context is not None:
+            self.agent._tool_abort_controller = tool_context.abort_controller
+
     async def ainit(self):
         """Complete async initialization (call this if initialized in async context).
 
@@ -300,22 +405,28 @@ async def ainit(self):
             agent = LeonAgent(sandbox=sandbox)
             await agent.ainit()
         """
-        if not self._needs_async_init:
-            return  # Already initialized
-
-        # Initialize async components
-        self._aiosqlite_conn = await self._init_checkpointer()
-        _mcp_tools = await self._init_mcp_tools()
+        if self.checkpointer is None:
+            # Initialize async components
+            await self._init_checkpointer()
+            _mcp_tools = await self._init_mcp_tools()
+            self._register_mcp_tools(_mcp_tools)
+
+            # Update agent with checkpointer
+            self.agent.checkpointer = self.checkpointer
+            if hasattr(self, "_memory_middleware"):
+                # @@@late-checkpointer-fanout - async bringup creates the saver after
+                # middleware construction, so QueryLoop and MemoryMiddleware must be
+                # rewired together or rebuild/persistence surfaces drift apart.
+                self._memory_middleware.checkpointer = self.checkpointer
 
-        # Update agent with checkpointer
-        self.agent.checkpointer = self.checkpointer
+            self._monitor_middleware.mark_ready()
 
-        # Mark as initialized
-        self._needs_async_init = False
-        self._monitor_middleware.mark_ready()
+            if self.verbose:
+                print("[LeonAgent] Async initialization completed")
 
-        if self.verbose:
-            print("[LeonAgent] Async initialization completed")
+        if not self._session_started:
+            await self._run_session_hooks("SessionStart")
+            self._session_started = True
 
     def _init_async_components(self) -> tuple[Any, list]:
         """Initialize async components (checkpointer and MCP tools).
@@ -339,24 +450,31 @@ def _init_async_components(self) -> tuple[Any, list]:
             self._event_loop = loop
 
             # Initialize components
-            conn = loop.run_until_complete(self._init_checkpointer())
+            loop.run_until_complete(self._init_checkpointer())
             mcp_tools = loop.run_until_complete(self._init_mcp_tools())
 
-            # DON'T close the loop - let it persist for aiosqlite
-            # The loop will be cleaned up when Python exits
-            return conn, mcp_tools
+            return None, mcp_tools
 
     def _has_middleware_tools(self, middleware: list) -> bool:
         """Check if any middleware has BaseTool instances."""
         return any(getattr(m, "tools", None) for m in middleware)
 
+    def _register_mcp_tools(self, mcp_tools: list) -> None:
+        if not mcp_tools:
+            return
+        for tool in mcp_tools:
+            try:
+                self._tool_registry.register(_make_mcp_tool_entry(tool))
+            except Exception as exc:
+                logger.warning("[LeonAgent] Failed to register MCP tool %s: %s", getattr(tool, "name", "<unknown>"), exc)
+
     def _create_placeholder_tool(self):
-        """Create placeholder tool to ensure ToolNode is created."""
+        """Create placeholder tool so the bound model still has a BaseTool."""
         from langchain_core.tools import tool
 
         @tool
         def _placeholder() -> str:
-            """Internal placeholder - ensures ToolNode is created for middleware tools."""
+            """Internal placeholder for the empty-tool edge."""
             return ""
 
         return _placeholder
@@ -391,10 +509,26 @@ def _get_member_blocked_tools(self) -> set[str]:
 
         return blocked
 
+    def _get_mcp_server_configs(self) -> dict[str, Any]:
+        if hasattr(self, "_agent_bundle") and self._agent_bundle and self._agent_bundle.mcp:
+            return {name: srv for name, srv in self._agent_bundle.mcp.items() if not srv.disabled}
+        return self.config.mcp.servers
+
+    def _get_mcp_instruction_blocks(self) -> dict[str, str]:
+        blocks: dict[str, str] = {}
+        for name, cfg in self._get_mcp_server_configs().items():
+            instructions = getattr(cfg, "instructions", None)
+            if not isinstance(instructions, str) or not instructions.strip():
+                continue
+            blocks[name] = instructions.strip()
+        return blocks
+
     def _load_config(
         self,
         agent_name: str | None,
+        bundle_dir: str | Path | None,
         workspace_root: str | Path | None,
+        sandbox_name: str | None,
         model_name: str | None,
         api_key: str | None,
         allowed_file_extensions: list[str] | None,
@@ -410,8 +544,14 @@ def _load_config(
         """
         # Build CLI overrides for runtime config
         cli_overrides: dict = {}
-
-        if workspace_root is not None:
+        use_workspace_override = sandbox_name in (None, "", "local")
+
+        if workspace_root is not None and use_workspace_override:
+            # @@@remote-sandbox-config-root
+            # Remote child agents may inherit a sandbox cwd like /home/daytona,
+            # which is valid inside the sandbox but not on the host. Feeding that
+            # path into LeonSettings makes config validation fail before sandbox
+            # init ever runs, so only local sandboxes pin workspace_root here.
             cli_overrides["workspace_root"] = str(workspace_root)
 
         # Runtime overrides go into "runtime" section
@@ -441,8 +581,14 @@ def _load_config(
         models_loader = ModelsLoader(workspace_root=workspace_root)
         models_config = models_loader.load(cli_overrides=models_cli if models_cli else None)
 
+        # @@@bundle-dir-wins - member-backed top-level agents need their own bundle even when
+        # no explicit agent type name is passed through the thread runtime wiring.
+        if bundle_dir is not None:
+            bundle_path = Path(bundle_dir).expanduser().resolve()
+            self._agent_bundle = loader.load_bundle(bundle_path)
+            self._agent_override = self._agent_bundle.agent.model_copy(update={"source_dir": bundle_path})
         # If agent specified, load agent definition to override system_prompt and tools
-        if agent_name:
+        elif agent_name:
             all_agents = loader.load_all_agents()
             agent_def = all_agents.get(agent_name)
             if not agent_def:
@@ -609,7 +755,16 @@ def _build_model_kwargs(self) -> dict:
 
         # Get credentials from the resolved provider
         p = self.models_config.get_provider(provider) if provider else None
-        base_url = (p.base_url if p else None) or self.models_config.get_base_url()
+        env_base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL")
+
+        # @@@explicit-api-key-base-url
+        # Real-model verification must not be silently redirected to a provider
+        # config endpoint when the caller explicitly injected credentials for a
+        # different OpenAI-compatible endpoint.
+        if self._explicit_api_key and env_base_url:
+            base_url = env_base_url
+        else:
+            base_url = (p.base_url if p else None) or self.models_config.get_base_url()
         if base_url:
             kwargs["base_url"] = self._normalize_base_url(base_url, provider)
 
@@ -714,12 +869,71 @@ def update_observation(self, **overrides) -> None:
         if self.verbose:
             print(f"[LeonAgent] Observation updated: active={self._observation_config.active}")
 
-    def close(self):
-        """Clean up resources."""
-        self._cleanup_sandbox()
-        self._mark_terminated()
-        self._cleanup_mcp_client()
-        self._cleanup_sqlite_connection()
+    def close(self, *, cleanup_sandbox: bool = True):
+        """Clean up resources via CleanupRegistry (priority-ordered).
+
+        Falls back to direct cleanup if CleanupRegistry is not initialized.
+        """
+        # @@@close-idempotent - child agents may explicitly skip sandbox cleanup
+        # and later still hit __del__ on GC; never let a second close silently
+        # re-enable default sandbox teardown on a shared lease.
+        if getattr(self, "_closed", False) or getattr(self, "_closing", False):
+            return
+
+        self._closing = True
+        session_end_error: Exception | None = None
+        try:
+            if getattr(self, "_session_started", False) and not getattr(self, "_session_ended", False):
+                try:
+                    self._run_async_cleanup(lambda: self._run_session_hooks("SessionEnd"), "SessionEnd hooks")
+                except Exception as exc:
+                    session_end_error = exc
+                finally:
+                    self._session_ended = True
+
+            if hasattr(self, "_cleanup_registry") and cleanup_sandbox:
+                self._run_async_cleanup(self._cleanup_registry.run_cleanup, "CleanupRegistry")
+            else:
+                # Fallback for edge cases where __init__ did not complete fully
+                cleanup_steps = [
+                    ("monitor", self._mark_terminated),
+                    ("MCP client", self._cleanup_mcp_client),
+                    ("SQLite connection", self._cleanup_sqlite_connection),
+                ]
+                if cleanup_sandbox:
+                    cleanup_steps.insert(0, ("sandbox", self._cleanup_sandbox))
+
+                for step_name, step_fn in cleanup_steps:
+                    try:
+                        step_fn()
+                    except Exception as e:
+                        print(f"[LeonAgent] {step_name} cleanup error: {e}")
+
+            if session_end_error is not None:
+                raise session_end_error
+        finally:
+            self._closed = True
+            self._closing = False
+
+    def _build_session_hook_payload(self, event: str) -> dict[str, Any]:
+        return {
+            "event": event,
+            "session_id": self._bootstrap.session_id,
+            "workspace_root": str(self.workspace_root),
+            "cwd": str(self._bootstrap.cwd or self.workspace_root),
+            "sandbox": self._sandbox.name,
+        }
+
+    async def _run_session_hooks(self, event: str) -> None:
+        hooks = self._app_state.get_session_hooks(event)
+        if not hooks:
+            return
+
+        payload = self._build_session_hook_payload(event)
+        for hook in hooks:
+            result = hook(payload)
+            if inspect.isawaitable(result):
+                await result
 
     def _cleanup_sandbox(self) -> None:
         """Clean up sandbox resources."""
@@ -734,32 +948,29 @@ def _mark_terminated(self) -> None:
         if hasattr(self, "_monitor_middleware"):
             self._monitor_middleware.mark_terminated()
 
+    _CLEANUP_TIMEOUT: float = 10.0  # seconds; prevents hanging on stuck I/O
+
     @staticmethod
     def _run_async_cleanup(coro_factory, label: str) -> None:
         import asyncio
 
         try:
-            running_loop = asyncio.get_running_loop()
+            asyncio.get_running_loop()
         except RuntimeError:
-            running_loop = None
-
-        if running_loop is None:
             asyncio.run(coro_factory())
             return
 
-        error: list[Exception] = []
-
-        def _runner() -> None:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            future = pool.submit(asyncio.run, coro_factory())
             try:
-                asyncio.run(coro_factory())
+                future.result(timeout=LeonAgent._CLEANUP_TIMEOUT)
+            except concurrent.futures.TimeoutError:
+                raise RuntimeError(
+                    f"{label} cleanup timed out after {LeonAgent._CLEANUP_TIMEOUT}s — "
+                    f"possible stuck I/O; resource abandoned to prevent hang"
+                )
             except Exception as exc:
-                error.append(exc)
-
-        thread = threading.Thread(target=_runner, daemon=True)
-        thread.start()
-        thread.join()
-        if error:
-            raise RuntimeError(f"{label} cleanup failed: {error[0]}") from error[0]
+                raise RuntimeError(f"{label} cleanup failed: {exc}") from exc
 
     def _cleanup_mcp_client(self) -> None:
         """Clean up MCP client."""
@@ -767,35 +978,15 @@ def _cleanup_mcp_client(self) -> None:
             return
 
         try:
-            self._run_async_cleanup(lambda: self._mcp_client.close(), "MCP client")
+            close_fn = getattr(self._mcp_client, "close", None)
+            if callable(close_fn):
+                self._run_async_cleanup(close_fn, "MCP client")
         except Exception as e:
             print(f"[LeonAgent] MCP cleanup error: {e}")
         self._mcp_client = None
 
     def _cleanup_sqlite_connection(self) -> None:
-        """Clean up SQLite connection.
-
-        Properly closes aiosqlite connection using asyncio.run() to avoid
-        hanging on process exit.
-        """
-        if not hasattr(self, "_aiosqlite_conn") or not self._aiosqlite_conn:
-            return
-
-        try:
-            import asyncio
-
-            # Close the connection asynchronously
-            async def _close():
-                if self._aiosqlite_conn:
-                    await self._aiosqlite_conn.close()
-
-            # Use asyncio.run() to properly close the connection
-            asyncio.run(_close())
-        except Exception:
-            # Ignore errors during cleanup
-            pass
-        finally:
-            self._aiosqlite_conn = None
+        """No-op: SQLite checkpointer removed; Postgres cleanup handled by _pg_saver_ctx."""
 
     def __del__(self):
         self.close()
@@ -830,11 +1021,19 @@ def _build_middleware_stack(self) -> list:
         if memory_enabled:
             self._add_memory_middleware(middleware)
 
-        # 4. Steering — injects queued messages before model call
+        # 4. MCP instructions delta — thread-scoped reminder when MCP guidance changes
+        middleware.append(
+            McpInstructionsDeltaMiddleware(
+                get_instruction_blocks=self._get_mcp_instruction_blocks,
+                get_app_state=lambda: self.app_state,
+            )
+        )
+
+        # 5. Steering — injects queued messages before model call
         self._steering_middleware = SteeringMiddleware(queue_manager=self.queue_manager)
         middleware.append(self._steering_middleware)
 
-        # 5. ToolRunner (innermost — routes all ToolRegistry-registered tool calls)
+        # 6. ToolRunner (innermost — routes all ToolRegistry-registered tool calls)
         self._tool_runner = ToolRunner(
             registry=self._tool_registry,
             validator=ToolValidator(),
@@ -843,7 +1042,7 @@ def _build_middleware_stack(self) -> list:
 
         # 0. SpillBuffer (outermost — catches oversized tool outputs)
         # Must be inserted at index 0 AFTER building the list:
-        # LangChain wraps middlewares as "first = outermost".
+        # QueryLoop composes middleware so the first entry remains outermost.
         if self.config.tools.spill_buffer.enabled:
             spill_cfg = self.config.tools.spill_buffer
             middleware.insert(
@@ -993,6 +1192,17 @@ def _init_services(self) -> None:
             workspace_root=self.workspace_root,
         )
 
+        # Cron tools (DEFERRED - backed by existing panel cron_jobs substrate)
+        self._cron_tool_service = CronToolService(
+            registry=self._tool_registry,
+        )
+
+        self._mcp_resource_tool_service = McpResourceToolService(
+            registry=self._tool_registry,
+            client_fn=lambda: getattr(self, "_mcp_client", None),
+            server_configs_fn=self._get_mcp_server_configs,
+        )
+
         # ToolSearch (INLINE - always available for discovering DEFERRED tools)
         self._tool_search_service = ToolSearchService(
             registry=self._tool_registry,
@@ -1005,8 +1215,12 @@ def _init_services(self) -> None:
             agent_registry=self._agent_registry,
             workspace_root=self.workspace_root,
             model_name=self.model_name,
+            thread_repo=self._thread_repo,
+            member_repo=self._member_repo,
             queue_manager=self.queue_manager,
             shared_runs=self._background_runs,
+            web_app=self._web_app,
+            child_agent_factory=create_leon_agent,
         )
 
         # Team coordination (TeamCreate/TeamDelete — deferred mode)
@@ -1023,51 +1237,37 @@ def _init_services(self) -> None:
         except ImportError:
             self._taskboard_service = None
 
-        # @@@chat-tools - register chat tools for agents with user identity
+        # @@@chat-tools - register chat tools for agents with user identity (v2 messaging)
         if self._chat_repos:
             repos = self._chat_repos
-            user_id = repos.get("user_id")
-            owner_user_id = repos.get("owner_user_id", "")
-            if user_id:
-                from core.agents.communication.chat_tool_service import ChatToolService
+            chat_identity_id = repos.get("chat_identity_id") or repos.get("user_id")
+            owner_id = repos.get("owner_id", "")
+            if chat_identity_id:
+                from messaging.tools.chat_tool_service import ChatToolService
 
-                # @@@lazy-runtime — runtime isn't set yet at _init_services() time.
-                # Pass a callable that resolves runtime lazily at tool call time.
                 self._chat_tool_service = ChatToolService(
                     registry=self._tool_registry,
-                    user_id=user_id,
-                    owner_user_id=owner_user_id,
-                    entity_repo=repos.get("entity_repo"),
-                    chat_service=repos.get("chat_service"),
-                    chat_entity_repo=repos.get("chat_entity_repo"),
-                    chat_message_repo=repos.get("chat_message_repo"),
+                    chat_identity_id=chat_identity_id,
+                    owner_id=owner_id,
+                    messaging_service=repos.get("messaging_service"),
+                    chat_member_repo=repos.get("chat_member_repo"),
+                    messages_repo=repos.get("messages_repo"),
                     member_repo=repos.get("member_repo"),
-                    chat_event_bus=repos.get("chat_event_bus"),
-                    runtime_fn=lambda: getattr(self, "runtime", None),
+                    thread_repo=self._thread_repo,
+                    relationship_repo=repos.get("relationship_repo"),
                 )
 
-        # @@@wechat-tools — register WeChat tools via lazy connection lookup
-        owner_uid = self._chat_repos.get("owner_user_id", "") if self._chat_repos else ""
-        if owner_uid:
-            try:
-                from core.tools.wechat.service import WeChatToolService
-
-                def _get_wechat_conn(uid=owner_uid):
-                    """Lazy lookup — returns None if registry not on app.state yet."""
-                    try:
-                        from backend.web.main import app
-
-                        registry = getattr(app.state, "wechat_registry", None)
-                        return registry.get(uid) if registry else None
-                    except Exception:
-                        return None
+        # LSP tools — DEFERRED, always registered, multilspy checked at call time
+        self._lsp_service = None
+        try:
+            from core.tools.lsp.service import LSPService
 
-                self._wechat_tool_service = WeChatToolService(
-                    registry=self._tool_registry,
-                    connection_fn=_get_wechat_conn,
-                )
-            except ImportError:
-                self._wechat_tool_service = None
+            self._lsp_service = LSPService(
+                registry=self._tool_registry,
+                workspace_root=self.workspace_root,
+            )
+        except Exception as e:
+            logger.debug("[LeonAgent] LSPService init skipped: %s", e)
 
         if self.verbose:
             all_tools = self._tool_registry.list_all()
@@ -1078,11 +1278,7 @@ def _get_wechat_conn(uid=owner_uid):
     async def _init_mcp_tools(self) -> list:
         mcp_enabled = self.config.mcp.enabled
 
-        # Use member bundle MCP config if available, else fall back to global config
-        if hasattr(self, "_agent_bundle") and self._agent_bundle and self._agent_bundle.mcp:
-            mcp_servers = {name: srv for name, srv in self._agent_bundle.mcp.items() if not srv.disabled}
-        else:
-            mcp_servers = self.config.mcp.servers
+        mcp_servers = self._get_mcp_server_configs()
 
         if not mcp_enabled or not mcp_servers:
             return []
@@ -1091,10 +1287,21 @@ async def _init_mcp_tools(self) -> list:
 
         configs = {}
         for name, cfg in mcp_servers.items():
+            transport = getattr(cfg, "transport", None)
             if cfg.url:
-                config = {"transport": "streamable_http", "url": cfg.url}
+                # @@@mcp-transport-honesty - api-04 requires explicit transport
+                # config to survive loader -> runtime. URL-based MCP is not
+                # always streamable_http; websocket/sse must stay explicit.
+                config = {
+                    "transport": transport or "streamable_http",
+                    "url": cfg.url,
+                }
             else:
-                config = {"transport": "stdio", "command": cfg.command, "args": cfg.args}
+                config = {
+                    "transport": transport or "stdio",
+                    "command": cfg.command,
+                    "args": cfg.args,
+                }
             if cfg.env:
                 config["env"] = cfg.env
             configs[name] = config
@@ -1129,31 +1336,20 @@ async def _init_mcp_tools(self) -> list:
     async def _init_checkpointer(self):
         """Initialize async checkpointer for conversation persistence.
 
-        Uses Postgres (via Supabase) when LEON_STORAGE_STRATEGY=supabase,
-        otherwise falls back to local SQLite.
+        Requires LEON_POSTGRES_URL to be set (Supabase Postgres).
         """
-        strategy = os.getenv("LEON_STORAGE_STRATEGY", "sqlite")
         pg_url = os.getenv("LEON_POSTGRES_URL")
+        if not pg_url:
+            raise RuntimeError("LEON_POSTGRES_URL is required for checkpointer initialization")
 
-        if strategy == "supabase" and pg_url:
-            from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
-
-            # from_conn_string is an async context manager; enter it and keep
-            # the reference so the connection pool stays open for the agent's lifetime.
-            self._pg_saver_ctx = AsyncPostgresSaver.from_conn_string(pg_url)
-            self.checkpointer = await self._pg_saver_ctx.__aenter__()
-            await self.checkpointer.setup()
-            return None  # no SQLite conn to track
-        else:
-            from storage.providers.sqlite.kernel import connect_sqlite_async
+        from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
 
-            db_path = self.db_path
-            db_path.parent.mkdir(parents=True, exist_ok=True)
-            conn = await connect_sqlite_async(db_path)
-            self.checkpointer = AsyncSqliteSaver(conn)
-            await self.checkpointer.setup()
-            return conn
-            return conn
+        # from_conn_string is an async context manager; enter it and keep
+        # the reference so the connection pool stays open for the agent's lifetime.
+        self._pg_saver_ctx = AsyncPostgresSaver.from_conn_string(pg_url)
+        self.checkpointer = await self._pg_saver_ctx.__aenter__()
+        await self.checkpointer.setup()
+        return None  # no SQLite conn to track
 
     def _is_tool_allowed(self, tool) -> bool:
         # Extract original tool name without mcp__ prefix
@@ -1190,155 +1386,109 @@ def _build_system_prompt(self) -> str:
 
         return prompt
 
-    def _build_context_section(self) -> str:
-        """Build the context section based on sandbox mode."""
-        if self._sandbox.name != "local":
-            env_label = self._sandbox.env_label
-            working_dir = self._sandbox.working_dir
-            if self._sandbox.name == "docker":
-                mode_label = "Sandbox (isolated local container)"
-            else:
-                mode_label = "Sandbox (isolated cloud environment)"
-            return f"""- Environment: {env_label}
-- Working Directory: {working_dir}
-- Mode: {mode_label}"""
-        else:
-            import platform
-
-            os_name = platform.system()
-            if os_name == "Windows":
-                shell_name = "powershell"
-            else:
-                shell_name = os.environ.get("SHELL", "/bin/bash").split("/")[-1]
-            return f"""- Workspace: `{self.workspace_root}`
-- OS: {os_name}
-- Shell: {shell_name}
-- Mode: Local"""
+    def _compose_system_prompt(self) -> str:
+        prompt = self._build_system_prompt()
 
-    def _build_rules_section(self) -> str:
-        """Build shared rules section for all modes."""
-        is_sandbox = self._sandbox.name != "local"
-        working_dir = self._sandbox.working_dir if is_sandbox else self.workspace_root
+        custom_prompt = self.config.system_prompt
+        if custom_prompt:
+            prompt += f"\n\n**Custom Instructions:**\n{custom_prompt}"
 
-        rules = []
+        # @@@chat-identity — inject chat identity so agent knows who it is in the social layer
+        if self._chat_repos:
+            repos = self._chat_repos
+            uid = repos.get("chat_identity_id") or repos.get("user_id")
+            owner_uid = repos.get("owner_id", "")
+            if uid:
+                member_repo = repos.get("member_repo")
+                self_member = member_repo.get_by_id(uid) if member_repo else None
+                if self_member is None and member_repo and self._thread_repo is not None:
+                    thread = self._thread_repo.get_by_user_id(uid)
+                    member_id = thread.get("member_id") if thread else None
+                    if member_id:
+                        self_member = member_repo.get_by_id(member_id)
+                owner_row = member_repo.get_by_id(owner_uid) if member_repo and owner_uid else None
+                name = self_member.name if self_member else uid
+                owner_name = owner_row.name if owner_row else "unknown"
+                prompt += (
+                    f"\n\n**Chat Identity:**\n"
+                    f"- Your name: {name}\n"
+                    f"- Your chat identity id: {uid}\n"
+                    f"- The chat tools still use the parameter name user_id for legacy reasons.\n"
+                    f"- Your owner: {owner_name} (human user_id: {owner_uid})\n"
+                    f"- When you receive a chat notification, you MUST read it with chat_read() before deciding what to do.\n"
+                    f"- If that notification already gives you a chat_id, prefer using that exact chat_id directly.\n"
+                    f"- If you reply to the other party, you MUST call chat_send(). Never claim you replied unless chat_send() succeeded.\n"
+                    f"- Your normal text output goes to your owner's thread, not to the chat — only chat_send() delivers to the other party.\n"
+                )
+        return prompt
 
-        # Rule 1: Environment-specific
-        if is_sandbox:
-            if self._sandbox.name == "docker":
-                location_rule = "All file and command operations run in a local Docker container, NOT on the user's host filesystem."
-            else:
-                location_rule = "All file and command operations run in a remote sandbox, NOT on the user's local machine."
-            rules.append(f"1. **Sandbox Environment**: {location_rule} The sandbox is an isolated Linux environment.")
-        else:
-            rules.append("1. **Workspace**: File operations are restricted to: " + str(self.workspace_root))
+    def _invalidate_system_prompt_cache(self) -> None:
+        self._system_prompt_section_cache.clear()
 
-        # Rule 2: Absolute paths
-        rules.append(f"""2. **Absolute Paths**: All file paths must be absolute paths.
-   - ✅ Correct: `{working_dir}/project/test.py`
-   - ❌ Wrong: `test.py` or `./test.py`""")
+    def _get_cached_prompt_section(self, key: str, builder) -> str:
+        cached = self._system_prompt_section_cache.get(key)
+        if cached is not None:
+            return cached
+        value = builder()
+        self._system_prompt_section_cache[key] = value
+        return value
 
-        # Rule 3: Security
-        if is_sandbox:
-            rules.append("3. **Security**: The sandbox is isolated. You can install packages, run any commands, and modify files freely.")
-        else:
-            rules.append("3. **Security**: Dangerous commands are blocked. All operations are logged.")
+    def _build_context_section(self) -> str:
+        from core.runtime.prompts import build_context_section
+
+        def _build() -> str:
+            is_sandbox = self._sandbox.name != "local"
+            if is_sandbox:
+                return build_context_section(
+                    sandbox_name=self._sandbox.name,
+                    sandbox_env_label=self._sandbox.env_label,
+                    sandbox_working_dir=self._sandbox.working_dir,
+                )
+            import platform
 
-        # Rule 4: Tool priority
-        rules.append(
-            """4. **Tool Priority**: When a built-in tool and an MCP tool (`mcp__*`) have the same functionality, use the built-in tool."""
-        )
+            os_name = platform.system()
+            shell_name = "powershell" if os_name == "Windows" else os.environ.get("SHELL", "/bin/bash").split("/")[-1]
+            return build_context_section(
+                sandbox_name="local",
+                workspace_root=str(self.workspace_root),
+                os_name=os_name,
+                shell_name=shell_name,
+            )
 
-        # Rule 5: Dedicated tools over shell
-        rules.append("""5. **Use Dedicated Tools Instead of Shell Commands**: Do NOT use `Bash` for tasks that have dedicated tools:
-   - File search → use `Grep` (NOT `rg`, `grep`, or `find` via Bash)
-   - File listing → use `Glob` (NOT `find` or `ls` via Bash)
-   - File reading → use `Read` (NOT `cat`, `head`, `tail` via Bash)
-   - File editing → use `Edit` (NOT `sed` or `awk` via Bash)
-   - Reserve `Bash` for: git, package managers, build tools, tests, and other system operations.""")
+        return self._get_cached_prompt_section("context", _build)
 
-        # Rule 6: Background task description
-        rules.append("""6. **Background Task Description**: When using `Bash` or `Agent` with `run_in_background: true`, always include a clear `description` parameter.  # noqa: E501
-   - The description is shown to the user in the background task indicator.
-   - Keep it concise (5–10 words), action-oriented, e.g. "Run test suite", "Analyze API codebase".
-   - Without a description, the raw command or agent name is shown, which is hard to read.""")
+    def _build_rules_section(self) -> str:
+        from core.runtime.prompts import build_rules_section
+
+        def _build() -> str:
+            is_sandbox = self._sandbox.name != "local"
+            working_dir = self._sandbox.working_dir if is_sandbox else str(self.workspace_root)
+            return build_rules_section(
+                is_sandbox=is_sandbox,
+                sandbox_name=self._sandbox.name,
+                working_dir=working_dir,
+                workspace_root=str(self.workspace_root),
+                spill_buffer_enabled=self.config.tools.spill_buffer.enabled,
+                spill_keep_recent=self.config.memory.pruning.protect_recent,
+            )
 
-        return "\n\n".join(rules)
+        return self._get_cached_prompt_section("rules", _build)
 
     def _build_base_prompt(self) -> str:
-        """Build the base system prompt (context + rules), shared by all modes."""
-        context = self._build_context_section()
-        rules = self._build_rules_section()
-
-        return f"""You are a highly capable AI assistant with access to file and system tools.
-
-**Context:**
-{context}
+        from core.runtime.prompts import build_base_prompt
 
-**Important Rules:**
-
-{rules}
-"""
+        return self._get_cached_prompt_section(
+            "base_prompt",
+            lambda: build_base_prompt(self._build_context_section(), self._build_rules_section()),
+        )
 
     def _build_common_prompt_sections(self) -> str:
-        """Build common prompt sections for both sandbox and local modes."""
-        prompt = """
-**Agent Tool (Sub-agent Orchestration):**
-
-Use the Agent tool to launch specialized sub-agents for complex tasks:
-- `explore`: Read-only codebase exploration. Use for: finding files, searching code, understanding implementations.
-- `plan`: Design implementation plans. Use for: architecture decisions, multi-step planning.
-- `bash`: Execute shell commands. Use for: git operations, running tests, system commands.
-- `general`: Full tool access. Use for: independent multi-step tasks requiring file modifications.
-
-When to use Agent:
-- Open-ended searches that may require multiple rounds of exploration
-- Tasks that can run independently while you continue other work
-- Complex operations that benefit from specialized focus
-
-When NOT to use Agent:
-- Simple file reads (use Read directly)
-- Specific searches with known patterns (use Grep directly)
-- Quick operations that don't need isolation
-
-**Todo Tools (Task Management):**
-
-Use Todo tools to track progress on complex, multi-step tasks:
-- `TaskCreate`: Create a new task with subject, description, and activeForm (present continuous for spinner)
-- `TaskList`: View all tasks and their status
-- `TaskGet`: Get full details of a specific task
-- `TaskUpdate`: Update task status (pending → in_progress → completed) or details
-
-When to use Todo:
-- Complex tasks with 3+ distinct steps
-- When the user provides multiple tasks to complete
-- To show progress on non-trivial work
-
-When NOT to use Todo:
-- Single, straightforward tasks
-- Trivial operations that don't need tracking
-"""
-
-        # Add Skills section if skills are enabled
-        skills_enabled = self.config.skills.enabled and self.config.skills.paths
-
-        if skills_enabled:
-            prompt += """
-**Skills (Specialized Knowledge):**
+        from core.runtime.prompts import build_common_sections
 
-Use the `load_skill` tool to access specialized domain knowledge and workflows:
-- Skills provide focused instructions for specific tasks (e.g., TDD, debugging, git workflows)
-- Call `load_skill(skill_name)` to load a skill's content into context
-- Available skills are listed in the load_skill tool description
-
-When to use load_skill:
-- When you need specialized guidance for a specific workflow
-- To access domain-specific best practices
-- When the user mentions a skill by name (e.g., "use TDD skill")
-
-Progressive disclosure: Skills are loaded on-demand to save tokens.
-"""
-
-        return prompt
+        return self._get_cached_prompt_section(
+            "common_sections",
+            lambda: build_common_sections(bool(self.config.skills.enabled and self.config.skills.paths)),
+        )
 
     def invoke(self, message: str, thread_id: str = "default") -> dict:
         """Invoke agent with a message (sync version).
@@ -1388,6 +1538,174 @@ async def ainvoke(self, message: str, thread_id: str = "default") -> dict:
             self._monitor_middleware.mark_error(e)
             raise
 
+    async def astream(
+        self,
+        message: str,
+        thread_id: str = "default",
+        stream_mode: str | list[str] = "updates",
+        max_budget_usd: float | None = None,
+    ):
+        """Stream agent output through a caller-owned LeonAgent surface."""
+        try:
+            async for chunk in self.agent.astream(
+                {"messages": [{"role": "user", "content": message}]},
+                config={"configurable": {"thread_id": thread_id}},
+                stream_mode=stream_mode,
+            ):
+                yield chunk
+                if max_budget_usd is not None and self.runtime.cost > max_budget_usd:
+                    raise RuntimeError(f"max_budget_usd exceeded: cost={self.runtime.cost:.6f} budget={max_budget_usd:.6f}")
+        except Exception as e:
+            self._monitor_middleware.mark_error(e)
+            raise
+
+    async def aclear_thread(self, thread_id: str = "default") -> None:
+        """Clear turn-scoped state for a thread while preserving session accumulators."""
+        try:
+            await self.agent.aclear(thread_id)
+            self._invalidate_system_prompt_cache()
+            self.system_prompt = self._compose_system_prompt()
+            self.agent.system_prompt = SystemMessage(content=[{"type": "text", "text": self.system_prompt}])
+        except Exception as e:
+            self._monitor_middleware.mark_error(e)
+            raise
+
+    def clear_thread(self, thread_id: str = "default") -> None:
+        """Sync wrapper for aclear_thread()."""
+        import asyncio
+
+        async def _aclear():
+            await self.aclear_thread(thread_id)
+
+        try:
+            if hasattr(self, "_event_loop") and self._event_loop:
+                self._event_loop.run_until_complete(_aclear())
+            else:
+                asyncio.run(_aclear())
+        except Exception as e:
+            self._monitor_middleware.mark_error(e)
+            raise
+
+    def get_pending_permission_requests(self, thread_id: str | None = None) -> list[dict]:
+        requests = list(self._app_state.pending_permission_requests.values())
+        if thread_id is not None:
+            requests = [item for item in requests if item.get("thread_id") == thread_id]
+        return requests
+
+    def get_thread_permission_rules(self, thread_id: str | None = None) -> dict[str, Any]:
+        state = self._app_state.tool_permission_context
+        return {
+            "thread_id": thread_id,
+            "scope": "session",
+            "managed_only": state.allowManagedPermissionRulesOnly,
+            "rules": {
+                "allow": list(state.alwaysAllowRules.get("session", [])),
+                "deny": list(state.alwaysDenyRules.get("session", [])),
+                "ask": list(state.alwaysAskRules.get("session", [])),
+            },
+        }
+
+    def add_thread_permission_rule(self, thread_id: str, *, behavior: str, tool_name: str) -> bool:
+        if self._app_state.tool_permission_context.allowManagedPermissionRulesOnly:
+            return False
+
+        def _update(state: AppState) -> AppState:
+            permission_state = state.tool_permission_context.model_copy(deep=True)
+            for bucket in (
+                permission_state.alwaysAllowRules.setdefault("session", []),
+                permission_state.alwaysDenyRules.setdefault("session", []),
+                permission_state.alwaysAskRules.setdefault("session", []),
+            ):
+                while tool_name in bucket:
+                    bucket.remove(tool_name)
+            target_bucket = {
+                "allow": permission_state.alwaysAllowRules.setdefault("session", []),
+                "deny": permission_state.alwaysDenyRules.setdefault("session", []),
+                "ask": permission_state.alwaysAskRules.setdefault("session", []),
+            }[behavior]
+            if tool_name not in target_bucket:
+                target_bucket.append(tool_name)
+            return state.model_copy(update={"tool_permission_context": permission_state})
+
+        self._app_state.set_state(_update)
+        return True
+
+    def remove_thread_permission_rule(self, thread_id: str, *, behavior: str, tool_name: str) -> bool:
+        removed = False
+
+        def _update(state: AppState) -> AppState:
+            nonlocal removed
+            permission_state = state.tool_permission_context.model_copy(deep=True)
+            bucket = {
+                "allow": permission_state.alwaysAllowRules.setdefault("session", []),
+                "deny": permission_state.alwaysDenyRules.setdefault("session", []),
+                "ask": permission_state.alwaysAskRules.setdefault("session", []),
+            }[behavior]
+            if tool_name in bucket:
+                bucket.remove(tool_name)
+                removed = True
+            return state.model_copy(update={"tool_permission_context": permission_state})
+
+        self._app_state.set_state(_update)
+        return removed
+
+    def resolve_permission_request(
+        self,
+        request_id: str,
+        *,
+        decision: str,
+        message: str | None = None,
+        answers: list[dict[str, Any]] | None = None,
+        annotations: dict[str, Any] | None = None,
+    ) -> bool:
+        pending = self._app_state.pending_permission_requests.get(request_id)
+        if pending is None:
+            return False
+
+        resolved = dict(self._app_state.resolved_permission_requests)
+        payload = {
+            **pending,
+            "decision": decision,
+            "message": message or pending.get("message"),
+        }
+        if answers is not None:
+            payload["answers"] = answers
+        if annotations is not None:
+            payload["annotations"] = annotations
+        resolved[request_id] = payload
+        still_pending = dict(self._app_state.pending_permission_requests)
+        still_pending.pop(request_id, None)
+        self._app_state.set_state(
+            lambda prev: prev.model_copy(
+                update={
+                    "pending_permission_requests": still_pending,
+                    "resolved_permission_requests": resolved,
+                }
+            )
+        )
+        return True
+
+    def drop_permission_request(self, request_id: str) -> bool:
+        had_pending = request_id in self._app_state.pending_permission_requests
+        had_resolved = request_id in self._app_state.resolved_permission_requests
+        if not had_pending and not had_resolved:
+            return False
+
+        def _drop(state: AppState) -> AppState:
+            pending = dict(state.pending_permission_requests)
+            resolved = dict(state.resolved_permission_requests)
+            pending.pop(request_id, None)
+            resolved.pop(request_id, None)
+            return state.model_copy(
+                update={
+                    "pending_permission_requests": pending,
+                    "resolved_permission_requests": resolved,
+                }
+            )
+
+        self._app_state.set_state(_drop)
+        return True
+
     def get_response(self, message: str, thread_id: str = "default", **kwargs) -> str:
         """Get agent's text response.
 
@@ -1411,7 +1729,7 @@ def cleanup(self):
 
 
 def create_leon_agent(
-    model_name: str = DEFAULT_MODEL,
+    model_name: str | None = None,
     api_key: str | None = None,
     workspace_root: str | Path | None = None,
     sandbox: Any = None,
@@ -1421,7 +1739,7 @@ def create_leon_agent(
     """Create Leon Agent.
 
     Args:
-        model_name: Model name
+        model_name: Model name. None means "let LeonAgent resolve defaults".
         api_key: API key
         workspace_root: Workspace directory
         sandbox: Sandbox instance, name string, or None for local
diff --git a/core/runtime/checkpoint_store.py b/core/runtime/checkpoint_store.py
new file mode 100644
index 000000000..1a27ada07
--- /dev/null
+++ b/core/runtime/checkpoint_store.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Protocol
+
+
+@dataclass(frozen=True)
+class ThreadCheckpointState:
+    messages: list
+    tool_permission_context: dict[str, Any]
+    pending_permission_requests: dict[str, dict[str, Any]]
+    resolved_permission_requests: dict[str, dict[str, Any]]
+    memory_compaction_state: dict[str, Any]
+    mcp_instruction_state: dict[str, Any]
+
+
+class CheckpointStore(Protocol):
+    async def load(self, thread_id: str) -> ThreadCheckpointState | None: ...
+
+    async def save(self, thread_id: str, state: ThreadCheckpointState) -> None: ...
diff --git a/core/runtime/cleanup.py b/core/runtime/cleanup.py
new file mode 100644
index 000000000..d55600684
--- /dev/null
+++ b/core/runtime/cleanup.py
@@ -0,0 +1,116 @@
+"""CleanupRegistry — priority-ordered async cleanup for LeonAgent lifecycle.
+
+Aligned with CC Pattern 5: Lifecycle & Cleanup.
+Priority numbers: lower = runs first.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import signal
+from collections.abc import Awaitable, Callable
+from itertools import groupby
+
+logger = logging.getLogger(__name__)
+
+
+class CleanupRegistry:
+    """Registry of async cleanup functions executed in priority order on shutdown.
+
+    Usage:
+        registry = CleanupRegistry()
+        registry.register(close_db, priority=1)
+        registry.register(close_sandbox, priority=2)
+        await registry.run_cleanup()
+    """
+
+    def __init__(self):
+        # List of (priority, fn) — not a dict because same priority can have multiple fns
+        self._entries: list[tuple[int, Callable[[], Awaitable[None] | None]]] = []
+        self._timeout_s = 2.0
+        self._cleanup_task: asyncio.Task[None] | None = None
+        self._shutdown_in_progress = False
+        self._signal_loop: asyncio.AbstractEventLoop | None = None
+        self._setup_signal_handlers()
+
+    def register(self, fn: Callable[[], Awaitable[None] | None], priority: int = 5) -> Callable[[], None]:
+        """Register a cleanup function.
+
+        Args:
+            fn: Sync or async callable that releases resources.
+            priority: Execution order — lower number runs first (1 before 2).
+        """
+        entry = (priority, fn)
+        self._entries.append(entry)
+
+        def unregister() -> None:
+            try:
+                self._entries.remove(entry)
+            except ValueError:
+                return
+
+        return unregister
+
+    async def run_cleanup(self) -> None:
+        """Execute all registered cleanup functions in priority order.
+
+        Different priority tiers run in order. Entries inside the same priority
+        tier run concurrently so one slow cleanup does not serialize its peers.
+        """
+        if self._cleanup_task is not None:
+            await asyncio.shield(self._cleanup_task)
+            return
+
+        async def _run_all() -> None:
+            sorted_entries = sorted(self._entries, key=lambda x: x[0])
+            for priority, grouped_entries in groupby(sorted_entries, key=lambda x: x[0]):
+                await asyncio.gather(
+                    *(self._run_entry(priority, fn) for _, fn in grouped_entries),
+                    return_exceptions=True,
+                )
+
+        self._shutdown_in_progress = True
+        self._cleanup_task = asyncio.create_task(_run_all())
+        await asyncio.shield(self._cleanup_task)
+
+    def is_shutting_down(self) -> bool:
+        return self._shutdown_in_progress
+
+    async def _run_entry(self, priority: int, fn: Callable[[], Awaitable[None] | None]) -> None:
+        try:
+            result = fn()
+            if asyncio.iscoroutine(result):
+                await asyncio.wait_for(result, timeout=self._timeout_s)
+        except TimeoutError:
+            logger.warning("CleanupRegistry: cleanup fn %s timed out after %.2fs", fn, self._timeout_s)
+        except Exception:
+            logger.exception("CleanupRegistry: error in cleanup fn %s (priority=%d)", fn, priority)
+
+    def _setup_signal_handlers(self) -> None:
+        """Register SIGINT/SIGTERM handlers to trigger async cleanup."""
+        try:
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            return  # No running loop yet — signal handlers set up later
+        self._signal_loop = loop
+
+        signals = [signal.SIGINT, signal.SIGTERM]
+        if hasattr(signal, "SIGHUP"):
+            signals.append(signal.SIGHUP)
+
+        for sig in signals:
+            try:
+                loop.add_signal_handler(sig, self._handle_signal)
+            except (NotImplementedError, RuntimeError):
+                # Windows or non-main thread — skip signal handler setup
+                pass
+
+    def _handle_signal(self) -> None:
+        loop = self._signal_loop
+        if loop is None:
+            return
+        if loop.is_running():
+            loop.create_task(self.run_cleanup())
+            return
+        loop.run_until_complete(self.run_cleanup())
diff --git a/core/runtime/errors.py b/core/runtime/errors.py
index 74ffbfc1e..591ff3090 100644
--- a/core/runtime/errors.py
+++ b/core/runtime/errors.py
@@ -1,4 +1,13 @@
 class InputValidationError(Exception):
     """Tool parameter validation failed."""
 
-    pass
+    def __init__(
+        self,
+        message: str,
+        *,
+        error_code: str | None = None,
+        details: list[dict[str, object]] | None = None,
+    ) -> None:
+        super().__init__(message)
+        self.error_code = error_code
+        self.details = [] if details is None else details
diff --git a/core/runtime/fork.py b/core/runtime/fork.py
new file mode 100644
index 000000000..c3992cf74
--- /dev/null
+++ b/core/runtime/fork.py
@@ -0,0 +1,91 @@
+"""Context fork for sub-agent spawning.
+
+When a sub-agent is spawned, it inherits workspace/model/permission configuration
+from the parent but gets its own isolated messages and session identity.
+
+Aligned with CC createSubagentContext() field-by-field fork table.
+"""
+
+from __future__ import annotations
+
+import copy
+import uuid
+
+from .abort import create_child_abort_controller
+from .state import BootstrapConfig, ToolUseContext
+
+
+def fork_context(parent: BootstrapConfig) -> BootstrapConfig:
+    """Create a child BootstrapConfig for a sub-agent.
+
+    Inherits all workspace identity, model settings, and security flags
+    from parent. Generates a fresh session_id and sets parent_session_id.
+    Messages, cost, and turn_count live in AppState — not here.
+    """
+    return BootstrapConfig(
+        workspace_root=parent.workspace_root,
+        original_cwd=parent.original_cwd,
+        project_root=parent.project_root,
+        cwd=parent.cwd,
+        model_name=parent.model_name,
+        api_key=parent.api_key,
+        sandbox_type=parent.sandbox_type,
+        block_dangerous_commands=parent.block_dangerous_commands,
+        block_network_commands=parent.block_network_commands,
+        enable_audit_log=parent.enable_audit_log,
+        enable_web_tools=parent.enable_web_tools,
+        allowed_file_extensions=parent.allowed_file_extensions,
+        extra_allowed_paths=parent.extra_allowed_paths,
+        max_turns=parent.max_turns,
+        # Fresh session identity
+        session_id=uuid.uuid4().hex,
+        parent_session_id=parent.session_id,
+        total_cost_usd=parent.total_cost_usd,
+        total_tool_duration_ms=parent.total_tool_duration_ms,
+        # Model settings
+        model_provider=parent.model_provider,
+        base_url=parent.base_url,
+        context_limit=parent.context_limit,
+    )
+
+
+def create_subagent_context(
+    parent: ToolUseContext,
+    *,
+    share_set_app_state: bool = False,
+) -> ToolUseContext:
+    """Create a minimally isolated ToolUseContext for sub-agents.
+
+    Default contract:
+    - bootstrap: fresh fork
+    - set_app_state: NO-OP
+    - set_app_state_for_tasks: always reaches the root/session store
+    - turn-local refs: fresh
+    - file cache/messages: cloned snapshots
+    """
+    read_file_state = parent.read_file_state
+    if hasattr(read_file_state, "clone") and callable(read_file_state.clone):
+        cloned_read_file_state = read_file_state.clone()
+    else:
+        # @@@sa-04-read-file-state-clone
+        # Subagent fork boundaries must isolate nested file cache state too;
+        # a shallow dict copy leaks child edits back into the parent cache.
+        cloned_read_file_state = copy.deepcopy(read_file_state)
+    return ToolUseContext(
+        bootstrap=fork_context(parent.bootstrap),
+        get_app_state=parent.get_app_state,
+        set_app_state=parent.set_app_state if share_set_app_state else (lambda updater: None),
+        set_app_state_for_tasks=parent.set_app_state_for_tasks or parent.set_app_state,
+        refresh_tools=parent.refresh_tools,
+        can_use_tool=parent.can_use_tool,
+        request_permission=parent.request_permission,
+        consume_permission_resolution=parent.consume_permission_resolution,
+        read_file_state=cloned_read_file_state,
+        loaded_nested_memory_paths=set(),
+        discovered_skill_names=set(),
+        discovered_tool_names=set(),
+        nested_memory_attachment_triggers=set(),
+        abort_controller=create_child_abort_controller(getattr(parent, "abort_controller", None)),
+        messages=list(parent.messages),
+        thread_id=parent.thread_id,
+    )
diff --git a/core/runtime/langgraph_checkpoint_store.py b/core/runtime/langgraph_checkpoint_store.py
new file mode 100644
index 000000000..7e4c1e210
--- /dev/null
+++ b/core/runtime/langgraph_checkpoint_store.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+import inspect
+from typing import Any, cast
+
+from .checkpoint_store import ThreadCheckpointState
+
+
+class LangGraphCheckpointStore:
+    def __init__(self, saver: Any):
+        self._saver = saver
+
+    async def load(self, thread_id: str) -> ThreadCheckpointState | None:
+        checkpoint = await self._aget_checkpoint(thread_id)
+        if checkpoint is None:
+            return None
+        channel_values = dict(checkpoint.get("channel_values", {}) or {})
+        return ThreadCheckpointState(
+            messages=list(channel_values.get("messages", [])),
+            tool_permission_context=dict(channel_values.get("tool_permission_context", {}) or {}),
+            pending_permission_requests=dict(channel_values.get("pending_permission_requests", {}) or {}),
+            resolved_permission_requests=dict(channel_values.get("resolved_permission_requests", {}) or {}),
+            memory_compaction_state=dict(channel_values.get("memory_compaction_state", {}) or {}),
+            mcp_instruction_state=dict(channel_values.get("mcp_instruction_state", {}) or {}),
+        )
+
+    async def save(self, thread_id: str, state: ThreadCheckpointState) -> None:
+        from langgraph.checkpoint.base import CheckpointMetadata, create_checkpoint, empty_checkpoint
+
+        existing_checkpoint = await self._aget_checkpoint(thread_id)
+        checkpoint = create_checkpoint(
+            self._normalize_checkpoint_for_write(existing_checkpoint, empty_checkpoint),
+            None,
+            len(state.messages),
+        )
+        checkpoint["channel_values"] = {
+            "messages": state.messages,
+            "tool_permission_context": state.tool_permission_context,
+            "pending_permission_requests": state.pending_permission_requests,
+            "resolved_permission_requests": state.resolved_permission_requests,
+            "memory_compaction_state": state.memory_compaction_state,
+            "mcp_instruction_state": state.mcp_instruction_state,
+        }
+        new_versions: dict[str, Any] = {}
+        get_next_version = getattr(self._saver, "get_next_version", None)
+        if callable(get_next_version):
+            current_versions = dict(checkpoint.get("channel_versions", {}) or {})
+            for channel_name in checkpoint["channel_values"]:
+                new_versions[channel_name] = get_next_version(current_versions.get(channel_name), None)
+            checkpoint["channel_versions"] = {**current_versions, **new_versions}
+            checkpoint["updated_channels"] = list(new_versions)
+        metadata: CheckpointMetadata = {
+            "source": "loop",
+            "step": len(state.messages),
+        }
+        await self._saver.aput(self._checkpoint_config(thread_id), checkpoint, metadata, new_versions)
+
+    async def _aget_checkpoint(self, thread_id: str) -> dict[str, Any] | None:
+        cfg = self._checkpoint_config(thread_id)
+        aget_tuple = getattr(self._saver, "aget_tuple", None)
+        if callable(aget_tuple):
+            checkpoint_tuple_result = aget_tuple(cfg)
+            checkpoint_tuple = await checkpoint_tuple_result if inspect.isawaitable(checkpoint_tuple_result) else checkpoint_tuple_result
+            checkpoint_value = getattr(checkpoint_tuple, "checkpoint", None)
+            if isinstance(checkpoint_value, dict):
+                return checkpoint_value
+        aget = getattr(self._saver, "aget", None)
+        if callable(aget):
+            checkpoint_result = aget(cfg)
+            checkpoint_value = await checkpoint_result if inspect.isawaitable(checkpoint_result) else checkpoint_result
+            if isinstance(checkpoint_value, dict):
+                return cast(dict[str, Any], checkpoint_value)
+        return None
+
+    @staticmethod
+    def _normalize_checkpoint_for_write(raw_checkpoint: Any, empty_checkpoint_factory: Any) -> Any:
+        checkpoint = empty_checkpoint_factory()
+        if not isinstance(raw_checkpoint, dict):
+            return checkpoint
+        # @@@checkpoint-shape-normalization - local/simple savers often persist only
+        # channel_values, while LangGraph savers expect the full checkpoint shape.
+        # Normalize both into one writable base contract before versioning.
+        for key, default_value in checkpoint.items():
+            if key not in raw_checkpoint:
+                continue
+            value = raw_checkpoint[key]
+            if isinstance(default_value, dict):
+                checkpoint[key] = dict(value or {})
+            elif isinstance(default_value, list):
+                checkpoint[key] = list(value or [])
+            else:
+                checkpoint[key] = value
+        return checkpoint
+
+    @staticmethod
+    def _checkpoint_config(thread_id: str) -> dict[str, Any]:
+        return {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
diff --git a/core/runtime/loop.py b/core/runtime/loop.py
new file mode 100644
index 000000000..be8136735
--- /dev/null
+++ b/core/runtime/loop.py
@@ -0,0 +1,2268 @@
+"""QueryLoop — self-managing agentic tool loop replacing LangGraph create_agent.
+
+Implements CC Pattern 1: Agentic Tool Loop (queryLoop).
+
+Design:
+- AsyncGenerator that alternates LLM sampling and tool execution.
+- Exposes the same .astream(input, config, stream_mode) interface as CompiledStateGraph.
+- Middleware chain (SpillBuffer/Monitor/PromptCaching/Memory/Steering/ToolRunner) is
+  preserved exactly — awrap_model_call and awrap_tool_call pass through in order.
+- is_concurrency_safe tools execute in parallel; others execute serially.
+- Checkpointer (AsyncSqliteSaver) stores/restores message history across calls.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import copy
+import inspect
+import json
+import logging
+import re
+import uuid
+from collections.abc import AsyncGenerator, Awaitable, Callable
+from dataclasses import dataclass
+from enum import StrEnum
+from types import SimpleNamespace
+from typing import Any
+
+from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage, RemoveMessage, SystemMessage, ToolMessage
+
+from core.runtime.middleware import (
+    AgentMiddleware,
+    ModelRequest,
+    ModelResponse,
+    ToolCallRequest,
+)
+
+from .abort import AbortController
+from .checkpoint_store import CheckpointStore, ThreadCheckpointState
+from .langgraph_checkpoint_store import LangGraphCheckpointStore
+from .permissions import ToolPermissionContext, evaluate_permission_rules
+from .registry import ToolMode, ToolRegistry
+from .state import AppState, BootstrapConfig, ToolPermissionState, ToolUseContext
+from .validator import _required_sets_match
+
+logger = logging.getLogger(__name__)
+
+_NOOP_HANDLER: Any = None  # placeholder for innermost "handler" in middleware chain
+_ESCALATED_MAX_OUTPUT_TOKENS = 64000
+_FLOOR_OUTPUT_TOKENS = 3000
+_CONTEXT_OVERFLOW_SAFETY_BUFFER = 1000
+_TRANSIENT_API_MAX_RETRIES = 3
+_TRANSIENT_API_BASE_DELAY_SECONDS = 0.5
+_PROMPT_TOO_LONG_NOTICE_TEXT = "Prompt is too long. Automatic recovery exhausted. Clear the thread or start a new one."
+
+
+class TerminalReason(StrEnum):
+    completed = "completed"
+    aborted_streaming = "aborted_streaming"
+    aborted_tools = "aborted_tools"
+    model_error = "model_error"
+    max_turns = "max_turns"
+    prompt_too_long = "prompt_too_long"
+    blocking_limit = "blocking_limit"
+    image_error = "image_error"
+    hook_stopped = "hook_stopped"
+    stop_hook_prevented = "stop_hook_prevented"
+
+
+class ContinueReason(StrEnum):
+    next_turn = "next_turn"
+    api_retry = "api_retry"
+    collapse_drain_retry = "collapse_drain_retry"
+    reactive_compact_retry = "reactive_compact_retry"
+    max_output_tokens_escalate = "max_output_tokens_escalate"
+    max_output_tokens_recovery = "max_output_tokens_recovery"
+    stop_hook_blocking = "stop_hook_blocking"
+    token_budget_continuation = "token_budget_continuation"
+
+
+@dataclass(frozen=True)
+class TerminalState:
+    reason: TerminalReason
+    turn_count: int
+    error: str | None = None
+
+
+@dataclass(frozen=True)
+class ContinueState:
+    reason: ContinueReason
+
+
+@dataclass(frozen=True)
+class _ModelErrorRecoveryResult:
+    messages: list
+    transition: ContinueState | None
+    max_output_tokens_recovery_count: int
+    has_attempted_reactive_compact: bool
+    max_output_tokens_override: int | None
+    transient_api_retry_count: int
+    terminal: TerminalState | None
+
+
+@dataclass(frozen=True)
+class _ModelErrorContext:
+    exc: Exception
+    error_text: str
+    thread_id: str
+    messages: list
+    turn: int
+    transition: ContinueState | None
+    max_output_tokens_recovery_count: int
+    has_attempted_reactive_compact: bool
+    max_output_tokens_override: int | None
+    transient_api_retry_count: int
+
+
+@dataclass
+class _TrackedTool:
+    order: int
+    tool_call: dict[str, Any]
+    is_concurrency_safe: bool
+    status: str = "queued"
+    task: asyncio.Task[None] | None = None
+    result: ToolMessage | None = None
+
+
+class StreamingToolExecutor:
+    def __init__(
+        self,
+        *,
+        execute_tool: Callable[[dict[str, Any], ToolUseContext | None], Awaitable[ToolMessage]],
+        is_concurrency_safe: Callable[[dict[str, Any]], bool],
+        lookup_tool: Callable[[str], Any | None],
+        tool_context: ToolUseContext | None,
+    ):
+        self._execute_tool = execute_tool
+        self._is_concurrency_safe = is_concurrency_safe
+        self._lookup_tool = lookup_tool
+        self._tool_context = tool_context
+        self._tracked: list[_TrackedTool] = []
+        self._discarded = False
+
+    def _tool_name(self, tool_call: dict[str, Any]) -> str:
+        return tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+
+    async def add_tool(self, tool_call: dict[str, Any]) -> None:
+        if self._discarded:
+            return
+        name = self._tool_name(tool_call)
+        if self._lookup_tool(name) is None:
+            self._tracked.append(
+                _TrackedTool(
+                    order=len(self._tracked),
+                    tool_call=tool_call,
+                    is_concurrency_safe=False,
+                    status="completed",
+                    result=self._tool_error(tool_call, f"Tool '{name}' not found"),
+                )
+            )
+            return
+        tracked = _TrackedTool(
+            order=len(self._tracked),
+            tool_call=tool_call,
+            is_concurrency_safe=self._is_concurrency_safe(tool_call),
+        )
+        self._tracked.append(tracked)
+        self._process_queue()
+
+    async def get_completed_results(self) -> list[ToolMessage]:
+        await asyncio.sleep(0)
+        self._process_queue()
+        ready: list[ToolMessage] = []
+        for tracked in self._tracked:
+            if tracked.status == "yielded":
+                continue
+            if tracked.status == "completed" and tracked.result is not None:
+                tracked.status = "yielded"
+                ready.append(tracked.result)
+                continue
+            break
+        return ready
+
+    async def drain_remaining(self) -> list[ToolMessage]:
+        while True:
+            self._process_queue()
+            running = [tracked.task for tracked in self._tracked if tracked.status == "executing" and tracked.task is not None]
+            if not running:
+                break
+            await asyncio.wait(running, return_when=asyncio.FIRST_COMPLETED)
+        self._process_queue()
+        remaining: list[ToolMessage] = []
+        for tracked in self._tracked:
+            if tracked.status == "yielded":
+                continue
+            if tracked.status == "completed" and tracked.result is not None:
+                tracked.status = "yielded"
+                remaining.append(tracked.result)
+        return remaining
+
+    async def discard(self, reason: str) -> list[ToolMessage]:
+        # @@@streaming-tool-discard
+        # ql-05 must not leave orphaned tool tasks behind when streaming exits
+        # early. Synthetic error emission is still a later hardening pass, but
+        # task cleanup itself must happen now.
+        self._discarded = True
+        running: list[asyncio.Task[None]] = []
+        for tracked in self._tracked:
+            if tracked.status == "queued":
+                tracked.status = "completed"
+                tracked.result = self._synthetic_error(tracked.tool_call, reason)
+                continue
+            if tracked.status == "executing" and tracked.task is not None:
+                tracked.task.cancel()
+                running.append(tracked.task)
+        if running:
+            await asyncio.gather(*running, return_exceptions=True)
+        for tracked in self._tracked:
+            if tracked.status == "executing":
+                tracked.status = "completed"
+                tracked.result = self._synthetic_error(tracked.tool_call, reason)
+        return await self.drain_remaining()
+
+    def _process_queue(self) -> None:
+        if self._discarded:
+            return
+        for tracked in self._tracked:
+            if tracked.status != "queued":
+                continue
+            if not self._can_execute(tracked):
+                break
+            tracked.status = "executing"
+            tracked.task = asyncio.create_task(self._run_tool(tracked))
+
+    def _can_execute(self, tracked: _TrackedTool) -> bool:
+        executing = [item for item in self._tracked if item.status == "executing"]
+        if not executing:
+            return True
+        if not tracked.is_concurrency_safe:
+            return False
+        return all(item.is_concurrency_safe for item in executing)
+
+    async def _run_tool(self, tracked: _TrackedTool) -> None:
+        # @@@streaming-tool-task-exit
+        # ql-05 cannot let middleware-level exceptions disappear into a dead
+        # task. Every tool_use must resolve to a ToolMessage, and queue
+        # progression must re-run immediately when a task exits.
+        try:
+            tracked.result = await self._execute_tool(tracked.tool_call, self._tool_context)
+            tracked.status = "completed"
+        except asyncio.CancelledError:
+            raise
+        except Exception as exc:
+            tracked.result = self._tool_error(tracked.tool_call, str(exc))
+            tracked.status = "completed"
+        finally:
+            if self._should_abort_siblings(tracked):
+                await self._abort_siblings(
+                    excluding=tracked,
+                    reason="sibling aborted after bash error",
+                )
+            if not self._discarded:
+                self._process_queue()
+
+    def _should_abort_siblings(self, tracked: _TrackedTool) -> bool:
+        if tracked.result is None:
+            return False
+        return self._tool_name(tracked.tool_call).lower() == "bash" and "<tool_use_error>" in tracked.result.content
+
+    async def _abort_siblings(self, *, excluding: _TrackedTool, reason: str) -> None:
+        # @@@bash-sibling-abort
+        # Claude Code only fan-outs this abort for bash failures. Keep it
+        # local to the current executor iteration so the parent loop survives
+        # and later turns can continue with explicit tool errors.
+        self._discarded = True
+        running: list[asyncio.Task[None]] = []
+        for tracked in self._tracked:
+            if tracked is excluding or tracked.status in {"completed", "yielded"}:
+                continue
+            if tracked.status == "queued":
+                tracked.status = "completed"
+                tracked.result = self._tool_error(tracked.tool_call, reason)
+                continue
+            if tracked.status == "executing" and tracked.task is not None:
+                tracked.task.cancel()
+                running.append(tracked.task)
+        if running:
+            await asyncio.gather(*running, return_exceptions=True)
+        for tracked in self._tracked:
+            if tracked is excluding or tracked.status != "executing":
+                continue
+            tracked.status = "completed"
+            tracked.result = self._tool_error(tracked.tool_call, reason)
+
+    def _synthetic_error(self, tool_call: dict[str, Any], reason: str) -> ToolMessage:
+        return self._tool_error(
+            tool_call,
+            f"streaming discarded: {reason}",
+        )
+
+    def _tool_error(self, tool_call: dict[str, Any], error_text: str) -> ToolMessage:
+        return ToolMessage(
+            content=f"<tool_use_error>{error_text}</tool_use_error>",
+            tool_call_id=tool_call.get("id", ""),
+            name=self._tool_name(tool_call),
+        )
+
+
+class QueryLoop:
+    """Self-managing query loop replacing create_agent.
+
+    The .astream() method is an AsyncGenerator that yields dicts compatible
+    with LangGraph's stream_mode="updates":
+      {"agent": {"messages": [AIMessage(...)]}}
+      {"tools": {"messages": [ToolMessage(...), ...]}}
+
+    The checkpointer attribute is set post-construction (mirrors create_agent pattern).
+    """
+
+    @property
+    def checkpointer(self) -> Any:
+        return self._checkpointer
+
+    @checkpointer.setter
+    def checkpointer(self, value: Any) -> None:
+        self._checkpointer = value
+        self._checkpoint_store = LangGraphCheckpointStore(value) if value is not None else None
+
+    def __init__(
+        self,
+        model: Any,
+        system_prompt: SystemMessage,
+        middleware: list[AgentMiddleware],
+        checkpointer: Any,
+        registry: ToolRegistry,
+        app_state: AppState | None = None,
+        runtime: Any = None,
+        bootstrap: BootstrapConfig | None = None,
+        refresh_tools: Any = None,
+        max_turns: int = 100,
+    ):
+        self.model = model
+        self.system_prompt = system_prompt
+        self.middleware = middleware
+        self.checkpointer = checkpointer
+        self._checkpoint_store: CheckpointStore | None
+        self._registry = registry
+        self._app_state = app_state
+        self._runtime = runtime
+        self._bootstrap = bootstrap
+        self._refresh_tools = refresh_tools
+        self._memory_middleware = next(
+            (mw for mw in middleware if hasattr(mw, "compact_boundary_index")),
+            None,
+        )
+        # @@@sa-02-session-tool-refs
+        # These refs must survive across turns within the same loop/session,
+        # while turn-local attachment triggers stay ephemeral per ToolUseContext.
+        self._tool_read_file_state: dict[str, Any] = {}
+        self._tool_loaded_nested_memory_paths: set[str] = set()
+        self._tool_discovered_skill_names: set[str] = set()
+        self._tool_discovered_tool_names_by_thread: dict[str, set[str]] = {}
+        self._tool_abort_controller = AbortController()
+        self.max_turns = max_turns
+        self.last_terminal: TerminalState | None = None
+        self.last_continue: ContinueState | None = None
+
+    # -------------------------------------------------------------------------
+    # Public streaming interface (LangGraph-compatible)
+    # -------------------------------------------------------------------------
+
+    async def query(
+        self,
+        input: dict,
+        config: dict | None = None,
+    ) -> AsyncGenerator[dict[str, Any], None]:
+        """Raw loop generator with an explicit final terminal event."""
+        config = config or {}
+        thread_id = config.get("configurable", {}).get("thread_id", "default")
+
+        # Set thread context so MemoryMiddleware can find thread_id via ContextVar
+        from sandbox.thread_context import set_current_thread_id
+
+        set_current_thread_id(thread_id)
+
+        # Load message history and thread-scoped runtime state from checkpointer
+        persisted = await self._hydrate_thread_state_from_checkpoint(thread_id)
+        messages = list(persisted["messages"])
+        self._restore_discovered_tool_names_from_messages(thread_id, messages)
+
+        # Parse and append new input messages
+        new_msgs = self._parse_input(input)
+        messages.extend(new_msgs)
+        self._sync_app_state(messages=messages, turn_count=0)
+
+        terminal: TerminalState | None = None
+        transition: ContinueState | None = None
+        pending_system_notices: list[HumanMessage] = []
+        max_output_tokens_recovery_count = 0
+        has_attempted_reactive_compact = False
+        max_output_tokens_override: int | None = None
+        transient_api_retry_count = 0
+
+        turn = 0
+        try:
+            while turn < self.max_turns:
+                turn += 1
+                tool_context = self._build_tool_use_context(messages, thread_id=thread_id)
+
+                messages_for_query, injected_messages = await self._build_query_messages(messages, config)
+                if injected_messages:
+                    # @@@steer-persist - queue/steer messages accepted before the
+                    # next model call must become durable conversation state, not
+                    # request-only hints, or later replay/history lies about what
+                    # the user actually said mid-run.
+                    messages.extend(injected_messages)
+                    self._sync_app_state(messages=messages, turn_count=turn)
+                self._sync_tool_context_messages(tool_context, messages_for_query)
+
+                # --- Call model through middleware chain ---
+                streamed_tool_results: list[ToolMessage] = []
+                pending_tool_results: list[ToolMessage] = []
+                used_streaming_overlap = False
+                response: ModelResponse | None = None
+                ai_msg: AIMessage | None = None
+                tool_calls: list[dict[str, Any]] = []
+                try:
+                    if self._can_stream_tools():
+                        used_streaming_overlap = True
+                        async for stream_event in self._stream_model_with_tool_overlap(
+                            messages_for_query,
+                            config,
+                            thread_id=thread_id,
+                            tool_context=tool_context,
+                            max_output_tokens_override=max_output_tokens_override,
+                        ):
+                            if stream_event["type"] == "message_chunk":
+                                yield {"message_chunk": stream_event["chunk"]}
+                                continue
+                            if stream_event["type"] == "tools":
+                                chunk_messages = stream_event["messages"]
+                                streamed_tool_results.extend(chunk_messages)
+                                yield {"tools": {"messages": chunk_messages}}
+                                continue
+                            response = stream_event["response"]
+                            ai_msg = stream_event["ai_message"]
+                            tool_calls = stream_event["tool_calls"]
+                            pending_tool_results = stream_event["remaining_tool_results"]
+                    else:
+                        response = await self._invoke_model(
+                            messages_for_query,
+                            config,
+                            thread_id=thread_id,
+                            max_output_tokens_override=max_output_tokens_override,
+                        )
+                except Exception as exc:
+                    self._collect_memory_system_notices(pending_system_notices)
+                    handled = await self._handle_model_error_recovery(
+                        exc=exc,
+                        thread_id=thread_id,
+                        messages=messages,
+                        turn=turn,
+                        transition=transition,
+                        max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                        has_attempted_reactive_compact=has_attempted_reactive_compact,
+                        max_output_tokens_override=max_output_tokens_override,
+                        transient_api_retry_count=transient_api_retry_count,
+                    )
+                    if handled is not None:
+                        messages = handled.messages
+                        transition = handled.transition
+                        max_output_tokens_recovery_count = handled.max_output_tokens_recovery_count
+                        has_attempted_reactive_compact = handled.has_attempted_reactive_compact
+                        max_output_tokens_override = handled.max_output_tokens_override
+                        transient_api_retry_count = handled.transient_api_retry_count
+                        if handled.terminal is not None:
+                            terminal = handled.terminal
+                            break
+                        self._sync_app_state(messages=messages, turn_count=turn)
+                        continue
+                    terminal = TerminalState(
+                        reason=TerminalReason.model_error,
+                        turn_count=turn,
+                        error=str(exc),
+                    )
+                    break
+
+                if response is None or ai_msg is None:
+                    ai_messages = [m for m in (response.result if response else []) if isinstance(m, AIMessage)]
+                    if not ai_messages:
+                        # No AI message — unexpected; treat as terminal
+                        terminal = TerminalState(
+                            reason=TerminalReason.model_error,
+                            turn_count=turn,
+                            error="model returned no AIMessage",
+                        )
+                        break
+                    ai_msg = ai_messages[0]
+                self._collect_memory_system_notices(pending_system_notices)
+                self._sync_tool_context_messages(
+                    tool_context,
+                    response.request_messages or messages_for_query,
+                )
+
+                truncated = self._handle_truncated_response_recovery(
+                    ai_msg=ai_msg,
+                    messages=messages,
+                    turn=turn,
+                    max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                    max_output_tokens_override=max_output_tokens_override,
+                )
+                if truncated is not None:
+                    messages = truncated["messages"]
+                    transition = truncated["transition"]
+                    max_output_tokens_recovery_count = truncated["max_output_tokens_recovery_count"]
+                    max_output_tokens_override = truncated["max_output_tokens_override"]
+                    self._sync_app_state(messages=messages, turn_count=turn)
+                    if truncated["yield_ai"]:
+                        yield {"agent": {"messages": [ai_msg]}}
+                    if truncated["terminal"] is not None:
+                        terminal = truncated["terminal"]
+                        break
+                    continue
+
+                self._sync_app_state(messages=messages, turn_count=turn)
+
+                if not tool_calls:
+                    tool_calls = getattr(ai_msg, "tool_calls", None) or []
+                if not tool_calls:
+                    # Also check additional_kwargs for older message formats
+                    tool_calls = ai_msg.additional_kwargs.get("tool_calls", [])
+
+                if not tool_calls and not self._ai_message_has_visible_content(ai_msg):
+                    terminal_followthrough_notice = self._get_terminal_followthrough_notice(messages)
+                    if terminal_followthrough_notice is not None:
+                        ai_msg = self._build_terminal_followthrough_fallback(terminal_followthrough_notice)
+                    else:
+                        chat_followthrough_notice = self._get_chat_followthrough_notice(messages)
+                        if chat_followthrough_notice is not None:
+                            ai_msg = self._build_chat_followthrough_fallback(chat_followthrough_notice)
+
+                # Yield agent update (stream_mode="updates" format)
+                yield {"agent": {"messages": [ai_msg]}}
+
+                if not tool_calls:
+                    # No tool calls → agent is done
+                    if self._ai_message_has_visible_content(ai_msg):
+                        messages.append(ai_msg)
+                    terminal = TerminalState(
+                        reason=TerminalReason.completed,
+                        turn_count=turn,
+                    )
+                    break
+
+                # Expose current messages for forkContext sub-agent spawning
+                from sandbox.thread_context import set_current_messages
+
+                set_current_messages(messages + [ai_msg])
+
+                if used_streaming_overlap:
+                    if pending_tool_results:
+                        yield {"tools": {"messages": pending_tool_results}}
+                    tool_results = streamed_tool_results + pending_tool_results
+                else:
+                    # --- Execute tools through middleware chain ---
+                    try:
+                        tool_results = await self._execute_tools(tool_calls, response, tool_context)
+                    except Exception as exc:
+                        terminal = TerminalState(
+                            reason=TerminalReason.aborted_tools,
+                            turn_count=turn,
+                            error=str(exc),
+                        )
+                        break
+
+                    # Yield tools update
+                    yield {"tools": {"messages": tool_results}}
+
+                # Advance message history for next turn
+                messages.append(ai_msg)
+                messages.extend(tool_results)
+                if self._tool_results_include_permission_request(tool_results):
+                    terminal = TerminalState(
+                        reason=TerminalReason.completed,
+                        turn_count=turn,
+                    )
+                    self._sync_app_state(messages=messages, turn_count=turn)
+                    break
+                await self._refresh_tools_between_turns(tool_context)
+                transition = ContinueState(reason=ContinueReason.next_turn)
+                max_output_tokens_recovery_count = 0
+                has_attempted_reactive_compact = False
+                max_output_tokens_override = None
+                transient_api_retry_count = 0
+                self._sync_app_state(messages=messages, turn_count=turn)
+        except asyncio.CancelledError:
+            # @@@cancel-persists-live-state - accepted user input from the
+            # current run must not evaporate just because the run is cancelled
+            # before the next terminal save.
+            messages = self._append_system_notices(messages, pending_system_notices)
+            await self._save_messages(thread_id, messages)
+            self._sync_app_state(messages=messages, turn_count=turn)
+            raise
+
+        if terminal is None:
+            terminal = TerminalState(
+                reason=TerminalReason.max_turns,
+                turn_count=turn,
+            )
+
+        # Persist message history
+        self._collect_memory_system_notices(pending_system_notices)
+        visible_terminal_error = self._build_visible_terminal_error_message(terminal, messages)
+        if visible_terminal_error is not None:
+            messages.append(visible_terminal_error)
+        terminal_notice = self._build_terminal_notice(terminal)
+        if terminal_notice is not None:
+            pending_system_notices.append(terminal_notice)
+        messages = self._append_system_notices(messages, pending_system_notices)
+        await self._save_messages(thread_id, messages)
+        self._sync_app_state(messages=messages, turn_count=turn)
+        self.last_terminal = terminal
+        self.last_continue = transition
+        yield {"terminal": terminal, "transition": transition}
+
+    def _make_streaming_tool_executor(self, *, tool_context: ToolUseContext | None) -> StreamingToolExecutor:
+        return StreamingToolExecutor(
+            execute_tool=self._execute_single_tool,
+            is_concurrency_safe=self._tool_is_concurrency_safe,
+            lookup_tool=self._registry.get,
+            tool_context=tool_context,
+        )
+
+    async def astream(
+        self,
+        input: dict,
+        config: dict | None = None,
+        stream_mode: str | list[str] = "updates",
+    ) -> AsyncGenerator[Any, None]:
+        """Stream agent execution chunks compatible with LangGraph stream modes."""
+        requested_modes = [stream_mode] if isinstance(stream_mode, str) else list(stream_mode)
+        emitted_live_agent_chunks = False
+        async for event in self.query(input, config=config):
+            if "terminal" in event:
+                terminal = event["terminal"]
+                if terminal is not None and terminal.reason is not TerminalReason.completed:
+                    # @@@astream-terminal-loud-fail
+                    # query() always emits a terminal event, but caller-facing
+                    # astream() must not turn runtime failures into a silent empty
+                    # iterator. Propagate non-completed terminals back to the caller.
+                    raise RuntimeError(self._terminal_error_text(terminal))
+                continue
+            if isinstance(stream_mode, str):
+                if "message_chunk" in event:
+                    continue
+                yield event
+                continue
+
+            if "message_chunk" in event:
+                if "messages" in requested_modes:
+                    yield (
+                        "messages",
+                        (
+                            event["message_chunk"],
+                            {"langgraph_node": "agent"},
+                        ),
+                    )
+                    emitted_live_agent_chunks = True
+                continue
+
+            if "messages" in requested_modes and "agent" in event:
+                if not emitted_live_agent_chunks:
+                    for msg in event["agent"].get("messages", []):
+                        if not isinstance(msg, AIMessage):
+                            continue
+                        yield (
+                            "messages",
+                            (
+                                AIMessageChunk(**msg.model_dump(exclude={"type"})),
+                                {"langgraph_node": "agent"},
+                            ),
+                        )
+                emitted_live_agent_chunks = False
+
+            if "updates" in requested_modes:
+                yield ("updates", event)
+
+    async def ainvoke(
+        self,
+        input: dict,
+        config: dict | None = None,
+        stream_mode: str = "updates",
+    ) -> dict[str, Any]:
+        """Drain query and return messages plus explicit terminal state."""
+        drained_messages: list[Any] = []
+        terminal: TerminalState | None = None
+        transition: ContinueState | None = None
+
+        # @@@ainvoke-drains-astream
+        # QueryLoop is generator-first. ainvoke exists only as a compatibility
+        # adapter for callers like LeonAgent.invoke/ainvoke and must not invent
+        # a separate execution path.
+        async for event in self.query(input, config=config):
+            if "terminal" in event:
+                terminal = event["terminal"]
+                transition = event.get("transition")
+                continue
+            for section in ("agent", "tools"):
+                drained_messages.extend(event.get(section, {}).get("messages", []))
+
+        return {
+            "messages": drained_messages,
+            "reason": terminal.reason.value if terminal else TerminalReason.completed.value,
+            "terminal": terminal,
+            "transition": transition,
+        }
+
+    async def aget_state(self, config: dict | None = None) -> Any:
+        """Minimal graph-state bridge for backend/web callers."""
+        config = config or {}
+        thread_id = config.get("configurable", {}).get("thread_id", "default")
+        if self._is_runtime_active():
+            # @@@active-state-no-clobber - caller surfaces like /permissions and
+            # /history can poll during an active run. Rehydrating from stale
+            # checkpoint here would erase live thread-scoped permission state.
+            values = self._snapshot_live_thread_state(thread_id)
+            return SimpleNamespace(values=values)
+        values = await self._hydrate_thread_state_from_checkpoint(thread_id)
+        return SimpleNamespace(values=values)
+
+    async def aupdate_state(
+        self,
+        config: dict | None,
+        input_data: dict[str, Any] | None,
+        as_node: str | None = None,
+    ) -> Any:
+        """Minimal graph-state update bridge for resumed-thread callers."""
+        config = config or {}
+        input_data = input_data or {}
+        thread_id = config.get("configurable", {}).get("thread_id", "default")
+        messages = await self._load_messages(thread_id)
+        raw_updates = input_data.get("messages", [])
+
+        # @@@ql-06-state-bridge - backend/web still speaks the old graph-state
+        # contract. Only the live caller shapes are supported here: append
+        # resumed start messages, or apply RemoveMessage-based repairs before
+        # appending replacement messages.
+        if as_node == "__start__":
+            messages.extend(self._parse_input({"messages": raw_updates}))
+        else:
+            updates = raw_updates if isinstance(raw_updates, list) else [raw_updates]
+            remove_ids = {update.id for update in updates if isinstance(update, RemoveMessage) and getattr(update, "id", None)}
+            if remove_ids:
+                messages = [message for message in messages if getattr(message, "id", None) not in remove_ids]
+            messages.extend(update for update in updates if not isinstance(update, RemoveMessage))
+
+        await self._save_messages(thread_id, messages)
+        current_turn_count = self._app_state.turn_count if self._app_state is not None else 0
+        self._sync_app_state(messages=messages, turn_count=current_turn_count)
+        self._restore_discovered_tool_names_from_messages(thread_id, messages)
+        return await self.aget_state(config)
+
+    async def apersist_state(self, thread_id: str) -> None:
+        """Persist the current thread-scoped loop/app state to the checkpointer."""
+        messages = list(self._app_state.messages) if self._app_state is not None else await self._load_messages(thread_id)
+        await self._save_messages(thread_id, messages)
+
+    # -------------------------------------------------------------------------
+    # Model invocation through middleware chain
+    # -------------------------------------------------------------------------
+
+    async def _invoke_model(
+        self,
+        messages: list,
+        config: dict,
+        *,
+        thread_id: str = "default",
+        max_output_tokens_override: int | None = None,
+    ) -> ModelResponse:
+        """Call model through the full middleware chain (awrap_model_call)."""
+
+        async def innermost_handler(request: ModelRequest) -> ModelResponse:
+            """Actual model call — innermost of the chain."""
+            tools = request.tools or []
+            model = request.model
+
+            # Bind tools to model if any
+            if tools:
+                try:
+                    bound = model.bind_tools(tools)
+                except Exception:
+                    bound = model
+            else:
+                bound = model
+
+            if max_output_tokens_override is not None and hasattr(bound, "bind"):
+                try:
+                    bound = bound.bind(max_tokens=max_output_tokens_override)
+                except Exception:
+                    pass
+
+            # Build message list: system + conversation
+            call_messages = []
+            if request.system_message:
+                call_messages.append(request.system_message)
+            call_messages.extend(request.messages)
+
+            result = await bound.ainvoke(call_messages)
+            if not isinstance(result, list):
+                result = [result]
+            return ModelResponse(result=result, request_messages=list(request.messages))
+
+        # Build ModelRequest
+        inline_schemas = self._registry.get_inline_schemas(self._get_discovered_tool_names(thread_id))
+        request = ModelRequest(
+            model=self.model,
+            messages=messages,
+            system_message=self.system_prompt,
+            tools=inline_schemas,
+        )
+
+        # Walk middleware chain outside-in: each wraps the next.
+        # Only include middleware that actually overrides awrap_model_call OR wrap_model_call
+        # (not just inherits the base-class NotImplementedError stub).
+        handler = innermost_handler
+        for mw in reversed(self.middleware):
+            if _mw_overrides_model_call(mw):
+                handler = _make_model_wrapper(mw, handler)
+
+        return await handler(request)
+
+    def _bind_model(
+        self,
+        model: Any,
+        tools: list | None,
+        *,
+        max_output_tokens_override: int | None = None,
+    ) -> Any:
+        if tools:
+            try:
+                bound = model.bind_tools(tools)
+            except Exception:
+                bound = model
+        else:
+            bound = model
+
+        if max_output_tokens_override is not None and hasattr(bound, "bind"):
+            try:
+                bound = bound.bind(max_tokens=max_output_tokens_override)
+            except Exception:
+                pass
+        return bound
+
+    def _can_stream_tools(self) -> bool:
+        stream_fn = getattr(self.model, "astream", None)
+        if not callable(stream_fn):
+            return False
+        return type(self.model).__module__ != "unittest.mock"
+
+    async def _prepare_streaming_request(
+        self,
+        messages: list,
+        *,
+        thread_id: str,
+    ) -> ModelRequest:
+        inline_schemas = self._registry.get_inline_schemas(self._get_discovered_tool_names(thread_id))
+        request = ModelRequest(
+            model=self.model,
+            messages=messages,
+            system_message=self.system_prompt,
+            tools=inline_schemas,
+        )
+
+        async def prepare_handler(request: ModelRequest) -> ModelResponse:
+            return ModelResponse(
+                result=[],
+                request_messages=list(request.messages),
+                prepared_request=request,
+            )
+
+        handler = prepare_handler
+        for mw in reversed(self.middleware):
+            if _mw_overrides_model_call(mw):
+                handler = _make_model_wrapper(mw, handler)
+
+        response = await handler(request)
+        return response.prepared_request or request
+
+    async def _stream_model_with_tool_overlap(
+        self,
+        messages: list,
+        config: dict,
+        *,
+        thread_id: str,
+        tool_context: ToolUseContext | None,
+        max_output_tokens_override: int | None,
+    ) -> AsyncGenerator[dict[str, Any], None]:
+        prepared_request = await self._prepare_streaming_request(messages, thread_id=thread_id)
+        bound = self._bind_model(
+            prepared_request.model,
+            prepared_request.tools,
+            max_output_tokens_override=max_output_tokens_override,
+        )
+
+        call_messages = []
+        if prepared_request.system_message:
+            call_messages.append(prepared_request.system_message)
+        call_messages.extend(prepared_request.messages)
+
+        executor = self._make_streaming_tool_executor(tool_context=tool_context)
+        aggregate: AIMessageChunk | None = None
+        seen_tool_ids: set[str] = set()
+        streamed_tool_calls: list[dict[str, Any]] = []
+
+        try:
+            async for chunk in bound.astream(call_messages):
+                if isinstance(chunk, AIMessage):
+                    chunk = AIMessageChunk(**chunk.model_dump(exclude={"type"}))
+                elif not isinstance(chunk, AIMessageChunk):
+                    continue
+
+                # @@@stream-chunk-snapshot
+                # Some providers reuse and mutate the same chunk object across
+                # yields. Snapshot before yielding/aggregating so the final
+                # AIMessage cannot collapse to the last empty chunk.
+                chunk = AIMessageChunk(**chunk.model_dump(exclude={"type"}))
+                if (
+                    aggregate is not None
+                    and getattr(chunk, "chunk_position", None) == "last"
+                    and not chunk.content
+                    and not getattr(chunk, "tool_calls", None)
+                    and not getattr(chunk, "invalid_tool_calls", None)
+                    and not getattr(chunk, "tool_call_chunks", None)
+                    and getattr(chunk, "usage_metadata", None) == getattr(aggregate, "usage_metadata", None)
+                ):
+                    chunk = chunk.model_copy(update={"usage_metadata": None})
+                aggregate = chunk if aggregate is None else aggregate + chunk
+
+                yield {"type": "message_chunk", "chunk": chunk}
+
+                tool_call_chunks = getattr(aggregate, "tool_call_chunks", None) or []
+                for tool_call in getattr(aggregate, "tool_calls", None) or []:
+                    ready_tool_call = self._normalize_stream_tool_call(tool_call, tool_call_chunks)
+                    if ready_tool_call is None:
+                        continue
+                    call_id = ready_tool_call.get("id")
+                    if not call_id or call_id in seen_tool_ids:
+                        continue
+                    seen_tool_ids.add(call_id)
+                    streamed_tool_calls.append(ready_tool_call)
+                    await executor.add_tool(ready_tool_call)
+
+                completed = await executor.get_completed_results()
+                if completed:
+                    yield {"type": "tools", "messages": completed}
+        except Exception:
+            discarded = await executor.discard(reason="streaming_error")
+            if discarded:
+                yield {"type": "tools", "messages": discarded}
+            raise
+
+        if aggregate is None:
+            raise RuntimeError("streaming model returned no AIMessageChunk")
+
+        ai_message = AIMessage(**aggregate.model_dump(exclude={"type"}))
+        self._notify_stream_response(prepared_request, ai_message)
+        remaining = await executor.drain_remaining()
+        yield {
+            "type": "done",
+            "response": ModelResponse(result=[ai_message], request_messages=list(prepared_request.messages)),
+            "ai_message": ai_message,
+            "tool_calls": list(streamed_tool_calls),
+            "remaining_tool_results": remaining,
+        }
+
+    def _notify_stream_response(self, request: ModelRequest, ai_message: AIMessage) -> None:
+        req_dict = {"messages": request.messages}
+        resp_dict = {"messages": [ai_message]}
+        for mw in self.middleware:
+            dispatch = getattr(mw, "_dispatch_monitors", None)
+            if callable(dispatch):
+                dispatch("on_response", req_dict, resp_dict)
+
+    async def _build_query_messages(self, messages: list, config: dict) -> tuple[list, list]:
+        return await self._apply_before_model(list(messages), config)
+
+    async def _apply_before_model(self, messages: list, config: dict) -> tuple[list, list]:
+        """Run middleware before_model/abefore_model hooks on the live path."""
+        current_messages = list(messages)
+        injected_messages: list[Any] = []
+        state = {"messages": current_messages}
+
+        for mw in self.middleware:
+            update: dict[str, Any] | None = None
+            abefore = getattr(mw, "abefore_model", None)
+            before = getattr(mw, "before_model", None)
+
+            if callable(abefore):
+                maybe_update = abefore(state=state, runtime=None, config=config)
+                if inspect.isawaitable(maybe_update):
+                    maybe_update = await maybe_update
+                update = maybe_update if isinstance(maybe_update, dict) else None
+            elif callable(before):
+                maybe_update = before(state=state, runtime=None, config=config)
+                update = maybe_update if isinstance(maybe_update, dict) else None
+
+            if not update:
+                continue
+
+            new_messages = update.get("messages")
+            if new_messages:
+                if not isinstance(new_messages, list):
+                    new_messages = [new_messages]
+                current_messages.extend(new_messages)
+                injected_messages.extend(new_messages)
+                state["messages"] = current_messages
+
+        return current_messages, injected_messages
+
+    def _sync_app_state(self, messages: list, turn_count: int) -> None:
+        """Keep runtime AppState aligned with the loop's live state."""
+        if self._app_state is None:
+            return
+
+        snapshot = list(messages)
+        current_cost = self._read_runtime_cost()
+        bootstrap_cost = self._bootstrap.total_cost_usd if self._bootstrap is not None else 0.0
+        cumulative_cost = max(current_cost, self._app_state.total_cost, bootstrap_cost)
+        compact_boundary_index = self._read_compact_boundary_index()
+
+        # @@@sa-03-cost-accumulator-monotonic
+        # /clear must preserve session accumulators, so loop sync cannot let a
+        # lower per-run observation overwrite the accumulated session total.
+        if self._bootstrap is not None:
+            self._bootstrap.total_cost_usd = cumulative_cost
+
+        # @@@app-state-sync
+        # ql-02 needs the loop's local lifecycle to write back into AppState,
+        # but we still do not have compaction yet. Clamp the boundary so the
+        # store stays coherent without pretending compaction exists.
+        def _update(state: AppState) -> AppState:
+            return state.model_copy(
+                update={
+                    "messages": snapshot,
+                    "turn_count": turn_count,
+                    "total_cost": cumulative_cost,
+                    "compact_boundary_index": compact_boundary_index,
+                }
+            )
+
+        self._app_state.set_state(_update)
+
+    def _read_runtime_cost(self) -> float:
+        if self._runtime is None:
+            return self._app_state.total_cost if self._app_state is not None else 0.0
+        try:
+            return float(self._runtime.cost)
+        except Exception:
+            return self._app_state.total_cost if self._app_state is not None else 0.0
+
+    def _read_compact_boundary_index(self) -> int:
+        if self._memory_middleware is None:
+            return 0
+        try:
+            boundary = int(getattr(self._memory_middleware, "compact_boundary_index", 0))
+        except Exception:
+            return 0
+        return max(boundary, 0)
+
+    def _get_discovered_tool_names(self, thread_id: str) -> set[str]:
+        # @@@dt-03-thread-scoped-deferred-tools - deferred discovery must stay
+        # isolated per thread_id, or one thread's tool_search silently changes
+        # another thread's inline schema surface on the next turn.
+        return self._tool_discovered_tool_names_by_thread.setdefault(thread_id, set())
+
+    def _restore_discovered_tool_names_from_messages(
+        self,
+        thread_id: str,
+        messages: list,
+    ) -> None:
+        discovered: set[str] = set()
+        for message in messages:
+            if not isinstance(message, ToolMessage) or getattr(message, "name", None) != "tool_search":
+                continue
+            content = getattr(message, "content", None)
+            if not isinstance(content, str):
+                continue
+            try:
+                payload = json.loads(content)
+            except Exception:
+                continue
+            if not isinstance(payload, list):
+                continue
+            for item in payload:
+                if not isinstance(item, dict):
+                    continue
+                name = item.get("name")
+                if not isinstance(name, str):
+                    continue
+                entry = self._registry.get(name)
+                if entry is not None and entry.mode == ToolMode.DEFERRED:
+                    discovered.add(name)
+        self._tool_discovered_tool_names_by_thread[thread_id] = discovered
+
+    def _build_tool_use_context(self, messages: list, *, thread_id: str = "default") -> ToolUseContext | None:
+        if self._bootstrap is None or self._app_state is None:
+            return None
+        has_permission_resolver = self._bootstrap.permission_resolver_scope != "none"
+        return ToolUseContext(
+            bootstrap=self._bootstrap,
+            get_app_state=self._app_state.get_state,
+            set_app_state=self._app_state.set_state,
+            refresh_tools=self._refresh_tools,
+            can_use_tool=lambda name, args, permission_context, request: self._default_can_use_tool(
+                name=name,
+                permission_context=permission_context,
+            ),
+            request_permission=(
+                lambda name, args, context, request, message: self._request_permission(
+                    thread_id=thread_id,
+                    name=name,
+                    args=args,
+                    message=message,
+                )
+            )
+            if has_permission_resolver
+            else None,
+            consume_permission_resolution=lambda name, args, context, request: self._consume_permission_resolution(
+                thread_id=thread_id,
+                name=name,
+                args=args,
+            ),
+            read_file_state=self._tool_read_file_state,
+            loaded_nested_memory_paths=self._tool_loaded_nested_memory_paths,
+            discovered_skill_names=self._tool_discovered_skill_names,
+            discovered_tool_names=self._get_discovered_tool_names(thread_id),
+            nested_memory_attachment_triggers=set(),
+            abort_controller=self._tool_abort_controller,
+            messages=list(messages),
+            thread_id=thread_id,
+        )
+
+    def _default_can_use_tool(
+        self,
+        *,
+        name: str,
+        permission_context: ToolPermissionContext,
+    ) -> dict[str, Any] | None:
+        if self._app_state is None:
+            return None
+        permission_state = self._app_state.tool_permission_context
+        merged_context = ToolPermissionContext(
+            is_read_only=permission_context.is_read_only,
+            is_destructive=permission_context.is_destructive,
+            alwaysAllowRules=permission_state.alwaysAllowRules,
+            alwaysDenyRules=permission_state.alwaysDenyRules,
+            alwaysAskRules=permission_state.alwaysAskRules,
+            allowManagedPermissionRulesOnly=permission_state.allowManagedPermissionRulesOnly,
+        )
+        decision = evaluate_permission_rules(name, merged_context)
+        if (
+            decision is not None
+            and decision.get("decision") == "ask"
+            and self._bootstrap is not None
+            and self._bootstrap.permission_resolver_scope == "none"
+        ):
+            # @@@permission-headless-fail-loud - ask is only a real product mode
+            # when this run has an owner-facing resolver. Otherwise fail loudly
+            # instead of creating a dead-end pending request in hidden state.
+            return {
+                "decision": "deny",
+                "message": f"{decision.get('message')}. No interactive permission resolver is available for this run.",
+            }
+        return decision
+
+    def _request_permission(
+        self,
+        *,
+        thread_id: str,
+        name: str,
+        args: dict[str, Any],
+        message: str | None,
+    ) -> str | None:
+        if self._app_state is None:
+            return None
+
+        request_id = uuid.uuid4().hex[:8]
+        payload = {
+            "request_id": request_id,
+            "thread_id": thread_id,
+            "tool_name": name,
+            "args": copy.deepcopy(args),
+            "message": message,
+        }
+
+        def _store(state: AppState) -> AppState:
+            pending = dict(state.pending_permission_requests)
+            pending[request_id] = payload
+            return state.model_copy(update={"pending_permission_requests": pending})
+
+        self._app_state.set_state(_store)
+        return request_id
+
+    def _consume_permission_resolution(
+        self,
+        *,
+        thread_id: str,
+        name: str,
+        args: dict[str, Any],
+    ) -> dict[str, Any] | None:
+        if self._app_state is None:
+            return None
+
+        resolved_items = list(self._app_state.resolved_permission_requests.items())
+        matched_id: str | None = None
+        matched_payload: dict[str, Any] | None = None
+        for request_id, payload in resolved_items:
+            if payload.get("thread_id") != thread_id:
+                continue
+            if payload.get("tool_name") != name:
+                continue
+            if payload.get("args") != args:
+                continue
+            matched_id = request_id
+            matched_payload = payload
+            break
+
+        if matched_id is None or matched_payload is None:
+            return None
+
+        def _consume(state: AppState) -> AppState:
+            resolved = dict(state.resolved_permission_requests)
+            resolved.pop(matched_id, None)
+            return state.model_copy(update={"resolved_permission_requests": resolved})
+
+        self._app_state.set_state(_consume)
+        return {
+            "decision": matched_payload.get("decision"),
+            "message": matched_payload.get("message"),
+        }
+
+    def _sync_tool_context_messages(
+        self,
+        tool_context: ToolUseContext | None,
+        messages: list,
+    ) -> None:
+        if tool_context is None:
+            return
+        tool_context.messages = list(messages)
+
+    async def _refresh_tools_between_turns(self, tool_context: ToolUseContext | None) -> None:
+        refresh = self._refresh_tools
+        if refresh is None and tool_context is not None:
+            refresh = tool_context.refresh_tools
+        if refresh is None:
+            return
+        result = refresh()
+        if inspect.isawaitable(result):
+            await result
+
+    async def _handle_model_error_recovery(
+        self,
+        *,
+        exc: Exception,
+        thread_id: str,
+        messages: list,
+        turn: int,
+        transition: ContinueState | None,
+        max_output_tokens_recovery_count: int,
+        has_attempted_reactive_compact: bool,
+        max_output_tokens_override: int | None,
+        transient_api_retry_count: int,
+    ) -> _ModelErrorRecoveryResult | None:
+        ctx = _ModelErrorContext(
+            exc=exc,
+            error_text=str(exc).lower(),
+            thread_id=thread_id,
+            messages=messages,
+            turn=turn,
+            transition=transition,
+            max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+            has_attempted_reactive_compact=has_attempted_reactive_compact,
+            max_output_tokens_override=max_output_tokens_override,
+            transient_api_retry_count=transient_api_retry_count,
+        )
+        for strategy in self._model_error_recovery_strategies():
+            result = await strategy(ctx)
+            if result is not None:
+                return result
+        return None
+
+    def _model_error_recovery_strategies(self) -> tuple[Callable[[_ModelErrorContext], Awaitable[_ModelErrorRecoveryResult | None]], ...]:
+        return (
+            self._try_context_overflow_escalate,
+            self._try_transient_api_retry,
+            self._try_max_output_tokens_recovery,
+            self._try_prompt_too_long_collapse_drain,
+            self._try_prompt_too_long_reactive_compact,
+            self._try_prompt_too_long_terminal,
+        )
+
+    async def _try_context_overflow_escalate(self, ctx: _ModelErrorContext) -> _ModelErrorRecoveryResult | None:
+        parsed_overflow = self._parse_context_overflow_override(str(ctx.exc))
+        if parsed_overflow is None:
+            return None
+        return _ModelErrorRecoveryResult(
+            messages=ctx.messages,
+            transition=ContinueState(reason=ContinueReason.max_output_tokens_escalate),
+            max_output_tokens_recovery_count=ctx.max_output_tokens_recovery_count,
+            has_attempted_reactive_compact=ctx.has_attempted_reactive_compact,
+            max_output_tokens_override=parsed_overflow,
+            transient_api_retry_count=ctx.transient_api_retry_count,
+            terminal=None,
+        )
+
+    async def _try_transient_api_retry(self, ctx: _ModelErrorContext) -> _ModelErrorRecoveryResult | None:
+        if not self._is_transient_api_error(ctx.exc, ctx.error_text):
+            return None
+        if ctx.transient_api_retry_count >= _TRANSIENT_API_MAX_RETRIES:
+            return None
+        delay_seconds = self._retry_delay_seconds(ctx.exc, ctx.transient_api_retry_count)
+        if delay_seconds > 0:
+            await asyncio.sleep(delay_seconds)
+        return _ModelErrorRecoveryResult(
+            messages=ctx.messages,
+            transition=ContinueState(reason=ContinueReason.api_retry),
+            max_output_tokens_recovery_count=ctx.max_output_tokens_recovery_count,
+            has_attempted_reactive_compact=ctx.has_attempted_reactive_compact,
+            max_output_tokens_override=ctx.max_output_tokens_override,
+            transient_api_retry_count=ctx.transient_api_retry_count + 1,
+            terminal=None,
+        )
+
+    async def _try_max_output_tokens_recovery(self, ctx: _ModelErrorContext) -> _ModelErrorRecoveryResult | None:
+        if "max_output_tokens" not in ctx.error_text:
+            return None
+        if ctx.max_output_tokens_override is None:
+            return _ModelErrorRecoveryResult(
+                messages=ctx.messages,
+                transition=ContinueState(reason=ContinueReason.max_output_tokens_escalate),
+                max_output_tokens_recovery_count=ctx.max_output_tokens_recovery_count,
+                has_attempted_reactive_compact=ctx.has_attempted_reactive_compact,
+                max_output_tokens_override=_ESCALATED_MAX_OUTPUT_TOKENS,
+                transient_api_retry_count=ctx.transient_api_retry_count,
+                terminal=None,
+            )
+        if ctx.max_output_tokens_recovery_count < 3:
+            recovered_messages = list(ctx.messages)
+            recovered_messages.append(
+                HumanMessage(
+                    content="Output token limit hit. Resume directly with no apology or recap.",
+                )
+            )
+            return _ModelErrorRecoveryResult(
+                messages=recovered_messages,
+                transition=ContinueState(reason=ContinueReason.max_output_tokens_recovery),
+                max_output_tokens_recovery_count=ctx.max_output_tokens_recovery_count + 1,
+                has_attempted_reactive_compact=ctx.has_attempted_reactive_compact,
+                max_output_tokens_override=ctx.max_output_tokens_override,
+                transient_api_retry_count=ctx.transient_api_retry_count,
+                terminal=None,
+            )
+        return _ModelErrorRecoveryResult(
+            messages=ctx.messages,
+            transition=ContinueState(reason=ContinueReason.max_output_tokens_recovery),
+            max_output_tokens_recovery_count=ctx.max_output_tokens_recovery_count,
+            has_attempted_reactive_compact=ctx.has_attempted_reactive_compact,
+            max_output_tokens_override=ctx.max_output_tokens_override,
+            transient_api_retry_count=ctx.transient_api_retry_count,
+            terminal=TerminalState(
+                reason=TerminalReason.model_error,
+                turn_count=ctx.turn,
+                error=str(ctx.exc),
+            ),
+        )
+
+    async def _try_prompt_too_long_collapse_drain(self, ctx: _ModelErrorContext) -> _ModelErrorRecoveryResult | None:
+        if not self._is_prompt_too_long_error(ctx.error_text):
+            return None
+        if ctx.transition is not None and ctx.transition.reason is ContinueReason.collapse_drain_retry:
+            return None
+        drained = await self._recover_from_overflow(ctx.messages)
+        if drained is None or drained["committed"] <= 0:
+            return None
+        return _ModelErrorRecoveryResult(
+            messages=drained["messages"],
+            transition=ContinueState(reason=ContinueReason.collapse_drain_retry),
+            max_output_tokens_recovery_count=ctx.max_output_tokens_recovery_count,
+            has_attempted_reactive_compact=ctx.has_attempted_reactive_compact,
+            max_output_tokens_override=ctx.max_output_tokens_override,
+            transient_api_retry_count=ctx.transient_api_retry_count,
+            terminal=None,
+        )
+
+    async def _try_prompt_too_long_reactive_compact(self, ctx: _ModelErrorContext) -> _ModelErrorRecoveryResult | None:
+        if not self._is_prompt_too_long_error(ctx.error_text):
+            return None
+        if ctx.has_attempted_reactive_compact:
+            return None
+        compacted = await self._force_reactive_compact(ctx.messages, thread_id=ctx.thread_id)
+        if compacted is None:
+            return None
+        return _ModelErrorRecoveryResult(
+            messages=compacted,
+            transition=ContinueState(reason=ContinueReason.reactive_compact_retry),
+            max_output_tokens_recovery_count=ctx.max_output_tokens_recovery_count,
+            has_attempted_reactive_compact=True,
+            max_output_tokens_override=ctx.max_output_tokens_override,
+            transient_api_retry_count=ctx.transient_api_retry_count,
+            terminal=None,
+        )
+
+    async def _try_prompt_too_long_terminal(self, ctx: _ModelErrorContext) -> _ModelErrorRecoveryResult | None:
+        if not self._is_prompt_too_long_error(ctx.error_text):
+            return None
+        return _ModelErrorRecoveryResult(
+            messages=ctx.messages,
+            transition=ctx.transition,
+            max_output_tokens_recovery_count=ctx.max_output_tokens_recovery_count,
+            has_attempted_reactive_compact=ctx.has_attempted_reactive_compact,
+            max_output_tokens_override=ctx.max_output_tokens_override,
+            transient_api_retry_count=ctx.transient_api_retry_count,
+            terminal=TerminalState(
+                reason=TerminalReason.prompt_too_long,
+                turn_count=ctx.turn,
+                error=str(ctx.exc),
+            ),
+        )
+
+    @staticmethod
+    def _parse_context_overflow_override(error_message: str) -> int | None:
+        match = re.search(
+            r"input length and `max_tokens` exceed context limit: (\d+) \+ (\d+) > (\d+)",
+            error_message,
+        )
+        if match is None:
+            return None
+        input_tokens = int(match.group(1))
+        context_limit = int(match.group(3))
+        available_context = max(0, context_limit - input_tokens - _CONTEXT_OVERFLOW_SAFETY_BUFFER)
+        if available_context < _FLOOR_OUTPUT_TOKENS:
+            return None
+        return max(_FLOOR_OUTPUT_TOKENS, available_context)
+
+    @staticmethod
+    def _is_transient_api_error(exc: Exception, error_text: str) -> bool:
+        status = getattr(exc, "status", None)
+        return status in {429, 529} or '"type":"overloaded_error"' in error_text
+
+    @staticmethod
+    def _retry_delay_seconds(exc: Exception, transient_api_retry_count: int) -> float:
+        headers = getattr(exc, "headers", None) or {}
+        # @@@retry-after-shape
+        # Test doubles use plain dict headers while SDK errors expose a Headers-like
+        # object. Keep this probe shape-tolerant so the loop can honor retry-after
+        # without forcing a specific exception class.
+        if hasattr(headers, "get"):
+            retry_after = headers.get("retry-after")
+        else:
+            retry_after = None
+        try:
+            if retry_after is not None:
+                return max(0.0, float(retry_after))
+        except (TypeError, ValueError):
+            pass
+        return _TRANSIENT_API_BASE_DELAY_SECONDS * (2**transient_api_retry_count)
+
+    def _handle_truncated_response_recovery(
+        self,
+        *,
+        ai_msg: AIMessage,
+        messages: list,
+        turn: int,
+        max_output_tokens_recovery_count: int,
+        max_output_tokens_override: int | None,
+    ) -> dict[str, Any] | None:
+        if not self._is_max_output_truncated(ai_msg):
+            return None
+
+        if max_output_tokens_override is None:
+            return {
+                "messages": messages,
+                "transition": ContinueState(reason=ContinueReason.max_output_tokens_escalate),
+                "max_output_tokens_recovery_count": max_output_tokens_recovery_count,
+                "max_output_tokens_override": _ESCALATED_MAX_OUTPUT_TOKENS,
+                "yield_ai": False,
+                "terminal": None,
+            }
+
+        if max_output_tokens_recovery_count < 3:
+            recovered_messages = list(messages)
+            recovered_messages.append(ai_msg)
+            recovered_messages.append(
+                HumanMessage(
+                    content="Output token limit hit. Resume directly with no apology or recap.",
+                )
+            )
+            return {
+                "messages": recovered_messages,
+                "transition": ContinueState(reason=ContinueReason.max_output_tokens_recovery),
+                "max_output_tokens_recovery_count": max_output_tokens_recovery_count + 1,
+                "max_output_tokens_override": max_output_tokens_override,
+                "yield_ai": False,
+                "terminal": None,
+            }
+
+        surfaced_messages = list(messages)
+        surfaced_messages.append(ai_msg)
+        return {
+            "messages": surfaced_messages,
+            "transition": ContinueState(reason=ContinueReason.max_output_tokens_recovery),
+            "max_output_tokens_recovery_count": max_output_tokens_recovery_count,
+            "max_output_tokens_override": max_output_tokens_override,
+            "yield_ai": True,
+            "terminal": TerminalState(
+                reason=TerminalReason.model_error,
+                turn_count=turn,
+                error="max_output_tokens",
+            ),
+        }
+
+    async def _force_reactive_compact(self, messages: list, *, thread_id: str) -> list | None:
+        if self._memory_middleware is None:
+            return None
+        compact = getattr(self._memory_middleware, "compact_messages_for_recovery", None)
+        if not callable(compact):
+            return None
+        signature = inspect.signature(compact)
+        if "thread_id" in signature.parameters:
+            compacted = compact(messages, thread_id=thread_id)
+        else:
+            compacted = compact(messages)
+        if not inspect.isawaitable(compacted):
+            raise TypeError("compact_messages_for_recovery must return an awaitable")
+        return await compacted
+
+    async def _recover_from_overflow(self, messages: list) -> dict[str, Any] | None:
+        # @@@collapse-drain-single-shot
+        # ql-04 needs collapse-drain and reactive-compact to stay as separate
+        # phases. The drain hook is optional, but if present it only gets one
+        # chance before prompt-too-long falls through to reactive compaction.
+        for middleware in self.middleware:
+            recover = getattr(middleware, "recover_from_overflow", None)
+            if not callable(recover):
+                continue
+            drained = recover(messages)
+            if inspect.isawaitable(drained):
+                drained = await drained
+            if drained is None:
+                return None
+            committed = int(getattr(drained, "get", lambda *_: 0)("committed", 0))
+            updated_messages = getattr(drained, "get", lambda *_: None)("messages")
+            if committed <= 0 or not isinstance(updated_messages, list):
+                return None
+            return {"committed": committed, "messages": list(updated_messages)}
+        return None
+
+    @staticmethod
+    def _is_prompt_too_long_error(error_text: str) -> bool:
+        return (
+            "prompt is too long" in error_text
+            or "prompt too long" in error_text
+            or "context length" in error_text
+            or "maximum context length" in error_text
+        )
+
+    @staticmethod
+    def _is_max_output_truncated(message: AIMessage) -> bool:
+        response_metadata = getattr(message, "response_metadata", None) or {}
+        additional_kwargs = getattr(message, "additional_kwargs", None) or {}
+        finish_reason = (
+            response_metadata.get("finish_reason")
+            or response_metadata.get("stop_reason")
+            or additional_kwargs.get("finish_reason")
+            or additional_kwargs.get("stop_reason")
+        )
+        return finish_reason in {"length", "max_tokens", "max_output_tokens"}
+
+    # -------------------------------------------------------------------------
+    # Tool execution through middleware chain
+    # -------------------------------------------------------------------------
+
+    async def _execute_tools(
+        self,
+        tool_calls: list,
+        model_response: ModelResponse,
+        tool_context: ToolUseContext | None,
+    ) -> list[ToolMessage]:
+        """Execute tool calls respecting concurrency safety, via middleware chain."""
+        results: dict[int, ToolMessage] = {}
+
+        async def execute_batch(batch: list[tuple[int, dict]]) -> None:
+            if not batch:
+                return
+            batch_results = await asyncio.gather(
+                *[self._execute_single_tool(tool_call, tool_context) for _, tool_call in batch],
+                return_exceptions=True,
+            )
+            for (idx, tool_call), result in zip(batch, batch_results):
+                if isinstance(result, BaseException):
+                    results[idx] = ToolMessage(
+                        content=f"<tool_use_error>{result}</tool_use_error>",
+                        tool_call_id=tool_call.get("id", ""),
+                        name=tool_call.get("name", ""),
+                    )
+                    continue
+                if not isinstance(result, ToolMessage):
+                    raise TypeError(f"Tool executor returned unexpected result type: {type(result)!r}")
+                results[idx] = result
+
+        safe_batch: list[tuple[int, dict]] = []
+        for idx, tool_call in enumerate(tool_calls):
+            # @@@tool-order-boundary
+            # te-01 needs the non-streaming path to keep the same queue barrier
+            # semantics as the streaming executor: contiguous safe tools may fan
+            # out together, but any unsafe tool flushes the batch and blocks the
+            # next safe tool until it finishes.
+            if self._tool_is_concurrency_safe(tool_call):
+                safe_batch.append((idx, tool_call))
+                continue
+
+            await execute_batch(safe_batch)
+            safe_batch = []
+            try:
+                results[idx] = await self._execute_single_tool(tool_call, tool_context)
+            except Exception as exc:
+                results[idx] = ToolMessage(
+                    content=f"<tool_use_error>{exc}</tool_use_error>",
+                    tool_call_id=tool_call.get("id", ""),
+                    name=tool_call.get("name", ""),
+                )
+
+        await execute_batch(safe_batch)
+        return [results[i] for i in range(len(tool_calls))]
+
+    async def _execute_single_tool(
+        self,
+        tool_call: dict,
+        tool_context: ToolUseContext | None,
+    ) -> ToolMessage:
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        call_id = tool_call.get("id", "")
+        args = tool_call.get("args", {}) or tool_call.get("function", {}).get("arguments", {})
+
+        if isinstance(args, str):
+            import json
+
+            try:
+                args = json.loads(args)
+            except Exception:
+                args = {}
+
+        normalized_call = {"name": name, "args": args, "id": call_id}
+        tc_request = ToolCallRequest(
+            tool_call=normalized_call,
+            tool=None,
+            state=tool_context,
+            runtime=self._runtime,  # type: ignore[arg-type]
+        )
+
+        async def innermost_tool_handler(req: ToolCallRequest) -> ToolMessage:
+            tc = req.tool_call
+            t_name = tc.get("name", "")
+            t_id = tc.get("id", "")
+            t_args = tc.get("args", {})
+            entry = self._registry.get(t_name)
+            if entry is None:
+                return ToolMessage(
+                    content=f"<tool_use_error>Tool '{t_name}' not found</tool_use_error>",
+                    tool_call_id=t_id,
+                    name=t_name,
+                )
+            try:
+                import asyncio as _asyncio
+
+                if _asyncio.iscoroutinefunction(entry.handler):
+                    result = await entry.handler(**t_args)
+                else:
+                    result = await _asyncio.to_thread(entry.handler, **t_args)
+                return ToolMessage(content=str(result), tool_call_id=t_id, name=t_name)
+            except Exception as e:
+                return ToolMessage(
+                    content=f"<tool_use_error>{e}</tool_use_error>",
+                    tool_call_id=t_id,
+                    name=t_name,
+                )
+
+        tool_handler = innermost_tool_handler
+        for mw in reversed(self.middleware):
+            if _mw_overrides_tool_call(mw):
+                tool_handler = _make_tool_wrapper(mw, tool_handler)
+
+        return await tool_handler(tc_request)
+
+    def _tool_is_concurrency_safe(self, tool_call: dict) -> bool:
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        entry = self._registry.get(name)
+        if entry is None:
+            return False
+        safety = entry.is_concurrency_safe
+        if callable(safety):
+            args = tool_call.get("args", {})
+            if isinstance(args, str):
+                try:
+                    import json as _json
+
+                    args = _json.loads(args)
+                except Exception:
+                    args = {}
+            try:
+                return bool(safety(args if isinstance(args, dict) else {}))
+            except Exception:
+                return False
+        return bool(safety)
+
+    def _tool_call_is_ready(self, tool_call: dict) -> bool:
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        entry = self._registry.get(name)
+        if entry is None:
+            return True
+
+        args = tool_call.get("args", {})
+        if isinstance(args, str):
+            try:
+                import json as _json
+
+                args = _json.loads(args)
+            except Exception:
+                return False
+        if not isinstance(args, dict):
+            return False
+
+        schema = entry.get_schema() or {}
+        parameters = schema.get("parameters", {}) if isinstance(schema, dict) else {}
+        return _required_sets_match(parameters, args) if isinstance(parameters, dict) else True
+
+    def _normalize_stream_tool_call(
+        self,
+        tool_call: dict,
+        tool_call_chunks: list[dict[str, Any]],
+    ) -> dict[str, Any] | None:
+        call_id = tool_call.get("id")
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        args: Any = tool_call.get("args", {})
+        if isinstance(args, str):
+            try:
+                import json as _json
+
+                args = _json.loads(args)
+            except Exception:
+                args = {}
+
+        raw_arg_chunks: list[str] = []
+        for chunk in tool_call_chunks:
+            if chunk.get("id") != call_id:
+                continue
+            if chunk.get("name"):
+                name = chunk["name"]
+            raw_args = chunk.get("args")
+            if raw_args in (None, ""):
+                continue
+            if isinstance(raw_args, str):
+                raw_arg_chunks.append(raw_args)
+            else:
+                args = raw_args
+
+        if raw_arg_chunks:
+            try:
+                import json as _json
+
+                args = _json.loads("".join(raw_arg_chunks))
+            except Exception:
+                return None
+
+        normalized = {"name": name, "args": args, "id": call_id}
+        if not self._tool_call_is_ready(normalized):
+            return None
+        return normalized
+
+    # -------------------------------------------------------------------------
+    # Checkpointer persistence
+    # -------------------------------------------------------------------------
+
+    async def _load_messages(self, thread_id: str) -> list:
+        """Load message history from checkpointer (if available)."""
+        state = await self._load_thread_checkpoint_state(thread_id)
+        return list(state.messages) if state is not None else []
+
+    async def _load_thread_checkpoint_state(self, thread_id: str) -> ThreadCheckpointState | None:
+        if self._checkpoint_store is None:
+            return None
+        try:
+            return await self._checkpoint_store.load(thread_id)
+        except Exception:
+            logger.debug("QueryLoop: could not load checkpoint for thread %s", thread_id)
+            return None
+
+    async def _load_checkpoint_channel_values(self, thread_id: str) -> dict[str, Any]:
+        """Compatibility helper for tests and bridge callers that still inspect channel_values."""
+        state = await self._load_thread_checkpoint_state(thread_id)
+        if state is None:
+            return {}
+        return {
+            "messages": list(state.messages),
+            "tool_permission_context": dict(state.tool_permission_context),
+            "pending_permission_requests": dict(state.pending_permission_requests),
+            "resolved_permission_requests": dict(state.resolved_permission_requests),
+            "memory_compaction_state": dict(state.memory_compaction_state),
+            "mcp_instruction_state": dict(state.mcp_instruction_state),
+        }
+
+    def _thread_permission_state_snapshot(
+        self,
+        thread_id: str,
+    ) -> tuple[dict[str, Any], dict[str, dict[str, Any]], dict[str, dict[str, Any]]]:
+        if self._app_state is None:
+            return {}, {}, {}
+
+        permission_context = copy.deepcopy(self._app_state.tool_permission_context.model_dump())
+        pending = {
+            key: copy.deepcopy(value)
+            for key, value in self._app_state.pending_permission_requests.items()
+            if value.get("thread_id") == thread_id
+        }
+        resolved = {
+            key: copy.deepcopy(value)
+            for key, value in self._app_state.resolved_permission_requests.items()
+            if value.get("thread_id") == thread_id
+        }
+        return permission_context, pending, resolved
+
+    def _thread_memory_state_snapshot(self, thread_id: str) -> dict[str, Any]:
+        if self._memory_middleware is None:
+            return {}
+        snapshot = getattr(self._memory_middleware, "snapshot_thread_state", None)
+        if not callable(snapshot):
+            return {}
+        raw_snapshot = snapshot(thread_id) or {}
+        if not isinstance(raw_snapshot, dict):
+            return {}
+        return {str(key): value for key, value in raw_snapshot.items()}
+
+    def _thread_mcp_instruction_state_snapshot(self, thread_id: str) -> dict[str, Any]:
+        if self._app_state is None:
+            return {}
+        announced_blocks = dict(self._app_state.announced_mcp_instruction_blocks.get(thread_id, {}))
+        return {"announced_blocks": announced_blocks}
+
+    def _is_runtime_active(self) -> bool:
+        current_state = getattr(self._runtime, "current_state", None)
+        return getattr(current_state, "value", current_state) == "active"
+
+    def _snapshot_live_thread_state(self, thread_id: str) -> dict[str, Any]:
+        messages = list(self._app_state.messages) if self._app_state is not None else []
+        permission_context, pending, resolved = self._thread_permission_state_snapshot(thread_id)
+        memory_state = self._thread_memory_state_snapshot(thread_id)
+        return {
+            "messages": messages,
+            "tool_permission_context": permission_context,
+            "pending_permission_requests": pending,
+            "resolved_permission_requests": resolved,
+            "memory_compaction_state": memory_state,
+            "mcp_instruction_state": self._thread_mcp_instruction_state_snapshot(thread_id),
+        }
+
+    def _restore_thread_permission_state(
+        self,
+        thread_id: str,
+        *,
+        permission_context: dict[str, Any],
+        pending: dict[str, dict[str, Any]],
+        resolved: dict[str, dict[str, Any]],
+    ) -> None:
+        if self._app_state is None:
+            return
+
+        # @@@permission-checkpoint-bridge - pending/resolved permission requests
+        # are thread-scoped runtime state, not display-only metadata. They must
+        # survive checkpoint replay so backend/UI surfaces stay honest after an
+        # idle reload or agent recreation.
+        def _update(state: AppState) -> AppState:
+            kept_pending = {key: value for key, value in state.pending_permission_requests.items() if value.get("thread_id") != thread_id}
+            kept_pending.update(copy.deepcopy(pending))
+            kept_resolved = {key: value for key, value in state.resolved_permission_requests.items() if value.get("thread_id") != thread_id}
+            kept_resolved.update(copy.deepcopy(resolved))
+            return state.model_copy(
+                update={
+                    "tool_permission_context": ToolPermissionState.model_validate(copy.deepcopy(permission_context)),
+                    "pending_permission_requests": kept_pending,
+                    "resolved_permission_requests": kept_resolved,
+                }
+            )
+
+        self._app_state.set_state(_update)
+
+    def _restore_thread_memory_state(
+        self,
+        thread_id: str,
+        *,
+        memory_state: dict[str, Any],
+    ) -> None:
+        if self._memory_middleware is None:
+            return
+        restore = getattr(self._memory_middleware, "restore_thread_state", None)
+        if callable(restore):
+            restore(thread_id, memory_state)
+
+    def _restore_thread_mcp_instruction_state(
+        self,
+        thread_id: str,
+        *,
+        mcp_instruction_state: dict[str, Any],
+    ) -> None:
+        if self._app_state is None:
+            return
+        announced_blocks = mcp_instruction_state.get("announced_blocks", {})
+        if not isinstance(announced_blocks, dict):
+            announced_blocks = {}
+        kept = {key: value for key, value in self._app_state.announced_mcp_instruction_blocks.items() if key != thread_id}
+        kept[thread_id] = {name: block for name, block in announced_blocks.items() if isinstance(name, str) and isinstance(block, str)}
+        self._app_state.announced_mcp_instruction_blocks = kept
+
+    async def _hydrate_thread_state_from_checkpoint(self, thread_id: str) -> dict[str, Any]:
+        checkpoint_state = await self._load_thread_checkpoint_state(thread_id)
+        messages = list(checkpoint_state.messages) if checkpoint_state is not None else []
+        permission_context = dict(checkpoint_state.tool_permission_context) if checkpoint_state is not None else {}
+        pending = dict(checkpoint_state.pending_permission_requests) if checkpoint_state is not None else {}
+        resolved = dict(checkpoint_state.resolved_permission_requests) if checkpoint_state is not None else {}
+        memory_state = dict(checkpoint_state.memory_compaction_state) if checkpoint_state is not None else {}
+        mcp_instruction_state = dict(checkpoint_state.mcp_instruction_state) if checkpoint_state is not None else {}
+        turn_count = self._app_state.turn_count if self._app_state is not None else 0
+        self._sync_app_state(messages=messages, turn_count=turn_count)
+        self._restore_thread_permission_state(
+            thread_id,
+            permission_context=permission_context,
+            pending=pending,
+            resolved=resolved,
+        )
+        self._restore_thread_memory_state(
+            thread_id,
+            memory_state=memory_state,
+        )
+        self._restore_thread_mcp_instruction_state(
+            thread_id,
+            mcp_instruction_state=mcp_instruction_state,
+        )
+        return {
+            "messages": messages,
+            "tool_permission_context": permission_context,
+            "pending_permission_requests": pending,
+            "resolved_permission_requests": resolved,
+            "memory_compaction_state": memory_state,
+            "mcp_instruction_state": mcp_instruction_state,
+        }
+
+    async def _save_messages(self, thread_id: str, messages: list) -> None:
+        """Persist message history to checkpointer."""
+        if self._checkpoint_store is None:
+            return
+        try:
+            permission_context, pending_requests, resolved_requests = self._thread_permission_state_snapshot(thread_id)
+            memory_state = self._thread_memory_state_snapshot(thread_id)
+            mcp_instruction_state = self._thread_mcp_instruction_state_snapshot(thread_id)
+            await self._checkpoint_store.save(
+                thread_id,
+                ThreadCheckpointState(
+                    messages=list(messages),
+                    tool_permission_context=permission_context,
+                    pending_permission_requests=pending_requests,
+                    resolved_permission_requests=resolved_requests,
+                    memory_compaction_state=memory_state,
+                    mcp_instruction_state=mcp_instruction_state,
+                ),
+            )
+        except Exception:
+            logger.debug("QueryLoop: could not save checkpoint for thread %s", thread_id, exc_info=True)
+
+    def _collect_memory_system_notices(self, pending_notices: list[HumanMessage]) -> None:
+        if self._memory_middleware is None:
+            return
+        consume_many = getattr(self._memory_middleware, "consume_pending_notices", None)
+        notices: list[dict[str, Any]] = []
+        if callable(consume_many):
+            maybe_notices = consume_many()
+            if isinstance(maybe_notices, list):
+                notices = [notice for notice in maybe_notices if isinstance(notice, dict)]
+        else:
+            consume_one = getattr(self._memory_middleware, "consume_latest_compaction_notice", None)
+            if callable(consume_one):
+                notice = consume_one()
+                if isinstance(notice, dict):
+                    notices = [notice]
+        for notice in notices:
+            pending_notices.append(
+                HumanMessage(
+                    content=str(notice.get("content") or ""),
+                    metadata={
+                        "source": "system",
+                        "notification_type": str(notice.get("notification_type") or "compact"),
+                        "compact_boundary_index": int(notice.get("compact_boundary_index") or 0),
+                    },
+                )
+            )
+
+    def _append_system_notices(self, messages: list, notices: list[HumanMessage]) -> list:
+        if not notices:
+            return messages
+        # @@@compact-notice-persist - compaction changes the model-visible
+        # boundary, but the notice is for the owner surface only. Persist it
+        # after the run settles so replay stays honest without perturbing the
+        # same run's next model call.
+        return list(messages) + list(notices)
+
+    def _build_terminal_notice(self, terminal: TerminalState | None) -> HumanMessage | None:
+        # @@@terminal-recovery-notice - recovery exhaustion must survive cold
+        # rebuilds. Persist one owner-visible system notice instead of leaving
+        # prompt-too-long as a hot-stream-only error.
+        if terminal is None or terminal.reason is not TerminalReason.prompt_too_long:
+            return None
+        return HumanMessage(
+            content=_PROMPT_TOO_LONG_NOTICE_TEXT,
+            metadata={"source": "system"},
+        )
+
+    def _terminal_error_text(self, terminal: TerminalState) -> str:
+        if terminal.reason is TerminalReason.prompt_too_long:
+            return _PROMPT_TOO_LONG_NOTICE_TEXT
+        return terminal.error or terminal.reason.value
+
+    def _build_visible_terminal_error_message(
+        self,
+        terminal: TerminalState,
+        messages: list[Any],
+    ) -> AIMessage | None:
+        if terminal.reason is TerminalReason.completed:
+            return None
+        error_text = self._terminal_error_text(terminal).strip()
+        if not error_text:
+            return None
+        last_message = messages[-1] if messages else None
+        if isinstance(last_message, AIMessage) and self._ai_message_has_visible_content(last_message):
+            return None
+        return AIMessage(content=f"Error: {error_text}")
+
+    async def aclear(self, thread_id: str) -> None:
+        """Clear turn-scoped state for a thread while preserving session accumulators."""
+        await self._save_messages(thread_id, [])
+
+        self._tool_read_file_state.clear()
+        self._tool_loaded_nested_memory_paths.clear()
+        self._tool_discovered_skill_names.clear()
+        self._tool_discovered_tool_names_by_thread.pop(thread_id, None)
+
+        if self._memory_middleware is not None:
+            summary_store = getattr(self._memory_middleware, "summary_store", None)
+            if summary_store is not None:
+                # @@@clear-thread-clears-summary-store - api-05 requires /clear
+                # to wipe replayable compaction state, not just in-memory cache.
+                summary_store.delete_thread_summaries(thread_id)
+            if hasattr(self._memory_middleware, "_cached_summary"):
+                setattr(self._memory_middleware, "_cached_summary", None)
+            if hasattr(self._memory_middleware, "_summary_restored"):
+                setattr(self._memory_middleware, "_summary_restored", False)
+            if hasattr(self._memory_middleware, "_summary_thread_id"):
+                setattr(self._memory_middleware, "_summary_thread_id", None)
+            if hasattr(self._memory_middleware, "_compact_up_to_index"):
+                setattr(self._memory_middleware, "_compact_up_to_index", 0)
+            clear_thread_state = getattr(self._memory_middleware, "clear_thread_state", None)
+            if callable(clear_thread_state):
+                clear_thread_state(thread_id)
+
+        if self._app_state is not None:
+            preserved_total_cost = self._app_state.total_cost
+            preserved_tool_overrides = dict(self._app_state.tool_overrides)
+            pending_requests = {
+                key: value for key, value in self._app_state.pending_permission_requests.items() if value.get("thread_id") != thread_id
+            }
+            resolved_requests = {
+                key: value for key, value in self._app_state.resolved_permission_requests.items() if value.get("thread_id") != thread_id
+            }
+
+            def _reset(state: AppState) -> AppState:
+                return state.model_copy(
+                    update={
+                        "messages": [],
+                        "turn_count": 0,
+                        "total_cost": preserved_total_cost,
+                        "compact_boundary_index": 0,
+                        "tool_overrides": preserved_tool_overrides,
+                        "pending_permission_requests": pending_requests,
+                        "resolved_permission_requests": resolved_requests,
+                    }
+                )
+
+            self._app_state.set_state(_reset)
+
+        await self._save_messages(thread_id, [])
+
+        if self._bootstrap is not None:
+            old_session_id = self._bootstrap.session_id
+            self._bootstrap.parent_session_id = old_session_id
+            self._bootstrap.session_id = uuid.uuid4().hex
+
+    # -------------------------------------------------------------------------
+    # Input parsing
+    # -------------------------------------------------------------------------
+
+    @staticmethod
+    def _parse_input(input: dict | None) -> list:
+        """Convert input dict to list of LangChain message objects."""
+        if input is None:
+            return []
+        raw_messages = input.get("messages", [])
+        result = []
+        for msg in raw_messages:
+            if hasattr(msg, "content"):
+                result.append(msg)
+            elif isinstance(msg, dict):
+                role = msg.get("role", "user")
+                content = msg.get("content", "")
+                if role == "user":
+                    result.append(HumanMessage(content=content))
+                elif role == "assistant":
+                    result.append(AIMessage(content=content))
+                else:
+                    result.append(HumanMessage(content=content))
+        return result
+
+    @staticmethod
+    def _ai_message_has_visible_content(message: AIMessage) -> bool:
+        content = getattr(message, "content", None)
+        if isinstance(content, str):
+            return content.strip() != ""
+        if isinstance(content, list):
+            for item in content:
+                if isinstance(item, str) and item.strip():
+                    return True
+                if isinstance(item, dict) and str(item.get("text", "")).strip():
+                    return True
+            return False
+        return bool(content)
+
+    @staticmethod
+    def _tool_results_include_permission_request(tool_results: list[ToolMessage]) -> bool:
+        for tool_result in tool_results:
+            additional_kwargs = getattr(tool_result, "additional_kwargs", None) or {}
+            meta = additional_kwargs.get("tool_result_meta")
+            if isinstance(meta, dict) and meta.get("kind") == "permission_request":
+                return True
+        return False
+
+    @staticmethod
+    def _get_terminal_followthrough_notice(messages: list[Any]) -> HumanMessage | None:
+        if not messages:
+            return None
+        last_message = messages[-1]
+        if last_message.__class__.__name__ != "HumanMessage":
+            return None
+        metadata = getattr(last_message, "metadata", None) or {}
+        if metadata.get("source") != "system":
+            return None
+        if metadata.get("notification_type") not in {"agent", "command"}:
+            return None
+        content = getattr(last_message, "content", "")
+        text = content if isinstance(content, str) else str(content)
+        if "CommandNotification" not in text and "task-notification" not in text:
+            return None
+        return last_message
+
+    @staticmethod
+    def _get_chat_followthrough_notice(messages: list[Any]) -> HumanMessage | None:
+        if not messages:
+            return None
+        last_message = messages[-1]
+        if last_message.__class__.__name__ != "HumanMessage":
+            return None
+        metadata = getattr(last_message, "metadata", None) or {}
+        if metadata.get("source") != "external":
+            return None
+        if metadata.get("notification_type") != "chat":
+            return None
+        content = getattr(last_message, "content", "")
+        text = content if isinstance(content, str) else str(content)
+        if "New message from" not in text or "read_messages(chat_id=" not in text:
+            return None
+        return last_message
+
+    @classmethod
+    def _build_terminal_followthrough_fallback(cls, notice: HumanMessage) -> AIMessage:
+        metadata = getattr(notice, "metadata", None) or {}
+        notification_type = str(metadata.get("notification_type") or "task")
+        content = getattr(notice, "content", "")
+        text = content if isinstance(content, str) else str(content)
+        status_match = re.search(r"<status>(.*?)</status>", text, flags=re.IGNORECASE | re.DOTALL)
+        status = status_match.group(1).strip().lower() if status_match else ""
+        subject = "command" if notification_type == "command" else "agent"
+        # @@@terminal-followthrough-fallback - terminal background notifications
+        # must never collapse into notice-only durable history when the model
+        # reentry stays silent; surface the silence explicitly instead.
+        if status == "completed":
+            reply = f"Background {subject} completed, but the followthrough assistant reply was empty."
+        elif status == "cancelled":
+            reply = f"Background {subject} was cancelled, but the followthrough assistant reply was empty."
+        elif status == "error":
+            reply = f"Background {subject} failed, but the followthrough assistant reply was empty."
+        else:
+            reply = f"Background {subject} update arrived, but the followthrough assistant reply was empty."
+        return AIMessage(content=reply)
+
+    @classmethod
+    def _build_chat_followthrough_fallback(cls, notice: HumanMessage) -> AIMessage:
+        content = getattr(notice, "content", "")
+        text = content if isinstance(content, str) else str(content)
+        chat_id_match = re.search(r'read_messages\(chat_id="([^"]+)"\)', text)
+        if chat_id_match:
+            chat_id = chat_id_match.group(1)
+            reply = (
+                f"I received a chat notification, but the followthrough assistant reply was empty. "
+                f'Read it with read_messages(chat_id="{chat_id}") before deciding whether to reply.'
+            )
+        else:
+            reply = "I received a chat notification, but the followthrough assistant reply was empty."
+        return AIMessage(content=reply)
+
+
+# -------------------------------------------------------------------------
+# Closure helpers (avoid late-binding bugs in loop-built lambdas)
+# -------------------------------------------------------------------------
+
+
+def _make_model_wrapper(mw: AgentMiddleware, next_handler):
+    """Build an awrap_model_call wrapper that correctly closes over mw and next_handler."""
+
+    async def wrapper(request: ModelRequest) -> ModelResponse:
+        return await mw.awrap_model_call(request, next_handler)
+
+    return wrapper
+
+
+def _make_tool_wrapper(mw: AgentMiddleware, next_handler):
+    """Build an awrap_tool_call wrapper that correctly closes over mw and next_handler."""
+
+    async def wrapper(request: ToolCallRequest) -> ToolMessage:
+        return await mw.awrap_tool_call(request, next_handler)
+
+    return wrapper
+
+
+# -------------------------------------------------------------------------
+# Middleware override detection helpers
+def _mw_overrides_model_call(mw: AgentMiddleware) -> bool:
+    """True if mw actually overrides awrap_model_call (not just inherits the base stub)."""
+    mw_type = type(mw)
+    own_fn = mw_type.__dict__.get("awrap_model_call")
+    if own_fn is not None:
+        return True
+    own_sync = mw_type.__dict__.get("wrap_model_call")
+    return own_sync is not None
+
+
+def _mw_overrides_tool_call(mw: AgentMiddleware) -> bool:
+    """True if mw actually overrides awrap_tool_call (not just inherits the base stub)."""
+    mw_type = type(mw)
+    own_fn = mw_type.__dict__.get("awrap_tool_call")
+    if own_fn is not None:
+        return True
+    own_sync = mw_type.__dict__.get("wrap_tool_call")
+    return own_sync is not None
diff --git a/core/runtime/middleware/__init__.py b/core/runtime/middleware/__init__.py
index e69de29bb..f777a7fde 100644
--- a/core/runtime/middleware/__init__.py
+++ b/core/runtime/middleware/__init__.py
@@ -0,0 +1,79 @@
+"""Local runtime middleware protocol and request/response types.
+
+This replaces the phantom `langchain.agents.middleware.types` dependency for
+the current runtime stack.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass, replace
+from typing import Any, ClassVar
+
+from langchain_core.messages import ToolMessage
+
+
+@dataclass(frozen=True)
+class ModelRequest:
+    model: Any
+    messages: list
+    system_message: Any = None
+    tools: list | None = None
+
+    def override(self, **changes: Any) -> ModelRequest:
+        return replace(self, **changes)
+
+
+@dataclass(frozen=True)
+class ModelResponse:
+    result: list
+    request_messages: list | None = None
+    prepared_request: ModelRequest | None = None
+
+
+ModelCallResult = ModelResponse
+
+
+@dataclass(frozen=True)
+class ToolCallRequest:
+    tool_call: dict
+    tool: Any = None
+    state: Any = None
+    runtime: Any = None
+
+    def override(self, **changes: Any) -> ToolCallRequest:
+        return replace(self, **changes)
+
+
+class AgentMiddleware:
+    """Minimal chain-of-responsibility middleware base for the runtime stack."""
+
+    tools: ClassVar[tuple[Any, ...]] = ()
+
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelResponse:
+        return handler(request)
+
+    async def awrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelResponse:
+        return await handler(request)
+
+    def wrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], ToolMessage],
+    ) -> ToolMessage:
+        return handler(request)
+
+    async def awrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], Awaitable[ToolMessage]],
+    ) -> ToolMessage:
+        return await handler(request)
diff --git a/core/runtime/middleware/mcp_instructions.py b/core/runtime/middleware/mcp_instructions.py
new file mode 100644
index 000000000..7cff4c7cb
--- /dev/null
+++ b/core/runtime/middleware/mcp_instructions.py
@@ -0,0 +1,80 @@
+"""Thread-scoped MCP instruction delta injection.
+
+Mycel does not have CC's attachment plane. Keep this contract smaller:
+- MCP server configs may carry `instructions`
+- the loop stores which server names have already been announced per thread
+- on the next turn after a change, inject one delta SystemMessage
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Callable
+from typing import Any
+
+from langchain_core.messages import SystemMessage
+
+from core.runtime.middleware import AgentMiddleware
+from core.runtime.state import AppState
+
+_DELTA_TAG = "mcp_instructions_delta"
+
+
+def _format_instruction_block(server_name: str, instructions: str) -> str:
+    return f"## {server_name}\n{instructions.strip()}"
+
+
+def _render_delta_message(*, added: dict[str, str], removed: list[str]) -> SystemMessage:
+    payload = {
+        "added_names": sorted(added),
+        "removed_names": sorted(removed),
+    }
+    blocks = [
+        "<system-reminder>",
+        f"<{_DELTA_TAG}>{json.dumps(payload, ensure_ascii=False)}</{_DELTA_TAG}>",
+        "MCP server instructions changed for this thread.",
+    ]
+    if added:
+        blocks.append("Use the newly available MCP instructions below for subsequent turns:")
+        blocks.extend(_format_instruction_block(name, added[name]) for name in sorted(added))
+    if removed:
+        blocks.append("The following MCP servers are no longer active for this thread:")
+        blocks.extend(f"- {name}" for name in sorted(removed))
+    blocks.append("</system-reminder>")
+    return SystemMessage(content="\n".join(blocks))
+
+
+class McpInstructionsDeltaMiddleware(AgentMiddleware):
+    """Injects MCP instruction deltas once per thread when the connected set changes."""
+
+    def __init__(
+        self,
+        *,
+        get_instruction_blocks: Callable[[], dict[str, str]],
+        get_app_state: Callable[[], AppState | None],
+    ) -> None:
+        self._get_instruction_blocks = get_instruction_blocks
+        self._get_app_state = get_app_state
+
+    def before_model(self, state: dict[str, Any], runtime: Any = None, config: dict[str, Any] | None = None) -> dict[str, Any] | None:
+        app_state = self._get_app_state()
+        if app_state is None:
+            return None
+
+        config = config or {}
+        thread_id = config.get("configurable", {}).get("thread_id", "default")
+        current_blocks = {name: block for name, block in self._get_instruction_blocks().items() if block.strip()}
+        announced_blocks = {
+            name: block
+            for name, block in app_state.announced_mcp_instruction_blocks.get(thread_id, {}).items()
+            if isinstance(name, str) and isinstance(block, str) and block.strip()
+        }
+
+        added_names = sorted(name for name, block in current_blocks.items() if announced_blocks.get(name) != block)
+        removed_names = sorted(name for name in announced_blocks if name not in current_blocks)
+        if not added_names and not removed_names:
+            return None
+
+        app_state.announced_mcp_instruction_blocks[thread_id] = dict(current_blocks)
+        added = {name: current_blocks[name] for name in added_names}
+        return {"messages": [_render_delta_message(added=added, removed=removed_names)]}
diff --git a/core/runtime/middleware/memory/compactor.py b/core/runtime/middleware/memory/compactor.py
index 67599b534..defbb7221 100644
--- a/core/runtime/middleware/memory/compactor.py
+++ b/core/runtime/middleware/memory/compactor.py
@@ -10,13 +10,22 @@
 
 from langchain_core.messages import HumanMessage, SystemMessage
 
+# CC L4b Legacy Compact: system prompt is simple (~200 tokens) — NOT inherited from parent.
+# Using a distinct simple system prompt prevents reusing the parent conversation's cache
+# (different system prompt → different prefix hash), and reduces input token cost.
+COMPACT_SYSTEM_PROMPT = "You are a helpful AI assistant tasked with summarizing conversations."
+
 SUMMARY_PROMPT = """\
-Provide a detailed summary for continuing our conversation. Include:
-1. Key decisions made and their rationale
-2. Files created, modified, or read and their current state
-3. Errors encountered and how they were resolved
-4. Outstanding tasks and current progress
-5. Important context that would be needed to continue the work
+Summarize this conversation in the following 9 sections:
+1. Request/Intent — what the user asked for
+2. Technical Concepts — key technologies and approaches discussed
+3. Files/Code — files created or modified and their current state
+4. Errors — errors encountered and how they were resolved
+5. Problem Solving — decisions made and rationale
+6. User Messages — key user inputs and feedback
+7. Pending Tasks — unfinished work
+8. Current Work — what was actively being done at the end
+9. Next Step — the immediate next action needed
 Be concise but retain all information needed to continue seamlessly."""
 
 SPLIT_TURN_PREFIX_PROMPT = """\
@@ -80,19 +89,41 @@ def split_messages(self, messages: list[Any]) -> tuple[list[Any], list[Any]]:
 
         return messages[:split_idx], messages[split_idx:]
 
-    async def compact(self, messages_to_summarize: list[Any], model: Any) -> str:
+    async def compact(
+        self,
+        messages_to_summarize: list[Any],
+        model: Any,
+        compact_boundary: int = 0,
+    ) -> str:
         """Generate a summary of the given messages using the LLM.
 
+        Aligned with CC L4b Legacy Compact:
+        - Uses COMPACT_SYSTEM_PROMPT (simple, ~200 tokens — NOT parent system prompt)
+        - No tools passed (extended thinking disabled, tools=[])
+        - Slices from compact_boundary forward
+        - max_tokens capped at 20000 (CC max summary output)
+
         Returns plain text summary string.
         """
-        # Build the summarization request
+        # Slice from compact_boundary forward (CC: from last compact_boundary marker)
+        if compact_boundary > 0 and compact_boundary < len(messages_to_summarize):
+            messages_to_summarize = messages_to_summarize[compact_boundary:]
+
         formatted = self._format_messages_for_summary(messages_to_summarize)
+        # CC L4b: system prompt is simple — does NOT inherit parent's system prompt.
+        # No tools, no extended thinking.
         summary_messages = [
-            SystemMessage(content=SUMMARY_PROMPT),
-            HumanMessage(content=f"Here is the conversation to summarize:\n\n{formatted}"),
+            SystemMessage(content=COMPACT_SYSTEM_PROMPT),
+            HumanMessage(content=f"Summarize this conversation:\n\n{formatted}\n\n{SUMMARY_PROMPT}"),
         ]
 
-        response = await model.ainvoke(summary_messages)
+        # Bind max_tokens=20000 (CC max summary output), no tools
+        try:
+            bound_model = model.bind(max_tokens=20000)
+        except Exception:
+            bound_model = model
+
+        response = await bound_model.ainvoke(summary_messages)
         return response.content if hasattr(response, "content") else str(response)
 
     def _estimate_msg_tokens(self, msg: Any) -> int:
diff --git a/core/runtime/middleware/memory/middleware.py b/core/runtime/middleware/memory/middleware.py
index 8775e1c21..c4d4f2362 100644
--- a/core/runtime/middleware/memory/middleware.py
+++ b/core/runtime/middleware/memory/middleware.py
@@ -7,19 +7,22 @@
 
 from __future__ import annotations
 
+import json
 import logging
 from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any
 
-from langchain.agents.middleware.types import (
+from langchain_core.messages import SystemMessage
+
+from core.runtime.checkpoint_store import CheckpointStore
+from core.runtime.langgraph_checkpoint_store import LangGraphCheckpointStore
+from core.runtime.middleware import (
     AgentMiddleware,
     ModelCallResult,
     ModelRequest,
     ModelResponse,
 )
-from langchain_core.messages import SystemMessage
-
 from storage.contracts import SummaryRepo
 
 from .compactor import ContextCompactor
@@ -27,6 +30,7 @@
 from .summary_store import SummaryStore
 
 logger = logging.getLogger(__name__)
+_COMPACTION_BREAKER_THRESHOLD = 3
 
 
 class MemoryMiddleware(AgentMiddleware):
@@ -36,7 +40,7 @@ class MemoryMiddleware(AgentMiddleware):
     Layer 2 (Compaction): LLM summarization when context exceeds threshold
     """
 
-    tools = []  # no tools injected
+    tools = ()  # no tools injected
 
     def __init__(
         self,
@@ -75,6 +79,8 @@ def __init__(
         # Persistent storage
         summary_db_path = db_path or Path.home() / ".leon" / "leon.db"
         self.summary_store = SummaryStore(summary_db_path, summary_repo=summary_repo) if (db_path or summary_repo) else None
+        self._checkpointer: Any = None
+        self._checkpoint_store: CheckpointStore | None = None
         self.checkpointer = checkpointer
 
         # Injected references (set by agent.py after construction)
@@ -86,6 +92,10 @@ def __init__(
         self._cached_summary: str | None = None
         self._compact_up_to_index: int = 0
         self._summary_restored: bool = False
+        self._summary_thread_id: str | None = None
+        self._pending_owner_notices: list[dict[str, Any]] = []
+        self._compaction_failure_counts_by_thread: dict[str, int] = {}
+        self._compaction_breaker_open_by_thread: dict[str, bool] = {}
 
         if verbose:
             print("[MemoryMiddleware] Initialized")
@@ -101,6 +111,15 @@ def set_model(self, model: Any, model_config: dict[str, Any] | None = None) -> N
         self._model = model
         self._model_config = model_config
 
+    @property
+    def checkpointer(self) -> Any:
+        return self._checkpointer
+
+    @checkpointer.setter
+    def checkpointer(self, value: Any) -> None:
+        self._checkpointer = value
+        self._checkpoint_store = LangGraphCheckpointStore(value) if value is not None else None
+
     @property
     def _resolved_model(self) -> Any:
         """Return model with config bound so it uses the correct model/provider."""
@@ -125,6 +144,10 @@ def set_runtime(self, runtime: Any) -> None:
         """Inject AgentRuntime reference (called by agent.py)."""
         self._runtime = runtime
 
+    @property
+    def compact_boundary_index(self) -> int:
+        return self._compact_up_to_index
+
     # ========== AgentMiddleware interface ==========
 
     async def awrap_model_call(
@@ -134,13 +157,18 @@ async def awrap_model_call(
     ) -> ModelCallResult:
         messages = list(request.messages)
         original_count = len(messages)
+        thread_id = self._extract_thread_id(request)
 
         # Restore summary from store if not already done
         if not self._summary_restored and self.summary_store:
-            thread_id = self._extract_thread_id(request)
             if thread_id:
                 await self._restore_summary_from_store(thread_id)
                 self._summary_restored = True
+                self._summary_thread_id = thread_id
+        elif self.summary_store and thread_id and self._summary_thread_id != thread_id:
+            await self._restore_summary_from_store(thread_id)
+            self._summary_restored = True
+            self._summary_thread_id = thread_id
 
         sys_tokens = self._estimate_system_tokens(request)
 
@@ -173,8 +201,9 @@ async def awrap_model_call(
             )
 
         if self.compactor.should_compact(estimated, self._context_limit, self._compaction_threshold) and self._model:
-            thread_id = self._extract_thread_id(request)
-            messages = await self._do_compact(messages, thread_id)
+            compacted = await self._attempt_compaction(messages, thread_id=thread_id)
+            if compacted is not None:
+                messages = compacted
         elif self._cached_summary and self._compact_up_to_index > 0:
             if self._compact_up_to_index <= len(messages):
                 summary_msg = SystemMessage(content=f"[Conversation Summary]\n{self._cached_summary}")
@@ -190,7 +219,14 @@ async def awrap_model_call(
             final_tokens = self._estimate_tokens(messages) + sys_tokens
             print(f"[Memory] Final: {len(messages)} msgs (~{final_tokens} tokens) sent to LLM (original: {original_count} msgs)")
 
-        return await handler(request.override(messages=messages))
+        response = await handler(request.override(messages=messages))
+        if response.request_messages is None:
+            return ModelResponse(
+                result=response.result,
+                request_messages=list(messages),
+                prepared_request=response.prepared_request,
+            )
+        return response
 
     async def _do_compact(self, messages: list[Any], thread_id: str | None = None) -> list[Any]:
         """Execute compaction: summarize old messages, return compacted list."""
@@ -219,6 +255,9 @@ async def _do_compact(self, messages: list[Any], thread_id: str | None = None) -
 
             self._cached_summary = summary_text
             self._compact_up_to_index = len(messages) - len(to_keep)
+            self._summary_restored = True
+            self._summary_thread_id = thread_id
+            self._record_compaction_notice()
 
             if self.summary_store and thread_id:
                 try:
@@ -257,6 +296,7 @@ async def force_compact(self, messages: list[Any]) -> dict[str, Any] | None:
             summary_text = await self.compactor.compact(to_summarize, self._resolved_model)
             self._cached_summary = summary_text
             self._compact_up_to_index = len(messages) - len(to_keep)
+            self._record_compaction_notice()
             return {
                 "stats": {
                     "summarized": len(to_summarize),
@@ -267,6 +307,24 @@ async def force_compact(self, messages: list[Any]) -> dict[str, Any] | None:
             if self._runtime:
                 self._runtime.set_flag("is_compacting", False)
 
+    async def compact_messages_for_recovery(self, messages: list[Any], thread_id: str | None = None) -> list[Any] | None:
+        """Force a compaction pass and return the compacted message list."""
+        if not self._model:
+            return None
+
+        pruned = self.pruner.prune(messages)
+        to_summarize, to_keep = self.compactor.split_messages(pruned)
+        if len(to_summarize) < 2:
+            return None
+
+        return await self._attempt_compaction(
+            pruned,
+            thread_id=thread_id or self._current_thread_id(),
+            respect_breaker=False,
+            record_failures=False,
+            clear_breaker_on_success=True,
+        )
+
     def _estimate_tokens(self, messages: list[Any]) -> int:
         """Estimate total tokens for messages (chars // 2)."""
         total = 0
@@ -306,6 +364,110 @@ def _extract_thread_id(self, request: ModelRequest) -> str | None:
             return configurable.get("thread_id")
         return getattr(configurable, "thread_id", None) if configurable else None
 
+    def consume_pending_notices(self) -> list[dict[str, Any]]:
+        notices = list(self._pending_owner_notices)
+        self._pending_owner_notices.clear()
+        return notices
+
+    def snapshot_thread_state(self, thread_id: str) -> dict[str, Any]:
+        return {
+            "failure_count": int(self._compaction_failure_counts_by_thread.get(thread_id, 0)),
+            "breaker_open": bool(self._compaction_breaker_open_by_thread.get(thread_id, False)),
+        }
+
+    def restore_thread_state(self, thread_id: str, state: dict[str, Any] | None) -> None:
+        payload = dict(state or {})
+        failure_count = int(payload.get("failure_count") or 0)
+        breaker_open = bool(payload.get("breaker_open", False))
+        if failure_count > 0:
+            self._compaction_failure_counts_by_thread[thread_id] = failure_count
+        else:
+            self._compaction_failure_counts_by_thread.pop(thread_id, None)
+        if breaker_open:
+            self._compaction_breaker_open_by_thread[thread_id] = True
+        else:
+            self._compaction_breaker_open_by_thread.pop(thread_id, None)
+
+    def clear_thread_state(self, thread_id: str) -> None:
+        self._compaction_failure_counts_by_thread.pop(thread_id, None)
+        self._compaction_breaker_open_by_thread.pop(thread_id, None)
+
+    def _record_compaction_notice(self) -> None:
+        content = f"Conversation compacted. Earlier {self._compact_up_to_index} message(s) are now represented by a summary."
+        self._queue_owner_notice(
+            {
+                "content": content,
+                "notification_type": "compact",
+                "compact_boundary_index": self._compact_up_to_index,
+            }
+        )
+
+    def _current_thread_id(self) -> str | None:
+        from sandbox.thread_context import get_current_thread_id
+
+        return get_current_thread_id()
+
+    async def _attempt_compaction(
+        self,
+        messages: list[Any],
+        *,
+        thread_id: str | None,
+        respect_breaker: bool = True,
+        record_failures: bool = True,
+        clear_breaker_on_success: bool = False,
+    ) -> list[Any] | None:
+        # @@@compaction-breaker-scope - match cc-src's narrower boundary:
+        # the breaker blocks later automatic compaction attempts, but reactive
+        # recovery may still try once and clear the breaker on success.
+        if respect_breaker and thread_id and self._compaction_breaker_open_by_thread.get(thread_id, False):
+            return None
+        try:
+            compacted = await self._do_compact(messages, thread_id)
+        except Exception as exc:
+            logger.error("[Memory] Compaction failed for thread %s: %s", thread_id or "<unknown>", exc)
+            if record_failures:
+                self._record_compaction_failure(thread_id, exc)
+            return None
+        self._record_compaction_success(thread_id, clear_breaker=clear_breaker_on_success)
+        return compacted
+
+    def _record_compaction_success(self, thread_id: str | None, *, clear_breaker: bool = False) -> None:
+        if not thread_id:
+            return
+        self._compaction_failure_counts_by_thread.pop(thread_id, None)
+        if clear_breaker:
+            self._compaction_breaker_open_by_thread.pop(thread_id, None)
+
+    def _record_compaction_failure(self, thread_id: str | None, exc: Exception) -> None:
+        if not thread_id:
+            return
+        failures = int(self._compaction_failure_counts_by_thread.get(thread_id, 0)) + 1
+        self._compaction_failure_counts_by_thread[thread_id] = failures
+        if failures < _COMPACTION_BREAKER_THRESHOLD or self._compaction_breaker_open_by_thread.get(thread_id, False):
+            return
+        self._compaction_breaker_open_by_thread[thread_id] = True
+        self._queue_owner_notice(
+            {
+                "content": "Automatic compaction disabled for this thread after repeated failures. Clear the thread or start a new one.",
+                "notification_type": "compact_breaker",
+                "failure_count": failures,
+                "error": str(exc),
+            }
+        )
+
+    def _queue_owner_notice(self, notice: dict[str, Any]) -> None:
+        self._pending_owner_notices.append(dict(notice))
+        if self._runtime and hasattr(self._runtime, "emit_activity_event"):
+            # @@@memory-owner-notices - compaction boundary and breaker state are
+            # owner-facing runtime facts, so stream and cold rebuild must share
+            # the same notice payload instead of inventing separate surfaces.
+            self._runtime.emit_activity_event(
+                {
+                    "event": "notice",
+                    "data": json.dumps(notice, ensure_ascii=False),
+                }
+            )
+
     async def _restore_summary_from_store(self, thread_id: str) -> None:
         """Restore summary from SummaryStore."""
         if not thread_id:
@@ -314,6 +476,10 @@ async def _restore_summary_from_store(self, thread_id: str) -> None:
             )
 
         try:
+            if self.summary_store is None:
+                return
+            self._cached_summary = None
+            self._compact_up_to_index = 0
             summary_data = self.summary_store.get_latest_summary(thread_id)
 
             if not summary_data:
@@ -332,6 +498,7 @@ async def _restore_summary_from_store(self, thread_id: str) -> None:
 
             self._cached_summary = summary_data.summary_text
             self._compact_up_to_index = summary_data.compact_up_to_index
+            self._summary_thread_id = thread_id
 
             if self.verbose:
                 print(
@@ -342,21 +509,25 @@ async def _restore_summary_from_store(self, thread_id: str) -> None:
                 )
 
         except Exception as e:
+            self._cached_summary = None
+            self._compact_up_to_index = 0
             logger.error(f"[Memory] Failed to restore summary: {e}")
 
     async def _rebuild_summary_from_checkpointer(self, thread_id: str) -> None:
         """Rebuild summary from checkpointer when store data is corrupted."""
         try:
+            if self.summary_store is None or self._checkpoint_store is None:
+                return
             if self.verbose:
                 print(f"[Memory] Rebuilding summary from checkpointer for thread {thread_id}...")
 
-            checkpoint = self.checkpointer.get({"configurable": {"thread_id": thread_id}})
-            if not checkpoint:
+            checkpoint_state = await self._checkpoint_store.load(thread_id)
+            if checkpoint_state is None:
                 if self.verbose:
                     print("[Memory] No checkpoint found, skipping rebuild")
                 return
 
-            messages = checkpoint.get("channel_values", {}).get("messages", [])
+            messages = list(checkpoint_state.messages)
             if not messages:
                 if self.verbose:
                     print("[Memory] No messages in checkpoint, skipping rebuild")
diff --git a/core/runtime/middleware/memory/summary_store.py b/core/runtime/middleware/memory/summary_store.py
index 6fcff004c..553d162fa 100644
--- a/core/runtime/middleware/memory/summary_store.py
+++ b/core/runtime/middleware/memory/summary_store.py
@@ -64,8 +64,9 @@ def __init__(self, db_path: Path | None = None, summary_repo: SummaryRepo | None
         if summary_repo is not None:
             self._repo = summary_repo
         else:
+            resolved_db_path = self.db_path
             # @@@connect_injection - keep _connect as an indirection point so existing retry/rollback tests can patch it.
-            self._repo = SQLiteSummaryRepo(db_path, connect_fn=lambda p: _connect(p))
+            self._repo = SQLiteSummaryRepo(resolved_db_path, connect_fn=lambda p: _connect(Path(p)))
         self._ensure_tables()
 
     def _ensure_tables(self) -> None:
@@ -126,6 +127,8 @@ def save_summary(
                     logger.error(f"[SummaryStore] Save failed after {max_retries} attempts: {e}")
                     raise
 
+        raise RuntimeError("Summary save loop exited without returning or raising")
+
     def get_latest_summary(
         self,
         thread_id: str,
diff --git a/core/runtime/middleware/monitor/cost.py b/core/runtime/middleware/monitor/cost.py
index 4b09c2a51..08615af02 100644
--- a/core/runtime/middleware/monitor/cost.py
+++ b/core/runtime/middleware/monitor/cost.py
@@ -112,7 +112,7 @@ def _load_cache() -> tuple[dict[str, dict[str, str]], dict[str, int], dict[str,
     if not cache_path.exists():
         return None
     try:
-        data = json.loads(cache_path.read_text())
+        data = json.loads(cache_path.read_text(encoding="utf-8"))
         if time.time() - data.get("timestamp", 0) > _CACHE_TTL:
             return None
         models = data.get("models", {})
@@ -128,7 +128,7 @@ def _save_cache(models: dict[str, dict[str, str]], context_limits: dict[str, int
     try:
         _CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
         data = {"timestamp": time.time(), "models": models, "context_limits": context_limits, "providers": providers}
-        _CACHE_PATH.write_text(json.dumps(data))
+        _CACHE_PATH.write_text(json.dumps(data), encoding="utf-8")
     except Exception:
         pass
 
@@ -163,11 +163,17 @@ def fetch_openrouter_pricing() -> dict[str, dict[str, Decimal]]:
     cached = _load_cache()
     if cached:
         models_raw, ctx, provs = cached
-        _pricing_data = _deserialize_costs(models_raw)
-        _context_limits = ctx
-        _model_providers = provs
-        _initialized = True
-        return _pricing_data
+        cached_costs = _deserialize_costs(models_raw)
+        # @@@pricing-cache-integrity - older CI caches can carry context/provider
+        # metadata with an empty model-pricing payload, which makes cost
+        # calculation silently degrade while context-limit tests still pass.
+        # Treat that cache as invalid and fall through to bundled/API reload.
+        if cached_costs:
+            _pricing_data = cached_costs
+            _context_limits = ctx
+            _model_providers = provs
+            _initialized = True
+            return _pricing_data
 
     _pricing_data = _fetch_from_openrouter() or _load_bundled()
     _initialized = True
@@ -219,7 +225,10 @@ def _load_bundled() -> dict[str, dict[str, Decimal]]:
     if not _BUNDLED_PATH.exists():
         return {}
     try:
-        data = json.loads(_BUNDLED_PATH.read_text())
+        # @@@bundled-models-utf8 - Windows runners do not default to UTF-8.
+        # The bundled OpenRouter snapshot contains non-ASCII descriptions, so
+        # implicit decoding can fail and silently collapse pricing/context data.
+        data = json.loads(_BUNDLED_PATH.read_text(encoding="utf-8"))
         result: dict[str, dict[str, Decimal]] = {}
         ctx_result: dict[str, int] = {}
         prov_result: dict[str, str] = {}
diff --git a/core/runtime/middleware/monitor/middleware.py b/core/runtime/middleware/monitor/middleware.py
index 218ebcd06..adff96818 100644
--- a/core/runtime/middleware/monitor/middleware.py
+++ b/core/runtime/middleware/monitor/middleware.py
@@ -3,7 +3,7 @@
 from collections.abc import Awaitable, Callable
 from typing import Any
 
-from langchain.agents.middleware.types import (
+from core.runtime.middleware import (
     AgentMiddleware,
     ModelCallResult,
     ModelRequest,
@@ -25,7 +25,7 @@ class MonitorMiddleware(AgentMiddleware):
     提供 AgentRuntime 聚合所有监控数据。
     """
 
-    tools = []  # 不注入工具
+    tools = ()  # 不注入工具
 
     def __init__(self, context_limit: int = 0, model_name: str = "", verbose: bool = False):
         self.verbose = verbose
@@ -113,6 +113,9 @@ async def awrap_model_call(
             self._state_monitor.mark_error(e)
             raise
 
+        if response.prepared_request is not None:
+            return response
+
         messages = response.result if hasattr(response, "result") else [response]
         resp_dict = {"messages": messages}
 
diff --git a/core/runtime/middleware/monitor/token_monitor.py b/core/runtime/middleware/monitor/token_monitor.py
index 255092704..7071d0141 100644
--- a/core/runtime/middleware/monitor/token_monitor.py
+++ b/core/runtime/middleware/monitor/token_monitor.py
@@ -1,8 +1,11 @@
 """Token 使用量监控（6 项分项追踪）"""
 
+from __future__ import annotations
+
 from typing import Any
 
 from .base import BaseMonitor
+from .cost import CostCalculator
 
 
 class TokenMonitor(BaseMonitor):
@@ -24,7 +27,7 @@ def __init__(self):
         self.total_tokens = 0  # 总计
 
         # 成本计算器（由 MonitorMiddleware 注入）
-        self.cost_calculator = None
+        self.cost_calculator: CostCalculator | None = None
 
     def on_request(self, request: dict[str, Any]) -> None:
         """请求前：无操作（call_count 在 on_response 中计数）"""
diff --git a/core/runtime/middleware/prompt_caching/__init__.py b/core/runtime/middleware/prompt_caching/__init__.py
index 87f4e92b4..361b124a8 100644
--- a/core/runtime/middleware/prompt_caching/__init__.py
+++ b/core/runtime/middleware/prompt_caching/__init__.py
@@ -1,8 +1,8 @@
 """Anthropic prompt caching middleware.
 
 Requires:
-    - `langchain`: For agent middleware framework
-    - `langchain-anthropic`: For `ChatAnthropic` model (already a dependency)
+    - local `core.runtime.middleware` protocol types
+    - `langchain-anthropic`: For `ChatAnthropic` model
 """
 
 from collections.abc import Awaitable, Callable
@@ -10,9 +10,10 @@
 from warnings import warn
 
 from langchain_anthropic.chat_models import ChatAnthropic
+from langchain_core.messages import SystemMessage
 
 try:
-    from langchain.agents.middleware.types import (
+    from core.runtime.middleware import (
         AgentMiddleware,
         ModelCallResult,
         ModelRequest,
@@ -20,9 +21,9 @@
     )
 except ImportError as e:
     msg = (
-        "AnthropicPromptCachingMiddleware requires 'langchain' to be installed. "
-        "This middleware is designed for use with LangChain agents. "
-        "Install it with: pip install langchain"
+        "AnthropicPromptCachingMiddleware requires the local "
+        "'core.runtime.middleware' protocol definitions and "
+        "'langchain-anthropic' to be importable."
     )
     raise ImportError(msg) from e
 
@@ -32,7 +33,7 @@ class PromptCachingMiddleware(AgentMiddleware):
 
     Optimizes API usage by caching conversation prefixes for Anthropic models.
 
-    Requires both `langchain` and `langchain-anthropic` packages to be installed.
+    Requires the local runtime middleware protocol plus `langchain-anthropic`.
 
     Learn more about Anthropic prompt caching
     [here](https://platform.claude.com/docs/en/build-with-claude/prompt-caching).
@@ -68,6 +69,26 @@ def __init__(
         self.min_messages_to_cache = min_messages_to_cache
         self.unsupported_model_behavior = unsupported_model_behavior
 
+    def _apply_system_cache(self, request: ModelRequest) -> ModelRequest:
+        """Add cache_control to the first (static) block of system_message.
+
+        Anthropic prompt caching requires cache_control on the system content
+        blocks, not on messages. Marking the first block caches the entire
+        static system prefix (identity + tool rules) across sessions.
+        """
+        sm = request.system_message
+        if sm is None:
+            return request
+        content = sm.content
+        if isinstance(content, str):
+            new_content: list = [{"type": "text", "text": content, "cache_control": {"type": self.type}}]
+        elif isinstance(content, list) and content:
+            first = {**content[0], "cache_control": {"type": self.type}}
+            new_content = [first, *content[1:]]
+        else:
+            return request
+        return request.override(system_message=SystemMessage(content=new_content))
+
     def _should_apply_caching(self, request: ModelRequest) -> bool:
         """Check if caching should be applied to the request.
 
@@ -112,12 +133,7 @@ def wrap_model_call(
         """
         if not self._should_apply_caching(request):
             return handler(request)
-
-        new_model_settings = {
-            **request.model_settings,
-            "cache_control": {"type": self.type, "ttl": self.ttl},
-        }
-        return handler(request.override(model_settings=new_model_settings))
+        return handler(self._apply_system_cache(request))
 
     async def awrap_model_call(
         self,
@@ -135,12 +151,7 @@ async def awrap_model_call(
         """
         if not self._should_apply_caching(request):
             return await handler(request)
-
-        new_model_settings = {
-            **request.model_settings,
-            "cache_control": {"type": self.type, "ttl": self.ttl},
-        }
-        return await handler(request.override(model_settings=new_model_settings))
+        return await handler(self._apply_system_cache(request))
 
 
 __all__ = ["PromptCachingMiddleware"]
diff --git a/core/runtime/middleware/queue/__init__.py b/core/runtime/middleware/queue/__init__.py
index f3d08f337..cf97229dc 100644
--- a/core/runtime/middleware/queue/__init__.py
+++ b/core/runtime/middleware/queue/__init__.py
@@ -2,7 +2,12 @@
 
 from storage.contracts import QueueItem
 
-from .formatters import format_background_notification, format_chat_notification, format_wechat_message
+from .formatters import (
+    format_agent_message,
+    format_background_notification,
+    format_chat_notification,
+    format_progress_notification,
+)
 from .manager import MessageQueueManager
 from .middleware import SteeringMiddleware
 
@@ -10,7 +15,8 @@
     "MessageQueueManager",
     "QueueItem",
     "SteeringMiddleware",
+    "format_agent_message",
     "format_background_notification",
     "format_chat_notification",
-    "format_wechat_message",
+    "format_progress_notification",
 ]
diff --git a/core/runtime/middleware/queue/formatters.py b/core/runtime/middleware/queue/formatters.py
index 1e7821187..85034f7b4 100644
--- a/core/runtime/middleware/queue/formatters.py
+++ b/core/runtime/middleware/queue/formatters.py
@@ -11,13 +11,51 @@
 
 
 def format_chat_notification(sender_name: str, chat_id: str, unread_count: int, signal: str | None = None) -> str:
-    """Lightweight notification — agent must chat_read to see content.
+    """Lightweight notification — agent must read_messages to see content.
 
     @@@v3-notification-only — no message content injected. Agent calls
-    chat_read(chat_id=...) to read, then chat_send() to reply.
+    read_messages(chat_id=...) to read, then send_message() to reply.
     """
     signal_hint = f" [signal: {signal}]" if signal and signal != "open" else ""
-    return f"<system-reminder>\nNew message from {sender_name} in chat {chat_id} ({unread_count} unread).{signal_hint}\n</system-reminder>"
+    return (
+        "<system-reminder>\n"
+        f"New message from {sender_name} in chat {chat_id} ({unread_count} unread).{signal_hint}\n"
+        f'Read it with read_messages(chat_id="{chat_id}").\n'
+        f'Reply with send_message(chat_id="{chat_id}", content="...").\n'
+        "Prefer using this exact chat_id directly.\n"
+        "Do not treat your normal assistant text as a chat reply.\n"
+        "</system-reminder>"
+    )
+
+
+def format_agent_message(sender_name: str, message: str) -> str:
+    """Format inter-agent delivery for steering injection on the next turn."""
+    return (
+        "<system-reminder>\n"
+        "<agent-message>\n"
+        f"  <from>{escape(sender_name)}</from>\n"
+        f"  <content>{escape(message)}</content>\n"
+        "</agent-message>\n"
+        "</system-reminder>"
+    )
+
+
+def format_progress_notification(
+    agent_id: str,
+    description: str,
+    *,
+    step: str = "running",
+) -> str:
+    """Format background worker progress for coordinator-style prompt injection."""
+    return (
+        "<system-reminder>\n"
+        "<worker-progress>\n"
+        f"  <agent-id>{escape(agent_id)}</agent-id>\n"
+        f"  <step>{escape(step)}</step>\n"
+        f"  <description>{escape(description)}</description>\n"
+        "</worker-progress>\n"
+        "</system-reminder>"
+    )
 
 
 def format_background_notification(
@@ -31,7 +69,7 @@ def format_background_notification(
     """Format background task completion as system-reminder XML."""
     parts = [
         "<system-reminder>",
-        "<background-notification>",
+        "<task-notification>",
         f"  <run-id>{task_id}</run-id>",
         f"  <status>{status}</status>",
     ]
@@ -44,29 +82,11 @@ def format_background_notification(
         parts.append(f"  <result>{escape(truncated)}</result>")
     if usage:
         parts.append(f"  <usage>{json.dumps(usage)}</usage>")
-    parts.append("</background-notification>")
+    parts.append("</task-notification>")
     parts.append("</system-reminder>")
     return "\n".join(parts)
 
 
-def format_wechat_message(sender_name: str, user_id: str, text: str) -> str:
-    """Format incoming WeChat message for thread delivery.
-
-    Agent sees: full message with user_id metadata (needed for wechat_send reply).
-    Frontend sees: just the message text (system-reminder stripped).
-    """
-    return (
-        f"{text}\n"
-        "<system-reminder>\n"
-        "<wechat-message>\n"
-        f"  <sender>{escape(sender_name)}</sender>\n"
-        f"  <user-id>{escape(user_id)}</user-id>\n"
-        "</wechat-message>\n"
-        'To reply, use wechat_send(user_id="' + escape(user_id) + '", text="...").\n'
-        "</system-reminder>"
-    )
-
-
 def format_command_notification(
     command_id: str,
     status: Literal["completed", "failed"],
diff --git a/core/runtime/middleware/queue/manager.py b/core/runtime/middleware/queue/manager.py
index fd155b94d..f7ea1466f 100644
--- a/core/runtime/middleware/queue/manager.py
+++ b/core/runtime/middleware/queue/manager.py
@@ -11,7 +11,7 @@
 from collections.abc import Callable
 from pathlib import Path
 
-from storage.contracts import QueueItem, QueueRepo
+from storage.contracts import NotificationType, QueueItem, QueueRepo
 
 logger = logging.getLogger(__name__)
 
@@ -40,7 +40,7 @@ def enqueue(
         self,
         content: str,
         thread_id: str,
-        notification_type: str = "steer",
+        notification_type: NotificationType = "steer",
         source: str | None = None,
         sender_id: str | None = None,
         sender_name: str | None = None,
diff --git a/core/runtime/middleware/queue/middleware.py b/core/runtime/middleware/queue/middleware.py
index ccb9c30be..714d0bd54 100644
--- a/core/runtime/middleware/queue/middleware.py
+++ b/core/runtime/middleware/queue/middleware.py
@@ -10,30 +10,65 @@
 from collections.abc import Awaitable, Callable
 from typing import Any
 
-from langchain_core.messages import HumanMessage, ToolMessage
+from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
 from langchain_core.runnables import RunnableConfig
 
-try:
-    from langchain.agents.middleware.types import (
-        AgentMiddleware,
-        ModelCallResult,
-        ModelRequest,
-        ModelResponse,
-        ToolCallRequest,
+from core.runtime.middleware import (
+    AgentMiddleware,
+    ModelCallResult,
+    ModelRequest,
+    ModelResponse,
+    ToolCallRequest,
+)
+from core.runtime.notifications import is_terminal_background_notification
+
+from .manager import MessageQueueManager
+
+logger = logging.getLogger(__name__)
+
+_STEER_NON_PREEMPTIVE_SYSTEM_NOTE = (
+    "Steer requests accepted during an active run are non-preemptive. "
+    "If any tool call from the interrupted run already started, it was allowed to finish and its side effects may "
+    "already have happened. Do not claim that prior work was interrupted, prevented, cancelled, or rolled back. "
+    "Treat the steer as instructions for what to do next after that completed work, and answer honestly about any "
+    "side effects that may already exist."
+)
+
+
+def _is_terminal_background_notification(item: Any) -> bool:
+    return is_terminal_background_notification(
+        getattr(item, "content", None),
+        source="system",
+        notification_type=getattr(item, "notification_type", None),
     )
-except ImportError:
 
-    class AgentMiddleware:
-        pass
 
-    ModelRequest = Any
-    ModelResponse = Any
-    ModelCallResult = Any
-    ToolCallRequest = Any
+def _is_owner_steer_message(message: Any) -> bool:
+    if message.__class__.__name__ != "HumanMessage":
+        return False
+    metadata = getattr(message, "metadata", {}) or {}
+    return bool(metadata.get("is_steer") or (metadata.get("source") == "owner" and metadata.get("notification_type") == "steer"))
 
-from .manager import MessageQueueManager
 
-logger = logging.getLogger(__name__)
+def _apply_steer_contract(request: ModelRequest) -> ModelRequest:
+    if not any(_is_owner_steer_message(message) for message in request.messages):
+        return request
+
+    system_message = request.system_message
+    if system_message is None:
+        return request.override(system_message=SystemMessage(content=_STEER_NON_PREEMPTIVE_SYSTEM_NOTE))
+
+    content = getattr(system_message, "content", None)
+    if isinstance(content, str):
+        if _STEER_NON_PREEMPTIVE_SYSTEM_NOTE in content:
+            return request
+        # @@@steer-honesty-contract - mid-run steer stays a real user message in
+        # durable history, but the live model call also needs an explicit
+        # non-preemptive contract so it cannot overclaim that already-started
+        # tool work was stopped or never produced side effects.
+        return request.override(system_message=SystemMessage(content=f"{content}\n\n{_STEER_NON_PREEMPTIVE_SYSTEM_NOTE}"))
+
+    return request.override(messages=[SystemMessage(content=_STEER_NON_PREEMPTIVE_SYSTEM_NOTE), *request.messages])
 
 
 class SteeringMiddleware(AgentMiddleware):
@@ -66,6 +101,20 @@ async def awrap_tool_call(
         """Async pure passthrough — never skip tool calls."""
         return await handler(request)
 
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelCallResult:
+        return handler(_apply_steer_contract(request))
+
+    async def awrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelCallResult:
+        return await handler(_apply_steer_contract(request))
+
     def before_model(
         self,
         state: Any,
@@ -79,7 +128,27 @@ def before_model(
             return None
 
         items = self._queue_manager.drain_all(thread_id)
-        rt = self._agent_runtime
+        inject_now = []
+        deferred = []
+        for item in items:
+            if _is_terminal_background_notification(item):
+                deferred.append(item)
+            else:
+                inject_now.append(item)
+        # @@@followup-defer - terminal background notifications must never be
+        # injected inline into an active run. Their stable contract is a
+        # dedicated followthrough notice-only turn, regardless of the current
+        # run source.
+        for item in deferred:
+            self._queue_manager.enqueue(
+                item.content,
+                thread_id,
+                notification_type=item.notification_type,
+                source=item.source,
+                sender_id=item.sender_id,
+                sender_name=item.sender_name,
+            )
+        items = inject_now
         if not items:
             return None
 
@@ -109,14 +178,15 @@ def before_model(
         # breaks the turn at the steer injection point.
         # user_message is NOT emitted here — wake_handler already did it
         # at enqueue time (@@@steer-instant-feedback).
-        if has_steer and rt and hasattr(rt, "emit_activity_event"):
-            rt.emit_activity_event(
+        agent_runtime = self._agent_runtime
+        if has_steer and agent_runtime and hasattr(agent_runtime, "emit_activity_event"):
+            agent_runtime.emit_activity_event(
                 {
                     "event": "run_done",
                     "data": json.dumps({"thread_id": thread_id}),
                 }
             )
-            rt.emit_activity_event(
+            agent_runtime.emit_activity_event(
                 {
                     "event": "run_start",
                     "data": json.dumps({"thread_id": thread_id, "showing": True}),
diff --git a/core/runtime/middleware/spill_buffer/middleware.py b/core/runtime/middleware/spill_buffer/middleware.py
index ca519cb27..66390718d 100644
--- a/core/runtime/middleware/spill_buffer/middleware.py
+++ b/core/runtime/middleware/spill_buffer/middleware.py
@@ -2,28 +2,16 @@
 
 from __future__ import annotations
 
+import json
+import mimetypes
+import posixpath
 from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any
 
 from langchain_core.messages import ToolMessage
 
-try:
-    from langchain.agents.middleware.types import (
-        AgentMiddleware,
-        ModelRequest,
-        ModelResponse,
-        ToolCallRequest,
-    )
-except ImportError:
-
-    class AgentMiddleware:  # type: ignore[no-redef]
-        pass
-
-    ModelRequest = Any
-    ModelResponse = Any
-    ToolCallRequest = Any
-
+from core.runtime.middleware import AgentMiddleware, ModelRequest, ModelResponse, ToolCallRequest
 from core.tools.filesystem.backend import FileSystemBackend
 
 from .spill import spill_if_needed
@@ -57,6 +45,53 @@ def __init__(
         self.thresholds: dict[str, int] = thresholds or {}
         self.default_threshold = default_threshold
 
+    def _rewrite_mcp_blocks(self, content: Any, *, tool_call_id: str) -> Any:
+        if not isinstance(content, list):
+            return content
+
+        lines: list[str] = []
+        saw_mcp_blocks = False
+
+        for index, block in enumerate(content):
+            if not isinstance(block, dict):
+                return content
+
+            kind = block.get("type")
+            if kind == "text":
+                lines.append(str(block.get("text", "")))
+                continue
+
+            saw_mcp_blocks = True
+            mime_type = str(block.get("mime_type") or "application/octet-stream")
+            guessed_ext = mimetypes.guess_extension(mime_type.split(";", 1)[0].strip()) or ".bin"
+
+            if isinstance(block.get("base64"), str):
+                payload_path = posixpath.join(
+                    self.workspace_root,
+                    ".leon",
+                    "tool-results",
+                    f"{tool_call_id}-{index}{guessed_ext}.base64",
+                )
+                # @@@mcp-binary-handoff - api-04 keeps Leon's sandbox/file
+                # abstraction by persisting encoded payloads through fs_backend
+                # instead of writing host-local bytes behind the sandbox's back.
+                write_result = self.fs_backend.write_file(payload_path, block["base64"])
+                if hasattr(write_result, "success") and not write_result.success:
+                    raise RuntimeError(write_result.error or f"failed to persist MCP payload to {payload_path}")
+                lines.append(f"MCP binary content ({mime_type}) saved to {payload_path} as base64 payload.")
+                continue
+
+            if isinstance(block.get("url"), str):
+                lines.append(f"MCP {kind} content available at {block['url']} ({mime_type})")
+                continue
+
+            lines.append(json.dumps(block, ensure_ascii=False, default=str))
+
+        if not saw_mcp_blocks:
+            text_only = "\n".join(line for line in lines if line)
+            return text_only if text_only else content
+        return "\n".join(line for line in lines if line)
+
     # -- model call: pass-through ------------------------------------------
 
     def wrap_model_call(
@@ -81,6 +116,19 @@ def _maybe_spill(self, request: ToolCallRequest, result: ToolMessage) -> ToolMes
         if tool_name in SKIP_TOOLS:
             return result
 
+        source = result.additional_kwargs.get("tool_result_meta", {}).get("source")
+        normalized_content = result.content
+        if source == "mcp":
+            normalized_content = self._rewrite_mcp_blocks(
+                normalized_content,
+                tool_call_id=request.tool_call.get("id", "unknown"),
+            )
+            if normalized_content is not result.content:
+                result = result.model_copy(update={"content": normalized_content})
+
+        if isinstance(result.content, str) and not result.content.strip():
+            return result.model_copy(update={"content": f"({tool_name} completed with no output)"})
+
         threshold = self.thresholds.get(tool_name, self.default_threshold)
         tool_call_id = request.tool_call.get("id", "unknown")
 
@@ -93,10 +141,10 @@ def _maybe_spill(self, request: ToolCallRequest, result: ToolMessage) -> ToolMes
         )
 
         if spilled is not result.content:
-            return ToolMessage(
-                content=spilled,
-                tool_call_id=result.tool_call_id,
-            )
+            # @@@spill-message-preservation - replacing content must not discard
+            # metadata/name/id; te-03 is about persisted handoff, not rebuilding
+            # a thinner ToolMessage shell.
+            return result.model_copy(update={"content": spilled})
         return result
 
     def wrap_tool_call(
diff --git a/core/runtime/middleware/spill_buffer/spill.py b/core/runtime/middleware/spill_buffer/spill.py
index 8246a4f33..58cfa470e 100644
--- a/core/runtime/middleware/spill_buffer/spill.py
+++ b/core/runtime/middleware/spill_buffer/spill.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import os
+import posixpath
 from typing import Any
 
 from core.tools.filesystem.backend import FileSystemBackend
@@ -10,6 +10,14 @@
 PREVIEW_BYTES = 2048
 
 
+def _format_preview(content: str) -> str:
+    preview = content[:PREVIEW_BYTES]
+    cutoff = preview.rfind("\n")
+    if cutoff >= PREVIEW_BYTES // 2:
+        return preview[:cutoff]
+    return preview
+
+
 def spill_if_needed(
     content: Any,
     threshold_bytes: int,
@@ -36,8 +44,8 @@ def spill_if_needed(
     if size <= threshold_bytes:
         return content
 
-    spill_dir = os.path.join(workspace_root, ".leon", "tool-results")
-    spill_path = os.path.join(spill_dir, f"{tool_call_id}.txt")
+    spill_dir = posixpath.join(workspace_root, ".leon", "tool-results")
+    spill_path = posixpath.join(spill_dir, f"{tool_call_id}.txt")
 
     write_note = ""
     try:
@@ -50,10 +58,15 @@ def spill_if_needed(
         write_note = f"\n\n(Warning: failed to save full output to disk: {exc})"
         spill_path = "<write failed>"
 
-    preview = content[:PREVIEW_BYTES]
+    # @@@persisted-output-wrapper - te-03 is about durable handoff semantics,
+    # not "shorter string". The model must see an explicit persisted artifact
+    # boundary plus the re-read path, otherwise we silently amputate context.
+    preview = _format_preview(content)
     return (
-        f"Output too large ({size} bytes). Full output saved to: {spill_path}"
-        f"\n\nUse read_file to view specific sections with offset and limit parameters."
-        f"\n\nPreview (first {PREVIEW_BYTES} bytes):\n{preview}\n..."
-        f"{write_note}"
+        f'<persisted-output path="{spill_path}" bytes="{size}">'
+        f"\nSize: {size} bytes"
+        f"\nUse read_file to inspect the full persisted output."
+        f"\nPreview (first {PREVIEW_BYTES} bytes):\n{preview}\n..."
+        f"{write_note}\n"
+        f"</persisted-output>"
     )
diff --git a/core/runtime/notifications.py b/core/runtime/notifications.py
new file mode 100644
index 000000000..f70ffc1fa
--- /dev/null
+++ b/core/runtime/notifications.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+
+def is_terminal_background_notification(
+    content: str | None,
+    *,
+    source: str | None,
+    notification_type: str | None,
+) -> bool:
+    if source != "system" or notification_type not in {"agent", "command"}:
+        return False
+    text = content or ""
+    return "<task-notification>" in text or "<CommandNotification>" in text
diff --git a/core/runtime/permissions.py b/core/runtime/permissions.py
new file mode 100644
index 000000000..37c182ed7
--- /dev/null
+++ b/core/runtime/permissions.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+PERMISSION_RULE_SOURCES = (
+    "userSettings",
+    "projectSettings",
+    "localSettings",
+    "flagSettings",
+    "policySettings",
+    "cliArg",
+    "session",
+)
+
+
+@dataclass(frozen=True)
+class ToolPermissionContext:
+    is_read_only: bool
+    is_destructive: bool = False
+    # @@@camelcase-permission-surface - external state/routes already speak this camelCase shape.
+    alwaysAllowRules: dict[str, list[str]] | None = None  # noqa: N815
+    alwaysDenyRules: dict[str, list[str]] | None = None  # noqa: N815
+    alwaysAskRules: dict[str, list[str]] | None = None  # noqa: N815
+    allowManagedPermissionRulesOnly: bool = False  # noqa: N815
+
+
+def can_auto_approve(context: ToolPermissionContext) -> bool:
+    return context.is_read_only and not context.is_destructive
+
+
+def _active_sources(context: ToolPermissionContext) -> tuple[str, ...]:
+    if context.allowManagedPermissionRulesOnly:
+        return ("policySettings",)
+    return PERMISSION_RULE_SOURCES
+
+
+def _extract_tool_name(rule: str) -> str:
+    rule = rule.strip()
+    open_paren = rule.find("(")
+    return rule if open_paren == -1 else rule[:open_paren]
+
+
+def _find_matching_rule(
+    rule_buckets: dict[str, list[str]] | None,
+    tool_name: str,
+    *,
+    sources: tuple[str, ...],
+) -> str | None:
+    if not rule_buckets:
+        return None
+    for source in sources:
+        for rule in rule_buckets.get(source, []):
+            if _extract_tool_name(rule) == tool_name:
+                return rule
+    return None
+
+
+def evaluate_permission_rules(
+    tool_name: str,
+    context: ToolPermissionContext,
+) -> dict[str, Any] | None:
+    sources = _active_sources(context)
+
+    deny_rule = _find_matching_rule(context.alwaysDenyRules, tool_name, sources=sources)
+    if deny_rule is not None:
+        return {"decision": "deny", "message": f"Permission denied by rule: {deny_rule}"}
+
+    ask_rule = _find_matching_rule(context.alwaysAskRules, tool_name, sources=sources)
+    if ask_rule is not None:
+        return {"decision": "ask", "message": f"Permission required by rule: {ask_rule}"}
+
+    allow_rule = _find_matching_rule(context.alwaysAllowRules, tool_name, sources=sources)
+    if allow_rule is not None:
+        return {"decision": "allow", "message": f"Permission allowed by rule: {allow_rule}"}
+
+    return None
diff --git a/core/runtime/prompts.py b/core/runtime/prompts.py
new file mode 100644
index 000000000..6077cf371
--- /dev/null
+++ b/core/runtime/prompts.py
@@ -0,0 +1,217 @@
+"""System prompt builders — pure functions, no agent state.
+
+Extracted from LeonAgent so agent.py stays lean.
+
+Middleware Stack
+- MemoryMiddleware: trims/compacts conversation context before model calls.
+- MonitorMiddleware: aggregates runtime metrics and observes model execution.
+- PromptCachingMiddleware: enables Anthropic prompt caching for eligible requests.
+- SteeringMiddleware: drains queued messages and injects them before the next model call.
+- SpillBufferMiddleware: spills oversized tool outputs to disk and replaces them with previews.
+"""
+
+from __future__ import annotations
+
+from typing import NamedTuple
+
+
+class RuleSpec(NamedTuple):
+    title: str
+    body: str
+    details: tuple[str, ...] = ()
+
+
+def _render_rule(index: int, rule: RuleSpec) -> str:
+    rendered = f"{index}. **{rule.title}**: {rule.body}"
+    if not rule.details:
+        return rendered
+    return rendered + "\n" + "\n".join(f"   - {detail}" for detail in rule.details)
+
+
+def _build_core_rules(*, is_sandbox: bool, sandbox_name: str, workspace_root: str, working_dir: str) -> list[RuleSpec]:
+    rules: list[RuleSpec] = []
+    if is_sandbox:
+        if sandbox_name == "docker":
+            location_rule = "All file and command operations run in a local Docker container, NOT on the user's host filesystem."
+        else:
+            location_rule = "All file and command operations run in a remote sandbox, NOT on the user's local machine."
+        rules.append(RuleSpec("Sandbox Environment", f"{location_rule} The sandbox is an isolated Linux environment."))
+    else:
+        rules.append(RuleSpec("Workspace", "File operations are restricted to: " + workspace_root))
+
+    rules.append(
+        RuleSpec(
+            "Absolute Paths",
+            "All file paths must be absolute paths.",
+            (
+                f"Correct: `{working_dir}/project/test.py`",
+                "Wrong: `test.py` or `./test.py`",
+            ),
+        )
+    )
+
+    if is_sandbox:
+        security = "The sandbox is isolated. You can install packages, run any commands, and modify files freely."
+    else:
+        security = "Dangerous commands are blocked. All operations are logged."
+    rules.append(RuleSpec("Security", security))
+    return rules
+
+
+def _build_risk_rules() -> list[RuleSpec]:
+    return [
+        RuleSpec(
+            "Risky Actions",
+            "Ask before destructive, hard-to-reverse, or shared-state actions.",
+            (
+                "Examples: deleting files, force-pushing, dropping tables, killing unfamiliar processes, modifying shared infrastructure.",
+                "If you see unexpected state, investigate before deleting or overwriting it.",
+            ),
+        ),
+        RuleSpec(
+            "No URL Guessing",
+            "Do not guess URLs unless the user provided them or you are confident they are directly relevant to programming help.",
+        ),
+        RuleSpec(
+            "Minimal Change",
+            "Do not add features, refactor code, or make speculative abstractions beyond what the task requires.",
+            (
+                "Don't create helpers, utilities, or abstractions for one-time operations.",
+                "Don't add error handling, fallbacks, or validation for scenarios that can't happen.",
+            ),
+        ),
+    ]
+
+
+def _build_tool_preference_rules() -> list[RuleSpec]:
+    return [
+        RuleSpec(
+            "Tool Priority",
+            "When a built-in tool and an MCP tool (`mcp__*`) have the same functionality, use the built-in tool.",
+        ),
+        RuleSpec(
+            "Tool Preference",
+            "Prefer dedicated tools over `Bash` when a built-in tool already matches the job.",
+            (
+                "Use `Read` instead of `cat`, `head`, or `tail`.",
+                "Use `Edit` instead of shell text-munging for file edits.",
+                "Use `Write` instead of heredoc or echo redirection for file creation.",
+                "Use `Glob`/`Grep` for file discovery and content search before falling back to `Bash`.",
+            ),
+        ),
+    ]
+
+
+def _build_interaction_rules() -> list[RuleSpec]:
+    return []
+
+
+def _build_function_result_clearing_rules(*, spill_buffer_enabled: bool, spill_keep_recent: int) -> list[RuleSpec]:
+    if not spill_buffer_enabled:
+        return []
+    return [
+        RuleSpec(
+            "Function Result Clearing",
+            f"Old tool results may be cleared from context to free up space. The {spill_keep_recent} most recent results are always kept.",
+            (
+                "When working with tool results, write down any important information "
+                "you might need later in your response, as the original tool result "
+                "may be cleared later.",
+            ),
+        )
+    ]
+
+
+def _build_rule_specs(
+    *,
+    is_sandbox: bool,
+    sandbox_name: str,
+    workspace_root: str,
+    working_dir: str,
+    spill_buffer_enabled: bool,
+    spill_keep_recent: int,
+) -> list[RuleSpec]:
+    rules: list[RuleSpec] = []
+    rules.extend(
+        _build_core_rules(
+            is_sandbox=is_sandbox,
+            sandbox_name=sandbox_name,
+            workspace_root=workspace_root,
+            working_dir=working_dir,
+        )
+    )
+    rules.extend(_build_risk_rules())
+    rules.extend(_build_tool_preference_rules())
+    rules.extend(
+        _build_function_result_clearing_rules(
+            spill_buffer_enabled=spill_buffer_enabled,
+            spill_keep_recent=spill_keep_recent,
+        )
+    )
+    rules.extend(_build_interaction_rules())
+    return rules
+
+
+def build_context_section(
+    *,
+    sandbox_name: str,
+    sandbox_env_label: str = "",
+    sandbox_working_dir: str = "",
+    workspace_root: str = "",
+    os_name: str = "",
+    shell_name: str = "",
+) -> str:
+    if sandbox_name != "local":
+        mode_label = "Sandbox (isolated local container)" if sandbox_name == "docker" else "Sandbox (isolated cloud environment)"
+        return f"""- Environment: {sandbox_env_label}
+- Working Directory: {sandbox_working_dir}
+- Mode: {mode_label}"""
+    return f"""- Workspace: `{workspace_root}`
+- OS: {os_name}
+- Shell: {shell_name}
+- Mode: Local"""
+
+
+def build_rules_section(
+    *,
+    is_sandbox: bool,
+    sandbox_name: str = "",
+    working_dir: str,
+    workspace_root: str,
+    spill_buffer_enabled: bool = False,
+    spill_keep_recent: int = 0,
+) -> str:
+    rule_specs = _build_rule_specs(
+        is_sandbox=is_sandbox,
+        sandbox_name=sandbox_name,
+        workspace_root=workspace_root,
+        working_dir=working_dir,
+        spill_buffer_enabled=spill_buffer_enabled,
+        spill_keep_recent=spill_keep_recent,
+    )
+    return "\n\n".join(_render_rule(index, rule) for index, rule in enumerate(rule_specs, start=1))
+
+
+def build_base_prompt(context: str, rules: str) -> str:
+    return f"""You are a highly capable AI assistant with access to file and system tools.
+
+**Context:**
+{context}
+
+**Important Rules:**
+
+{rules}
+"""
+
+
+_AGENT_TOOL_SECTION = """
+**Sub-agent Types:**
+- `explore`: Read-only codebase exploration (Grep, Glob, Read only)
+- `plan`: Architecture design and planning (read-only tools)
+- `bash`: Shell command execution (Bash + read tools)
+- `general`: Full tool access for independent multi-step tasks
+"""
+
+
+def build_common_sections(skills_enabled: bool) -> str:
+    return _AGENT_TOOL_SECTION
diff --git a/core/runtime/registry.py b/core/runtime/registry.py
index f6a87f008..4b9de4ccb 100644
--- a/core/runtime/registry.py
+++ b/core/runtime/registry.py
@@ -1,11 +1,46 @@
 from __future__ import annotations
 
 from collections.abc import Awaitable, Callable
+from copy import deepcopy
 from dataclasses import dataclass
 from enum import Enum
+from typing import Any, NotRequired, Required, TypedDict
 
-Handler = Callable[..., str] | Callable[..., Awaitable[str]]
-SchemaProvider = dict | Callable[[], dict]
+from core.runtime.tool_result import ToolResultEnvelope
+
+type ToolSchema = dict[str, Any]
+type ToolHandlerResult = str | ToolResultEnvelope
+type ToolArgs = dict[str, Any]
+type ToolPropertySchema = dict[str, Any]
+type ToolProperties = dict[str, ToolPropertySchema]
+
+type Handler = Callable[..., ToolHandlerResult] | Callable[..., Awaitable[ToolHandlerResult]]
+type SchemaProvider = ToolSchema | Callable[[], ToolSchema]
+type ConcurrencySafety = bool | Callable[[ToolArgs], bool]
+type ToolInputValidator = Callable[[ToolArgs, Any], ToolArgs | None] | Callable[[ToolArgs, Any], Awaitable[ToolArgs | None]]
+
+
+class _ToolEntryDefaults(TypedDict):
+    search_hint: str
+    is_concurrency_safe: ConcurrencySafety
+    is_read_only: bool
+    is_destructive: bool
+    context_schema: ToolSchema | None
+    validate_input: ToolInputValidator | None
+
+
+class _ToolEntryBuildArgs(TypedDict, total=False):
+    name: Required[str]
+    mode: Required[ToolMode]
+    schema: Required[SchemaProvider]
+    handler: Required[Handler]
+    source: Required[str]
+    search_hint: NotRequired[str]
+    is_concurrency_safe: NotRequired[ConcurrencySafety]
+    is_read_only: NotRequired[bool]
+    is_destructive: NotRequired[bool]
+    context_schema: NotRequired[ToolSchema | None]
+    validate_input: NotRequired[ToolInputValidator | None]
 
 
 class ToolMode(Enum):
@@ -20,11 +55,50 @@ class ToolEntry:
     schema: SchemaProvider
     handler: Handler
     source: str
-
-    def get_schema(self) -> dict:
+    search_hint: str = ""  # 3-10 word capability description for ToolSearch matching
+    is_concurrency_safe: ConcurrencySafety = False  # fail-closed: assume not safe
+    is_read_only: bool = False  # fail-closed: assume write operation
+    is_destructive: bool = False  # advisory metadata for permission/UI layers
+    context_schema: ToolSchema | None = None  # fields this tool needs from ToolUseContext
+    validate_input: ToolInputValidator | None = None
+
+    def get_schema(self) -> ToolSchema:
         return self.schema() if callable(self.schema) else self.schema
 
 
+TOOL_DEFAULTS: _ToolEntryDefaults = {
+    "search_hint": "",
+    "is_concurrency_safe": False,
+    "is_read_only": False,
+    "is_destructive": False,
+    "context_schema": None,
+    "validate_input": None,
+}
+
+
+def make_tool_schema(
+    *,
+    name: str,
+    description: str,
+    properties: ToolProperties,
+    required: list[str] | None = None,
+    parameter_overrides: ToolSchema | None = None,
+) -> ToolSchema:
+    parameters: ToolSchema = {
+        "type": "object",
+        "properties": properties,
+    }
+    if required:
+        parameters["required"] = required
+    if parameter_overrides:
+        parameters.update(parameter_overrides)
+    return {
+        "name": name,
+        "description": description,
+        "parameters": parameters,
+    }
+
+
 class ToolRegistry:
     """Central registry for all tools.
 
@@ -55,23 +129,70 @@ def register(self, entry: ToolEntry) -> None:
     def get(self, name: str) -> ToolEntry | None:
         return self._tools.get(name)
 
-    def get_inline_schemas(self) -> list[dict]:
-        return [e.get_schema() for e in self._tools.values() if e.mode == ToolMode.INLINE]
-
-    def search(self, query: str) -> list[ToolEntry]:
-        """Return all matching tools (including inline) for tool_search."""
-        q = query.lower()
-        results = []
-        for entry in self._tools.values():
+    def get_inline_schemas(self, discovered_tool_names: set[str] | None = None) -> list[dict]:
+        discovered_tool_names = discovered_tool_names or set()
+        return [
+            self._sanitize_schema_for_model(e.get_schema())
+            for e in self._tools.values()
+            if e.mode == ToolMode.INLINE or e.name in discovered_tool_names
+        ]
+
+    def _sanitize_schema_for_model(self, schema: dict) -> dict:
+        # @@@tool-schema-sanitize - runtime-only schema metadata is useful for
+        # validator/readiness, but provider tool schemas must stay within the
+        # subset the live model API accepts.
+        def _walk(value: Any) -> Any:
+            if isinstance(value, dict):
+                return {key: _walk(child) for key, child in value.items() if not (isinstance(key, str) and key.startswith("x-leon-"))}
+            if isinstance(value, list):
+                return [_walk(item) for item in value]
+            return value
+
+        return _walk(deepcopy(schema))
+
+    def search(self, query: str, *, modes: set[ToolMode] | None = None) -> list[ToolEntry]:
+        """Return matching tools with ranked relevance.
+
+        Supports ``select:Name1,Name2`` for exact selection.
+        Otherwise ranks by: search_hint > name > description.
+        """
+        q = query.strip()
+        entries = [entry for entry in self._tools.values() if modes is None or entry.mode in modes]
+
+        # --- select:<names> exact lookup ---
+        if q.lower().startswith("select:"):
+            names = [n.strip() for n in q[len("select:") :].split(",") if n.strip()]
+            results = [self._tools[n] for n in names if n in self._tools and (modes is None or self._tools[n].mode in modes)]
+            return results
+
+        # --- keyword search with ranking ---
+        keywords = q.lower().split()
+        if not keywords:
+            return list(entries)
+
+        scored: list[tuple[int, ToolEntry]] = []
+        for entry in entries:
             schema = entry.get_schema()
-            name = schema.get("name", "")
-            desc = schema.get("description", "")
-            if q in name.lower() or q in desc.lower():
-                results.append(entry)
-        # If no match, return all
-        if not results:
-            results = list(self._tools.values())
-        return results
+            name_lower = entry.name.lower()
+            hint_lower = entry.search_hint.lower()
+            desc_lower = schema.get("description", "").lower()
+
+            score = 0
+            for kw in keywords:
+                if kw in hint_lower:
+                    score += 3
+                if kw in name_lower:
+                    score += 2
+                if kw in desc_lower:
+                    score += 1
+            if score > 0:
+                scored.append((score, entry))
+
+        if not scored:
+            return []
+
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [entry for _, entry in scored]
 
     def list_all(self) -> list[ToolEntry]:
         return list(self._tools.values())
diff --git a/core/runtime/runner.py b/core/runtime/runner.py
index ade917216..15fffb02c 100644
--- a/core/runtime/runner.py
+++ b/core/runtime/runner.py
@@ -1,23 +1,44 @@
 from __future__ import annotations
 
 import asyncio
+import copy
+import inspect
 import json
 import logging
+import threading
 from collections.abc import Awaitable, Callable
+from typing import Any, cast
 
-from langchain.agents.middleware.types import (
+from langchain_core.messages import ToolMessage
+
+from core.runtime.middleware import (
     AgentMiddleware,
     ModelRequest,
     ModelResponse,
     ToolCallRequest,
 )
-from langchain_core.messages import ToolMessage
 
 from .errors import InputValidationError
+from .permissions import ToolPermissionContext
 from .registry import ToolRegistry
+from .tool_result import (
+    ToolResultEnvelope,
+    materialize_tool_message,
+    tool_error,
+    tool_permission_denied,
+    tool_permission_request,
+    tool_success,
+)
 from .validator import ToolValidator
 
 logger = logging.getLogger(__name__)
+DEFAULT_ASYNC_HOOK_TIMEOUT_S = 15.0
+
+
+class _ToolSpecificValidationError(Exception):
+    def __init__(self, message: str, error_code: str | None = None):
+        super().__init__(message)
+        self.error_code = error_code
 
 
 class ToolRunner(AgentMiddleware):
@@ -48,9 +69,9 @@ def _inject_tools(self, request: ModelRequest) -> ModelRequest:
 
     def _extract_call_info(self, request: ToolCallRequest) -> tuple[str, dict, str]:
         tool_call = request.tool_call
-        name = tool_call.get("name")
+        name = tool_call.get("name") or ""
         args = tool_call.get("args", {})
-        call_id = tool_call.get("id", "")
+        call_id = tool_call.get("id", "") or ""
 
         if isinstance(args, str):
             try:
@@ -60,49 +81,612 @@ def _extract_call_info(self, request: ToolCallRequest) -> tuple[str, dict, str]:
 
         return name, args, call_id
 
-    def _validate_and_run(self, name: str, args: dict, call_id: str) -> ToolMessage:
-        entry = self._registry.get(name)
-        if entry is None:
-            return None  # not our tool
+    @staticmethod
+    def _get_request_hook(request: ToolCallRequest, hook_name: str):
+        state = getattr(request, "state", None)
+        if state is None:
+            return None
+        if isinstance(state, dict):
+            hook = state.get(hook_name)
+        else:
+            hook = vars(state).get(hook_name)
+        if hook is None:
+            return None
+        if isinstance(hook, list):
+            return hook
+        return hook if callable(hook) else None
 
-        schema = entry.get_schema()
+    @staticmethod
+    async def _apply_result_hooks(
+        hook_or_hooks,
+        payload: ToolMessage | ToolResultEnvelope,
+        request: ToolCallRequest,
+    ) -> ToolMessage | ToolResultEnvelope:
+        if hook_or_hooks is None:
+            return payload
+        hooks = hook_or_hooks if isinstance(hook_or_hooks, list) else [hook_or_hooks]
+        current = payload
+
+        async def _invoke(hook):
+            updated = hook(copy.deepcopy(payload), request)
+            if asyncio.iscoroutine(updated):
+                updated = await ToolRunner._await_async_hook_with_timeout(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            return updated
+
+        for updated in await asyncio.gather(*(_invoke(hook) for hook in hooks)):
+            if updated is not None:
+                current = updated
+        return current
+
+    def _normalize_result(self, result: Any) -> ToolResultEnvelope:
+        if isinstance(result, ToolResultEnvelope):
+            return result
+        return tool_success(result)
+
+    @staticmethod
+    def _resolve_context_path(state: Any, path: str) -> Any:
+        current = state
+        for segment in path.split("."):
+            if segment == "app_state":
+                current = current.get_app_state()
+                continue
+            if isinstance(current, dict):
+                current = current[segment]
+            else:
+                current = getattr(current, segment)
+        return current
+
+    @staticmethod
+    def _inject_handler_context(entry, args: dict, request: ToolCallRequest) -> dict:
+        state = getattr(request, "state", None)
+        if state is None:
+            return args
         try:
-            self._validator.validate(schema, args)
-        except InputValidationError as e:
-            return ToolMessage(
-                content=f"InputValidationError: {name} failed due to the following issue:\n{e}",
-                tool_call_id=call_id,
-                name=name,
-            )
+            signature = inspect.signature(entry.handler)
+        except (TypeError, ValueError):
+            return args
+        accepts_kwargs = any(param.kind == inspect.Parameter.VAR_KEYWORD for param in signature.parameters.values())
+        injected = dict(args)
+
+        context_schema = getattr(entry, "context_schema", None) or {}
+        if isinstance(context_schema, dict):
+            # @@@pt-02-context-schema-mapping
+            # Pattern 2 only becomes real once declared ToolUseContext field
+            # mappings are injected into handler kwargs on the live path.
+            for param_name, context_path in context_schema.items():
+                if param_name in injected:
+                    continue
+                if not accepts_kwargs and param_name not in signature.parameters:
+                    continue
+                injected[param_name] = ToolRunner._resolve_context_path(state, context_path)
+
+        if "tool_context" in injected:
+            return injected
+        if accepts_kwargs or "tool_context" in signature.parameters:
+            # @@@sa-04-tool-context-injection
+            # The sub-agent boundary only becomes real once the live ToolUseContext
+            # can cross the tool runner into handlers that explicitly opt in.
+            injected["tool_context"] = state
+        return injected
+
+    @staticmethod
+    def _coerce_permission_response(result) -> tuple[str | None, str | None]:
+        if result is None:
+            return None, None
+        if isinstance(result, str):
+            return result, None
+        if isinstance(result, dict):
+            decision = result.get("decision") or result.get("permission")
+            message = result.get("message")
+            return decision, message
+        decision = getattr(result, "decision", None) or getattr(result, "permission", None)
+        message = getattr(result, "message", None)
+        return decision, message
+
+    @staticmethod
+    def _permission_denied_result(decision: str, message: str | None) -> ToolResultEnvelope:
+        if decision == "ask":
+            text = message or "Permission required"
+        else:
+            text = message or "Permission denied"
+        return tool_permission_denied(
+            text,
+            metadata={"decision": decision, "error_type": "permission_resolution"},
+        )
+
+    @staticmethod
+    def _permission_request_result(request_id: str, message: str | None) -> ToolResultEnvelope:
+        return tool_permission_request(
+            message or "Permission required",
+            metadata={
+                "decision": "ask",
+                "request_id": request_id,
+                "error_type": "permission_resolution",
+            },
+        )
 
+    @staticmethod
+    def _materialize_permission_ask(
+        request_id: str | None,
+        message: str | None,
+    ) -> ToolResultEnvelope:
+        # @@@permission-ask-materialization
+        # Ask is only honest when a concrete request surface exists. Otherwise
+        # fail loudly as a deny so caller metadata matches the actual runtime.
+        if request_id is not None:
+            return ToolRunner._permission_request_result(request_id, message)
+        return ToolRunner._permission_denied_result("deny", message)
+
+    @staticmethod
+    def _run_awaitable_sync(awaitable):
+        try:
+            asyncio.get_running_loop()
+        except RuntimeError:
+            return asyncio.run(awaitable)
+
+        result_box: list[Any] = []
+        error_box: list[BaseException] = []
+
+        # @@@sync-awaitable-bridge - sync tool entrypoints still need to consume
+        # async permission checkers even when called from a live event loop.
+        def _runner() -> None:
+            try:
+                result_box.append(asyncio.run(awaitable))
+            except BaseException as exc:  # pragma: no cover - re-raised below
+                error_box.append(exc)
+
+        thread = threading.Thread(target=_runner, daemon=True)
+        thread.start()
+        thread.join()
+
+        if error_box:
+            raise error_box[0]
+        return result_box[0] if result_box else None
+
+    @staticmethod
+    def _get_async_hook_timeout_s(request: ToolCallRequest) -> float:
+        state = getattr(request, "state", None)
+        if state is None:
+            return DEFAULT_ASYNC_HOOK_TIMEOUT_S
+        hook_timeout_ms = state.get("hook_timeout_ms") if isinstance(state, dict) else getattr(state, "hook_timeout_ms", None)
+        if isinstance(hook_timeout_ms, (int, float)) and hook_timeout_ms > 0:
+            return float(hook_timeout_ms) / 1000.0
+        hook_timeout_s = state.get("hook_timeout_s") if isinstance(state, dict) else getattr(state, "hook_timeout_s", None)
+        if isinstance(hook_timeout_s, (int, float)) and hook_timeout_s > 0:
+            return float(hook_timeout_s)
+        return DEFAULT_ASYNC_HOOK_TIMEOUT_S
+
+    @staticmethod
+    async def _await_async_hook_with_timeout(
+        request: ToolCallRequest,
+        awaitable,
+        *,
+        hook_name: str,
+    ):
+        timeout_s = ToolRunner._get_async_hook_timeout_s(request)
+        task = asyncio.create_task(awaitable)
         try:
-            result = entry.handler(**args)
+            return await asyncio.wait_for(task, timeout=timeout_s)
+        except TimeoutError:
+            logger.warning("Async hook %s timed out after %.3fs; ignoring hook result", hook_name, timeout_s)
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+            return None
+
+    @staticmethod
+    def _await_async_hook_with_timeout_sync(
+        request: ToolCallRequest,
+        awaitable,
+        *,
+        hook_name: str,
+    ):
+        return ToolRunner._run_awaitable_sync(
+            ToolRunner._await_async_hook_with_timeout(
+                request,
+                awaitable,
+                hook_name=hook_name,
+            )
+        )
+
+    @staticmethod
+    def _get_state_callable(request: ToolCallRequest, name: str):
+        state = getattr(request, "state", None)
+        if state is None:
+            return None
+        return state.get(name) if isinstance(state, dict) else getattr(state, name, None)
+
+    async def _consume_permission_resolution_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+    ) -> tuple[str | None, str | None]:
+        consumer = self._get_state_callable(request, "consume_permission_resolution")
+        if not callable(consumer):
+            return None, None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        result = consumer(name, args, permission_context, request)
+        if asyncio.iscoroutine(result):
+            result = await result
+        return self._coerce_permission_response(result)
+
+    async def _request_permission_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+        message: str | None,
+    ) -> str | None:
+        requester = self._get_state_callable(request, "request_permission")
+        if not callable(requester):
+            return None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        result = requester(name, args, permission_context, request, message)
+        if asyncio.iscoroutine(result):
+            result = await result
+        if isinstance(result, dict):
+            request_id = result.get("request_id")
+            return request_id if isinstance(request_id, str) else None
+        return result if isinstance(result, str) else None
+
+    async def _run_tool_specific_validation_async(self, entry, args: dict, request: ToolCallRequest) -> dict:
+        validator = getattr(entry, "validate_input", None)
+        if validator is None:
+            return args
+        result = validator(dict(args), request)
+        if asyncio.iscoroutine(result):
+            result = await result
+        if result is None:
+            return args
+        if isinstance(result, dict):
+            if result.get("result") is False or result.get("ok") is False:
+                raise _ToolSpecificValidationError(
+                    result.get("message") or "Tool-specific validation failed",
+                    result.get("errorCode") or result.get("error_code"),
+                )
+            return result
+        raise InputValidationError(str(result))
+
+    async def _run_pre_tool_use_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+    ) -> tuple[dict, str | None, str | None]:
+        hooks = self._get_request_hook(request, "pre_tool_use")
+        if hooks is None:
+            return args, None, None
+        payload = {"name": name, "args": dict(args), "entry": entry}
+        permission: str | None = None
+        message: str | None = None
+        hook_list = hooks if isinstance(hooks, list) else [hooks]
+
+        async def _invoke(hook):
+            updated = hook({"name": name, "args": dict(args), "entry": entry}, request)
+            if asyncio.iscoroutine(updated):
+                updated = await self._await_async_hook_with_timeout(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            return updated
+
+        # @@@pt-06-hook-fanout
+        # Pattern 6 requires hooks to fan out instead of impersonating a
+        # middleware chain. We still fold results back in hook-list order so
+        # the aggregation stays deterministic.
+        for updated in await asyncio.gather(*(_invoke(hook) for hook in hook_list)):
+            if updated is None:
+                continue
+            if isinstance(updated, dict):
+                if "args" in updated:
+                    next_args = updated["args"]
+                    if isinstance(next_args, dict):
+                        payload["args"] = {**payload["args"], **next_args}
+                    else:
+                        payload["args"] = next_args
+                if "name" in updated:
+                    payload["name"] = updated["name"]
+                if "entry" in updated:
+                    payload["entry"] = updated["entry"]
+                new_permission, new_message = self._coerce_permission_response(updated)
+                if new_permission == "deny" and permission != "deny":
+                    permission = new_permission
+                    message = new_message
+                elif new_permission == "ask" and permission not in {"deny", "ask"}:
+                    permission = new_permission
+                    message = new_message
+                elif new_permission == "allow" and permission is None:
+                    permission = new_permission
+                    message = new_message
+        return payload["args"], permission, message
+
+    async def _run_permission_request_hooks_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        entry,
+        message: str | None,
+    ) -> tuple[str | None, str | None]:
+        hooks = self._get_request_hook(request, "permission_request_hooks")
+        if hooks is None:
+            return None, message
+        payload = {"name": name, "entry": entry, "message": message}
+        permission: str | None = None
+        hook_message = message
+        hook_list = hooks if isinstance(hooks, list) else [hooks]
+
+        async def _invoke(hook):
+            updated = hook(payload, request)
+            if asyncio.iscoroutine(updated):
+                updated = await self._await_async_hook_with_timeout(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            return updated
+
+        for updated in await asyncio.gather(*(_invoke(hook) for hook in hook_list)):
+            if updated is None:
+                continue
+            if isinstance(updated, dict):
+                new_permission, new_message = self._coerce_permission_response(updated)
+                if new_permission == "deny" and permission != "deny":
+                    permission = new_permission
+                elif new_permission == "ask" and permission not in {"deny", "ask"}:
+                    permission = new_permission
+                elif new_permission == "allow" and permission is None:
+                    permission = new_permission
+                if new_message is not None:
+                    hook_message = new_message
+        return permission, hook_message
+
+    async def _resolve_permission_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+        hook_permission: str | None,
+        hook_message: str | None,
+    ) -> ToolResultEnvelope | None:
+        if hook_permission == "deny":
+            return self._permission_denied_result("deny", hook_message)
+
+        checker = self._get_state_callable(request, "can_use_tool")
+        rule_permission: str | None = None
+        rule_message: str | None = None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        if callable(checker):
+            result = checker(name, args, permission_context, request)
             if asyncio.iscoroutine(result):
-                result = asyncio.get_event_loop().run_until_complete(result)
-            return ToolMessage(content=str(result), tool_call_id=call_id, name=name)
-        except Exception as e:
-            logger.exception("Tool %s execution failed", name)
-            return ToolMessage(
-                content=f"<tool_use_error>{e}</tool_use_error>",
-                tool_call_id=call_id,
+                result = await result
+            rule_permission, rule_message = self._coerce_permission_response(result)
+
+        # @@@permission-resolution-precedence - only consume one-shot approvals when current state still asks.
+        if rule_permission == "ask":
+            resolved_permission, resolved_message = await self._consume_permission_resolution_async(
+                request,
+                name=name,
+                args=args,
+                entry=entry,
+            )
+            if resolved_permission == "allow":
+                return None
+            if resolved_permission in {"deny", "ask"}:
+                return self._permission_denied_result(resolved_permission, resolved_message)
+            request_hook_permission, request_hook_message = await self._run_permission_request_hooks_async(
+                request,
                 name=name,
+                entry=entry,
+                message=rule_message,
             )
+            if request_hook_permission == "allow":
+                return None
+            if request_hook_permission in {"deny", "ask"}:
+                return self._permission_denied_result(request_hook_permission, request_hook_message)
+            rule_message = request_hook_message
+
+        if hook_permission == "allow":
+            if rule_permission in {"deny", "ask"}:
+                if rule_permission == "ask":
+                    request_id = await self._request_permission_async(
+                        request,
+                        name=name,
+                        args=args,
+                        entry=entry,
+                        message=rule_message,
+                    )
+                    return self._materialize_permission_ask(request_id, rule_message)
+                return self._permission_denied_result(rule_permission, rule_message)
+            return None
+
+        if rule_permission in {"deny", "ask"}:
+            if rule_permission == "ask":
+                request_id = await self._request_permission_async(
+                    request,
+                    name=name,
+                    args=args,
+                    entry=entry,
+                    message=rule_message,
+                )
+                return self._materialize_permission_ask(request_id, rule_message)
+            return self._permission_denied_result(rule_permission, rule_message)
+        return None
 
-    async def _validate_and_run_async(self, name: str, args: dict, call_id: str) -> ToolMessage | None:
+    def _materialize_result(
+        self,
+        envelope: ToolResultEnvelope,
+        *,
+        name: str,
+        call_id: str,
+        source: str,
+    ) -> ToolMessage:
+        return materialize_tool_message(
+            envelope,
+            tool_call_id=call_id,
+            name=name,
+            source=source,
+        )
+
+    @staticmethod
+    def _entry_source(entry) -> str:
+        return "mcp" if getattr(entry, "source", None) == "mcp" else "local"
+
+    def _finalize_registered_result(
+        self,
+        envelope: ToolResultEnvelope,
+        *,
+        name: str,
+        call_id: str,
+        source: str,
+    ) -> ToolMessage | ToolResultEnvelope:
+        if source == "mcp":
+            return envelope
+        return self._materialize_result(
+            envelope,
+            name=name,
+            call_id=call_id,
+            source=source,
+        )
+
+    async def _finalize_tool_result_async(
+        self,
+        request: ToolCallRequest,
+        result: ToolMessage | ToolResultEnvelope,
+        *,
+        name: str,
+        call_id: str,
+        source: str,
+    ) -> ToolMessage:
+        if isinstance(result, ToolResultEnvelope):
+            hook_name = self._select_hook_name(result.kind)
+            hooks = self._get_request_hook(request, hook_name)
+            hooked = await self._apply_result_hooks(hooks, result, request)
+            if isinstance(hooked, ToolMessage):
+                return hooked
+            return self._materialize_result(hooked, name=name, call_id=call_id, source=source)
+
+        meta = result.additional_kwargs.get("tool_result_meta", {})
+        hook_name = self._select_hook_name(meta.get("kind"))
+        hooks = self._get_request_hook(request, hook_name)
+        hooked = await self._apply_result_hooks(hooks, result, request)
+        if isinstance(hooked, ToolMessage):
+            return hooked
+        return self._materialize_result(hooked, name=name, call_id=call_id, source=source)
+
+    @staticmethod
+    def _select_hook_name(kind: str) -> str:
+        if kind == "error":
+            return "post_tool_use_failure"
+        if kind == "permission_denied":
+            return "permission_denied_hooks"
+        return "post_tool_use"
+
+    @staticmethod
+    def _input_validation_metadata(error: InputValidationError) -> dict[str, object]:
+        metadata: dict[str, object] = {"error_type": "input_validation"}
+        if error.error_code:
+            metadata["error_code"] = error.error_code
+        if error.details:
+            metadata["error_details"] = error.details
+        return metadata
+
+    async def _validate_and_run_async(
+        self,
+        request: ToolCallRequest,
+        name: str,
+        args: dict,
+        call_id: str,
+    ) -> ToolMessage | ToolResultEnvelope | None:
         entry = self._registry.get(name)
         if entry is None:
             return None
+        source = self._entry_source(entry)
 
         schema = entry.get_schema()
         try:
             self._validator.validate(schema, args)
         except InputValidationError as e:
-            return ToolMessage(
-                content=f"InputValidationError: {name} failed due to the following issue:\n{e}",
-                tool_call_id=call_id,
+            return self._finalize_registered_result(
+                tool_error(
+                    f"InputValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata=self._input_validation_metadata(e),
+                ),
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
+        try:
+            args = await self._run_tool_specific_validation_async(entry, args, request)
+        except _ToolSpecificValidationError as e:
+            return self._finalize_registered_result(
+                tool_error(
+                    f"ToolValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata={"error_type": "tool_input_validation", "error_code": e.error_code},
+                ),
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
+        except InputValidationError as e:
+            return self._finalize_registered_result(
+                tool_error(
+                    f"ToolValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata={"error_type": "tool_input_validation"},
+                ),
                 name=name,
+                call_id=call_id,
+                source=source,
             )
 
+        args, hook_permission, hook_message = await self._run_pre_tool_use_async(
+            request,
+            name=name,
+            args=args,
+            entry=entry,
+        )
+        permission_result = await self._resolve_permission_async(
+            request,
+            name=name,
+            args=args,
+            entry=entry,
+            hook_permission=hook_permission,
+            hook_message=hook_message,
+        )
+        if permission_result is not None:
+            return self._finalize_registered_result(
+                permission_result,
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
+
+        args = self._inject_handler_context(entry, args, request)
         try:
             if asyncio.iscoroutinefunction(entry.handler):
                 result = await entry.handler(**args)
@@ -113,13 +697,22 @@ async def _validate_and_run_async(self, name: str, args: dict, call_id: str) ->
                 result = await asyncio.to_thread(entry.handler, **args)
             if asyncio.iscoroutine(result):
                 result = await result
-            return ToolMessage(content=str(result), tool_call_id=call_id, name=name)
+            return self._finalize_registered_result(
+                self._normalize_result(result),
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
         except Exception as e:
             logger.exception("Tool %s execution failed", name)
-            return ToolMessage(
-                content=f"<tool_use_error>{e}</tool_use_error>",
-                tool_call_id=call_id,
+            return self._finalize_registered_result(
+                tool_error(
+                    f"<tool_use_error>{e}</tool_use_error>",
+                    metadata={"error_type": "tool_execution"},
+                ),
                 name=name,
+                call_id=call_id,
+                source=source,
             )
 
     # -- Model call wrappers --
@@ -146,10 +739,26 @@ def wrap_tool_call(
         handler: Callable[[ToolCallRequest], ToolMessage],
     ) -> ToolMessage:
         name, args, call_id = self._extract_call_info(request)
-        result = self._validate_and_run(name, args, call_id)
+        entry = self._registry.get(name)
+        result: ToolMessage | ToolResultEnvelope | None = self._run_awaitable_sync(
+            self._validate_and_run_async(request, name, args, call_id)
+        )
         if result is not None:
-            return result
-        return handler(request)
+            source = self._entry_source(entry) if entry is not None else "local"
+            return cast(
+                ToolMessage,
+                self._run_awaitable_sync(
+                    self._finalize_tool_result_async(
+                        request,
+                        result,
+                        name=name,
+                        call_id=call_id,
+                        source=source,
+                    )
+                ),
+            )
+        upstream = handler(request)
+        return upstream
 
     async def awrap_tool_call(
         self,
@@ -157,7 +766,32 @@ async def awrap_tool_call(
         handler: Callable[[ToolCallRequest], Awaitable[ToolMessage]],
     ) -> ToolMessage:
         name, args, call_id = self._extract_call_info(request)
-        result = await self._validate_and_run_async(name, args, call_id)
+        entry = self._registry.get(name)
+        source = self._entry_source(entry) if entry is not None else "local"
+        result = await self._validate_and_run_async(request, name, args, call_id)
         if result is not None:
-            return result
-        return await handler(request)
+            # @@@tool-result-ordering
+            # te-02 keeps local tools materialize-first, but registered MCP
+            # tools must stay envelope-first so post hooks can see and modify
+            # structured output before final ToolMessage creation.
+            return await self._finalize_tool_result_async(
+                request,
+                result,
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
+
+        upstream = await handler(request)
+        post_tool_use = self._get_request_hook(request, "post_tool_use")
+        if isinstance(upstream, ToolResultEnvelope):
+            # MCP/upstream path: post hooks get first shot at the structured
+            # result, and only then do we materialize the ToolMessage.
+            hooked = await self._apply_result_hooks(post_tool_use, upstream, request)
+            if isinstance(hooked, ToolMessage):
+                return hooked
+            return self._materialize_result(hooked, name=name, call_id=call_id, source="mcp")
+        if isinstance(upstream, ToolMessage):
+            hooked = await self._apply_result_hooks(post_tool_use, upstream, request)
+            return hooked if isinstance(hooked, ToolMessage) else self._materialize_result(hooked, name=name, call_id=call_id, source="mcp")
+        return upstream
diff --git a/core/runtime/state.py b/core/runtime/state.py
new file mode 100644
index 000000000..80b53a4c2
--- /dev/null
+++ b/core/runtime/state.py
@@ -0,0 +1,172 @@
+"""Three-layer state models aligned with CC architecture.
+
+Layer 1: BootstrapConfig — survives /clear, process-level constants
+Layer 2: AppState — per-session mutable state (Zustand-style store)
+Layer 3: ToolUseContext — per-turn, holds live closures to AppState
+"""
+
+from __future__ import annotations
+
+import uuid
+from collections.abc import Awaitable, Callable
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from .abort import AbortController
+from .permissions import ToolPermissionContext
+
+
+class ToolPermissionState(BaseModel):
+    # @@@camelcase-permission-surface - persisted/thread API surface already uses camelCase keys.
+    alwaysAllowRules: dict[str, list[str]] = Field(default_factory=dict)  # noqa: N815
+    alwaysDenyRules: dict[str, list[str]] = Field(default_factory=dict)  # noqa: N815
+    alwaysAskRules: dict[str, list[str]] = Field(default_factory=dict)  # noqa: N815
+    allowManagedPermissionRulesOnly: bool = False  # noqa: N815
+
+
+class BootstrapConfig(BaseModel):
+    """Process-level configuration that survives /clear.
+
+    Analogous to CC Bootstrap State (~85 fields). Contains workspace
+    identity, model config, security flags, and API credentials.
+    """
+
+    workspace_root: Path
+    original_cwd: Path | None = None
+    project_root: Path | None = None
+    cwd: Path | None = None
+    model_name: str
+    api_key: str | None = None
+    sandbox_type: str = "local"
+    permission_resolver_scope: str = "none"
+
+    # Security flags (fail-closed defaults)
+    block_dangerous_commands: bool = True
+    block_network_commands: bool = False
+    enable_audit_log: bool = True
+    enable_web_tools: bool = False
+
+    # File access
+    allowed_file_extensions: list[str] | None = None
+    extra_allowed_paths: list[str] | None = None
+
+    # Turn limits
+    max_turns: int | None = None
+
+    # Session identity
+    session_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
+    parent_session_id: str | None = None
+
+    # Session accumulators that survive turn-level resets
+    total_cost_usd: float = 0.0
+    total_tool_duration_ms: int = 0
+
+    # Model settings
+    model_provider: str | None = None
+    base_url: str | None = None
+    context_limit: int | None = None
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    def model_post_init(self, __context: Any) -> None:
+        self.workspace_root = Path(self.workspace_root)
+        self.original_cwd = Path(self.original_cwd) if self.original_cwd is not None else self.workspace_root
+        self.project_root = Path(self.project_root) if self.project_root is not None else self.workspace_root
+        self.cwd = Path(self.cwd) if self.cwd is not None else self.project_root
+
+
+class AppState(BaseModel):
+    """Per-session mutable state. Analogous to CC AppState store.
+
+    Implements a minimal Zustand-style store with getState/setState.
+    Not reactive — no subscriptions needed for Python backend.
+    """
+
+    messages: list = Field(default_factory=list)
+    turn_count: int = 0
+    total_cost: float = 0.0
+    compact_boundary_index: int = 0
+    # Map of tool_name -> is_enabled (runtime overrides)
+    tool_overrides: dict[str, bool] = Field(default_factory=dict)
+    tool_permission_context: ToolPermissionState = Field(default_factory=ToolPermissionState)
+    pending_permission_requests: dict[str, dict[str, Any]] = Field(default_factory=dict)
+    resolved_permission_requests: dict[str, dict[str, Any]] = Field(default_factory=dict)
+    announced_mcp_instruction_blocks: dict[str, dict[str, str]] = Field(default_factory=dict)
+    # @@@session-hooks-not-watchers - keep this surface local and lifecycle-scoped.
+    # File watching remains a later outer-layer concern so Leon keeps the
+    # filesystem + terminal core decoupled.
+    session_hooks: dict[str, list[Any]] = Field(default_factory=dict)
+
+    def get_state(self) -> AppState:
+        return self
+
+    def set_state(self, updater: Callable[[AppState], AppState]) -> AppState:
+        updated = updater(self)
+        # Mutate in place (Python idiom — no immutable constraint needed here)
+        for field_name in AppState.model_fields:
+            setattr(self, field_name, getattr(updated, field_name))
+        return self
+
+    def add_session_hook(self, event: str, hook: Any) -> None:
+        hooks = list(self.session_hooks.get(event, []))
+        hooks.append(hook)
+        self.session_hooks[event] = hooks
+
+    def remove_session_hook(self, event: str, hook: Any) -> None:
+        hooks = [candidate for candidate in self.session_hooks.get(event, []) if candidate != hook]
+        if hooks:
+            self.session_hooks[event] = hooks
+        else:
+            self.session_hooks.pop(event, None)
+
+    def get_session_hooks(self, event: str) -> list[Any]:
+        return list(self.session_hooks.get(event, []))
+
+
+AppStateUpdater = Callable[[AppState], AppState]
+AppStateGetter = Callable[[], AppState]
+AppStateSetter = Callable[[AppStateUpdater], AppState | None]
+RefreshToolsHook = Callable[[], Awaitable[None] | None]
+PermissionDecision = dict[str, Any] | None
+PermissionChecker = Callable[
+    [str, dict[str, Any], ToolPermissionContext, object],
+    PermissionDecision | Awaitable[PermissionDecision],
+]
+PermissionRequester = Callable[
+    [str, dict[str, Any], ToolPermissionContext, object, str | None],
+    str | dict[str, Any] | None | Awaitable[str | dict[str, Any] | None],
+]
+PermissionResolutionConsumer = Callable[
+    [str, dict[str, Any], ToolPermissionContext, object],
+    PermissionDecision | Awaitable[PermissionDecision],
+]
+
+
+class ToolUseContext(BaseModel):
+    """Per-turn context bag. Analogous to CC ToolUseContext.
+
+    Carries live closures to AppState so tools can read/mutate session state.
+    Sub-agents receive a NO-OP set_app_state to prevent write-through.
+    """
+
+    bootstrap: BootstrapConfig
+    get_app_state: AppStateGetter = Field(exclude=True)
+    set_app_state: AppStateSetter = Field(exclude=True)
+    set_app_state_for_tasks: AppStateSetter | None = Field(default=None, exclude=True)
+    refresh_tools: RefreshToolsHook | None = Field(default=None, exclude=True)
+    can_use_tool: PermissionChecker | None = Field(default=None, exclude=True)
+    request_permission: PermissionRequester | None = Field(default=None, exclude=True)
+    consume_permission_resolution: PermissionResolutionConsumer | None = Field(default=None, exclude=True)
+    read_file_state: Any = Field(default_factory=dict, exclude=True)
+    loaded_nested_memory_paths: Any = Field(default_factory=set, exclude=True)
+    discovered_skill_names: Any = Field(default_factory=set, exclude=True)
+    discovered_tool_names: Any = Field(default_factory=set, exclude=True)
+    nested_memory_attachment_triggers: Any = Field(default_factory=set, exclude=True)
+    abort_controller: AbortController = Field(default_factory=AbortController, exclude=True)
+    messages: list = Field(default_factory=list)
+    thread_id: str = "default"
+    turn_id: str = Field(default_factory=lambda: uuid.uuid4().hex[:8])
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
diff --git a/core/runtime/tool_result.py b/core/runtime/tool_result.py
new file mode 100644
index 000000000..1ccd24288
--- /dev/null
+++ b/core/runtime/tool_result.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from langchain_core.messages import ToolMessage
+
+
+@dataclass
+class ToolResultEnvelope:
+    kind: str
+    content: Any
+    is_error: bool = False
+    top_level_blocks: list[Any] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+
+def tool_success(content: Any, *, metadata: dict[str, Any] | None = None) -> ToolResultEnvelope:
+    return ToolResultEnvelope(
+        kind="success",
+        content=content,
+        metadata=dict(metadata or {}),
+    )
+
+
+def tool_error(content: str, *, metadata: dict[str, Any] | None = None) -> ToolResultEnvelope:
+    return ToolResultEnvelope(
+        kind="error",
+        content=content,
+        is_error=True,
+        metadata=dict(metadata or {}),
+    )
+
+
+def tool_permission_denied(
+    content: str,
+    *,
+    top_level_blocks: list[Any] | None = None,
+    metadata: dict[str, Any] | None = None,
+) -> ToolResultEnvelope:
+    return ToolResultEnvelope(
+        kind="permission_denied",
+        content=content,
+        is_error=True,
+        top_level_blocks=list(top_level_blocks or []),
+        metadata=dict(metadata or {}),
+    )
+
+
+def tool_permission_request(
+    content: str,
+    *,
+    top_level_blocks: list[Any] | None = None,
+    metadata: dict[str, Any] | None = None,
+) -> ToolResultEnvelope:
+    return ToolResultEnvelope(
+        kind="permission_request",
+        content=content,
+        top_level_blocks=list(top_level_blocks or []),
+        metadata=dict(metadata or {}),
+    )
+
+
+def materialize_tool_message(
+    envelope: ToolResultEnvelope,
+    *,
+    tool_call_id: str,
+    name: str,
+    source: str,
+) -> ToolMessage:
+    additional_kwargs = {
+        "tool_result_meta": {
+            "kind": envelope.kind,
+            "source": source,
+            "top_level_blocks": list(envelope.top_level_blocks),
+            **dict(envelope.metadata),
+        }
+    }
+    return ToolMessage(
+        content=envelope.content,
+        tool_call_id=tool_call_id,
+        name=name,
+        additional_kwargs=additional_kwargs,
+    )
diff --git a/core/runtime/validator.py b/core/runtime/validator.py
index 84e678d07..46fa6d963 100644
--- a/core/runtime/validator.py
+++ b/core/runtime/validator.py
@@ -1,8 +1,45 @@
 import json
+import re
 
 from .errors import InputValidationError
 
 
+def _required_sets(parameters: dict, key: str) -> list[list[str]]:
+    value = parameters.get(key, [])
+    if not isinstance(value, list):
+        return []
+    sets: list[list[str]] = []
+    for item in value:
+        if isinstance(item, dict):
+            required = item.get("required", [])
+        else:
+            required = item
+        if isinstance(required, list):
+            sets.append([field for field in required if isinstance(field, str)])
+    return sets
+
+
+def _required_sets_match(parameters: dict, args: dict) -> bool:
+    required = parameters.get("required", [])
+    if any(field not in args for field in required):
+        return False
+
+    # @@@required-set-contract - some tools need one of several identifier sets
+    # before they're valid. Keep that contract in runtime metadata so
+    # validator/readiness stay aligned without sending unsupported top-level
+    # anyOf/oneOf schema to live providers.
+    any_of = _required_sets(parameters, "x-leon-required-any-of") or _required_sets(parameters, "anyOf")
+    if any_of:
+        return any(all(field in args for field in required) for required in any_of)
+
+    one_of = _required_sets(parameters, "x-leon-required-one-of") or _required_sets(parameters, "oneOf")
+    if one_of:
+        matches = [required for required in one_of if all(field in args for field in required)]
+        return len(matches) == 1
+
+    return True
+
+
 class ValidationResult:
     def __init__(self, ok: bool, params: dict):
         self.ok = ok
@@ -13,14 +50,43 @@ class ToolValidator:
     """Three-phase tool argument validation."""
 
     def validate(self, schema: dict, args: dict) -> ValidationResult:
-        properties = schema.get("parameters", {}).get("properties", {})
-        required = schema.get("parameters", {}).get("required", [])
+        parameters = schema.get("parameters", {})
+        properties = parameters.get("properties", {})
 
         # Phase 1: required fields
-        missing = [f for f in required if f not in args]
-        if missing:
-            msgs = [f"The required parameter `{f}` is missing" for f in missing]
-            raise InputValidationError("\n".join(msgs))
+        if not _required_sets_match(parameters, args):
+            required = parameters.get("required", [])
+            missing = [f for f in required if f not in args]
+            if missing:
+                details = [
+                    {
+                        "field": field,
+                        "error_code": "REQUIRED_FIELD_MISSING",
+                        "message": f"The required parameter `{field}` is missing",
+                    }
+                    for field in missing
+                ]
+                raise InputValidationError(
+                    "\n".join(detail["message"] for detail in details),
+                    error_code="REQUIRED_FIELD_MISSING" if len(details) == 1 else "INPUT_CONSTRAINT_VIOLATION",
+                    details=details,
+                )
+            any_of = _required_sets(parameters, "x-leon-required-any-of") or _required_sets(parameters, "anyOf")
+            one_of = _required_sets(parameters, "x-leon-required-one-of") or _required_sets(parameters, "oneOf")
+            if any_of:
+                message = f"Arguments must satisfy one of these required sets: {any_of}"
+                raise InputValidationError(
+                    message,
+                    error_code="REQUIRED_SET_UNSATISFIED",
+                    details=[{"error_code": "REQUIRED_SET_UNSATISFIED", "message": message}],
+                )
+            if one_of:
+                message = f"Arguments must satisfy exactly one of these required sets: {one_of}"
+                raise InputValidationError(
+                    message,
+                    error_code="REQUIRED_SET_UNSATISFIED",
+                    details=[{"error_code": "REQUIRED_SET_UNSATISFIED", "message": message}],
+                )
 
         # Phase 2: type check
         for name, val in args.items():
@@ -28,12 +94,38 @@ def validate(self, schema: dict, args: dict) -> ValidationResult:
             expected = prop.get("type")
             if expected and not self._type_matches(val, expected):
                 actual = type(val).__name__
-                raise InputValidationError(f"The parameter `{name}` type is expected as `{expected}` but provided as `{actual}`")
+                message = f"The parameter `{name}` type is expected as `{expected}` but provided as `{actual}`"
+                raise InputValidationError(
+                    message,
+                    error_code="INVALID_TYPE",
+                    details=[
+                        {
+                            "field": name,
+                            "error_code": "INVALID_TYPE",
+                            "expected": expected,
+                            "actual": actual,
+                            "message": message,
+                        }
+                    ],
+                )
 
-        # Phase 3: enum validation
+        # Phase 3: scalar constraints
+        issues = self._validate_scalar_constraints(properties, args)
+        if issues:
+            raise InputValidationError(
+                "\n".join(str(issue["message"]) for issue in issues),
+                error_code=str(issues[0]["error_code"]) if len(issues) == 1 else "INPUT_CONSTRAINT_VIOLATION",
+                details=issues,
+            )
+
+        # Phase 4: enum validation
         issues = self._validate_enum(properties, args)
         if issues:
-            raise InputValidationError(json.dumps(issues))
+            raise InputValidationError(
+                json.dumps(issues),
+                error_code="INVALID_ENUM" if len(issues) == 1 else "INPUT_CONSTRAINT_VIOLATION",
+                details=issues,
+            )
 
         return ValidationResult(ok=True, params=args)
 
@@ -51,11 +143,77 @@ def _type_matches(self, val, expected: str) -> bool:
             return True
         return isinstance(val, expected_type)
 
-    def _validate_enum(self, properties: dict, args: dict) -> list:
-        issues = []
+    def _validate_enum(self, properties: dict, args: dict) -> list[dict[str, object]]:
+        issues: list[dict[str, object]] = []
         for name, val in args.items():
             prop = properties.get(name, {})
             enum_vals = prop.get("enum")
             if enum_vals and val not in enum_vals:
-                issues.append({"field": name, "expected": enum_vals, "got": val})
+                issues.append(
+                    {
+                        "field": name,
+                        "error_code": "INVALID_ENUM",
+                        "expected": enum_vals,
+                        "got": val,
+                        "message": f"The parameter `{name}` must be one of {enum_vals}, got {val!r}",
+                    }
+                )
+        return issues
+
+    def _validate_scalar_constraints(self, properties: dict, args: dict) -> list[dict[str, object]]:
+        issues: list[dict[str, object]] = []
+        for name, val in args.items():
+            prop = properties.get(name, {})
+            if isinstance(val, str):
+                min_length = prop.get("minLength")
+                if isinstance(min_length, int) and len(val) < min_length:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "STRING_TOO_SHORT",
+                            "message": f"The parameter `{name}` must be at least {min_length} characters long",
+                            "minimum": min_length,
+                        }
+                    )
+                max_length = prop.get("maxLength")
+                if isinstance(max_length, int) and len(val) > max_length:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "STRING_TOO_LONG",
+                            "message": f"The parameter `{name}` must be at most {max_length} characters long",
+                            "maximum": max_length,
+                        }
+                    )
+                pattern = prop.get("pattern")
+                if isinstance(pattern, str) and re.search(pattern, val) is None:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "PATTERN_MISMATCH",
+                            "message": f"The parameter `{name}` must match pattern `{pattern}`",
+                            "pattern": pattern,
+                        }
+                    )
+            if isinstance(val, (int, float)) and not isinstance(val, bool):
+                minimum = prop.get("minimum")
+                if isinstance(minimum, (int, float)) and val < minimum:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "NUMBER_TOO_SMALL",
+                            "message": f"The parameter `{name}` must be at least {minimum}",
+                            "minimum": minimum,
+                        }
+                    )
+                maximum = prop.get("maximum")
+                if isinstance(maximum, (int, float)) and val > maximum:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "NUMBER_TOO_LARGE",
+                            "message": f"The parameter `{name}` must be at most {maximum}",
+                            "maximum": maximum,
+                        }
+                    )
         return issues
diff --git a/core/runtime/visibility.py b/core/runtime/visibility.py
index 5c1a31f5d..cd1e1467f 100644
--- a/core/runtime/visibility.py
+++ b/core/runtime/visibility.py
@@ -1,7 +1,8 @@
-"""Owner visibility — v3: everything is always visible.
+"""Owner visibility helpers.
 
-v2 had a two-layer context/showing state machine for private context.
-v3 removes private context entirely — all messages are shown to the owner.
+v3 default is "visible unless explicitly hidden". Some backend paths still emit
+durable hidden owner messages (for example AskUserQuestion answer anchors), so
+this layer must preserve an already-declared display contract.
 """
 
 from __future__ import annotations
@@ -11,23 +12,8 @@
 _ALWAYS_SHOWING = {"showing": True}
 
 
-def compute_visibility(source: str, is_steer: bool, context: str) -> tuple[bool, str]:
-    """Always visible. Kept for call-site compatibility during transition."""
-    return True, "owner"
-
-
-def message_visibility(context: str, tool_names: list[str] | None = None) -> dict[str, Any]:
-    """Always visible."""
-    return _ALWAYS_SHOWING
-
-
-def tool_event_visibility(context: str, tool_name: str) -> dict[str, Any]:
-    """Always visible."""
-    return _ALWAYS_SHOWING
-
-
 def annotate_owner_visibility(messages: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], str]:
-    """Annotate every message as visible."""
+    """Annotate messages as visible unless they already carry display metadata."""
     for msg in messages:
-        msg["display"] = _ALWAYS_SHOWING
+        msg.setdefault("display", _ALWAYS_SHOWING)
     return messages, "owner"
diff --git a/core/tools/command/base.py b/core/tools/command/base.py
index e716420b2..7a1356081 100644
--- a/core/tools/command/base.py
+++ b/core/tools/command/base.py
@@ -4,7 +4,25 @@
 This module re-exports for backward compatibility.
 """
 
+from __future__ import annotations
+
 from sandbox.interfaces.executor import *  # noqa: F401,F403
 from sandbox.interfaces.executor import AsyncCommand, BaseExecutor, ExecuteResult
 
 __all__ = ["BaseExecutor", "ExecuteResult", "AsyncCommand"]
+
+
+def describe_execution_exception(exc: Exception) -> str:
+    detail = str(exc).strip()
+    if detail:
+        return detail
+    return exc.__class__.__name__
+
+
+def require_subprocess_pipe[TPipe](pipe: TPipe | None, name: str) -> TPipe:
+    # @@@persistent-shell-pipe-contract - persistent shell executors only work
+    # when asyncio created real stdio pipes; fail loudly instead of pretending
+    # optional streams are always present.
+    if pipe is None:
+        raise RuntimeError(f"Subprocess missing {name} pipe")
+    return pipe
diff --git a/core/tools/command/bash/executor.py b/core/tools/command/bash/executor.py
index d559970d0..c4c060f53 100644
--- a/core/tools/command/bash/executor.py
+++ b/core/tools/command/bash/executor.py
@@ -6,7 +6,7 @@
 import os
 import uuid
 
-from ..base import AsyncCommand, BaseExecutor, ExecuteResult
+from ..base import AsyncCommand, BaseExecutor, ExecuteResult, require_subprocess_pipe
 
 _RUNNING_COMMANDS: dict[str, AsyncCommand] = {}
 
@@ -35,8 +35,9 @@ async def _ensure_session(self, env: dict[str, str]) -> asyncio.subprocess.Proce
                 cwd=self._current_cwd,
             )
             # Disable PS1 prompt
-            self._session.stdin.write(b"export PS1=''\n")
-            await self._session.stdin.drain()
+            stdin = require_subprocess_pipe(self._session.stdin, "stdin")
+            stdin.write(b"export PS1=''\n")
+            await stdin.drain()
         return self._session
 
     async def _send_command(self, proc: asyncio.subprocess.Process, command: str) -> tuple[str, str, int]:
@@ -44,14 +45,16 @@ async def _send_command(self, proc: asyncio.subprocess.Process, command: str) ->
         marker = f"__END_{uuid.uuid4().hex[:8]}__"
         full_cmd = f"{command}\necho {marker} $?\n"
 
-        proc.stdin.write(full_cmd.encode())
-        await proc.stdin.drain()
+        stdin = require_subprocess_pipe(proc.stdin, "stdin")
+        stdout = require_subprocess_pipe(proc.stdout, "stdout")
+        stdin.write(full_cmd.encode())
+        await stdin.drain()
 
         stdout_lines = []
         exit_code = 0
 
         while True:
-            line = await proc.stdout.readline()
+            line = await stdout.readline()
             if not line:
                 break
             line_str = line.decode("utf-8", errors="replace")
diff --git a/core/tools/command/hooks/dangerous_commands.py b/core/tools/command/hooks/dangerous_commands.py
index 496251292..3abde2337 100644
--- a/core/tools/command/hooks/dangerous_commands.py
+++ b/core/tools/command/hooks/dangerous_commands.py
@@ -1,6 +1,7 @@
 """Dangerous commands hook - blocks commands that may harm the system."""
 
 import re
+import shlex
 from pathlib import Path
 from typing import Any
 
@@ -40,6 +41,32 @@ class DangerousCommandsHook(BashHook):
         r"\bssh\b",
     ]
 
+    DEFAULT_BLOCKED_BASE_COMMANDS = {
+        "rmdir",
+        "chmod",
+        "chown",
+        "sudo",
+        "su",
+        "kill",
+        "pkill",
+        "reboot",
+        "shutdown",
+        "mkfs",
+        "dd",
+    }
+    NETWORK_BASE_COMMANDS = {
+        "curl",
+        "wget",
+        "scp",
+        "sftp",
+        "rsync",
+        "ssh",
+    }
+    OPERATOR_TOKENS = {";", ";;", "&", "&&", "|", "||", "(", ")"}
+    ENV_ASSIGN_RE = re.compile(r"^[A-Za-z_]\w*=")
+    ANSI_C_QUOTE_RE = re.compile(r"\$'[^']*'")
+    LOCALE_QUOTE_RE = re.compile(r'\$"[^"]*"')
+
     def __init__(
         self,
         workspace_root: Path | str | None = None,
@@ -58,13 +85,140 @@ def __init__(
             patterns.extend(custom_blocked)
 
         self.compiled_patterns = [re.compile(p, re.IGNORECASE) for p in patterns]
+        self.blocked_base_commands = set(self.DEFAULT_BLOCKED_BASE_COMMANDS)
+        if block_network:
+            self.blocked_base_commands.update(self.NETWORK_BASE_COMMANDS)
 
         if verbose:
             print(f"[DangerousCommands] Loaded {len(self.compiled_patterns)} blocked command patterns")
 
+    @staticmethod
+    def _unquoted_command(command: str) -> str:
+        # @@@bash-hook-unquoted-scan - dangerous regexes should only inspect executable shell surface,
+        # not literal text inside quotes.
+        pieces: list[str] = []
+        in_single = False
+        in_double = False
+        escaped = False
+
+        for char in command:
+            if escaped:
+                if not in_single and not in_double:
+                    pieces.append(char)
+                escaped = False
+                continue
+
+            if char == "\\" and not in_single:
+                if not in_double:
+                    pieces.append(char)
+                escaped = True
+                continue
+
+            if char == "'" and not in_double:
+                in_single = not in_single
+                continue
+
+            if char == '"' and not in_single:
+                in_double = not in_double
+                continue
+
+            if not in_single and not in_double and char == "#":
+                prev = pieces[-1] if pieces else ""
+                if not prev or prev.isspace():
+                    break
+
+            if not in_single and not in_double:
+                pieces.append(char)
+
+        return "".join(pieces)
+
+    @classmethod
+    def _has_dangerous_rm_flags(cls, tokens: list[str], start: int) -> bool:
+        recursive = False
+        force = False
+
+        for token in tokens[start:]:
+            if token in cls.OPERATOR_TOKENS:
+                break
+            if token == "--":
+                break
+            lowered = token.lower()
+            if lowered == "--recursive":
+                recursive = True
+            elif lowered == "--force":
+                force = True
+            elif lowered.startswith("-"):
+                short_flags = lowered[1:]
+                recursive = recursive or "r" in short_flags
+                force = force or "f" in short_flags
+            if recursive and force:
+                return True
+
+        return False
+
+    def _find_dangerous_command_word(self, command: str) -> str | None:
+        try:
+            lexer = shlex.shlex(command, posix=True, punctuation_chars=";&|()<>")
+        except ValueError:
+            return None
+        lexer.whitespace_split = True
+        lexer.commenters = "#"
+        tokens = list(lexer)
+        command_position = True
+
+        for index, token in enumerate(tokens):
+            if token in self.OPERATOR_TOKENS:
+                command_position = True
+                continue
+
+            if token in {"<", ">", ">>", "<<", "<<<", "<>", ">|", "&>", "2>", "1>"}:
+                command_position = False
+                continue
+
+            if not command_position:
+                continue
+
+            if self.ENV_ASSIGN_RE.match(token):
+                continue
+
+            if token in self.blocked_base_commands:
+                return token
+
+            if token == "rm" and self._has_dangerous_rm_flags(tokens, index + 1):
+                return "rm -rf"
+
+            command_position = False
+
+        return None
+
     def check_command(self, command: str, context: dict[str, Any]) -> HookResult:
+        stripped = command.strip()
+        if self.ANSI_C_QUOTE_RE.search(stripped) or self.LOCALE_QUOTE_RE.search(stripped):
+            return HookResult.block_command(
+                error_message=(
+                    f"❌ SECURITY ERROR: Dangerous command detected\n"
+                    f"   Command: {command[:100]}\n"
+                    f"   Reason: Obfuscated shell quoting is blocked for security reasons\n"
+                    f"   Pattern: raw_obfuscation:$quote\n"
+                    f"   💡 If you need to perform this operation, ask the user for permission."
+                )
+            )
+
+        dangerous_word = self._find_dangerous_command_word(stripped)
+        if dangerous_word is not None:
+            return HookResult.block_command(
+                error_message=(
+                    f"❌ SECURITY ERROR: Dangerous command detected\n"
+                    f"   Command: {command[:100]}\n"
+                    f"   Reason: This command is blocked for security reasons\n"
+                    f"   Pattern: command_word:{dangerous_word}\n"
+                    f"   💡 If you need to perform this operation, ask the user for permission."
+                )
+            )
+
+        scanned = self._unquoted_command(stripped)
         for pattern in self.compiled_patterns:
-            if pattern.search(command.strip()):
+            if pattern.search(scanned):
                 return HookResult.block_command(
                     error_message=(
                         f"❌ SECURITY ERROR: Dangerous command detected\n"
diff --git a/core/tools/command/hooks/loader.py b/core/tools/command/hooks/loader.py
index d46ee78b9..449b2901c 100644
--- a/core/tools/command/hooks/loader.py
+++ b/core/tools/command/hooks/loader.py
@@ -39,13 +39,3 @@ def load_hooks(
     hooks.sort(key=lambda h: h.priority)
     print(f"[BashHooks] Total {len(hooks)} hooks loaded")
     return hooks
-
-
-def discover_hooks() -> list[str]:
-    """Discover all available hook plugins without loading them."""
-    hooks_dir = Path(__file__).parent
-    return [
-        py_file.stem
-        for py_file in hooks_dir.glob("*.py")
-        if not py_file.name.startswith("_") and py_file.name not in ["base.py", "loader.py"]
-    ]
diff --git a/core/tools/command/middleware.py b/core/tools/command/middleware.py
index dcd6453a4..c01d2e71d 100644
--- a/core/tools/command/middleware.py
+++ b/core/tools/command/middleware.py
@@ -9,7 +9,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 from langchain.agents.middleware import AgentMiddleware, AgentState
 from langchain.agents.middleware.types import ModelRequest, ModelResponse
@@ -18,7 +18,7 @@
 
 from sandbox.shell_output import normalize_pty_result
 
-from .base import AsyncCommand, BaseExecutor
+from .base import AsyncCommand, BaseExecutor, describe_execution_exception
 from .dispatcher import get_executor, get_shell_info
 
 logger = logging.getLogger(__name__)
@@ -203,7 +203,7 @@ async def _execute_blocking(self, command_line: str, work_dir: str | None, timeo
                 env=self.env,
             )
         except Exception as e:
-            return f"Error executing command: {e}"
+            return f"Error executing command: {describe_execution_exception(e)}"
         return result.to_tool_result()
 
     def set_agent(self, agent: Any) -> None:
@@ -219,7 +219,7 @@ async def _execute_async(self, command_line: str, work_dir: str | None, timeout:
                 env=self.env,
             )
         except Exception as e:
-            return f"Error starting async command: {e}"
+            return f"Error starting async command: {describe_execution_exception(e)}"
 
         # Emit task_start event
         runtime = getattr(self._agent, "runtime", None) if self._agent else None
@@ -319,7 +319,7 @@ async def _monitor_async_command(self, command_id: str, command_line: str, runti
     async def _inject_command_notification(
         self,
         command_id: str,
-        status: str,
+        status: Literal["completed", "failed"],
         exit_code: int,
         command_line: str,
         output: str,
diff --git a/core/tools/command/service.py b/core/tools/command/service.py
index 475289b9c..e1927b82b 100644
--- a/core/tools/command/service.py
+++ b/core/tools/command/service.py
@@ -15,11 +15,13 @@
 import asyncio
 import json
 import logging
+from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
-from core.tools.command.base import BaseExecutor
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+from core.runtime.tool_result import ToolResultEnvelope, tool_permission_denied
+from core.tools.command.base import BaseExecutor, describe_execution_exception
 from core.tools.command.dispatcher import get_executor
 
 logger = logging.getLogger(__name__)
@@ -61,35 +63,39 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Bash",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Bash",
-                    "description": ("Execute shell command. OS auto-detects shell (mac->zsh, linux->bash, win->powershell)."),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "command": {
-                                "type": "string",
-                                "description": "Command to execute",
-                            },
-                            "description": {
-                                "type": "string",
-                                "description": (
-                                    "Human-readable description of what this command does. "
-                                    "Required when run_in_background is true; shown in the background task indicator."
-                                ),
-                            },
-                            "run_in_background": {
-                                "type": "boolean",
-                                "description": "Run in background (default: false). Returns task ID for status queries.",
-                            },
-                            "timeout": {
-                                "type": "integer",
-                                "description": "Timeout in milliseconds (default: 120000)",
-                            },
+                schema=make_tool_schema(
+                    name="Bash",
+                    description=(
+                        "Execute shell command (zsh on macOS, bash on Linux, PowerShell on Windows). "
+                        "Default timeout 120s (max 600s). Dangerous commands are blocked. "
+                        "Prefer dedicated tools over Bash: Read over cat, Grep over grep/rg, Glob over find/ls, Edit over sed/awk."
+                    ),
+                    properties={
+                        "command": {
+                            "type": "string",
+                            "description": "Command to execute",
+                            "minLength": 1,
+                        },
+                        "description": {
+                            "type": "string",
+                            "description": (
+                                "Human-readable description of what this command does. "
+                                "Required when run_in_background is true; shown in the background task indicator."
+                            ),
+                        },
+                        "run_in_background": {
+                            "type": "boolean",
+                            "description": "Run in background (default: false). Returns task ID for status queries.",
+                        },
+                        "timeout": {
+                            "type": "integer",
+                            "description": "Timeout in milliseconds (default: 120000)",
+                            "minimum": 1,
+                            "maximum": 600000,
                         },
-                        "required": ["command"],
                     },
-                },
+                    required=["command"],
+                ),
                 handler=self._bash,
                 source="CommandService",
             )
@@ -113,10 +119,13 @@ async def _bash(
         description: str = "",
         run_in_background: bool = False,
         timeout: int = DEFAULT_TIMEOUT_MS,
-    ) -> str:
+    ) -> str | ToolResultEnvelope:
         allowed, error_msg = self._check_hooks(command)
         if not allowed:
-            return error_msg
+            return tool_permission_denied(
+                error_msg,
+                metadata={"policy": "command_hook"},
+            )
 
         work_dir = None if self._executor.runtime_owns_cwd else str(self.workspace_root)
         timeout_secs = timeout / 1000.0
@@ -135,7 +144,7 @@ async def _execute_blocking(self, command: str, work_dir: str | None, timeout_se
                 env=self.env,
             )
         except Exception as e:
-            return f"Error executing command: {e}"
+            return f"Error executing command: {describe_execution_exception(e)}"
         return result.to_tool_result()
 
     async def _execute_async(self, command: str, work_dir: str | None, timeout_secs: float, description: str = "") -> str:
@@ -146,7 +155,7 @@ async def _execute_async(self, command: str, work_dir: str | None, timeout_secs:
                 env=self.env,
             )
         except Exception as e:
-            return f"Error starting async command: {e}"
+            return f"Error starting async command: {describe_execution_exception(e)}"
 
         task_id = async_cmd.command_id
 
@@ -156,7 +165,7 @@ async def _execute_async(self, command: str, work_dir: str | None, timeout_secs:
             self._background_runs[task_id] = _BashBackgroundRun(async_cmd, command, description=description)
 
         # Build emit_fn for SSE task lifecycle events
-        emit_fn = None
+        emit_fn: Callable[[dict[str, Any]], Awaitable[None] | None] | None = None
         parent_thread_id = None
         try:
             from backend.web.event_bus import get_event_bus
@@ -178,7 +187,7 @@ async def _execute_async(self, command: str, work_dir: str | None, timeout_secs:
 
         # Emit task_start so the frontend dot lights up immediately
         if emit_fn is not None:
-            await emit_fn(
+            emission = emit_fn(
                 {
                     "event": "task_start",
                     "data": json.dumps(
@@ -193,6 +202,8 @@ async def _execute_async(self, command: str, work_dir: str | None, timeout_secs:
                     ),
                 }
             )
+            if asyncio.iscoroutine(emission):
+                await emission
 
         if parent_thread_id:
             asyncio.create_task(
@@ -207,7 +218,7 @@ async def _notify_bash_completion(
         async_cmd: Any,
         command: str,
         parent_thread_id: str,
-        emit_fn: Any = None,
+        emit_fn: Callable[[dict[str, Any]], Awaitable[None] | None] | None = None,
         description: str = "",
     ) -> None:
         """Poll until async command finishes, then enqueue CommandNotification."""
@@ -220,7 +231,7 @@ async def _notify_bash_completion(
         # Emit task_done so the frontend dot updates in real time
         if emit_fn is not None:
             try:
-                await emit_fn(
+                emission = emit_fn(
                     {
                         "event": "task_done",
                         "data": json.dumps(
@@ -232,6 +243,8 @@ async def _notify_bash_completion(
                         ),
                     }
                 )
+                if asyncio.iscoroutine(emission):
+                    await emission
             except Exception:
                 pass
 
diff --git a/core/tools/command/zsh/executor.py b/core/tools/command/zsh/executor.py
index 6990531aa..2d19be8ec 100644
--- a/core/tools/command/zsh/executor.py
+++ b/core/tools/command/zsh/executor.py
@@ -6,7 +6,7 @@
 import os
 import uuid
 
-from ..base import AsyncCommand, BaseExecutor, ExecuteResult
+from ..base import AsyncCommand, BaseExecutor, ExecuteResult, require_subprocess_pipe
 
 _RUNNING_COMMANDS: dict[str, AsyncCommand] = {}
 
@@ -35,8 +35,9 @@ async def _ensure_session(self, env: dict[str, str]) -> asyncio.subprocess.Proce
                 cwd=self._current_cwd,
             )
             # Disable PS1 prompt
-            self._session.stdin.write(b"export PS1=''\n")
-            await self._session.stdin.drain()
+            stdin = require_subprocess_pipe(self._session.stdin, "stdin")
+            stdin.write(b"export PS1=''\n")
+            await stdin.drain()
         return self._session
 
     async def _send_command(self, proc: asyncio.subprocess.Process, command: str) -> tuple[str, str, int]:
@@ -44,14 +45,16 @@ async def _send_command(self, proc: asyncio.subprocess.Process, command: str) ->
         marker = f"__END_{uuid.uuid4().hex[:8]}__"
         full_cmd = f"{command}\necho {marker} $?\n"
 
-        proc.stdin.write(full_cmd.encode())
-        await proc.stdin.drain()
+        stdin = require_subprocess_pipe(proc.stdin, "stdin")
+        stdout = require_subprocess_pipe(proc.stdout, "stdout")
+        stdin.write(full_cmd.encode())
+        await stdin.drain()
 
         stdout_lines = []
         exit_code = 0
 
         while True:
-            line = await proc.stdout.readline()
+            line = await stdout.readline()
             if not line:
                 break
             line_str = line.decode("utf-8", errors="replace")
diff --git a/core/tools/cron/service.py b/core/tools/cron/service.py
new file mode 100644
index 000000000..026c7d9be
--- /dev/null
+++ b/core/tools/cron/service.py
@@ -0,0 +1,102 @@
+"""CronToolService — agent-callable cron job CRUD on top of existing backend service."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from croniter import croniter
+
+from backend.web.services import cron_job_service
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+
+CRON_CREATE_SCHEMA = make_tool_schema(
+    name="CronCreate",
+    description="Create a cron job using the existing Mycel cron_jobs substrate.",
+    properties={
+        "name": {"type": "string", "description": "Human-readable cron job name", "minLength": 1},
+        "cron_expression": {
+            "type": "string",
+            "description": "Standard 5-field cron expression",
+            "minLength": 1,
+        },
+        "description": {"type": "string", "description": "Optional cron job description"},
+        "task_template": {
+            "type": "string",
+            "description": "JSON string template used when the cron job creates a task",
+        },
+        "enabled": {"type": "boolean", "description": "Whether the cron job starts enabled"},
+    },
+    required=["name", "cron_expression"],
+)
+
+CRON_DELETE_SCHEMA = make_tool_schema(
+    name="CronDelete",
+    description="Delete a cron job by ID.",
+    properties={
+        "job_id": {"type": "string", "description": "Cron job ID returned by CronCreate", "minLength": 1},
+    },
+    required=["job_id"],
+)
+
+CRON_LIST_SCHEMA = make_tool_schema(
+    name="CronList",
+    description="List all cron jobs in the current Mycel cron_jobs substrate.",
+    properties={},
+)
+
+
+class CronToolService:
+    def __init__(self, registry: ToolRegistry):
+        self._register(registry)
+
+    def _register(self, registry: ToolRegistry) -> None:
+        for name, schema, handler, read_only in [
+            ("CronCreate", CRON_CREATE_SCHEMA, self._create, False),
+            ("CronDelete", CRON_DELETE_SCHEMA, self._delete, False),
+            ("CronList", CRON_LIST_SCHEMA, self._list, True),
+        ]:
+            registry.register(
+                ToolEntry(
+                    name=name,
+                    mode=ToolMode.DEFERRED,
+                    schema=schema,
+                    handler=handler,
+                    source="CronToolService",
+                    is_concurrency_safe=read_only,
+                    is_read_only=read_only,
+                )
+            )
+
+    def _create(self, **args: Any) -> str:
+        name = str(args.get("name", "")).strip()
+        cron_expression = str(args.get("cron_expression", "")).strip()
+        if not croniter.is_valid(cron_expression):
+            raise ValueError(f"Invalid cron expression: {cron_expression!r}")
+
+        task_template = args.get("task_template", "{}")
+        if isinstance(task_template, str):
+            try:
+                json.loads(task_template)
+            except json.JSONDecodeError as exc:
+                raise ValueError("task_template must be valid JSON") from exc
+
+        item = cron_job_service.create_cron_job(
+            name=name,
+            cron_expression=cron_expression,
+            description=str(args.get("description", "")),
+            task_template=task_template,
+            enabled=int(bool(args.get("enabled", True))),
+        )
+        return json.dumps({"item": item}, ensure_ascii=False, indent=2)
+
+    def _delete(self, **args: Any) -> str:
+        job_id = str(args.get("job_id", "")).strip()
+        ok = cron_job_service.delete_cron_job(job_id)
+        if not ok:
+            raise ValueError(f"Cron job not found: {job_id}")
+        return json.dumps({"ok": True, "id": job_id}, ensure_ascii=False, indent=2)
+
+    def _list(self, **_args: Any) -> str:
+        items = cron_job_service.list_cron_jobs()
+        return json.dumps({"items": items, "total": len(items)}, ensure_ascii=False, indent=2)
diff --git a/core/tools/filesystem/local_backend.py b/core/tools/filesystem/local_backend.py
index 2bad2d45b..50bbe58a0 100644
--- a/core/tools/filesystem/local_backend.py
+++ b/core/tools/filesystem/local_backend.py
@@ -18,14 +18,16 @@ class LocalBackend(FileSystemBackend):
 
     def read_file(self, path: str) -> FileReadResult:
         p = Path(path)
-        content = p.read_text(encoding="utf-8")
+        with p.open("r", encoding="utf-8", newline="") as f:
+            content = f.read()
         return FileReadResult(content=content, size=p.stat().st_size)
 
     def write_file(self, path: str, content: str) -> FileWriteResult:
         try:
             p = Path(path)
             p.parent.mkdir(parents=True, exist_ok=True)
-            p.write_text(content, encoding="utf-8")
+            with p.open("w", encoding="utf-8", newline="") as f:
+                f.write(content)
             return FileWriteResult(success=True)
         except Exception as e:
             return FileWriteResult(success=False, error=str(e))
diff --git a/core/tools/filesystem/middleware.py b/core/tools/filesystem/middleware.py
index 0844d892a..8519d30ea 100644
--- a/core/tools/filesystem/middleware.py
+++ b/core/tools/filesystem/middleware.py
@@ -13,8 +13,8 @@
 
 from __future__ import annotations
 
-from collections.abc import Awaitable, Callable
-from pathlib import Path
+from collections.abc import Awaitable, Callable, Mapping
+from pathlib import Path, PurePosixPath
 from typing import TYPE_CHECKING, Any
 
 from langchain.agents.middleware.types import (
@@ -33,6 +33,28 @@
     from core.operations import FileOperationRecorder
 
 
+def _remote_path(path: str | Path) -> PurePosixPath:
+    # @@@remote-posix-path-contract - Middleware callers still hand us sandbox
+    # POSIX paths even when tests run on Windows, so keep validation and
+    # workspace comparisons in POSIX space instead of host-native path rules.
+    return PurePosixPath(str(path).replace("\\", "/"))
+
+
+type ResolvedPath = Path | PurePosixPath
+
+
+def _require_resolved_path(resolved: ResolvedPath | None) -> ResolvedPath:
+    if resolved is None:
+        raise RuntimeError("Validated filesystem path unexpectedly missing")
+    return resolved
+
+
+def _require_local_path(resolved: ResolvedPath) -> Path:
+    if not isinstance(resolved, Path):
+        raise RuntimeError(f"Expected local filesystem path, got remote path: {resolved}")
+    return resolved
+
+
 class FileSystemMiddleware(AgentMiddleware):
     """FileSystem Middleware - pure middleware implementation of file operations.
 
@@ -80,7 +102,12 @@ def __init__(
             backend = LocalBackend()
 
         self.backend = backend
-        self.workspace_root = Path(workspace_root) if backend.is_remote else Path(workspace_root).resolve()
+        if backend.is_remote:
+            self.workspace_root: ResolvedPath = _remote_path(workspace_root)
+        else:
+            local_workspace_root = Path(workspace_root).resolve()
+            local_workspace_root.mkdir(parents=True, exist_ok=True)
+            self.workspace_root = local_workspace_root
         self.max_file_size = max_file_size
         self.allowed_extensions = allowed_extensions
         self.hooks = hooks or []
@@ -91,13 +118,10 @@ def __init__(
             "multi_edit": True,
             "list_dir": True,
         }
-        self._read_files: dict[Path, float | None] = {}
+        self._read_files: dict[Path | PurePosixPath, float | None] = {}
         self.operation_recorder = operation_recorder
         self.verbose = verbose
-        self.extra_allowed_paths: list[Path] = [Path(p) if backend.is_remote else Path(p).resolve() for p in (extra_allowed_paths or [])]
-
-        if not backend.is_remote:
-            self.workspace_root.mkdir(parents=True, exist_ok=True)
+        self.extra_allowed_paths = [_remote_path(p) if backend.is_remote else Path(p).resolve() for p in (extra_allowed_paths or [])]
 
         if verbose:
             backend_name = type(backend).__name__
@@ -105,17 +129,20 @@ def __init__(
             if self.hooks:
                 print(f"[FileSystemMiddleware] Loaded {len(self.hooks)} hooks")
 
-    def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | None]:
+    def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | PurePosixPath | None]:
         """Validate path for file operations.
 
         Returns:
             (is_valid, error_message, resolved_path)
         """
-        if not Path(path).is_absolute():
+        if self.backend.is_remote:
+            if not _remote_path(path).is_absolute():
+                return False, f"Path must be absolute: {path}", None
+        elif not Path(path).is_absolute():
             return False, f"Path must be absolute: {path}", None
 
         try:
-            resolved = Path(path) if self.backend.is_remote else Path(path).resolve()
+            resolved = _remote_path(path) if self.backend.is_remote else Path(path).resolve()
         except Exception as e:
             return False, f"Invalid path: {path} ({e})", None
 
@@ -146,7 +173,7 @@ def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | N
 
         return True, "", resolved
 
-    def _check_file_staleness(self, resolved: Path) -> str | None:
+    def _check_file_staleness(self, resolved: Path | PurePosixPath) -> str | None:
         """Check if file has been modified since last read.
 
         Returns:
@@ -165,7 +192,7 @@ def _check_file_staleness(self, resolved: Path) -> str | None:
 
         return None
 
-    def _update_file_tracking(self, resolved: Path) -> None:
+    def _update_file_tracking(self, resolved: Path | PurePosixPath) -> None:
         """Update mtime tracking after successful file operation."""
         self._read_files[resolved] = self.backend.file_mtime(str(resolved))
 
@@ -203,7 +230,7 @@ def _record_operation(
         except Exception as e:
             raise RuntimeError(f"[FileSystemMiddleware] Failed to record operation: {e}") from e
 
-    def _count_lines(self, resolved: Path) -> int:
+    def _count_lines(self, resolved: Path | PurePosixPath) -> int:
         """Count total lines in a file (for error messages)."""
         try:
             raw = self.backend.read_file(str(resolved))
@@ -222,6 +249,7 @@ def _read_file_impl(self, file_path: str, offset: int = 0, limit: int | None = N
         if not is_valid:
             return ReadResult(file_path=file_path, file_type=None, error=error)  # type: ignore[arg-type]
 
+        resolved = _require_resolved_path(resolved)
         file_size = self.backend.file_size(str(resolved))
 
         # Absolute limit — always reject (even with offset/limit)
@@ -265,7 +293,13 @@ def _read_file_impl(self, file_path: str, offset: int = 0, limit: int | None = N
 
         if isinstance(self.backend, LocalBackend):
             limits = ReadLimits()
-            result = read_file_dispatch(path=resolved, limits=limits, offset=offset if offset > 0 else None, limit=limit)
+            local_resolved = _require_local_path(resolved)
+            result = read_file_dispatch(
+                path=local_resolved,
+                limits=limits,
+                offset=offset if offset > 0 else None,
+                limit=limit,
+            )
             if not result.error:
                 self._update_file_tracking(resolved)
             return result
@@ -314,6 +348,7 @@ def _write_file_impl(self, file_path: str, content: str) -> str:
         if not is_valid:
             return error
 
+        resolved = _require_resolved_path(resolved)
         if self.backend.file_exists(str(resolved)):
             return f"File already exists: {file_path}\nUse edit_file to modify existing files"
 
@@ -342,6 +377,7 @@ def _edit_file_impl(self, file_path: str, old_string: str, new_string: str) -> s
         if not is_valid:
             return error
 
+        resolved = _require_resolved_path(resolved)
         if not self.backend.file_exists(str(resolved)):
             return f"File not found: {file_path}"
 
@@ -388,6 +424,7 @@ def _multi_edit_impl(self, file_path: str, edits: list[dict[str, str]]) -> str:
         if not is_valid:
             return error
 
+        resolved = _require_resolved_path(resolved)
         if not self.backend.file_exists(str(resolved)):
             return f"File not found: {file_path}"
 
@@ -435,6 +472,7 @@ def _list_dir_impl(self, directory_path: str) -> str:
         if not is_valid:
             return error
 
+        resolved = _require_resolved_path(resolved)
         if not self.backend.is_dir(str(resolved)):
             if self.backend.file_exists(str(resolved)):
                 return f"Not a directory: {directory_path}"
@@ -461,7 +499,7 @@ def _list_dir_impl(self, directory_path: str) -> str:
         except Exception as e:
             return f"Error listing directory: {e}"
 
-    def _get_tool_schemas(self) -> list[dict]:
+    def _get_tool_schemas(self) -> list[dict[str, Any]]:
         """获取文件系统工具 schema（sync/async 共享）"""
         return [
             {
@@ -571,12 +609,12 @@ def _get_tool_schemas(self) -> list[dict]:
                     "parameters": {
                         "type": "object",
                         "properties": {
-                            "directory_path": {
+                            "path": {
                                 "type": "string",
                                 "description": "Absolute directory path (e.g., /path/to/dir). Do NOT use '.' or '..'",
                             },
                         },
-                        "required": ["directory_path"],
+                        "required": ["path"],
                     },
                 },
             },
@@ -602,7 +640,7 @@ async def awrap_model_call(
         tools.extend(self._get_tool_schemas())
         return await handler(request.override(tools=tools))
 
-    def _handle_tool_call(self, tool_call: dict) -> ToolMessage | None:
+    def _handle_tool_call(self, tool_call: Mapping[str, Any]) -> ToolMessage | None:
         """Handle filesystem tool calls. Returns ToolMessage if handled, None otherwise."""
         tool_name = tool_call.get("name")
         args = tool_call.get("args", {})
@@ -633,7 +671,7 @@ def _handle_tool_call(self, tool_call: dict) -> ToolMessage | None:
             return ToolMessage(content=result, tool_call_id=tool_call_id)
 
         if tool_name == self.TOOL_LIST_DIR:
-            result = self._list_dir_impl(directory_path=args.get("directory_path", ""))
+            result = self._list_dir_impl(directory_path=args.get("path", ""))
             return ToolMessage(content=result, tool_call_id=tool_call_id)
 
         return None
diff --git a/core/tools/filesystem/read/dispatcher.py b/core/tools/filesystem/read/dispatcher.py
index f880e60e1..0119f424e 100644
--- a/core/tools/filesystem/read/dispatcher.py
+++ b/core/tools/filesystem/read/dispatcher.py
@@ -22,6 +22,7 @@ def read_file(
     limits: ReadLimits | None = None,
     offset: int | None = None,
     limit: int | None = None,
+    pages: str | None = None,
 ) -> ReadResult:
     """
     Read file with type-specific handling.
@@ -38,6 +39,7 @@ def read_file(
         limits: ReadLimits configuration (uses defaults if None)
         offset: Start line for text files (1-indexed)
         limit: Number of lines for text files
+        pages: Optional page range for document files, e.g. "1" or "3-5"
 
     Returns:
         ReadResult with content and metadata
@@ -68,7 +70,8 @@ def read_file(
         return read_binary(path)
 
     if file_type == FileType.DOCUMENT:
-        return _read_document(path, limits, offset, limit)
+        start_page, limit_pages = _parse_pages_arg(pages, offset, limit)
+        return _read_document(path, limits, start_page, limit_pages)
 
     if file_type == FileType.NOTEBOOK:
         return read_notebook(path, limits, start_cell=offset, limit_cells=limit)
@@ -79,6 +82,32 @@ def read_file(
     return read_text(path, limits, offset, limit)
 
 
+def _parse_pages_arg(
+    pages: str | None,
+    offset: int | None,
+    limit: int | None,
+) -> tuple[int | None, int | None]:
+    if pages is None:
+        return offset, limit
+
+    raw = pages.strip()
+    if not raw:
+        raise ValueError("pages must not be empty")
+
+    if "-" in raw:
+        start_raw, end_raw = raw.split("-", 1)
+        start_page = int(start_raw)
+        end_page = int(end_raw)
+        if start_page <= 0 or end_page < start_page:
+            raise ValueError(f"Invalid pages range: {pages}")
+        return start_page, end_page - start_page + 1
+
+    start_page = int(raw)
+    if start_page <= 0:
+        raise ValueError(f"Invalid page number: {pages}")
+    return start_page, 1
+
+
 def _read_document(
     path: Path,
     limits: ReadLimits,
diff --git a/core/tools/filesystem/read/readers/pdf.py b/core/tools/filesystem/read/readers/pdf.py
index 6f43eabfa..9a1f58bb5 100644
--- a/core/tools/filesystem/read/readers/pdf.py
+++ b/core/tools/filesystem/read/readers/pdf.py
@@ -3,11 +3,14 @@
 from __future__ import annotations
 
 from pathlib import Path
+from typing import Any
 
 from core.tools.filesystem.read.types import FileType, ReadLimits, ReadResult
 
+_pymupdf: Any | None = None
+
 try:
-    import pymupdf
+    import pymupdf as _pymupdf
 
     HAS_PYMUPDF = True
 except ImportError:
@@ -34,6 +37,8 @@ def read_pdf(
     """
     if not HAS_PYMUPDF:
         return _no_pymupdf_result(path)
+    if _pymupdf is None:
+        raise RuntimeError("pymupdf import unexpectedly unavailable")
 
     stat = path.stat()
     result = ReadResult(
@@ -43,7 +48,7 @@ def read_pdf(
     )
 
     try:
-        doc = pymupdf.open(path)
+        doc = _pymupdf.open(path)
     except Exception as e:
         result.error = f"Error opening PDF: {e}"
         return result
diff --git a/core/tools/filesystem/read/readers/pptx.py b/core/tools/filesystem/read/readers/pptx.py
index 822f29a37..7f2dde962 100644
--- a/core/tools/filesystem/read/readers/pptx.py
+++ b/core/tools/filesystem/read/readers/pptx.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from pathlib import Path
+from typing import Any, cast
 
 from core.tools.filesystem.read.types import FileType, ReadLimits, ReadResult
 
@@ -43,7 +44,9 @@ def read_pptx(
     )
 
     try:
-        prs = Presentation(path)
+        # @@@pptx-callable-seam - python-pptx exports Presentation as a factory function at runtime,
+        # but pyright sees a module-like surface here. Keep the third-party seam local.
+        prs = cast(Any, Presentation)(str(path))
     except Exception as e:
         result.error = f"Error opening PPTX: {e}"
         return result
diff --git a/core/tools/filesystem/service.py b/core/tools/filesystem/service.py
index a8cf1c9c6..ecfa0b7c5 100644
--- a/core/tools/filesystem/service.py
+++ b/core/tools/filesystem/service.py
@@ -10,18 +10,90 @@
 from __future__ import annotations
 
 import logging
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+import tempfile
+import threading
+from collections import OrderedDict
+from collections.abc import Sequence
+from dataclasses import dataclass
+from pathlib import Path, PurePosixPath
+from typing import TYPE_CHECKING, Any, Literal
+
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+from core.runtime.tool_result import ToolResultEnvelope, tool_success
 from core.tools.filesystem.backend import FileSystemBackend
 from core.tools.filesystem.read import ReadLimits
 from core.tools.filesystem.read import read_file as read_file_dispatch
+from core.tools.filesystem.read.readers.binary import IMAGE_EXTENSIONS, MAX_IMAGE_SIZE
+from core.tools.filesystem.read.types import FileType, detect_file_type
 
 if TYPE_CHECKING:
     from core.operations import FileOperationRecorder
 
 logger = logging.getLogger(__name__)
+DEFAULT_READ_STATE_CACHE_SIZE = 100
+ABSOLUTE_PATH_PATTERN = r"^(?:/|[A-Za-z]:[\\/])"
+type ResolvedPath = Path | PurePosixPath
+type ValidationResult = tuple[Literal[True], str, ResolvedPath] | tuple[Literal[False], str, None]
+
+
+def _remote_path(path: str | Path) -> PurePosixPath:
+    # @@@remote-posix-path-contract - Remote filesystem tools operate on sandbox
+    # POSIX paths, not host-native paths. Preserve forward-slash semantics even
+    # when the host process is running on Windows.
+    return PurePosixPath(str(path).replace("\\", "/"))
+
+
+@dataclass
+class _ReadFileState:
+    timestamp: float | None
+    is_partial: bool
+
+
+class _ReadFileStateCache:
+    def __init__(self, max_entries: int = DEFAULT_READ_STATE_CACHE_SIZE):
+        self._max_entries = max_entries
+        self._entries: OrderedDict[ResolvedPath, _ReadFileState] = OrderedDict()
+
+    @staticmethod
+    def make_state(*, timestamp: float | None, is_partial: bool) -> _ReadFileState:
+        return _ReadFileState(timestamp=timestamp, is_partial=is_partial)
+
+    def get(self, path: ResolvedPath) -> _ReadFileState | None:
+        state = self._entries.get(path)
+        if state is None:
+            return None
+        self._entries.move_to_end(path)
+        return state
+
+    def set(self, path: ResolvedPath, state: _ReadFileState) -> None:
+        self._entries[path] = state
+        self._entries.move_to_end(path)
+        while len(self._entries) > self._max_entries:
+            self._entries.popitem(last=False)
+
+    def clone(self) -> _ReadFileStateCache:
+        clone = _ReadFileStateCache(max_entries=self._max_entries)
+        clone._entries = OrderedDict(
+            (path, _ReadFileState(timestamp=state.timestamp, is_partial=state.is_partial)) for path, state in self._entries.items()
+        )
+        return clone
+
+    def merge(self, other: _ReadFileStateCache) -> None:
+        for path, incoming in other._entries.items():
+            existing = self._entries.get(path)
+            if existing is None or self._is_newer(incoming, existing):
+                self.set(
+                    path,
+                    _ReadFileState(timestamp=incoming.timestamp, is_partial=incoming.is_partial),
+                )
+
+    @staticmethod
+    def _is_newer(incoming: _ReadFileState, existing: _ReadFileState) -> bool:
+        if incoming.timestamp is None:
+            return False
+        if existing.timestamp is None:
+            return True
+        return incoming.timestamp >= existing.timestamp
 
 
 class FileSystemService:
@@ -37,7 +109,9 @@ def __init__(
         hooks: list[Any] | None = None,
         operation_recorder: FileOperationRecorder | None = None,
         backend: FileSystemBackend | None = None,
-        extra_allowed_paths: list[str | Path] | None = None,
+        extra_allowed_paths: Sequence[str | Path] | None = None,
+        max_read_cache_entries: int = DEFAULT_READ_STATE_CACHE_SIZE,
+        max_edit_file_size: int | None = None,
     ):
         if backend is None:
             from core.tools.filesystem.local_backend import LocalBackend
@@ -45,15 +119,17 @@ def __init__(
             backend = LocalBackend()
 
         self.backend = backend
-        self.workspace_root = Path(workspace_root) if backend.is_remote else Path(workspace_root).resolve()
+        self.workspace_root: ResolvedPath = _remote_path(workspace_root) if backend.is_remote else Path(workspace_root).resolve()
         self.max_file_size = max_file_size
         self.allowed_extensions = allowed_extensions
         self.hooks = hooks or []
-        self._read_files: dict[Path, float | None] = {}
+        self._read_files = _ReadFileStateCache(max_entries=max_read_cache_entries)
+        self.max_edit_file_size = max_file_size if max_edit_file_size is None else max_edit_file_size
         self.operation_recorder = operation_recorder
-        self.extra_allowed_paths: list[Path] = [Path(p) if backend.is_remote else Path(p).resolve() for p in (extra_allowed_paths or [])]
+        self.extra_allowed_paths = [_remote_path(p) if backend.is_remote else Path(p).resolve() for p in (extra_allowed_paths or [])]
+        self._edit_critical_section = threading.Lock()
 
-        if not backend.is_remote:
+        if not backend.is_remote and isinstance(self.workspace_root, Path):
             self.workspace_root.mkdir(parents=True, exist_ok=True)
 
         self._register(registry)
@@ -67,30 +143,42 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Read",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Read",
-                    "description": ("Read file content (text/code/images/PDF/PPTX/Notebook). Path must be absolute."),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "file_path": {
-                                "type": "string",
-                                "description": "Absolute file path",
-                            },
-                            "offset": {
-                                "type": "integer",
-                                "description": "Start line (1-indexed, optional)",
-                            },
-                            "limit": {
-                                "type": "integer",
-                                "description": "Number of lines to read (optional)",
-                            },
+                schema=make_tool_schema(
+                    name="Read",
+                    description=(
+                        "Read file content. Output uses cat -n format (line numbers starting at 1). "
+                        "Default reads up to 2000 lines from start; use offset/limit for long files. "
+                        "Supports images (PNG/JPG), PDF (use pages param for large PDFs), and Jupyter notebooks. "
+                        "Path must be absolute."
+                    ),
+                    properties={
+                        "file_path": {
+                            "type": "string",
+                            "description": "Absolute file path",
+                            "minLength": 1,
+                            "pattern": ABSOLUTE_PATH_PATTERN,
+                        },
+                        "offset": {
+                            "type": "integer",
+                            "description": "Start line (1-indexed, optional)",
+                        },
+                        "limit": {
+                            "type": "integer",
+                            "description": "Number of lines to read (optional)",
+                        },
+                        "pages": {
+                            "type": "string",
+                            "description": "Page range for PDF files (e.g. '1-5'). Max 20 pages per request.",
                         },
-                        "required": ["file_path"],
                     },
-                },
+                    required=["file_path"],
+                ),
                 handler=self._read_file,
+                validate_input=self._validate_read_args,
                 source="FileSystemService",
+                search_hint="read view file content text code image PDF notebook",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
@@ -98,26 +186,27 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Write",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Write",
-                    "description": "Create new file. Path must be absolute. Fails if file exists.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "file_path": {
-                                "type": "string",
-                                "description": "Absolute file path",
-                            },
-                            "content": {
-                                "type": "string",
-                                "description": "File content",
-                            },
+                schema=make_tool_schema(
+                    name="Write",
+                    description="Create or overwrite a file with full content. Forces LF line endings. Path must be absolute.",
+                    properties={
+                        "file_path": {
+                            "type": "string",
+                            "description": "Absolute file path",
+                            "minLength": 1,
+                            "pattern": ABSOLUTE_PATH_PATTERN,
+                        },
+                        "content": {
+                            "type": "string",
+                            "description": "File content",
                         },
-                        "required": ["file_path", "content"],
                     },
-                },
+                    required=["file_path", "content"],
+                ),
                 handler=self._write_file,
+                validate_input=self._validate_write_args,
                 source="FileSystemService",
+                search_hint="create new file write content to disk",
             )
         )
 
@@ -125,39 +214,39 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Edit",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Edit",
-                    "description": (
-                        "Edit existing file using exact string replacement. "
-                        "MUST read file before editing. "
-                        "old_string must be unique in file. "
-                        "Set replace_all=true to replace all occurrences."
+                schema=make_tool_schema(
+                    name="Edit",
+                    description=(
+                        "Edit file via exact string replacement. You MUST Read the file first. "
+                        "old_string must match exactly one location (or use replace_all=true). "
+                        "Does not support .ipynb files (use Write to overwrite full JSON). Path must be absolute."
                     ),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "file_path": {
-                                "type": "string",
-                                "description": "Absolute file path",
-                            },
-                            "old_string": {
-                                "type": "string",
-                                "description": "Exact string to replace",
-                            },
-                            "new_string": {
-                                "type": "string",
-                                "description": "Replacement string",
-                            },
-                            "replace_all": {
-                                "type": "boolean",
-                                "description": "Replace all occurrences (default: false)",
-                            },
+                    properties={
+                        "file_path": {
+                            "type": "string",
+                            "description": "Absolute file path",
+                            "minLength": 1,
+                            "pattern": ABSOLUTE_PATH_PATTERN,
+                        },
+                        "old_string": {
+                            "type": "string",
+                            "description": "Exact string to replace",
+                        },
+                        "new_string": {
+                            "type": "string",
+                            "description": "Replacement string",
+                        },
+                        "replace_all": {
+                            "type": "boolean",
+                            "description": "Replace all occurrences (default: false)",
                         },
-                        "required": ["file_path", "old_string", "new_string"],
                     },
-                },
+                    required=["file_path", "old_string", "new_string"],
+                ),
                 handler=self._edit_file,
+                validate_input=self._validate_edit_args,
                 source="FileSystemService",
+                search_hint="edit modify replace string in existing file",
             )
         )
 
@@ -165,22 +254,25 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="list_dir",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "list_dir",
-                    "description": "List directory contents. Path must be absolute.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "directory_path": {
-                                "type": "string",
-                                "description": "Absolute directory path",
-                            },
+                schema=make_tool_schema(
+                    name="list_dir",
+                    description="List directory contents (files and subdirectories, non-recursive). Path must be absolute.",
+                    properties={
+                        "path": {
+                            "type": "string",
+                            "description": "Absolute directory path",
+                            "minLength": 1,
+                            "pattern": ABSOLUTE_PATH_PATTERN,
                         },
-                        "required": ["directory_path"],
                     },
-                },
+                    required=["path"],
+                ),
                 handler=self._list_dir,
+                validate_input=self._validate_list_dir_args,
                 source="FileSystemService",
+                search_hint="list directory contents browse folder",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
@@ -188,12 +280,15 @@ def _register(self, registry: ToolRegistry) -> None:
     # Path validation (reused from middleware)
     # ------------------------------------------------------------------
 
-    def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | None]:
-        if not Path(path).is_absolute():
+    def _validate_path(self, path: str, operation: str) -> ValidationResult:
+        if self.backend.is_remote:
+            if not _remote_path(path).is_absolute():
+                return False, f"Path must be absolute: {path}", None
+        elif not Path(path).is_absolute():
             return False, f"Path must be absolute: {path}", None
 
         try:
-            resolved = Path(path) if self.backend.is_remote else Path(path).resolve()
+            resolved = _remote_path(path) if self.backend.is_remote else Path(path).resolve()
         except Exception as e:
             return False, f"Invalid path: {path} ({e})", None
 
@@ -224,10 +319,159 @@ def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | N
 
         return True, "", resolved
 
-    def _check_file_staleness(self, resolved: Path) -> str | None:
-        if resolved not in self._read_files:
-            return "File has not been read yet. Read it first before writing to it."
-        stored_mtime = self._read_files[resolved]
+    def _validation_error(self, message: str, error_code: str) -> dict[str, object]:
+        return {
+            "result": False,
+            "message": message,
+            "errorCode": error_code,
+        }
+
+    def _path_validation_error(self, message: str) -> dict[str, object]:
+        # @@@filesystem-validation-codes - Keep the pre-execution path failure
+        # mapping centralized so the runner can surface stable structured
+        # codes instead of ad-hoc handler strings on the highest-traffic tools.
+        if message.startswith("Path must be absolute:"):
+            return self._validation_error(message, "PATH_NOT_ABSOLUTE")
+        if message.startswith("Invalid path:"):
+            return self._validation_error(message, "INVALID_PATH")
+        if message.startswith("Path outside workspace"):
+            return self._validation_error(message, "PATH_OUTSIDE_WORKSPACE")
+        if message.startswith("File type not allowed:"):
+            return self._validation_error(message, "FILE_TYPE_NOT_ALLOWED")
+        return self._validation_error(message, "INVALID_PATH")
+
+    def _validate_existing_path(self, path: str, operation: str) -> tuple[dict[str, object] | None, ResolvedPath | None]:
+        is_valid, error, resolved = self._validate_path(path, operation)
+        if not is_valid:
+            return self._path_validation_error(error), None
+        assert resolved is not None
+        return None, resolved
+
+    def _validation_message(self, error: dict[str, object]) -> str:
+        return str(error["message"])
+
+    def _read_preflight(
+        self,
+        *,
+        file_path: str,
+        offset: int = 0,
+        limit: int | None = None,
+        pages: str | None = None,
+    ) -> tuple[dict[str, object] | None, ResolvedPath | None]:
+        error, resolved = self._validate_existing_path(file_path, "read")
+        if error is not None:
+            return error, None
+        assert resolved is not None
+
+        file_size = self.backend.file_size(str(resolved))
+        if file_size is not None and file_size > self.max_file_size:
+            return (
+                self._validation_error(
+                    f"File too large: {file_size:,} bytes (max: {self.max_file_size:,} bytes)",
+                    "FILE_TOO_LARGE",
+                ),
+                None,
+            )
+
+        has_pagination = offset > 0 or limit is not None or pages is not None
+        if not has_pagination and file_size is not None:
+            limits = ReadLimits()
+            if file_size > limits.max_size_bytes:
+                total_lines = self._count_lines(resolved)
+                return (
+                    self._validation_error(
+                        (
+                            f"File content ({file_size:,} bytes) exceeds maximum allowed size ({limits.max_size_bytes:,} bytes).\n"
+                            f"Use offset and limit parameters to read specific sections.\n"
+                            f"Total lines: {total_lines}"
+                        ),
+                        "READ_REQUIRES_PAGINATION",
+                    ),
+                    None,
+                )
+            estimated_tokens = file_size // 4
+            if estimated_tokens > limits.max_tokens:
+                total_lines = self._count_lines(resolved)
+                return (
+                    self._validation_error(
+                        (
+                            f"File content (~{estimated_tokens:,} tokens) exceeds maximum allowed tokens ({limits.max_tokens:,}).\n"
+                            f"Use offset and limit parameters to read specific sections.\n"
+                            f"Total lines: {total_lines}"
+                        ),
+                        "READ_REQUIRES_PAGINATION",
+                    ),
+                    None,
+                )
+
+        return None, resolved
+
+    def _edit_preflight(self, *, file_path: str) -> tuple[dict[str, object] | None, ResolvedPath | None]:
+        error, resolved = self._validate_existing_path(file_path, "edit")
+        if error is not None:
+            return error, None
+        assert resolved is not None
+
+        if resolved.suffix.lower() == ".ipynb":
+            return (
+                self._validation_error(
+                    "Notebook files (.ipynb) are not supported by Edit. Use Write to overwrite the full JSON.",
+                    "NOTEBOOK_EDIT_UNSUPPORTED",
+                ),
+                None,
+            )
+
+        file_size = self.backend.file_size(str(resolved))
+        if file_size is not None and file_size > self.max_edit_file_size:
+            return (
+                self._validation_error(
+                    f"File too large for Edit: {file_size:,} bytes (max: {self.max_edit_file_size:,} bytes)",
+                    "FILE_TOO_LARGE",
+                ),
+                None,
+            )
+
+        return None, resolved
+
+    def _list_dir_preflight(self, *, path: str) -> tuple[dict[str, object] | None, ResolvedPath | None]:
+        error, resolved = self._validate_existing_path(path, "list")
+        if error is not None:
+            return error, None
+        assert resolved is not None
+        if not self.backend.is_dir(str(resolved)):
+            if self.backend.file_exists(str(resolved)):
+                return self._validation_error(f"Not a directory: {path}", "NOT_A_DIRECTORY"), None
+            return self._validation_error(f"Directory not found: {path}", "DIRECTORY_NOT_FOUND"), None
+        return None, resolved
+
+    def _validate_read_args(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        error, _ = self._read_preflight(
+            file_path=args["file_path"],
+            offset=args.get("offset") or 0,
+            limit=args.get("limit"),
+            pages=args.get("pages"),
+        )
+        return error or args
+
+    def _validate_write_args(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        error, _ = self._validate_existing_path(args["file_path"], "write")
+        return error or args
+
+    def _validate_edit_args(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        error, _ = self._edit_preflight(file_path=args["file_path"])
+        return error or args
+
+    def _validate_list_dir_args(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        error, _ = self._list_dir_preflight(path=args["path"])
+        return error or args
+
+    def _check_file_staleness(self, resolved: ResolvedPath) -> str | None:
+        state = self._read_files.get(resolved)
+        if state is None:
+            return "File has not been read yet. Read the full file first before editing."
+        if state.is_partial:
+            return "File has only been read partially. Read the full file before editing."
+        stored_mtime = state.timestamp
         if stored_mtime is None:
             return None
         current_mtime = self.backend.file_mtime(str(resolved))
@@ -235,8 +479,70 @@ def _check_file_staleness(self, resolved: Path) -> str | None:
             return "File has been modified since last read. Read it again before editing."
         return None
 
-    def _update_file_tracking(self, resolved: Path) -> None:
-        self._read_files[resolved] = self.backend.file_mtime(str(resolved))
+    def _update_file_tracking(
+        self,
+        resolved: ResolvedPath,
+        *,
+        is_partial: bool,
+        file_type: FileType | None = None,
+    ) -> None:
+        if file_type is None:
+            file_type = self._detect_file_type(resolved)
+        if file_type not in {FileType.TEXT, FileType.NOTEBOOK}:
+            return
+        self._read_files.set(
+            resolved,
+            _ReadFileState(
+                timestamp=self.backend.file_mtime(str(resolved)),
+                is_partial=is_partial,
+            ),
+        )
+
+    def _normalize_write_content(self, content: str) -> str:
+        return content.replace("\r\n", "\n").replace("\r", "\n")
+
+    def _read_result_is_partial(self, result) -> bool:
+        if getattr(result, "truncated", False):
+            return True
+        if getattr(result, "file_type", None) == FileType.TEXT:
+            start_line = getattr(result, "start_line", None) or 1
+            total_lines = getattr(result, "total_lines", None)
+            end_line = getattr(result, "end_line", None) or total_lines or start_line
+            if total_lines is not None:
+                return start_line > 1 or end_line < total_lines
+        return False
+
+    def _detect_file_type(self, resolved: ResolvedPath) -> FileType:
+        return detect_file_type(Path(str(resolved)))
+
+    def _structured_media_success(
+        self,
+        *,
+        resolved: ResolvedPath,
+        file_type: FileType,
+        content_blocks: list[dict[str, str]],
+    ) -> ToolResultEnvelope:
+        return tool_success(
+            [
+                {
+                    "type": "text",
+                    "text": (f"Read file: {resolved.name}\nSpecial content is attached below as structured blocks."),
+                },
+                *content_blocks,
+            ],
+            metadata={"file_type": file_type.value},
+        )
+
+    def _restore_special_result_identity(
+        self,
+        *,
+        result,
+        resolved: ResolvedPath,
+        temp_path: Path,
+    ) -> None:
+        result.file_path = str(resolved)
+        if isinstance(getattr(result, "content", None), str):
+            result.content = result.content.replace(str(temp_path), str(resolved)).replace(temp_path.name, resolved.name)
 
     def _record_operation(
         self,
@@ -267,7 +573,7 @@ def _record_operation(
         except Exception as e:
             raise RuntimeError(f"[FileSystemService] Failed to record operation: {e}") from e
 
-    def _count_lines(self, resolved: Path) -> int:
+    def _count_lines(self, resolved: ResolvedPath) -> int:
         try:
             raw = self.backend.read_file(str(resolved))
             return raw.content.count("\n") + 1
@@ -278,50 +584,86 @@ def _count_lines(self, resolved: Path) -> int:
     # Tool handlers
     # ------------------------------------------------------------------
 
-    def _read_file(self, file_path: str, offset: int = 0, limit: int | None = None) -> str:
-        is_valid, error, resolved = self._validate_path(file_path, "read")
-        if not is_valid:
-            return error
-
-        file_size = self.backend.file_size(str(resolved))
-
-        if file_size is not None and file_size > self.max_file_size:
-            return f"File too large: {file_size:,} bytes (max: {self.max_file_size:,} bytes)"
-
-        has_pagination = offset > 0 or limit is not None
-        if not has_pagination and file_size is not None:
-            limits = ReadLimits()
-            if file_size > limits.max_size_bytes:
-                total_lines = self._count_lines(resolved)
-                return (
-                    f"File content ({file_size:,} bytes) exceeds maximum allowed size ({limits.max_size_bytes:,} bytes).\n"
-                    f"Use offset and limit parameters to read specific sections.\n"
-                    f"Total lines: {total_lines}"
-                )
-            estimated_tokens = file_size // 4
-            if estimated_tokens > limits.max_tokens:
-                total_lines = self._count_lines(resolved)
-                return (
-                    f"File content (~{estimated_tokens:,} tokens) exceeds maximum allowed tokens ({limits.max_tokens:,}).\n"
-                    f"Use offset and limit parameters to read specific sections.\n"
-                    f"Total lines: {total_lines}"
-                )
+    def _read_file(self, file_path: str, offset: int = 0, limit: int | None = None, pages: str | None = None) -> str | ToolResultEnvelope:
+        error, resolved = self._read_preflight(
+            file_path=file_path,
+            offset=offset,
+            limit=limit,
+            pages=pages,
+        )
+        if error is not None:
+            return self._validation_message(error)
+        assert resolved is not None
 
         from core.tools.filesystem.local_backend import LocalBackend
 
         if isinstance(self.backend, LocalBackend):
+            assert isinstance(resolved, Path)
             limits = ReadLimits()
             result = read_file_dispatch(
                 path=resolved,
                 limits=limits,
                 offset=offset if offset > 0 else None,
                 limit=limit,
+                pages=pages,
             )
             if not result.error:
-                self._update_file_tracking(resolved)
+                self._update_file_tracking(
+                    resolved,
+                    is_partial=self._read_result_is_partial(result),
+                    file_type=result.file_type,
+                )
+            if result.content_blocks:
+                return self._structured_media_success(
+                    resolved=resolved,
+                    file_type=result.file_type,
+                    content_blocks=result.content_blocks,
+                )
             return result.format_output()
 
         try:
+            file_type = self._detect_file_type(resolved)
+            download_bytes = getattr(self.backend, "download_bytes", None)
+            if callable(download_bytes) and file_type in {FileType.BINARY, FileType.DOCUMENT}:
+                # @@@dt-02-remote-special-file-bridge
+                # Remote providers expose raw-byte download hooks. Reuse the
+                # same local dispatcher for binary/document reads instead of
+                # degrading special files into placeholder text.
+                raw_bytes = download_bytes(str(resolved))
+                if not isinstance(raw_bytes, (bytes, bytearray)):
+                    raise TypeError(f"Remote special-file download returned {type(raw_bytes).__name__}, expected bytes.")
+                raw_bytes = bytes(raw_bytes)
+                if (
+                    file_type == FileType.BINARY
+                    and resolved.suffix.lstrip(".").lower() in IMAGE_EXTENSIONS
+                    and len(raw_bytes) > MAX_IMAGE_SIZE
+                ):
+                    return f"Image exceeds size limit: {len(raw_bytes)} bytes"
+                with tempfile.NamedTemporaryFile(suffix=resolved.suffix, delete=False) as tmp:
+                    tmp.write(raw_bytes)
+                    tmp_path = Path(tmp.name)
+                try:
+                    result = read_file_dispatch(
+                        path=tmp_path,
+                        limits=ReadLimits(),
+                        offset=offset if offset > 0 else None,
+                        limit=limit,
+                        pages=pages,
+                    )
+                finally:
+                    tmp_path.unlink(missing_ok=True)
+                self._restore_special_result_identity(
+                    result=result,
+                    resolved=resolved,
+                    temp_path=tmp_path,
+                )
+                if result.content_blocks:
+                    return self._structured_media_success(
+                        resolved=resolved,
+                        file_type=result.file_type,
+                        content_blocks=result.content_blocks,
+                    )
+                return result.format_output()
             raw = self.backend.read_file(str(resolved))
             lines = raw.content.split("\n")
             total_lines = len(lines)
@@ -331,7 +673,10 @@ def _read_file(self, file_path: str, offset: int = 0, limit: int | None = None)
             selected = lines[start:end]
             numbered = [f"{start + i + 1:>6}\t{line}" for i, line in enumerate(selected)]
             content = "\n".join(numbered)
-            self._update_file_tracking(resolved)
+            self._update_file_tracking(
+                resolved,
+                is_partial=start > 0 or end < total_lines,
+            )
             return content
         except Exception as e:
             return f"Error reading file: {e}"
@@ -340,88 +685,102 @@ def _write_file(self, file_path: str, content: str) -> str:
         is_valid, error, resolved = self._validate_path(file_path, "write")
         if not is_valid:
             return error
-
-        if self.backend.file_exists(str(resolved)):
-            return f"File already exists: {file_path}\nUse Edit to modify existing files"
+        assert resolved is not None
 
         try:
-            result = self.backend.write_file(str(resolved), content)
+            normalized = self._normalize_write_content(content)
+            result = self.backend.write_file(str(resolved), normalized)
             if not result.success:
                 return f"Error writing file: {result.error}"
 
-            self._update_file_tracking(resolved)
+            self._update_file_tracking(resolved, is_partial=False)
             self._record_operation(
                 operation_type="write",
                 file_path=file_path,
                 before_content=None,
-                after_content=content,
+                after_content=normalized,
             )
 
-            lines = content.count("\n") + 1
+            lines = normalized.count("\n") + 1
             return f"File created: {file_path}\n   Lines: {lines}\n   Size: {len(content)} bytes"
         except Exception as e:
             return f"Error writing file: {e}"
 
     def _edit_file(self, file_path: str, old_string: str, new_string: str, replace_all: bool = False) -> str:
-        is_valid, error, resolved = self._validate_path(file_path, "edit")
-        if not is_valid:
-            return error
-
-        if not self.backend.file_exists(str(resolved)):
-            return f"File not found: {file_path}"
-
-        staleness_error = self._check_file_staleness(resolved)
-        if staleness_error:
-            return staleness_error
-
-        if old_string == new_string:
-            return "Error: old_string and new_string are identical (no-op edit)"
+        error, resolved = self._edit_preflight(file_path=file_path)
+        if error is not None:
+            return self._validation_message(error)
+        assert resolved is not None
 
         try:
-            raw = self.backend.read_file(str(resolved))
-            content = raw.content
-
-            if old_string not in content:
-                return f"String not found in file\n   Looking for: {old_string[:100]}..."
-
-            if replace_all:
-                count = content.count(old_string)
-                new_content = content.replace(old_string, new_string)
-            else:
-                count = content.count(old_string)
-                if count > 1:
-                    return (
-                        f"String appears {count} times in file (not unique)\n"
-                        f"   Use replace_all=true or provide more context to make it unique"
-                    )
-                new_content = content.replace(old_string, new_string, 1)
-                count = 1
-
-            result = self.backend.write_file(str(resolved), new_content)
-            if not result.success:
-                return f"Error editing file: {result.error}"
-
-            self._update_file_tracking(resolved)
-            self._record_operation(
-                operation_type="edit",
-                file_path=file_path,
-                before_content=content,
-                after_content=new_content,
-                changes=[{"old_string": old_string, "new_string": new_string}],
-            )
-            return f"File edited: {file_path}\n   Replaced {count} occurrence(s)"
+            # @@@edit-critical-lock
+            # dt-01 requires the reread -> stale check -> write path to be one
+            # synchronous critical section so two stale concurrent edits cannot
+            # both commit from the same prior read snapshot.
+            with self._edit_critical_section:
+                try:
+                    raw = self.backend.read_file(str(resolved))
+                except FileNotFoundError:
+                    if old_string == "":
+                        return self._write_file(file_path, new_string)
+                    return f"File not found: {file_path}"
+                content = raw.content
+
+                if old_string == "":
+                    return "Cannot use empty old_string on an existing file. Use Write to replace the full file content."
+                staleness_error = self._check_file_staleness(resolved)
+                if staleness_error:
+                    return staleness_error
+
+                if old_string == new_string:
+                    return "Error: old_string and new_string are identical (no-op edit)"
+
+                # @@@edit-critical-staleness
+                # te-06 needs a second stale-read check inside the read->write
+                # critical section so an external write that lands after the
+                # preflight check cannot be silently overwritten.
+                staleness_error = self._check_file_staleness(resolved)
+                if staleness_error:
+                    return staleness_error
+
+                if old_string not in content:
+                    return f"String not found in file\n   Looking for: {old_string[:100]}..."
+
+                if replace_all:
+                    count = content.count(old_string)
+                    new_content = content.replace(old_string, new_string)
+                else:
+                    count = content.count(old_string)
+                    if count > 1:
+                        return (
+                            f"String appears {count} times in file (not unique)\n"
+                            f"   Use replace_all=true or provide more context to make it unique"
+                        )
+                    new_content = content.replace(old_string, new_string, 1)
+                    count = 1
+
+                result = self.backend.write_file(str(resolved), new_content)
+                if not result.success:
+                    return f"Error editing file: {result.error}"
+
+                self._update_file_tracking(resolved, is_partial=False)
+                self._record_operation(
+                    operation_type="edit",
+                    file_path=file_path,
+                    before_content=content,
+                    after_content=new_content,
+                    changes=[{"old_string": old_string, "new_string": new_string}],
+                )
+                return f"File edited: {file_path}\n   Replaced {count} occurrence(s)"
         except Exception as e:
             return f"Error editing file: {e}"
 
-    def _list_dir(self, directory_path: str) -> str:
-        is_valid, error, resolved = self._validate_path(directory_path, "list")
-        if not is_valid:
-            return error
-
-        if not self.backend.is_dir(str(resolved)):
-            if self.backend.file_exists(str(resolved)):
-                return f"Not a directory: {directory_path}"
-            return f"Directory not found: {directory_path}"
+    def _list_dir(self, path: str) -> str:
+        directory_path = path
+        error, resolved = self._list_dir_preflight(path=directory_path)
+        if error is not None:
+            return self._validation_message(error)
+        assert resolved is not None
 
         try:
             result = self.backend.list_dir(str(resolved))
diff --git a/core/tools/lsp/__init__.py b/core/tools/lsp/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/core/tools/lsp/service.py b/core/tools/lsp/service.py
new file mode 100644
index 000000000..dc480812d
--- /dev/null
+++ b/core/tools/lsp/service.py
@@ -0,0 +1,838 @@
+"""LSP Service - Language Server Protocol code intelligence via multilspy.
+
+Registers a single DEFERRED `LSP` tool with 9 operations:
+  goToDefinition, findReferences, hover, documentSymbol, workspaceSymbol,
+  goToImplementation, prepareCallHierarchy, incomingCalls, outgoingCalls
+
+Sessions are managed by the process-level _LSPSessionPool singleton — they
+start lazily on first use and persist for the lifetime of the process,
+surviving agent restarts. Call `await lsp_pool.close_all()` on process exit.
+
+Supported languages (via multilspy):
+  python, typescript, javascript, go, rust, java, ruby, kotlin, csharp
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import shutil
+import subprocess
+from pathlib import Path
+from typing import Any
+
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+
+_FILE_SIZE_LIMIT = 10 * 1024 * 1024  # 10 MB — matches CC LSP limit
+
+logger = logging.getLogger(__name__)
+
+LSP_SCHEMA = make_tool_schema(
+    name="LSP",
+    description=(
+        "Language Server Protocol code intelligence. "
+        "Operations: goToDefinition, findReferences, hover, documentSymbol, workspaceSymbol, "
+        "goToImplementation, prepareCallHierarchy, incomingCalls, outgoingCalls. "
+        "Language servers are auto-downloaded on first use. "
+        "Supports python, typescript, javascript, go, rust, java, ruby, kotlin. "
+        "file_path must be absolute. line/character are 1-based. "
+        "incomingCalls/outgoingCalls require 'item' from prepareCallHierarchy output."
+    ),
+    properties={
+        "operation": {
+            "type": "string",
+            "enum": [
+                "goToDefinition",
+                "findReferences",
+                "hover",
+                "documentSymbol",
+                "workspaceSymbol",
+                "goToImplementation",
+                "prepareCallHierarchy",
+                "incomingCalls",
+                "outgoingCalls",
+            ],
+            "description": "LSP operation to perform",
+        },
+        "file_path": {
+            "type": "string",
+            "description": "Absolute path to file (required for all operations except workspaceSymbol)",
+        },
+        "line": {
+            "type": "integer",
+            "description": "1-based line number (required for goToDefinition, findReferences, hover)",
+        },
+        "character": {
+            "type": "integer",
+            "description": "1-based character offset (required for goToDefinition, findReferences, hover)",
+        },
+        "query": {
+            "type": "string",
+            "description": "Symbol name to search (required for workspaceSymbol)",
+        },
+        "language": {
+            "type": "string",
+            "description": "Language override. Auto-detected from file extension if omitted.",
+        },
+        "item": {
+            "type": "object",
+            "description": "CallHierarchyItem from prepareCallHierarchy (required for incomingCalls/outgoingCalls).",
+        },
+    },
+    required=["operation"],
+)
+
+# File extension → multilspy language identifier
+_EXT_TO_LANG: dict[str, str] = {
+    ".py": "python",
+    ".ts": "typescript",
+    ".tsx": "typescript",
+    ".js": "javascript",
+    ".jsx": "javascript",
+    ".go": "go",
+    ".rs": "rust",
+    ".java": "java",
+    ".rb": "ruby",
+    ".kt": "kotlin",
+    ".cs": "csharp",
+}
+
+
+def _find_pyright() -> str | None:
+    """Locate pyright-langserver: venv-local first, then PATH."""
+    for name in ("pyright-langserver", "pyright_langserver"):
+        # prefer the binary in the same venv as the current interpreter
+        venv_bin = Path(os.__file__).parent.parent.parent / "bin" / name
+        if venv_bin.exists():
+            return str(venv_bin)
+        found = shutil.which(name)
+        if found:
+            return found
+    return None
+
+
+class _PyrightSession:
+    """Minimal asyncio LSP client for pyright-langserver (stdio).
+
+    Used for Python operations not supported by Jedi:
+    goToImplementation, prepareCallHierarchy, incomingCalls, outgoingCalls.
+
+    Requires pyright in the active venv: pip install pyright
+    """
+
+    def __init__(self, workspace_root: str) -> None:
+        self._workspace_root = workspace_root
+        self._proc: asyncio.subprocess.Process | None = None
+        self._pending: dict[int, asyncio.Future] = {}
+        self._next_id = 1
+        self._reader_task: asyncio.Task | None = None
+        self._open_files: set[str] = set()
+
+    async def start(self) -> None:
+        server = _find_pyright()
+        if not server:
+            raise RuntimeError("pyright-langserver not found. Install with: pip install pyright")
+        self._proc = await asyncio.create_subprocess_exec(
+            server,
+            "--stdio",
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.DEVNULL,
+        )
+        self._reader_task = asyncio.create_task(self._read_loop(), name="pyright-reader")
+
+        # LSP handshake
+        await self._request(
+            "initialize",
+            {
+                "processId": os.getpid(),
+                "rootUri": Path(self._workspace_root).as_uri(),
+                "capabilities": {
+                    "textDocument": {
+                        "synchronization": {"dynamicRegistration": False},
+                        "implementation": {"dynamicRegistration": False, "linkSupport": True},
+                        "callHierarchy": {"dynamicRegistration": False},
+                    }
+                },
+                "initializationOptions": {},
+            },
+        )
+        self._notify("initialized", {})
+
+    # ── I/O ───────────────────────────────────────────────────────────
+
+    async def _read_loop(self) -> None:
+        try:
+            while True:
+                assert self._proc and self._proc.stdout
+                # Read headers until blank line
+                content_length = 0
+                while True:
+                    raw = await self._proc.stdout.readline()
+                    if not raw:
+                        return
+                    line = raw.decode().rstrip()
+                    if not line:
+                        break
+                    if line.lower().startswith("content-length:"):
+                        content_length = int(line.split(":", 1)[1].strip())
+                if content_length == 0:
+                    continue
+                body = await self._proc.stdout.readexactly(content_length)
+                msg = json.loads(body)
+                # Route response/error to waiting Future
+                msg_id = msg.get("id")
+                msg_method = msg.get("method", "")
+                if msg_id is not None and msg_method:
+                    # Server-to-client request — must acknowledge with a response
+                    self._write({"jsonrpc": "2.0", "id": msg_id, "result": None})
+                    await self._drain()
+                elif msg_id is not None and msg_id in self._pending:
+                    fut = self._pending.pop(msg_id)
+                    if not fut.done():
+                        if "error" in msg:
+                            fut.set_exception(RuntimeError(f"{msg['error'].get('message', 'LSP error')} ({msg['error'].get('code', '')})"))
+                        else:
+                            fut.set_result(msg.get("result"))
+                # All other notifications ($/progress, diagnostics, etc.) are silently dropped
+        except Exception as exc:
+            for fut in self._pending.values():
+                if not fut.done():
+                    fut.set_exception(exc)
+
+    def _write(self, msg: dict) -> None:
+        """Encode and buffer one LSP message (call drain() to flush)."""
+        assert self._proc and self._proc.stdin
+        body = json.dumps(msg, separators=(",", ":")).encode()
+        header = f"Content-Length: {len(body)}\r\n\r\n".encode()
+        self._proc.stdin.write(header + body)
+
+    async def _drain(self) -> None:
+        assert self._proc and self._proc.stdin
+        await self._proc.stdin.drain()
+
+    def _notify(self, method: str, params: Any) -> None:
+        self._write({"jsonrpc": "2.0", "method": method, "params": params})
+
+    async def _request(self, method: str, params: Any, timeout: float = 30.0) -> Any:
+        req_id = self._next_id
+        self._next_id += 1
+        loop = asyncio.get_event_loop()
+        fut: asyncio.Future = loop.create_future()
+        self._pending[req_id] = fut
+        self._write({"jsonrpc": "2.0", "id": req_id, "method": method, "params": params})
+        await self._drain()
+        return await asyncio.wait_for(fut, timeout=timeout)
+
+    # ── file lifecycle ────────────────────────────────────────────────
+
+    def _open_file(self, abs_path: str) -> None:
+        uri = Path(abs_path).as_uri()
+        if uri in self._open_files:
+            return
+        try:
+            text = Path(abs_path).read_text(encoding="utf-8", errors="replace")
+        except OSError:
+            text = ""
+        self._notify("textDocument/didOpen", {"textDocument": {"uri": uri, "languageId": "python", "version": 1, "text": text}})
+        self._open_files.add(uri)
+
+    def _close_file(self, abs_path: str) -> None:
+        uri = Path(abs_path).as_uri()
+        if uri not in self._open_files:
+            return
+        self._notify("textDocument/didClose", {"textDocument": {"uri": uri}})
+        self._open_files.discard(uri)
+
+    def _abs(self, rel_path: str) -> str:
+        return str(Path(self._workspace_root) / rel_path)
+
+    # ── LSP operations ────────────────────────────────────────────────
+
+    async def request_implementation(self, rel_path: str, line: int, col: int) -> list:
+        abs_path = self._abs(rel_path)
+        self._open_file(abs_path)
+        await self._drain()
+        uri = Path(abs_path).as_uri()
+        response = await self._request(
+            "textDocument/implementation",
+            {
+                "textDocument": {"uri": uri},
+                "position": {"line": line, "character": col},
+            },
+        )
+        return self._normalise_locations(response)
+
+    async def request_prepare_call_hierarchy(self, rel_path: str, line: int, col: int) -> list:
+        abs_path = self._abs(rel_path)
+        self._open_file(abs_path)
+        await self._drain()
+        uri = Path(abs_path).as_uri()
+        response = await self._request(
+            "textDocument/prepareCallHierarchy",
+            {
+                "textDocument": {"uri": uri},
+                "position": {"line": line, "character": col},
+            },
+        )
+        # File stays open — callHierarchy/incomingCalls and outgoingCalls may need it
+        return response or []
+
+    async def request_incoming_calls(self, item: dict) -> list:
+        response = await self._request("callHierarchy/incomingCalls", {"item": item})
+        return response or []
+
+    async def request_outgoing_calls(self, item: dict) -> list:
+        response = await self._request("callHierarchy/outgoingCalls", {"item": item})
+        return response or []
+
+    @staticmethod
+    def _normalise_locations(response: Any) -> list:
+        if not response:
+            return []
+        if isinstance(response, dict):
+            response = [response]
+        out = []
+        for loc in response:
+            uri = loc.get("uri") or loc.get("targetUri", "")
+            rng = loc.get("range") or loc.get("targetSelectionRange") or loc.get("targetRange") or {}
+            out.append({"uri": uri, "absolutePath": uri.replace("file://", ""), "range": rng})
+        return out
+
+    # ── shutdown ──────────────────────────────────────────────────────
+
+    async def stop(self) -> None:
+        if self._proc:
+            try:
+                await asyncio.wait_for(self._request("shutdown", {}), timeout=5)
+                self._notify("exit", {})
+            except Exception:
+                pass
+            try:
+                self._proc.terminate()
+                await asyncio.wait_for(self._proc.wait(), timeout=5)
+            except Exception:
+                self._proc.kill()
+        if self._reader_task and not self._reader_task.done():
+            self._reader_task.cancel()
+            try:
+                await self._reader_task
+            except (asyncio.CancelledError, Exception):
+                pass
+
+
+class _LSPSession:
+    """Holds a multilspy LanguageServer alive in a background asyncio task.
+
+    Pattern: start_server() is an async context manager that must stay open
+    for the lifetime of the session. We enter it inside a background Task and
+    use an Event to signal readiness. Stopping sets a second Event that causes
+    the background task to exit the context and shut down the server process.
+    """
+
+    def __init__(self, language: str, workspace_root: str) -> None:
+        self.language = language
+        self._workspace_root = workspace_root
+        self._ready = asyncio.Event()
+        self._stop = asyncio.Event()
+        self._task: asyncio.Task | None = None
+        self._lsp: Any = None
+        self._error: Exception | None = None
+
+    async def start(self) -> None:
+        self._task = asyncio.create_task(self._run(), name=f"lsp-{self.language}")
+        try:
+            await asyncio.wait_for(asyncio.shield(self._ready.wait()), timeout=60)
+        except TimeoutError:
+            raise TimeoutError(f"LSP server for '{self.language}' did not start within 60s")
+        if self._error:
+            raise self._error
+
+    async def _run(self) -> None:
+        try:
+            from multilspy import LanguageServer  # core dep — always available
+            from multilspy.multilspy_config import MultilspyConfig
+            from multilspy.multilspy_logger import MultilspyLogger
+
+            config = MultilspyConfig.from_dict({"code_language": self.language})
+            lsp_logger = MultilspyLogger()
+            self._lsp = LanguageServer.create(config, lsp_logger, self._workspace_root)
+            async with self._lsp.start_server():
+                self._ready.set()
+                await self._stop.wait()
+        except Exception as e:
+            self._error = e
+            self._ready.set()  # unblock any waiters
+            logger.error("[LSPService] %s server error: %s", self.language, e)
+
+    async def stop(self) -> None:
+        self._stop.set()
+        if self._task and not self._task.done():
+            try:
+                await asyncio.wait_for(self._task, timeout=5)
+            except (TimeoutError, asyncio.CancelledError):
+                self._task.cancel()
+                try:
+                    await self._task
+                except asyncio.CancelledError:
+                    pass
+
+    # ── request methods ───────────────────────────────────────────────
+
+    async def request_definition(self, rel_path: str, line: int, col: int) -> list:
+        try:
+            return await self._lsp.request_definition(rel_path, line, col) or []
+        except AssertionError:
+            return []  # multilspy asserts on None response (no definition found)
+
+    async def request_references(self, rel_path: str, line: int, col: int) -> list:
+        try:
+            return await self._lsp.request_references(rel_path, line, col) or []
+        except AssertionError:
+            return []
+
+    async def request_hover(self, rel_path: str, line: int, col: int) -> Any:
+        try:
+            return await self._lsp.request_hover(rel_path, line, col)
+        except AssertionError:
+            return None
+
+    async def request_document_symbols(self, rel_path: str) -> list:
+        try:
+            symbols, _ = await self._lsp.request_document_symbols(rel_path)
+            return symbols or []
+        except AssertionError:
+            return []
+
+    async def request_workspace_symbol(self, query: str) -> list:
+        return await self._lsp.request_workspace_symbol(query) or []
+
+    # ── advanced ops (direct server.send, for servers that support them) ──
+
+    async def request_implementation(self, rel_path: str, line: int, col: int) -> list:
+        abs_uri = Path(self._workspace_root, rel_path).as_uri()
+        with self._lsp.open_file(rel_path):
+            response = await self._lsp.server.send.implementation(
+                {"textDocument": {"uri": abs_uri}, "position": {"line": line, "character": col}}
+            )
+        if not response:
+            return []
+        if isinstance(response, dict):
+            response = [response]
+        out = []
+        for item in response:
+            if "uri" in item and "range" in item:
+                item.setdefault("absolutePath", item["uri"].replace("file://", ""))
+                out.append(item)
+            elif "targetUri" in item:
+                out.append(
+                    {
+                        "uri": item["targetUri"],
+                        "absolutePath": item["targetUri"].replace("file://", ""),
+                        "range": item.get("targetSelectionRange", item.get("targetRange", {})),
+                    }
+                )
+        return out
+
+    async def request_prepare_call_hierarchy(self, rel_path: str, line: int, col: int) -> list:
+        abs_uri = Path(self._workspace_root, rel_path).as_uri()
+        with self._lsp.open_file(rel_path):
+            response = await self._lsp.server.send.prepare_call_hierarchy(
+                {"textDocument": {"uri": abs_uri}, "position": {"line": line, "character": col}}
+            )
+        return response or []
+
+    async def request_incoming_calls(self, item: dict) -> list:
+        response = await self._lsp.server.send.incoming_calls({"item": item})
+        return response or []
+
+    async def request_outgoing_calls(self, item: dict) -> list:
+        response = await self._lsp.server.send.outgoing_calls({"item": item})
+        return response or []
+
+
+class _LSPSessionPool:
+    """Process-level singleton managing LSP sessions across all agent instances.
+
+    Sessions are keyed by (language, workspace_root) and survive agent restarts.
+    Call close_all() once at process exit (e.g. from backend lifespan shutdown).
+    """
+
+    def __init__(self) -> None:
+        # (language, workspace_root) → _LSPSession
+        self._sessions: dict[tuple[str, str], _LSPSession] = {}
+        # workspace_root → _PyrightSession
+        self._pyright: dict[str, _PyrightSession] = {}
+        # In-flight start tasks to prevent duplicate starts under concurrent requests
+        self._starting: dict[tuple[str, str], asyncio.Task] = {}
+        self._starting_pyright: dict[str, asyncio.Task] = {}
+
+    async def get_session(self, language: str, workspace_root: str) -> _LSPSession:
+        key = (language, workspace_root)
+        if key in self._sessions:
+            return self._sessions[key]
+        if key not in self._starting:
+
+            async def _start() -> _LSPSession:
+                logger.info("[LSPPool] starting %s language server (workspace=%s)...", language, workspace_root)
+                s = _LSPSession(language, workspace_root)
+                await s.start()
+                self._sessions[key] = s
+                self._starting.pop(key, None)
+                logger.info("[LSPPool] %s language server ready", language)
+                return s
+
+            self._starting[key] = asyncio.create_task(_start(), name=f"lsp-start-{language}")
+        return await self._starting[key]
+
+    async def get_pyright(self, workspace_root: str) -> _PyrightSession:
+        if workspace_root in self._pyright:
+            return self._pyright[workspace_root]
+        if workspace_root not in self._starting_pyright:
+
+            async def _start() -> _PyrightSession:
+                logger.info("[LSPPool] starting pyright (workspace=%s)...", workspace_root)
+                s = _PyrightSession(workspace_root)
+                await s.start()
+                self._pyright[workspace_root] = s
+                self._starting_pyright.pop(workspace_root, None)
+                logger.info("[LSPPool] pyright ready")
+                return s
+
+            self._starting_pyright[workspace_root] = asyncio.create_task(_start(), name="lsp-start-pyright")
+        return await self._starting_pyright[workspace_root]
+
+    async def close_all(self) -> None:
+        """Stop all running language server processes. Call once at process exit."""
+        for (lang, ws), session in list(self._sessions.items()):
+            try:
+                await session.stop()
+                logger.debug("[LSPPool] stopped %s server (workspace=%s)", lang, ws)
+            except Exception as e:
+                logger.debug("[LSPPool] error stopping %s: %s", lang, e)
+        self._sessions.clear()
+        for ws, session in list(self._pyright.items()):
+            try:
+                await session.stop()
+                logger.debug("[LSPPool] stopped pyright (workspace=%s)", ws)
+            except Exception as e:
+                logger.debug("[LSPPool] error stopping pyright: %s", e)
+        self._pyright.clear()
+
+
+# Process-level singleton — import and use directly
+lsp_pool = _LSPSessionPool()
+
+
+class LSPService:
+    """Registers the LSP tool (DEFERRED) into ToolRegistry.
+
+    Delegates all session management to the process-level lsp_pool singleton.
+    Language servers start lazily on first use and persist across agent restarts.
+    """
+
+    # Operations that Jedi doesn't support — routed to pyright for Python,
+    # or to the native server.send.* for other languages.
+    _ADVANCED_OPS: frozenset[str] = frozenset({"goToImplementation", "prepareCallHierarchy", "incomingCalls", "outgoingCalls"})
+
+    def __init__(self, registry: ToolRegistry, workspace_root: str | Path) -> None:
+        self._workspace_root = str(Path(workspace_root).resolve())
+        registry.register(
+            ToolEntry(
+                name="LSP",
+                mode=ToolMode.DEFERRED,
+                schema=LSP_SCHEMA,
+                handler=self._handle,
+                source="LSPService",
+                search_hint="language server definition references hover symbols go-to",
+                is_read_only=True,
+                is_concurrency_safe=True,
+            )
+        )
+        logger.debug("[LSPService] registered (workspace=%s)", self._workspace_root)
+
+    # ── session management (delegates to process-level pool) ──────────
+
+    async def _get_session(self, language: str) -> _LSPSession:
+        return await lsp_pool.get_session(language, self._workspace_root)
+
+    async def _get_pyright(self) -> _PyrightSession:
+        return await lsp_pool.get_pyright(self._workspace_root)
+
+    def _detect_language(self, file_path: str) -> str | None:
+        return _EXT_TO_LANG.get(Path(file_path).suffix.lower())
+
+    def _to_relative(self, file_path: str) -> str:
+        try:
+            return str(Path(file_path).relative_to(self._workspace_root))
+        except ValueError:
+            return file_path  # fallback: pass as-is
+
+    # ── pre-flight checks ─────────────────────────────────────────────
+
+    @staticmethod
+    def _check_file(file_path: str) -> str | None:
+        """Return error string if file exceeds 10 MB limit, else None."""
+        try:
+            size = Path(file_path).stat().st_size
+        except OSError:
+            return None  # let LSP handle missing file errors
+        if size > _FILE_SIZE_LIMIT:
+            mb = size / (1024 * 1024)
+            return f"File too large ({mb:.1f} MB). LSP file size limit is 10 MB."
+        return None
+
+    def _filter_gitignored(self, locations: list) -> list:
+        """Filter out locations inside gitignored paths (batches of 50, like CC)."""
+        if not locations:
+            return locations
+        abs_paths = [loc.get("absolutePath") or loc.get("uri", "").replace("file://", "") for loc in locations]
+        try:
+            # git check-ignore exits 0 if any path is ignored, 1 if none are
+            result = subprocess.run(
+                ["git", "check-ignore", "--stdin", "-z"],
+                input="\0".join(abs_paths),
+                capture_output=True,
+                text=True,
+                cwd=self._workspace_root,
+                timeout=5,
+            )
+            ignored = set(result.stdout.split("\0")) if result.stdout else set()
+        except Exception:
+            return locations  # on error, return all (fail-open)
+        return [loc for loc, p in zip(locations, abs_paths) if p not in ignored]
+
+    def _filter_gitignored_batched(self, locations: list) -> list:
+        """Run _filter_gitignored in batches of 50 (matches CC batch size)."""
+        out = []
+        for i in range(0, len(locations), 50):
+            out.extend(self._filter_gitignored(locations[i : i + 50]))
+        return out
+
+    async def _filter_gitignored_batched_async(self, locations: list) -> list:
+        return await asyncio.to_thread(self._filter_gitignored_batched, locations)
+
+    # ── output formatters ─────────────────────────────────────────────
+
+    @staticmethod
+    def _fmt_location(loc: Any) -> dict:
+        start = loc.get("range", {}).get("start", {})
+        return {
+            "file": loc.get("absolutePath") or loc.get("uri", ""),
+            "line": start.get("line", 0),
+            "column": start.get("character", 0),
+        }
+
+    @staticmethod
+    def _fmt_hover(result: Any) -> str:
+        contents = result.get("contents", "")
+        if isinstance(contents, dict):
+            return contents.get("value", str(contents))
+        if isinstance(contents, list):
+            parts = []
+            for c in contents:
+                parts.append(c.get("value", str(c)) if isinstance(c, dict) else str(c))
+            return "\n".join(parts)
+        return str(contents)
+
+    @staticmethod
+    def _fmt_symbol(sym: Any) -> dict:
+        loc = sym.get("location") or {}
+        if loc:
+            # SymbolInformation (workspaceSymbol) — location.uri + location.range
+            start = loc.get("range", {}).get("start", {})
+            uri = loc.get("uri", "")
+            file = loc.get("absolutePath") or (uri.replace("file://", "") if uri.startswith("file://") else uri)
+        else:
+            # DocumentSymbol (documentSymbol) — range/selectionRange at top level, no file
+            start = sym.get("selectionRange", sym.get("range", {})).get("start", {})
+            file = ""
+        return {
+            "name": sym.get("name", ""),
+            "kind": sym.get("kind"),
+            "file": file,
+            "line": start.get("line"),
+        }
+
+    @staticmethod
+    def _fmt_call_hierarchy_item(item: Any) -> dict:
+        uri = item.get("uri", "")
+        start = item.get("range", {}).get("start", {})
+        return {
+            "name": item.get("name", ""),
+            "kind": item.get("kind"),
+            "file": uri.replace("file://", "") if uri.startswith("file://") else uri,
+            "line": start.get("line"),
+            "item": item,  # pass-through for incomingCalls/outgoingCalls
+        }
+
+    @staticmethod
+    def _fmt_call_hierarchy_call(call: Any, direction: str) -> dict:
+        item_key = "from" if direction == "incoming" else "to"
+        caller = call.get(item_key, {})
+        uri = caller.get("uri", "")
+        start = caller.get("range", {}).get("start", {})
+        ranges = [r.get("start", {}) for r in call.get(f"{item_key}Ranges", [])]
+        return {
+            "name": caller.get("name", ""),
+            "kind": caller.get("kind"),
+            "file": uri.replace("file://", "") if uri.startswith("file://") else uri,
+            "line": start.get("line"),
+            "call_sites": [{"line": r.get("line"), "column": r.get("character")} for r in ranges],
+            "item": caller,  # pass-through for chaining
+        }
+
+    # ── tool handler ──────────────────────────────────────────────────
+
+    async def _handle(
+        self,
+        operation: str,
+        file_path: str | None = None,
+        line: int | None = None,
+        character: int | None = None,
+        query: str | None = None,
+        language: str | None = None,
+        item: dict | None = None,
+    ) -> str:
+        # Resolve language (incomingCalls/outgoingCalls carry language in item["uri"])
+        lang = language
+        if not lang and file_path:
+            lang = self._detect_language(file_path)
+        if not lang and operation in ("incomingCalls", "outgoingCalls") and item:
+            uri = item.get("uri", "")
+            lang = self._detect_language(uri)
+        if not lang:
+            supported = ", ".join(sorted(set(_EXT_TO_LANG.values())))
+            return f"Cannot detect language. Set 'language' parameter. Supported: {supported}"
+
+        # 10 MB file size guard (matches CC LSP limit)
+        if file_path:
+            err = self._check_file(file_path)
+            if err:
+                return err
+
+        # Python advanced ops → pyright; other languages → multilspy server.send.*
+        use_pyright = lang == "python" and operation in self._ADVANCED_OPS
+
+        pyright: _PyrightSession | None = None
+        session: _LSPSession | None = None
+
+        if use_pyright:
+            try:
+                pyright = await self._get_pyright()
+            except Exception as e:
+                return f"Failed to start pyright language server: {e}"
+        else:
+            try:
+                session = await self._get_session(lang)
+            except Exception as e:
+                return f"Failed to start {lang} language server: {e}"
+
+        rel = self._to_relative(file_path) if file_path else ""
+        # @@@dt-04-lsp-position-contract - CC exposes editor-facing 1-based
+        # positions and converts at the tool boundary. Leon must do the same
+        # or every position-aware operation silently lands one symbol off.
+        zero_line = line - 1 if line is not None else None
+        zero_character = character - 1 if character is not None else None
+
+        try:
+            if operation == "goToDefinition":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "goToDefinition requires: file_path, line, character"
+                assert session is not None
+                results = await session.request_definition(rel, zero_line, zero_character)
+                results = await self._filter_gitignored_batched_async(results)
+                if not results:
+                    return "No definition found."
+                return json.dumps([self._fmt_location(r) for r in results], indent=2)
+
+            elif operation == "findReferences":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "findReferences requires: file_path, line, character"
+                assert session is not None
+                results = await session.request_references(rel, zero_line, zero_character)
+                results = await self._filter_gitignored_batched_async(results)
+                if not results:
+                    return "No references found."
+                return json.dumps([self._fmt_location(r) for r in results], indent=2)
+
+            elif operation == "hover":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "hover requires: file_path, line, character"
+                assert session is not None
+                result = await session.request_hover(rel, zero_line, zero_character)
+                if not result:
+                    return "No hover info."
+                return self._fmt_hover(result)
+
+            elif operation == "documentSymbol":
+                if not file_path:
+                    return "documentSymbol requires: file_path"
+                assert session is not None
+                symbols = await session.request_document_symbols(rel)
+                if not symbols:
+                    return "No symbols found."
+                return json.dumps([self._fmt_symbol(s) for s in symbols], indent=2)
+
+            elif operation == "workspaceSymbol":
+                if not query:
+                    return "workspaceSymbol requires: query"
+                assert session is not None
+                symbols = await session.request_workspace_symbol(query)
+                if not symbols:
+                    return f"No symbols matching '{query}'."
+                return json.dumps([self._fmt_symbol(s) for s in symbols], indent=2)
+
+            elif operation == "goToImplementation":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "goToImplementation requires: file_path, line, character"
+                src = pyright if use_pyright else session
+                assert src is not None
+                results = await src.request_implementation(rel, zero_line, zero_character)
+                results = await self._filter_gitignored_batched_async(results)
+                if not results:
+                    return "No implementation found."
+                return json.dumps([self._fmt_location(r) for r in results], indent=2)
+
+            elif operation == "prepareCallHierarchy":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "prepareCallHierarchy requires: file_path, line, character"
+                src = pyright if use_pyright else session
+                assert src is not None
+                items = await src.request_prepare_call_hierarchy(rel, zero_line, zero_character)
+                if not items:
+                    return "No call hierarchy items found."
+                return json.dumps([self._fmt_call_hierarchy_item(i) for i in items], indent=2)
+
+            elif operation == "incomingCalls":
+                if not item:
+                    return "incomingCalls requires: item (CallHierarchyItem from prepareCallHierarchy)"
+                src = pyright if use_pyright else session
+                assert src is not None
+                calls = await src.request_incoming_calls(item)
+                if not calls:
+                    return "No incoming calls found."
+                return json.dumps([self._fmt_call_hierarchy_call(c, "incoming") for c in calls], indent=2)
+
+            elif operation == "outgoingCalls":
+                if not item:
+                    return "outgoingCalls requires: item (CallHierarchyItem from prepareCallHierarchy)"
+                src = pyright if use_pyright else session
+                assert src is not None
+                calls = await src.request_outgoing_calls(item)
+                if not calls:
+                    return "No outgoing calls found."
+                return json.dumps([self._fmt_call_hierarchy_call(c, "outgoing") for c in calls], indent=2)
+
+            else:
+                return (
+                    f"Unknown operation '{operation}'. "
+                    "Valid: goToDefinition, findReferences, hover, documentSymbol, workspaceSymbol, "
+                    "goToImplementation, prepareCallHierarchy, incomingCalls, outgoingCalls"
+                )
+
+        except Exception as e:
+            logger.exception("[LSPService] operation=%s failed", operation)
+            return f"LSP error: {e}"
diff --git a/core/tools/mcp_resources/service.py b/core/tools/mcp_resources/service.py
new file mode 100644
index 000000000..bf44c2cbc
--- /dev/null
+++ b/core/tools/mcp_resources/service.py
@@ -0,0 +1,155 @@
+"""Expose MCP resource discovery and reading as agent-callable deferred tools."""
+
+from __future__ import annotations
+
+import base64
+import json
+from collections.abc import Callable
+from typing import Any
+
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+
+LIST_MCP_RESOURCES_SCHEMA = make_tool_schema(
+    name="ListMcpResources",
+    description="List MCP resources exposed by connected MCP servers.",
+    properties={
+        "server": {
+            "type": "string",
+            "description": "Optional MCP server name to filter by.",
+            "minLength": 1,
+        }
+    },
+)
+
+READ_MCP_RESOURCE_SCHEMA = make_tool_schema(
+    name="ReadMcpResource",
+    description="Read a specific MCP resource by server name and URI.",
+    properties={
+        "server": {
+            "type": "string",
+            "description": "MCP server name.",
+            "minLength": 1,
+        },
+        "uri": {
+            "type": "string",
+            "description": "Resource URI to read.",
+            "minLength": 1,
+        },
+    },
+    required=["server", "uri"],
+)
+
+
+class McpResourceToolService:
+    def __init__(
+        self,
+        *,
+        registry: ToolRegistry,
+        client_fn: Callable[[], Any | None],
+        server_configs_fn: Callable[[], dict[str, Any]],
+    ) -> None:
+        self._client_fn = client_fn
+        self._server_configs_fn = server_configs_fn
+        if not self._server_configs_fn():
+            return
+        self._register(registry)
+
+    def _register(self, registry: ToolRegistry) -> None:
+        for name, schema, handler in [
+            ("ListMcpResources", LIST_MCP_RESOURCES_SCHEMA, self._list_resources),
+            ("ReadMcpResource", READ_MCP_RESOURCE_SCHEMA, self._read_resource),
+        ]:
+            registry.register(
+                ToolEntry(
+                    name=name,
+                    mode=ToolMode.DEFERRED,
+                    schema=schema,
+                    handler=handler,
+                    source="McpResourceToolService",
+                    is_concurrency_safe=True,
+                    is_read_only=True,
+                )
+            )
+
+    def _get_client(self) -> Any:
+        client = self._client_fn()
+        if client is None:
+            raise ValueError("MCP client is not initialized")
+        return client
+
+    def _available_servers(self) -> list[str]:
+        return list(self._server_configs_fn().keys())
+
+    @staticmethod
+    def _stringify_uri(value: Any) -> str | None:
+        if value is None:
+            return None
+        return str(value)
+
+    async def _list_resources(self, server: str | None = None, **_kwargs: Any) -> str:
+        client = self._get_client()
+        server_names = [server] if server else self._available_servers()
+        if server and server not in self._available_servers():
+            raise ValueError(f'MCP server not found: "{server}"')
+
+        items: list[dict[str, Any]] = []
+        for server_name in server_names:
+            async with client.session(server_name) as session:
+                result = await session.list_resources()
+                for resource in result.resources:
+                    items.append(
+                        {
+                            "server": server_name,
+                            "uri": self._stringify_uri(resource.uri),
+                            "name": getattr(resource, "name", self._stringify_uri(resource.uri)),
+                            "mime_type": getattr(resource, "mimeType", None),
+                            "description": getattr(resource, "description", None),
+                        }
+                    )
+        return json.dumps({"items": items, "total": len(items)}, ensure_ascii=False, indent=2)
+
+    async def _read_resource(self, *, server: str, uri: str, **_kwargs: Any) -> str:
+        client = self._get_client()
+        if server not in self._available_servers():
+            raise ValueError(f'MCP server not found: "{server}"')
+
+        async with client.session(server) as session:
+            result = await session.read_resource(uri)
+
+        contents: list[dict[str, Any]] = []
+        for content in result.contents:
+            if hasattr(content, "text"):
+                contents.append(
+                    {
+                        "uri": self._stringify_uri(content.uri),
+                        "mime_type": getattr(content, "mimeType", None),
+                        "text": content.text,
+                    }
+                )
+                continue
+            if hasattr(content, "blob"):
+                blob_size = len(base64.b64decode(content.blob))
+                contents.append(
+                    {
+                        "uri": self._stringify_uri(content.uri),
+                        "mime_type": getattr(content, "mimeType", None),
+                        "text": f"Binary MCP resource omitted from context ({blob_size} bytes).",
+                    }
+                )
+                continue
+            contents.append(
+                {
+                    "uri": self._stringify_uri(getattr(content, "uri", uri)),
+                    "mime_type": getattr(content, "mimeType", None),
+                }
+            )
+
+        return json.dumps(
+            {
+                "server": server,
+                "uri": uri,
+                "contents": contents,
+            },
+            ensure_ascii=False,
+            indent=2,
+        )
diff --git a/core/tools/search/service.py b/core/tools/search/service.py
index 4329de6e4..a6ff0a4d4 100644
--- a/core/tools/search/service.py
+++ b/core/tools/search/service.py
@@ -12,11 +12,16 @@
 import subprocess
 from pathlib import Path
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 
 DEFAULT_EXCLUDES: list[str] = [
     "node_modules",
     ".git",
+    ".svn",
+    ".hg",
+    ".bzr",
+    ".jj",
+    ".sl",
     "__pycache__",
     ".venv",
     "venv",
@@ -50,67 +55,76 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Grep",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Grep",
-                    "description": "Search file contents using regex patterns.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "pattern": {
-                                "type": "string",
-                                "description": "Regex pattern to search for",
-                            },
-                            "path": {
-                                "type": "string",
-                                "description": "File or directory (absolute). Defaults to workspace.",
-                            },
-                            "glob": {
-                                "type": "string",
-                                "description": "Filter files by glob (e.g., '*.py')",
-                            },
-                            "type": {
-                                "type": "string",
-                                "description": "Filter by file type (e.g., 'py', 'js')",
-                            },
-                            "case_insensitive": {
-                                "type": "boolean",
-                                "description": "Case insensitive search",
-                            },
-                            "after_context": {
-                                "type": "integer",
-                                "description": "Lines to show after each match",
-                            },
-                            "before_context": {
-                                "type": "integer",
-                                "description": "Lines to show before each match",
-                            },
-                            "context": {
-                                "type": "integer",
-                                "description": "Context lines before and after each match",
-                            },
-                            "output_mode": {
-                                "type": "string",
-                                "enum": ["content", "files_with_matches", "count"],
-                                "description": "Output format. Default: files_with_matches",
-                            },
-                            "head_limit": {
-                                "type": "integer",
-                                "description": "Limit to first N entries",
-                            },
-                            "offset": {
-                                "type": "integer",
-                                "description": "Skip first N entries",
-                            },
-                            "multiline": {
-                                "type": "boolean",
-                                "description": "Allow pattern to span multiple lines",
-                            },
+                schema=make_tool_schema(
+                    name="Grep",
+                    description=(
+                        "Regex search across files (ripgrep-based). "
+                        "Default output_mode: files_with_matches (sorted by mtime). Default head_limit: 250 entries. "
+                        "Auto-excludes .git/.svn/.hg dirs. Max column width 500 chars (suppresses minified/base64). "
+                        "Use output_mode='content' with after_context/before_context/context for context lines."
+                    ),
+                    properties={
+                        "pattern": {
+                            "type": "string",
+                            "description": "Regex pattern to search for",
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "File or directory (absolute). Defaults to workspace.",
+                        },
+                        "glob": {
+                            "type": "string",
+                            "description": "Filter files by glob (e.g., '*.py')",
+                        },
+                        "type": {
+                            "type": "string",
+                            "description": "Filter by file type (e.g., 'py', 'js')",
+                        },
+                        "case_insensitive": {
+                            "type": "boolean",
+                            "description": "Case insensitive search",
+                        },
+                        "after_context": {
+                            "type": "integer",
+                            "description": "Lines to show after each match",
+                        },
+                        "before_context": {
+                            "type": "integer",
+                            "description": "Lines to show before each match",
+                        },
+                        "context": {
+                            "type": "integer",
+                            "description": "Context lines before and after each match",
+                        },
+                        "output_mode": {
+                            "type": "string",
+                            "enum": ["content", "files_with_matches", "count"],
+                            "description": "Output format. Default: files_with_matches",
+                        },
+                        "head_limit": {
+                            "type": "integer",
+                            "description": "Limit to first N entries",
+                        },
+                        "offset": {
+                            "type": "integer",
+                            "description": "Skip first N entries",
+                        },
+                        "multiline": {
+                            "type": "boolean",
+                            "description": "Allow pattern to span multiple lines",
+                        },
+                        "line_numbers": {
+                            "type": "boolean",
+                            "description": "Show line numbers (default true). Only applies with output_mode='content'.",
                         },
-                        "required": ["pattern"],
                     },
-                },
+                    required=["pattern"],
+                ),
                 handler=self._grep,
                 source="SearchService",
+                search_hint="search file contents regex pattern matching ripgrep",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
@@ -118,26 +132,30 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Glob",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Glob",
-                    "description": "Find files by glob pattern. Returns paths sorted by modification time.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "pattern": {
-                                "type": "string",
-                                "description": "Glob pattern (e.g., '**/*.py')",
-                            },
-                            "path": {
-                                "type": "string",
-                                "description": "Directory to search (absolute). Defaults to workspace.",
-                            },
+                schema=make_tool_schema(
+                    name="Glob",
+                    description=(
+                        "Fast file pattern matching (ripgrep-based). Returns paths sorted by modification time. "
+                        "Includes hidden files, ignores .gitignore. Default limit 100 results. "
+                        "Use '**/*.py' for recursive search. Path must be absolute."
+                    ),
+                    properties={
+                        "pattern": {
+                            "type": "string",
+                            "description": "Glob pattern (e.g., '**/*.py')",
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "Directory to search (absolute). Defaults to workspace.",
                         },
-                        "required": ["pattern"],
                     },
-                },
+                    required=["pattern"],
+                ),
                 handler=self._glob,
                 source="SearchService",
+                search_hint="find files by name glob pattern matching",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
@@ -183,9 +201,10 @@ def _grep(
         before_context: int | None = None,
         context: int | None = None,
         output_mode: str = "files_with_matches",
-        head_limit: int | None = None,
+        head_limit: int | None = 250,
         offset: int | None = None,
         multiline: bool = False,
+        line_numbers: bool = True,
     ) -> str:
         ok, error, resolved = self._validate_path(path)
         if not ok:
@@ -209,6 +228,7 @@ def _grep(
                     head_limit=head_limit,
                     offset=offset,
                     multiline=multiline,
+                    line_numbers=line_numbers,
                 )
             except Exception:
                 pass  # fallback to Python
@@ -238,8 +258,9 @@ def _ripgrep_search(
         head_limit: int | None,
         offset: int | None,
         multiline: bool,
+        line_numbers: bool = True,
     ) -> str:
-        cmd: list[str] = ["rg", pattern, str(path)]
+        cmd: list[str] = ["rg", pattern, str(path), "--max-columns", "500"]
 
         for excl in DEFAULT_EXCLUDES:
             cmd.extend(["--glob", f"!{excl}"])
@@ -258,7 +279,8 @@ def _ripgrep_search(
         elif output_mode == "count":
             cmd.append("--count")
         elif output_mode == "content":
-            cmd.extend(["--line-number", "--no-heading"])
+            ln_flag = "--line-number" if line_numbers else "--no-line-number"
+            cmd.extend([ln_flag, "--no-heading"])
             if context is not None:
                 cmd.extend(["-C", str(context)])
             else:
diff --git a/core/tools/skills/service.py b/core/tools/skills/service.py
index e65215a20..17c0b842a 100644
--- a/core/tools/skills/service.py
+++ b/core/tools/skills/service.py
@@ -9,9 +9,10 @@
 from __future__ import annotations
 
 import re
+from collections.abc import Sequence
 from pathlib import Path
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 
 
 class SkillsService:
@@ -20,7 +21,7 @@ class SkillsService:
     def __init__(
         self,
         registry: ToolRegistry,
-        skill_paths: list[str | Path],
+        skill_paths: Sequence[str | Path],
         enabled_skills: dict[str, bool] | None = None,
     ):
         self.skill_paths = [Path(p).expanduser().resolve() for p in skill_paths]
@@ -65,6 +66,8 @@ def _register(self, registry: ToolRegistry) -> None:
                 schema=self._get_schema,
                 handler=self._load_skill,
                 source="SkillsService",
+                is_concurrency_safe=True,
+                is_read_only=True,
             )
         )
 
@@ -72,24 +75,22 @@ def _get_schema(self) -> dict:
         available_skills = list(self._skills_index.keys())
         skills_list = "\n".join(f"- {name}" for name in available_skills)
 
-        return {
-            "name": "load_skill",
-            "description": (
-                f"Load a specialized skill to access domain-specific knowledge and workflows.\n\n"
-                f"Available skills:\n{skills_list}\n\n"
-                f"Returns the skill's instructions and context."
+        return make_tool_schema(
+            name="load_skill",
+            description=(
+                f"Load a skill for domain-specific guidance. "
+                f"Use when you need specialized workflows (TDD, debugging, git). "
+                f"Skills are loaded on-demand to save context.\n\n"
+                f"Available skills:\n{skills_list}"
             ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "skill_name": {
-                        "type": "string",
-                        "description": f"Name of the skill to load. Available: {', '.join(self._skills_index.keys())}",
-                    },
+            properties={
+                "skill_name": {
+                    "type": "string",
+                    "description": f"Name of the skill to load. Available: {', '.join(self._skills_index.keys())}",
                 },
-                "required": ["skill_name"],
             },
-        }
+            required=["skill_name"],
+        )
 
     def _load_skill(self, skill_name: str) -> str:
         if skill_name not in self._skills_index:
diff --git a/core/tools/task/service.py b/core/tools/task/service.py
index b6e9f6f96..e09fd39fa 100644
--- a/core/tools/task/service.py
+++ b/core/tools/task/service.py
@@ -12,118 +12,110 @@
 from pathlib import Path
 from typing import Any
 
-from backend.web.core.storage_factory import make_tool_task_repo
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 from core.tools.task.types import Task, TaskStatus
+from storage.runtime import build_tool_task_repo
 
 logger = logging.getLogger(__name__)
 
 DEFAULT_DB_PATH = Path.home() / ".leon" / "tasks.db"
 
-TASK_CREATE_SCHEMA = {
-    "name": "TaskCreate",
-    "description": ("Create a new task to track work progress. Tasks are created with status 'pending'."),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "subject": {
-                "type": "string",
-                "description": "Brief task title in imperative form",
-            },
-            "description": {
-                "type": "string",
-                "description": "Detailed description of what needs to be done",
-            },
-            "active_form": {
-                "type": "string",
-                "description": "Present continuous form for spinner display",
-            },
-            "metadata": {
-                "type": "object",
-                "description": "Optional metadata to attach to the task",
-            },
+TASK_CREATE_SCHEMA = make_tool_schema(
+    name="TaskCreate",
+    description=(
+        "Create a task to track multi-step work. "
+        "Use for complex tasks with 3+ steps or when managing multiple parallel workstreams. "
+        "Status starts as 'pending'."
+    ),
+    properties={
+        "subject": {
+            "type": "string",
+            "description": "Brief task title in imperative form",
         },
-        "required": ["subject", "description"],
-    },
-}
-
-TASK_GET_SCHEMA = {
-    "name": "TaskGet",
-    "description": "Get full details of a task including description and dependencies.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_id": {
-                "type": "string",
-                "description": "The task ID to retrieve",
-            },
+        "description": {
+            "type": "string",
+            "description": "Detailed description of what needs to be done",
+        },
+        "active_form": {
+            "type": "string",
+            "description": "Present continuous form for spinner display",
+        },
+        "metadata": {
+            "type": "object",
+            "description": "Optional metadata to attach to the task",
         },
-        "required": ["task_id"],
     },
-}
-
-TASK_LIST_SCHEMA = {
-    "name": "TaskList",
-    "description": ("List all tasks with summary info: id, subject, status, owner, blockedBy."),
-    "parameters": {
-        "type": "object",
-        "properties": {},
+    required=["subject", "description"],
+)
+
+TASK_GET_SCHEMA = make_tool_schema(
+    name="TaskGet",
+    description="Get full details of a task including description and dependencies.",
+    properties={
+        "task_id": {
+            "type": "string",
+            "description": "The task ID to retrieve",
+        },
     },
-}
-
-TASK_UPDATE_SCHEMA = {
-    "name": "TaskUpdate",
-    "description": (
+    required=["task_id"],
+)
+
+TASK_LIST_SCHEMA = make_tool_schema(
+    name="TaskList",
+    description="List all tasks with summary info: id, subject, status, owner, blockedBy.",
+    properties={},
+)
+
+TASK_UPDATE_SCHEMA = make_tool_schema(
+    name="TaskUpdate",
+    description=(
         "Update a task's status, dependencies, or other fields. "
         "Status flow: pending -> in_progress -> completed. "
         "Use status='deleted' to remove a task."
     ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_id": {
-                "type": "string",
-                "description": "The task ID to update",
-            },
-            "status": {
-                "type": "string",
-                "enum": ["pending", "in_progress", "completed", "deleted"],
-                "description": "New status for the task",
-            },
-            "subject": {
-                "type": "string",
-                "description": "New subject for the task",
-            },
-            "description": {
-                "type": "string",
-                "description": "New description for the task",
-            },
-            "active_form": {
-                "type": "string",
-                "description": "New activeForm for the task",
-            },
-            "owner": {
-                "type": "string",
-                "description": "Assign task to an agent",
-            },
-            "add_blocks": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "Task IDs that this task blocks",
-            },
-            "add_blocked_by": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "Task IDs that block this task",
-            },
-            "metadata": {
-                "type": "object",
-                "description": "Metadata keys to merge (set key to null to delete)",
-            },
+    properties={
+        "task_id": {
+            "type": "string",
+            "description": "The task ID to update",
+        },
+        "status": {
+            "type": "string",
+            "enum": ["pending", "in_progress", "completed", "deleted"],
+            "description": "New status for the task",
+        },
+        "subject": {
+            "type": "string",
+            "description": "New subject for the task",
+        },
+        "description": {
+            "type": "string",
+            "description": "New description for the task",
+        },
+        "active_form": {
+            "type": "string",
+            "description": "New activeForm for the task",
+        },
+        "owner": {
+            "type": "string",
+            "description": "Assign task to an agent",
+        },
+        "add_blocks": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "Task IDs that this task blocks",
+        },
+        "add_blocked_by": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "Task IDs that block this task",
+        },
+        "metadata": {
+            "type": "object",
+            "description": "Metadata keys to merge (set key to null to delete)",
         },
-        "required": ["task_id"],
     },
-}
+    required=["task_id"],
+)
 
 
 class TaskService:
@@ -139,14 +131,15 @@ class TaskService:
     def __init__(
         self,
         registry: ToolRegistry,
-        workspace_root: str | None = None,
+        workspace_root: str | Path | None = None,
         db_path: Path | None = None,
         thread_id: str | None = None,
+        repo: Any | None = None,
     ):
-        self._repo = make_tool_task_repo(db_path or DEFAULT_DB_PATH)
+        self._repo = repo or build_tool_task_repo(db_path=db_path or DEFAULT_DB_PATH)
         self._default_thread_id = thread_id  # override for tests / single-agent TUI
         self._register(registry)
-        logger.info("TaskService initialized (db=%s)", db_path or DEFAULT_DB_PATH)
+        logger.info("TaskService initialized")
 
     def _get_thread_id(self) -> str:
         if self._default_thread_id:
@@ -157,12 +150,14 @@ def _get_thread_id(self) -> str:
         return tid or "default"
 
     def _register(self, registry: ToolRegistry) -> None:
+        read_only = {"TaskGet", "TaskList"}
         for name, schema, handler in [
             ("TaskCreate", TASK_CREATE_SCHEMA, self._create),
             ("TaskGet", TASK_GET_SCHEMA, self._get),
             ("TaskList", TASK_LIST_SCHEMA, self._list),
             ("TaskUpdate", TASK_UPDATE_SCHEMA, self._update),
         ]:
+            ro = name in read_only
             registry.register(
                 ToolEntry(
                     name=name,
@@ -170,6 +165,8 @@ def _register(self, registry: ToolRegistry) -> None:
                     schema=schema,
                     handler=handler,
                     source="TaskService",
+                    is_concurrency_safe=ro,
+                    is_read_only=ro,
                 )
             )
 
diff --git a/core/tools/tool_search/service.py b/core/tools/tool_search/service.py
index 9b5ceba77..234007182 100644
--- a/core/tools/tool_search/service.py
+++ b/core/tools/tool_search/service.py
@@ -9,24 +9,26 @@
 import json
 import logging
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 
 logger = logging.getLogger(__name__)
 
-TOOL_SEARCH_SCHEMA = {
-    "name": "tool_search",
-    "description": ("Search for available tools. Use this to discover tools that might help with your task."),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": "Search query - tool name or description of what you want to do",
-            },
+TOOL_SEARCH_SCHEMA = make_tool_schema(
+    name="tool_search",
+    description=(
+        "Search for available deferred tools by name or keyword. "
+        "Use 'select:ToolA,ToolB' for exact deferred-tool lookup (returns full schema). "
+        "Use keywords for fuzzy search (up to 5 results). "
+        "Deferred tools are only usable after discovery via this tool."
+    ),
+    properties={
+        "query": {
+            "type": "string",
+            "description": "Search query. Use 'select:ToolA,ToolB' for exact deferred-tool lookup, or keywords for fuzzy search.",
         },
-        "required": ["query"],
     },
-}
+    required=["query"],
+)
 
 
 class ToolSearchService:
@@ -41,11 +43,34 @@ def __init__(self, registry: ToolRegistry):
                 schema=TOOL_SEARCH_SCHEMA,
                 handler=self._search,
                 source="ToolSearchService",
+                is_concurrency_safe=True,
+                is_read_only=True,
             )
         )
         logger.info("ToolSearchService initialized")
 
-    def _search(self, query: str = "", **kwargs) -> str:
-        results = self._registry.search(query)
+    def _search(self, query: str = "", tool_context=None, **kwargs) -> str:
+        select_names: list[str] = []
+        normalized = query.strip()
+        if normalized.lower().startswith("select:"):
+            select_names = [name.strip() for name in normalized[len("select:") :].split(",") if name.strip()]
+
+        results = self._registry.search(query, modes={ToolMode.DEFERRED})
+        if select_names:
+            found_names = {entry.name for entry in results}
+            missing = [name for name in select_names if name not in found_names]
+            inline = [name for name in missing if (entry := self._registry.get(name)) is not None and entry.mode == ToolMode.INLINE]
+            unknown = [name for name in missing if self._registry.get(name) is None]
+            if inline or unknown:
+                parts: list[str] = []
+                if inline:
+                    parts.append(f"inline/already-available tools: {', '.join(inline)}")
+                if unknown:
+                    parts.append(f"unknown tools: {', '.join(unknown)}")
+                raise ValueError("tool_search select: only supports deferred tools; " + "; ".join(parts))
+        else:
+            results = results[:5]
+        if tool_context is not None and hasattr(tool_context, "discovered_tool_names"):
+            tool_context.discovered_tool_names.update(entry.name for entry in results)
         schemas = [e.get_schema() for e in results]
         return json.dumps(schemas, indent=2, ensure_ascii=False)
diff --git a/core/tools/web/fetchers/markdownify.py b/core/tools/web/fetchers/markdownify.py
index 22e855f8e..508790276 100644
--- a/core/tools/web/fetchers/markdownify.py
+++ b/core/tools/web/fetchers/markdownify.py
@@ -3,12 +3,15 @@
 from __future__ import annotations
 
 import re
+from collections.abc import Callable
+from typing import Any
 
 import httpx
 
 from core.tools.web.fetchers.base import BaseFetcher
 from core.tools.web.types import ContentChunk, FetchLimits, FetchResult
 
+md: Callable[..., str] | None = None
 try:
     from markdownify import markdownify as md
 
@@ -16,6 +19,7 @@
 except ImportError:
     HAS_MARKDOWNIFY = False
 
+BeautifulSoup: Any | None = None
 try:
     from bs4 import BeautifulSoup
 
@@ -112,7 +116,11 @@ def _process_html(self, html: str, result: FetchResult) -> str:
 
     def _markdownify_html(self, html: str, result: FetchResult) -> str:
         """Convert HTML to Markdown using markdownify."""
+        if md is None:
+            raise RuntimeError("markdownify import unexpectedly unavailable")
         if self.has_bs4:
+            if BeautifulSoup is None:
+                raise RuntimeError("BeautifulSoup import unexpectedly unavailable")
             soup = BeautifulSoup(html, "html.parser")
 
             title_tag = soup.find("title")
@@ -145,6 +153,8 @@ def _markdownify_html(self, html: str, result: FetchResult) -> str:
 
     def _bs4_extract(self, html: str, result: FetchResult) -> str:
         """Extract text using BeautifulSoup."""
+        if BeautifulSoup is None:
+            raise RuntimeError("BeautifulSoup import unexpectedly unavailable")
         soup = BeautifulSoup(html, "html.parser")
 
         title_tag = soup.find("title")
diff --git a/core/tools/web/middleware.py b/core/tools/web/middleware.py
index fedf1708e..1cfef8827 100644
--- a/core/tools/web/middleware.py
+++ b/core/tools/web/middleware.py
@@ -103,8 +103,8 @@ async def _web_search_impl(
         self,
         Query: str,
         MaxResults: int | None = None,
-        IncludeDomains: list[str] | None = None,
-        ExcludeDomains: list[str] | None = None,
+        AllowedDomains: list[str] | None = None,
+        BlockedDomains: list[str] | None = None,
     ) -> SearchResult:
         """
         实现 web_search（多提供商降级）
@@ -121,8 +121,8 @@ async def _web_search_impl(
                 result = await searcher.search(
                     query=Query,
                     max_results=max_results,
-                    include_domains=IncludeDomains,
-                    exclude_domains=ExcludeDomains,
+                    include_domains=AllowedDomains,
+                    exclude_domains=BlockedDomains,
                 )
                 if not result.error:
                     return result
@@ -217,12 +217,12 @@ def _get_tool_definitions(self) -> list[dict]:
                                 "type": "integer",
                                 "description": "Maximum number of results (default: 5)",
                             },
-                            "IncludeDomains": {
+                            "AllowedDomains": {
                                 "type": "array",
                                 "items": {"type": "string"},
                                 "description": "Only include results from these domains",
                             },
-                            "ExcludeDomains": {
+                            "BlockedDomains": {
                                 "type": "array",
                                 "items": {"type": "string"},
                                 "description": "Exclude results from these domains",
@@ -281,8 +281,8 @@ async def _handle_tool_call(self, tool_name: str, args: dict, tool_call_id: str)
             result = await self._web_search_impl(
                 Query=args.get("Query", ""),
                 MaxResults=args.get("MaxResults"),
-                IncludeDomains=args.get("IncludeDomains"),
-                ExcludeDomains=args.get("ExcludeDomains"),
+                AllowedDomains=args.get("AllowedDomains"),
+                BlockedDomains=args.get("BlockedDomains"),
             )
             return ToolMessage(content=result.format_output(), tool_call_id=tool_call_id)
 
@@ -304,7 +304,8 @@ async def awrap_tool_call(
         tool_call = request.tool_call
         tool_name = tool_call.get("name")
         args = tool_call.get("args", {})
-        tool_call_id = tool_call.get("id", "")
+        raw_tool_call_id = tool_call.get("id", "")
+        tool_call_id = raw_tool_call_id if isinstance(raw_tool_call_id, str) else ""
 
         result = await self._handle_tool_call(tool_name, args, tool_call_id)
         if result is not None:
diff --git a/core/tools/web/service.py b/core/tools/web/service.py
index 077db9b70..02d2f12e8 100644
--- a/core/tools/web/service.py
+++ b/core/tools/web/service.py
@@ -10,7 +10,7 @@
 import asyncio
 from typing import Any
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 from core.tools.web.fetchers.jina import JinaFetcher
 from core.tools.web.fetchers.markdownify import MarkdownifyFetcher
 from core.tools.web.searchers.exa import ExaSearcher
@@ -59,64 +59,74 @@ def _register(self, registry: ToolRegistry) -> None:
         registry.register(
             ToolEntry(
                 name="WebSearch",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "WebSearch",
-                    "description": "Search the web for current information. Returns titles, URLs, and snippets.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "query": {
-                                "type": "string",
-                                "description": "Search query",
-                            },
-                            "max_results": {
-                                "type": "integer",
-                                "description": "Maximum number of results (default: 5)",
-                            },
-                            "include_domains": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "Only include results from these domains",
-                            },
-                            "exclude_domains": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "Exclude results from these domains",
-                            },
+                mode=ToolMode.DEFERRED,
+                schema=make_tool_schema(
+                    name="WebSearch",
+                    description=(
+                        "Search the web. Returns titles, URLs, and text snippets. "
+                        "Use for current events, documentation lookups, or fact-checking. Max 10 results per query."
+                    ),
+                    properties={
+                        "query": {
+                            "type": "string",
+                            "description": "Search query",
+                            "minLength": 1,
+                        },
+                        "max_results": {
+                            "type": "integer",
+                            "description": "Maximum number of results (default: 5)",
+                            "minimum": 1,
+                            "maximum": 10,
+                        },
+                        "allowed_domains": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Only include results from these domains",
+                        },
+                        "blocked_domains": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Exclude results from these domains",
                         },
-                        "required": ["query"],
                     },
-                },
+                    required=["query"],
+                ),
                 handler=self._web_search,
                 source="WebService",
+                is_concurrency_safe=True,
+                is_read_only=True,
             )
         )
 
         registry.register(
             ToolEntry(
                 name="WebFetch",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "WebFetch",
-                    "description": "Fetch a URL and extract specific information using AI. Returns processed content, not raw HTML.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "url": {
-                                "type": "string",
-                                "description": "URL to fetch content from",
-                            },
-                            "prompt": {
-                                "type": "string",
-                                "description": "What information to extract from the page",
-                            },
+                mode=ToolMode.DEFERRED,
+                schema=make_tool_schema(
+                    name="WebFetch",
+                    description=(
+                        "Fetch a URL and extract specific information via AI. Returns processed text, not raw HTML. "
+                        "Provide a focused prompt describing what to extract. "
+                        "Useful for reading documentation pages, API references, or articles."
+                    ),
+                    properties={
+                        "url": {
+                            "type": "string",
+                            "description": "URL to fetch content from",
+                            "minLength": 1,
+                        },
+                        "prompt": {
+                            "type": "string",
+                            "description": "What information to extract from the page",
+                            "minLength": 1,
                         },
-                        "required": ["url", "prompt"],
                     },
-                },
+                    required=["url", "prompt"],
+                ),
                 handler=self._web_fetch,
                 source="WebService",
+                is_concurrency_safe=True,
+                is_read_only=True,
             )
         )
 
@@ -124,8 +134,8 @@ async def _web_search(
         self,
         query: str,
         max_results: int | None = None,
-        include_domains: list[str] | None = None,
-        exclude_domains: list[str] | None = None,
+        allowed_domains: list[str] | None = None,
+        blocked_domains: list[str] | None = None,
     ) -> str:
         if not self._searchers:
             return "No search providers configured"
@@ -137,8 +147,8 @@ async def _web_search(
                 result: SearchResult = await searcher.search(
                     query=query,
                     max_results=effective_max,
-                    include_domains=include_domains,
-                    exclude_domains=exclude_domains,
+                    include_domains=allowed_domains,
+                    exclude_domains=blocked_domains,
                 )
                 if not result.error:
                     return result.format_output()
diff --git a/core/tools/wechat/service.py b/core/tools/wechat/service.py
deleted file mode 100644
index 9cb57e233..000000000
--- a/core/tools/wechat/service.py
+++ /dev/null
@@ -1,109 +0,0 @@
-"""WeChat tool service — registers wechat_send and wechat_contacts into ToolRegistry.
-
-Thin wrapper: actual API calls go through WeChatConnection (backend).
-Tools are scoped to the agent's owner's user_id (the human who connected WeChat).
-"""
-
-from __future__ import annotations
-
-import logging
-from collections.abc import Callable
-from typing import TYPE_CHECKING
-
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
-
-if TYPE_CHECKING:
-    from backend.web.services.wechat_service import WeChatConnection
-
-logger = logging.getLogger(__name__)
-
-
-class WeChatToolService:
-    """Registers WeChat tools for agents to interact with WeChat contacts.
-
-    @@@lazy-connection — connection_fn is called at tool invocation time, not registration.
-    This avoids import-time dependency on app.state.
-    """
-
-    def __init__(self, registry: ToolRegistry, connection_fn: Callable[[], WeChatConnection | None]) -> None:
-        self._get_conn = connection_fn
-        self._register(registry)
-
-    def _register(self, registry: ToolRegistry) -> None:
-        self._register_wechat_send(registry)
-        self._register_wechat_contacts(registry)
-
-    def _register_wechat_send(self, registry: ToolRegistry) -> None:
-        get_conn = self._get_conn
-
-        async def handle(user_id: str, text: str) -> str:
-            conn = get_conn()
-            if not conn or not conn.connected:
-                return "Error: WeChat is not connected. Ask the owner to connect via the Connections page."
-            try:
-                await conn.send_message(user_id, text)
-                return f"Message sent to {user_id.split('@')[0]}"
-            except RuntimeError as e:
-                return f"Error: {e}"
-
-        registry.register(
-            ToolEntry(
-                name="wechat_send",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "wechat_send",
-                    "description": (
-                        "Send a text message to a WeChat user via the connected WeChat bot.\n"
-                        "Use wechat_contacts to find available user_ids.\n"
-                        "The user must have messaged the bot first before you can reply.\n"
-                        "Keep messages concise — WeChat is a chat app. Use plain text, no markdown."
-                    ),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "user_id": {
-                                "type": "string",
-                                "description": "WeChat user ID (format: xxx@im.wechat). Get from wechat_contacts.",
-                            },
-                            "text": {
-                                "type": "string",
-                                "description": "Plain text message to send. No markdown — WeChat won't render it.",
-                            },
-                        },
-                        "required": ["user_id", "text"],
-                    },
-                },
-                handler=handle,
-                source="wechat",
-            )
-        )
-
-    def _register_wechat_contacts(self, registry: ToolRegistry) -> None:
-        get_conn = self._get_conn
-
-        def handle() -> str:
-            conn = get_conn()
-            if not conn or not conn.connected:
-                return "WeChat is not connected."
-            contacts = conn.list_contacts()
-            if not contacts:
-                return "No WeChat contacts yet. Users need to message the bot first."
-            lines = [f"- {c['display_name']} [user_id: {c['user_id']}]" for c in contacts]
-            return "\n".join(lines)
-
-        registry.register(
-            ToolEntry(
-                name="wechat_contacts",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "wechat_contacts",
-                    "description": "List WeChat contacts who have messaged the bot. Returns user_ids for use with wechat_send.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {},
-                    },
-                },
-                handler=handle,
-                source="wechat",
-            )
-        )
diff --git a/docker-compose.yml b/docker-compose.yml
index cb302edf3..15c3e7c7a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,6 +3,10 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+    volumes:
+      # @@@staging-leon-home-volume - staging runtime state (models/members/sandboxes)
+      # must survive container replacement, otherwise each deploy boots with an empty ~/.leon.
+      - leon-home:/root/.leon
     restart: unless-stopped
 
   frontend:
@@ -14,3 +18,6 @@ services:
     depends_on:
       - backend
     restart: unless-stopped
+
+volumes:
+  leon-home:
diff --git a/docs/en/introduction.mdx b/docs/en/introduction.mdx
index 306238336..84e35bd7d 100644
--- a/docs/en/introduction.mdx
+++ b/docs/en/introduction.mdx
@@ -49,7 +49,7 @@ flowchart LR
         direction LR
         H["Human Entity"]
         A["Agent Entity"]
-        H <-->|chat_send / chat_read| A
+        H <-->|send_message / read_messages| A
     end
 
     subgraph Infra["Infrastructure"]
diff --git a/docs/en/multi-agent-chat.mdx b/docs/en/multi-agent-chat.mdx
index 6a10e8fec..2da8a8591 100644
--- a/docs/en/multi-agent-chat.mdx
+++ b/docs/en/multi-agent-chat.mdx
@@ -3,7 +3,7 @@ title: Multi-agent chat
 sidebarTitle: Social layer
 description: How humans and agents communicate on the Mycel social layer
 icon: comments
-keywords: [entity, chat, agent communication, social, directory, chat_send, SSE]
+keywords: [entity, chat, agent communication, social, list_chats, send_message, SSE]
 ---
 
 Mycel's social layer lets humans and agents coexist as equals in a shared messaging environment. Agents can initiate conversations, forward context to teammates, and collaborate autonomously — without any special orchestration code.
@@ -19,7 +19,7 @@ flowchart LR
         direction TB
         HE["Human Entity"]
         AE["Agent Entity"]
-        HE <-->|"chat_send / chat_read"| AE
+        HE <-->|"send_message / read_messages"| AE
     end
 
     T --> Chat
@@ -53,42 +53,33 @@ Every participant on the platform — human or agent — has an **Entity**. When
 
 ## Agent chat tools
 
-Agents have five built-in tools for social interaction:
+Agents have four built-in tools for social interaction:
 
 <AccordionGroup>
-  <Accordion title="directory — discover other entities" icon="address-book">
-    Browse all known Entities. Returns Entity IDs needed for other tools.
-
-    ```text
-    directory(search="Alice", type="human")
-    → - Alice [human] entity_id=m_abc123-1
-    ```
-  </Accordion>
-
-  <Accordion title="chats — list active conversations" icon="inbox">
+  <Accordion title="list_chats — list active conversations" icon="inbox">
     List the agent's active chats with unread counts and last message preview.
 
     ```text
-    chats(unread_only=true)
+    list_chats(unread_only=true)
     → - Alice [m_abc123-1] (3 unread) — last: "Can you help me with..."
     ```
   </Accordion>
 
-  <Accordion title="chat_read — read message history" icon="book-open">
+  <Accordion title="read_messages — read message history" icon="book-open">
     Read message history in a chat. Automatically marks messages as read.
 
     ```text
-    chat_read(entity_id="m_abc123-1", limit=10)
+    read_messages(entity_id="m_abc123-1", limit=10)
     → [Alice]: Can you help me with this bug?
       [you]: Sure, let me take a look.
     ```
   </Accordion>
 
-  <Accordion title="chat_send — send a message" icon="paper-plane">
+  <Accordion title="send_message — send a message" icon="paper-plane">
     Send a message. The agent must read unread messages before sending (enforced by the system).
 
     ```text
-    chat_send(content="Here's the fix.", entity_id="m_abc123-1")
+    send_message(content="Here's the fix.", entity_id="m_abc123-1")
     ```
 
     **Signal protocol** controls conversation flow:
@@ -100,11 +91,11 @@ Agents have five built-in tools for social interaction:
     | `close` | "Conversation over, do not reply" |
   </Accordion>
 
-  <Accordion title="chat_search — search message history" icon="magnifying-glass">
+  <Accordion title="search_messages — search message history" icon="magnifying-glass">
     Search through message history across all chats or within a specific chat.
 
     ```text
-    chat_search(query="bug fix", entity_id="m_abc123-1")
+    search_messages(query="bug fix", entity_id="m_abc123-1")
     ```
   </Accordion>
 </AccordionGroup>
@@ -124,15 +115,15 @@ sequenceDiagram
     API->>H: SSE push (message event)
     API->>Q: Enqueue notification
     Q->>T: Wake thread (if idle)
-    T->>API: chat_read (get actual message)
+    T->>API: read_messages (get actual message)
     T->>T: Process message
-    T->>API: chat_send (response)
+    T->>API: send_message (response)
     API->>DB: Store response
     API->>H: SSE push (message event)
 ```
 
 <Note>
-  Notifications don't include message content — the agent must call `chat_read` to read them. This enforces a consistent **read → respond** pattern and prevents agents from acting on stale summaries.
+  Notifications don't include message content — the agent must call `read_messages` to read them. This enforces a consistent **read → respond** pattern and prevents agents from acting on stale summaries.
 </Note>
 
 ## Real-time updates
diff --git a/docs/en/quickstart.mdx b/docs/en/quickstart.mdx
index 91954831c..204f99163 100644
--- a/docs/en/quickstart.mdx
+++ b/docs/en/quickstart.mdx
@@ -100,7 +100,7 @@ Mycel's social layer lets agents message each other — and you — like a group
   </Step>
 
   <Step title="Let agents talk to each other">
-    In the first agent's thread, tell it to message your code reviewer: "Ask the code reviewer to look at this function." The agent will call `chat_send` and the reviewer will respond autonomously.
+    In the first agent's thread, tell it to message your code reviewer: "Ask the code reviewer to look at this function." The agent will call `send_message` and the reviewer will respond autonomously.
   </Step>
 </Steps>
 
diff --git a/docs/zh/introduction.mdx b/docs/zh/introduction.mdx
index fdc5e8693..9566e8cfe 100644
--- a/docs/zh/introduction.mdx
+++ b/docs/zh/introduction.mdx
@@ -49,7 +49,7 @@ flowchart LR
         direction LR
         H["人类 Entity"]
         A["Agent Entity"]
-        H <-->|"chat_send / chat_read"| A
+        H <-->|"send_message / read_messages"| A
     end
 
     subgraph Infra["基础设施"]
diff --git a/docs/zh/multi-agent-chat.mdx b/docs/zh/multi-agent-chat.mdx
index 3a44bd48c..4fb44940a 100644
--- a/docs/zh/multi-agent-chat.mdx
+++ b/docs/zh/multi-agent-chat.mdx
@@ -3,7 +3,7 @@ title: 多 Agent 通讯
 sidebarTitle: 社交层
 description: 人与 Agent 如何在 Mycel 社交层中通讯
 icon: comments
-keywords: [entity, chat, agent 通讯, 社交, directory, chat_send, SSE]
+keywords: [entity, chat, agent 通讯, 社交, list_chats, send_message, SSE]
 ---
 
 Mycel 的社交层让人与 Agent 在共享的消息环境中平等共存。Agent 可以主动发起对话、把上下文转发给队友、自主协作 — 无需任何特殊的编排代码。
@@ -19,7 +19,7 @@ flowchart LR
         direction TB
         HE["人类 Entity"]
         AE["Agent Entity"]
-        HE <-->|"chat_send / chat_read"| AE
+        HE <-->|"send_message / read_messages"| AE
     end
 
     T --> Chat
@@ -52,39 +52,30 @@ flowchart LR
 ## Agent 聊天工具
 
 <AccordionGroup>
-  <Accordion title="directory — 发现其他 Entity" icon="address-book">
-    浏览所有已知的 Entity，返回其他工具需要的 Entity ID。
-
-    ```text
-    directory(search="Alice", type="human")
-    → - Alice [human] entity_id=m_abc123-1
-    ```
-  </Accordion>
-
-  <Accordion title="chats — 列出活跃对话" icon="inbox">
+  <Accordion title="list_chats — 列出活跃对话" icon="inbox">
     列出 Agent 的活跃对话，包含未读数和最新消息预览。
 
     ```text
-    chats(unread_only=true)
+    list_chats(unread_only=true)
     → - Alice [m_abc123-1] (3 条未读) — 最新："能帮我看看..."
     ```
   </Accordion>
 
-  <Accordion title="chat_read — 读取消息历史" icon="book-open">
+  <Accordion title="read_messages — 读取消息历史" icon="book-open">
     读取对话消息历史，自动标记为已读。
 
     ```text
-    chat_read(entity_id="m_abc123-1", limit=10)
+    read_messages(entity_id="m_abc123-1", limit=10)
     → [Alice]: 能帮我看看这个 bug 吗？
       [you]: 好的，我来看看。
     ```
   </Accordion>
 
-  <Accordion title="chat_send — 发送消息" icon="paper-plane">
+  <Accordion title="send_message — 发送消息" icon="paper-plane">
     发送消息。系统强制要求 Agent 先读取未读消息再发送。
 
     ```text
-    chat_send(content="这是修复方案。", entity_id="m_abc123-1")
+    send_message(content="这是修复方案。", entity_id="m_abc123-1")
     ```
 
     **信号协议**控制对话流转：
@@ -96,11 +87,11 @@ flowchart LR
     | `close` | "对话结束，不需要回复" |
   </Accordion>
 
-  <Accordion title="chat_search — 搜索消息历史" icon="magnifying-glass">
+  <Accordion title="search_messages — 搜索消息历史" icon="magnifying-glass">
     在所有对话或指定对话中搜索消息历史。
 
     ```text
-    chat_search(query="bug 修复", entity_id="m_abc123-1")
+    search_messages(query="bug 修复", entity_id="m_abc123-1")
     ```
   </Accordion>
 </AccordionGroup>
@@ -120,15 +111,15 @@ sequenceDiagram
     API->>H: SSE 推送（message 事件）
     API->>Q: 入队通知
     Q->>T: 唤醒 Thread（若空闲）
-    T->>API: chat_read（读取实际消息）
+    T->>API: read_messages（读取实际消息）
     T->>T: 处理消息
-    T->>API: chat_send（回复）
+    T->>API: send_message（回复）
     API->>DB: 存储回复
     API->>H: SSE 推送（message 事件）
 ```
 
 <Note>
-  通知不包含消息内容 — Agent 必须调用 `chat_read` 才能读到。这强制执行「先读后发」的一致模式。
+  通知不包含消息内容 — Agent 必须调用 `read_messages` 才能读到。这强制执行「先读后发」的一致模式。
 </Note>
 
 ## 联系人与投递设置
diff --git a/docs/zh/quickstart.mdx b/docs/zh/quickstart.mdx
index 884bf09f4..37c67e8c8 100644
--- a/docs/zh/quickstart.mdx
+++ b/docs/zh/quickstart.mdx
@@ -100,7 +100,7 @@ Mycel 的社交层让 Agent 之间可以像群聊一样互相发消息。
   </Step>
 
   <Step title="让两个 Agent 互相协作">
-    在第一个 Agent 的 Thread 中，告诉它去联系代码审查员：「帮我把这个函数发给代码审查员看看。」Agent 会调用 `chat_send` 工具，审查员会自主回复。
+    在第一个 Agent 的 Thread 中，告诉它去联系代码审查员：「帮我把这个函数发给代码审查员看看。」Agent 会调用 `send_message` 工具，审查员会自主回复。
   </Step>
 </Steps>
 
diff --git a/eval/storage.py b/eval/storage.py
index 2dd75c523..ba389cdd1 100644
--- a/eval/storage.py
+++ b/eval/storage.py
@@ -1,7 +1,4 @@
-"""SQLite storage for eval trajectories and metrics.
-
-Database: ~/.leon/eval.db (separate from main leon.db)
-"""
+"""Storage for eval trajectories and metrics."""
 
 from __future__ import annotations
 
@@ -9,28 +6,28 @@
 from datetime import UTC
 from pathlib import Path
 
-from config.user_paths import user_home_path
 from eval.models import (
     ObjectiveMetrics,
     RunTrajectory,
     SystemMetrics,
 )
-from eval.repo import SQLiteEvalRepo
-
-_DEFAULT_DB_PATH = user_home_path("eval.db")
 
 
 class TrajectoryStore:
-    """SQLite-backed storage for eval trajectories and metrics."""
+    """Storage for eval trajectories and metrics."""
+
+    def __init__(self, db_path: str | Path | None = None, eval_repo=None):
+        if eval_repo is not None:
+            self._repo = eval_repo
+        else:
+            from storage.runtime import build_storage_container
 
-    def __init__(self, db_path: str | Path | None = None):
-        self.db_path = Path(db_path) if db_path else _DEFAULT_DB_PATH
-        self.db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._repo = SQLiteEvalRepo(self.db_path)
-        self._init_db()
+            container = build_storage_container()
+            self._repo = container.eval_repo()
 
     def _init_db(self) -> None:
-        self._repo.ensure_schema()
+        if hasattr(self._repo, "ensure_schema"):
+            self._repo.ensure_schema()
 
     def save_trajectory(self, trajectory: RunTrajectory) -> str:
         """Save a trajectory and its LLM/tool call records. Returns run_id."""
diff --git a/frontend/app/.env.example b/frontend/app/.env.example
new file mode 100644
index 000000000..abfdc2804
--- /dev/null
+++ b/frontend/app/.env.example
@@ -0,0 +1,2 @@
+VITE_SUPABASE_URL=
+VITE_SUPABASE_ANON_KEY=
diff --git a/frontend/app/DESIGN_SYSTEM.md b/frontend/app/DESIGN_SYSTEM.md
index 5043fe083..62ae20435 100644
--- a/frontend/app/DESIGN_SYSTEM.md
+++ b/frontend/app/DESIGN_SYSTEM.md
@@ -186,7 +186,6 @@ These are **not** motion tokens. Import from `@/styles/ux-timing`.
 |----------|-------|-------|
 | `FEEDBACK_BRIEF` | 1500ms | Copy confirmation, save flash |
 | `FEEDBACK_NORMAL` | 2000ms | Toast display, status message |
-| `BLUR_CLOSE_DELAY` | 150ms | Dropdown close delay on blur |
 
 ### Rules
 
diff --git a/frontend/app/package-lock.json b/frontend/app/package-lock.json
index 8af285c77..e0f68e798 100644
--- a/frontend/app/package-lock.json
+++ b/frontend/app/package-lock.json
@@ -35,6 +35,7 @@
         "@radix-ui/react-toggle": "^1.1.10",
         "@radix-ui/react-toggle-group": "^1.1.11",
         "@radix-ui/react-tooltip": "^1.2.8",
+        "@supabase/supabase-js": "^2.49.8",
         "@types/diff": "^7.0.2",
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
@@ -62,6 +63,7 @@
       },
       "devDependencies": {
         "@eslint/js": "^9.39.1",
+        "@testing-library/react": "^16.3.2",
         "@types/node": "^24.10.1",
         "@types/react": "^19.2.5",
         "@types/react-dom": "^19.2.3",
@@ -71,6 +73,7 @@
         "eslint-plugin-react-hooks": "^7.0.1",
         "eslint-plugin-react-refresh": "^0.4.24",
         "globals": "^16.5.0",
+        "jsdom": "^28.1.0",
         "kimi-plugin-inspect-react": "^1.0.3",
         "postcss": "^8.5.6",
         "tailwindcss": "^3.4.19",
@@ -78,9 +81,17 @@
         "tw-animate-css": "^1.4.0",
         "typescript": "~5.9.3",
         "typescript-eslint": "^8.46.4",
-        "vite": "^7.2.4"
+        "vite": "^7.2.4",
+        "vitest": "^4.1.2"
       }
     },
+    "node_modules/@acemir/cssom": {
+      "version": "0.9.31",
+      "resolved": "https://registry.npmjs.org/@acemir/cssom/-/cssom-0.9.31.tgz",
+      "integrity": "sha512-ZnR3GSaH+/vJ0YlHau21FjfLYjMpYVIzTD8M8vIEQvIGxeOXyXdzCI140rrCY862p/C/BbzWsjc1dgnM9mkoTA==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@alloc/quick-lru": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
@@ -94,6 +105,64 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/@asamuzakjp/css-color": {
+      "version": "5.1.5",
+      "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-5.1.5.tgz",
+      "integrity": "sha512-8cMAA1bE66Mb/tfmkhcfJLjEPgyT7SSy6lW6id5XL113ai1ky76d/1L27sGnXCMsLfq66DInAU3OzuahB4lu9Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@csstools/css-calc": "^3.1.1",
+        "@csstools/css-color-parser": "^4.0.2",
+        "@csstools/css-parser-algorithms": "^4.0.0",
+        "@csstools/css-tokenizer": "^4.0.0",
+        "lru-cache": "^11.2.7"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      }
+    },
+    "node_modules/@asamuzakjp/css-color/node_modules/lru-cache": {
+      "version": "11.3.0",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.3.0.tgz",
+      "integrity": "sha512-sr8xPKE25m6vJVcrdn6NxtC0fVfuPowbscLypegRgOm0yXSqr5JNHCAY3hnusdJ7HRBW04j6Ip4khvHU778DuQ==",
+      "dev": true,
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
+    "node_modules/@asamuzakjp/dom-selector": {
+      "version": "6.8.1",
+      "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-6.8.1.tgz",
+      "integrity": "sha512-MvRz1nCqW0fsy8Qz4dnLIvhOlMzqDVBabZx6lH+YywFDdjXhMY37SmpV1XFX3JzG5GWHn63j6HX6QPr3lZXHvQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@asamuzakjp/nwsapi": "^2.3.9",
+        "bidi-js": "^1.0.3",
+        "css-tree": "^3.1.0",
+        "is-potential-custom-element-name": "^1.0.1",
+        "lru-cache": "^11.2.6"
+      }
+    },
+    "node_modules/@asamuzakjp/dom-selector/node_modules/lru-cache": {
+      "version": "11.3.0",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.3.0.tgz",
+      "integrity": "sha512-sr8xPKE25m6vJVcrdn6NxtC0fVfuPowbscLypegRgOm0yXSqr5JNHCAY3hnusdJ7HRBW04j6Ip4khvHU778DuQ==",
+      "dev": true,
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
+    "node_modules/@asamuzakjp/nwsapi": {
+      "version": "2.3.9",
+      "resolved": "https://registry.npmjs.org/@asamuzakjp/nwsapi/-/nwsapi-2.3.9.tgz",
+      "integrity": "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@babel/code-frame": {
       "version": "7.28.6",
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.28.6.tgz",
@@ -1846,6 +1915,161 @@
         "node": ">=6.9.0"
       }
     },
+    "node_modules/@bramus/specificity": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/@bramus/specificity/-/specificity-2.4.2.tgz",
+      "integrity": "sha512-ctxtJ/eA+t+6q2++vj5j7FYX3nRu311q1wfYH3xjlLOsczhlhxAg2FWNUXhpGvAw3BWo1xBcvOV6/YLc2r5FJw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "css-tree": "^3.0.0"
+      },
+      "bin": {
+        "specificity": "bin/cli.js"
+      }
+    },
+    "node_modules/@csstools/color-helpers": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-6.0.2.tgz",
+      "integrity": "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT-0",
+      "engines": {
+        "node": ">=20.19.0"
+      }
+    },
+    "node_modules/@csstools/css-calc": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-3.1.1.tgz",
+      "integrity": "sha512-HJ26Z/vmsZQqs/o3a6bgKslXGFAungXGbinULZO3eMsOyNJHeBBZfup5FiZInOghgoM4Hwnmw+OgbJCNg1wwUQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "peerDependencies": {
+        "@csstools/css-parser-algorithms": "^4.0.0",
+        "@csstools/css-tokenizer": "^4.0.0"
+      }
+    },
+    "node_modules/@csstools/css-color-parser": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-4.0.2.tgz",
+      "integrity": "sha512-0GEfbBLmTFf0dJlpsNU7zwxRIH0/BGEMuXLTCvFYxuL1tNhqzTbtnFICyJLTNK4a+RechKP75e7w42ClXSnJQw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "@csstools/color-helpers": "^6.0.2",
+        "@csstools/css-calc": "^3.1.1"
+      },
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "peerDependencies": {
+        "@csstools/css-parser-algorithms": "^4.0.0",
+        "@csstools/css-tokenizer": "^4.0.0"
+      }
+    },
+    "node_modules/@csstools/css-parser-algorithms": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-4.0.0.tgz",
+      "integrity": "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT",
+      "peer": true,
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "peerDependencies": {
+        "@csstools/css-tokenizer": "^4.0.0"
+      }
+    },
+    "node_modules/@csstools/css-syntax-patches-for-csstree": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.1.2.tgz",
+      "integrity": "sha512-5GkLzz4prTIpoyeUiIu3iV6CSG3Plo7xRVOFPKI7FVEJ3mZ0A8SwK0XU3Gl7xAkiQ+mDyam+NNp875/C5y+jSA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT-0",
+      "peerDependencies": {
+        "css-tree": "^3.2.1"
+      },
+      "peerDependenciesMeta": {
+        "css-tree": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@csstools/css-tokenizer": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-4.0.0.tgz",
+      "integrity": "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT",
+      "peer": true,
+      "engines": {
+        "node": ">=20.19.0"
+      }
+    },
     "node_modules/@date-fns/tz": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/@date-fns/tz/-/tz-1.4.1.tgz",
@@ -2451,6 +2675,24 @@
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
+    "node_modules/@exodus/bytes": {
+      "version": "1.15.0",
+      "resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.15.0.tgz",
+      "integrity": "sha512-UY0nlA+feH81UGSHv92sLEPLCeZFjXOuHhrIo0HQydScuQc8s0A7kL/UdgwgDq8g8ilksmuoF35YVTNphV2aBQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      },
+      "peerDependencies": {
+        "@noble/hashes": "^1.8.0 || ^2.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@noble/hashes": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@floating-ui/core": {
       "version": "1.7.3",
       "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz",
@@ -4607,12 +4849,161 @@
         "win32"
       ]
     },
+    "node_modules/@standard-schema/spec": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz",
+      "integrity": "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@standard-schema/utils": {
       "version": "0.3.0",
       "resolved": "https://registry.npmjs.org/@standard-schema/utils/-/utils-0.3.0.tgz",
       "integrity": "sha512-e7Mew686owMaPJVNNLs55PUvgz371nKgwsc4vxE49zsODpJEnxgxRo2y/OKrqueavXgZNMDVj3DdHFlaSAeU8g==",
       "license": "MIT"
     },
+    "node_modules/@supabase/auth-js": {
+      "version": "2.101.1",
+      "resolved": "https://registry.npmjs.org/@supabase/auth-js/-/auth-js-2.101.1.tgz",
+      "integrity": "sha512-Kd0Wey+RkFHgyVep7adS6UOE2pN6MJ3mZ32PAXSvfw6IjUkFRC7IQpdZZjUOcUe5pXr1ejufCRgF6lsGINe4Tw==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "2.8.1"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
+    "node_modules/@supabase/functions-js": {
+      "version": "2.101.1",
+      "resolved": "https://registry.npmjs.org/@supabase/functions-js/-/functions-js-2.101.1.tgz",
+      "integrity": "sha512-OZWU7YtaG+NNNFZK8p/FuJ6gpq7pFyrG2fLOopP73HAIDHDGpOttPJapvO8ADu3RkqfQfkwrB354vPkSBbZ20A==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "2.8.1"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
+    "node_modules/@supabase/phoenix": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/@supabase/phoenix/-/phoenix-0.4.0.tgz",
+      "integrity": "sha512-RHSx8bHS02xwfHdAbX5Lpbo6PXbgyf7lTaXTlwtFDPwOIw64NnVRwFAXGojHhjtVYI+PEPNSWwkL90f4agN3bw==",
+      "license": "MIT"
+    },
+    "node_modules/@supabase/postgrest-js": {
+      "version": "2.101.1",
+      "resolved": "https://registry.npmjs.org/@supabase/postgrest-js/-/postgrest-js-2.101.1.tgz",
+      "integrity": "sha512-UW1RajH5jbZoK+ldAJ1I6VZ+HWwZ2oaKjEQ6Gn+AQ67CHQVxGl8wNQoLYyumbyaExm41I+wn7arulcY1eHeZJw==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "2.8.1"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
+    "node_modules/@supabase/realtime-js": {
+      "version": "2.101.1",
+      "resolved": "https://registry.npmjs.org/@supabase/realtime-js/-/realtime-js-2.101.1.tgz",
+      "integrity": "sha512-Oa6dno0OB9I+hv5do5zsZHbFu41ViZnE9IWjmkeeF/8fPmB5fWoHGqeTYEC3/0DAgtpUoFJa4FpvzFH0SBHo1Q==",
+      "license": "MIT",
+      "dependencies": {
+        "@supabase/phoenix": "^0.4.0",
+        "@types/ws": "^8.18.1",
+        "tslib": "2.8.1",
+        "ws": "^8.18.2"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
+    "node_modules/@supabase/storage-js": {
+      "version": "2.101.1",
+      "resolved": "https://registry.npmjs.org/@supabase/storage-js/-/storage-js-2.101.1.tgz",
+      "integrity": "sha512-WhTaUOBgeEvnKLy95Cdlp6+D5igSF/65yC727w1olxbet5nzUvMlajKUWyzNtQu2efrz2cQ7FcdVBdQqgT9YKQ==",
+      "license": "MIT",
+      "dependencies": {
+        "iceberg-js": "^0.8.1",
+        "tslib": "2.8.1"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
+    "node_modules/@supabase/supabase-js": {
+      "version": "2.101.1",
+      "resolved": "https://registry.npmjs.org/@supabase/supabase-js/-/supabase-js-2.101.1.tgz",
+      "integrity": "sha512-Jnhm3LfuACwjIzvk2pfUbGQn7pa7hi6MFzfSyPrRYWVCCu69RPLCFyHSBl7HSBwadbQ3UZOznnD3gPca3ePrRA==",
+      "license": "MIT",
+      "dependencies": {
+        "@supabase/auth-js": "2.101.1",
+        "@supabase/functions-js": "2.101.1",
+        "@supabase/postgrest-js": "2.101.1",
+        "@supabase/realtime-js": "2.101.1",
+        "@supabase/storage-js": "2.101.1"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
+    "node_modules/@testing-library/dom": {
+      "version": "10.4.1",
+      "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
+      "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "@babel/code-frame": "^7.10.4",
+        "@babel/runtime": "^7.12.5",
+        "@types/aria-query": "^5.0.1",
+        "aria-query": "5.3.0",
+        "dom-accessibility-api": "^0.5.9",
+        "lz-string": "^1.5.0",
+        "picocolors": "1.1.1",
+        "pretty-format": "^27.0.2"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@testing-library/react": {
+      "version": "16.3.2",
+      "resolved": "https://registry.npmjs.org/@testing-library/react/-/react-16.3.2.tgz",
+      "integrity": "sha512-XU5/SytQM+ykqMnAnvB2umaJNIOsLF3PVv//1Ew4CTcpz0/BRyy/af40qqrt7SjKpDdT1saBMc42CUok5gaw+g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime": "^7.12.5"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "@testing-library/dom": "^10.0.0",
+        "@types/react": "^18.0.0 || ^19.0.0",
+        "@types/react-dom": "^18.0.0 || ^19.0.0",
+        "react": "^18.0.0 || ^19.0.0",
+        "react-dom": "^18.0.0 || ^19.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@types/aria-query": {
+      "version": "5.0.4",
+      "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz",
+      "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@types/babel__core": {
       "version": "7.20.5",
       "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
@@ -4658,6 +5049,17 @@
         "@babel/types": "^7.28.2"
       }
     },
+    "node_modules/@types/chai": {
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.3.tgz",
+      "integrity": "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/deep-eql": "*",
+        "assertion-error": "^2.0.1"
+      }
+    },
     "node_modules/@types/d3-array": {
       "version": "3.2.2",
       "resolved": "https://registry.npmjs.org/@types/d3-array/-/d3-array-3.2.2.tgz",
@@ -4730,6 +5132,13 @@
         "@types/ms": "*"
       }
     },
+    "node_modules/@types/deep-eql": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz",
+      "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@types/diff": {
       "version": "7.0.2",
       "resolved": "https://registry.npmjs.org/@types/diff/-/diff-7.0.2.tgz",
@@ -4786,9 +5195,7 @@
       "version": "24.10.4",
       "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.4.tgz",
       "integrity": "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg==",
-      "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "undici-types": "~7.16.0"
       }
@@ -4821,6 +5228,15 @@
       "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==",
       "license": "MIT"
     },
+    "node_modules/@types/ws": {
+      "version": "8.18.1",
+      "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz",
+      "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
     "node_modules/@typescript-eslint/eslint-plugin": {
       "version": "8.52.0",
       "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.52.0.tgz",
@@ -5118,29 +5534,152 @@
         "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
       }
     },
-    "node_modules/acorn": {
-      "version": "8.15.0",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
-      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
+    "node_modules/@vitest/expect": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-4.1.2.tgz",
+      "integrity": "sha512-gbu+7B0YgUJ2nkdsRJrFFW6X7NTP44WlhiclHniUhxADQJH5Szt9mZ9hWnJPJ8YwOK5zUOSSlSvyzRf0u1DSBQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
-      "bin": {
-        "acorn": "bin/acorn"
+      "dependencies": {
+        "@standard-schema/spec": "^1.1.0",
+        "@types/chai": "^5.2.2",
+        "@vitest/spy": "4.1.2",
+        "@vitest/utils": "4.1.2",
+        "chai": "^6.2.2",
+        "tinyrainbow": "^3.1.0"
       },
-      "engines": {
-        "node": ">=0.4.0"
+      "funding": {
+        "url": "https://opencollective.com/vitest"
       }
     },
-    "node_modules/acorn-jsx": {
-      "version": "5.3.2",
-      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
-      "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==",
+    "node_modules/@vitest/mocker": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-4.1.2.tgz",
+      "integrity": "sha512-Ize4iQtEALHDttPRCmN+FKqOl2vxTiNUhzobQFFt/BM1lRUTG7zRCLOykG/6Vo4E4hnUdfVLo5/eqKPukcWW7Q==",
       "dev": true,
       "license": "MIT",
-      "peerDependencies": {
-        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
-      }
+      "dependencies": {
+        "@vitest/spy": "4.1.2",
+        "estree-walker": "^3.0.3",
+        "magic-string": "^0.30.21"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      },
+      "peerDependencies": {
+        "msw": "^2.4.9",
+        "vite": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      },
+      "peerDependenciesMeta": {
+        "msw": {
+          "optional": true
+        },
+        "vite": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@vitest/pretty-format": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-4.1.2.tgz",
+      "integrity": "sha512-dwQga8aejqeuB+TvXCMzSQemvV9hNEtDDpgUKDzOmNQayl2OG241PSWeJwKRH3CiC+sESrmoFd49rfnq7T4RnA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "tinyrainbow": "^3.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      }
+    },
+    "node_modules/@vitest/runner": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-4.1.2.tgz",
+      "integrity": "sha512-Gr+FQan34CdiYAwpGJmQG8PgkyFVmARK8/xSijia3eTFgVfpcpztWLuP6FttGNfPLJhaZVP/euvujeNYar36OQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@vitest/utils": "4.1.2",
+        "pathe": "^2.0.3"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      }
+    },
+    "node_modules/@vitest/snapshot": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-4.1.2.tgz",
+      "integrity": "sha512-g7yfUmxYS4mNxk31qbOYsSt2F4m1E02LFqO53Xpzg3zKMhLAPZAjjfyl9e6z7HrW6LvUdTwAQR3HHfLjpko16A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@vitest/pretty-format": "4.1.2",
+        "@vitest/utils": "4.1.2",
+        "magic-string": "^0.30.21",
+        "pathe": "^2.0.3"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      }
+    },
+    "node_modules/@vitest/spy": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-4.1.2.tgz",
+      "integrity": "sha512-DU4fBnbVCJGNBwVA6xSToNXrkZNSiw59H8tcuUspVMsBDBST4nfvsPsEHDHGtWRRnqBERBQu7TrTKskmjqTXKA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      }
+    },
+    "node_modules/@vitest/utils": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-4.1.2.tgz",
+      "integrity": "sha512-xw2/TiX82lQHA06cgbqRKFb5lCAy3axQ4H4SoUFhUsg+wztiet+co86IAMDtF6Vm1hc7J6j09oh/rgDn+JdKIQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@vitest/pretty-format": "4.1.2",
+        "convert-source-map": "^2.0.0",
+        "tinyrainbow": "^3.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      }
+    },
+    "node_modules/acorn": {
+      "version": "8.15.0",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
+      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "bin": {
+        "acorn": "bin/acorn"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/acorn-jsx": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
+      "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==",
+      "dev": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      }
+    },
+    "node_modules/agent-base": {
+      "version": "7.1.4",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
+      "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 14"
+      }
     },
     "node_modules/ajv": {
       "version": "6.12.6",
@@ -5159,6 +5698,16 @@
         "url": "https://github.com/sponsors/epoberezkin"
       }
     },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/ansi-styles": {
       "version": "4.3.0",
       "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
@@ -5235,6 +5784,26 @@
         "node": ">=10"
       }
     },
+    "node_modules/aria-query": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz",
+      "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "dequal": "^2.0.3"
+      }
+    },
+    "node_modules/assertion-error": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz",
+      "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      }
+    },
     "node_modules/autoprefixer": {
       "version": "10.4.23",
       "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.23.tgz",
@@ -5341,6 +5910,16 @@
         "baseline-browser-mapping": "dist/cli.js"
       }
     },
+    "node_modules/bidi-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz",
+      "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "require-from-string": "^2.0.2"
+      }
+    },
     "node_modules/binary-extensions": {
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz",
@@ -5464,6 +6043,16 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/chai": {
+      "version": "6.2.2",
+      "resolved": "https://registry.npmjs.org/chai/-/chai-6.2.2.tgz",
+      "integrity": "sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/chalk": {
       "version": "4.1.2",
       "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
@@ -5692,6 +6281,21 @@
         "node": ">= 8"
       }
     },
+    "node_modules/css-tree": {
+      "version": "3.2.1",
+      "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-3.2.1.tgz",
+      "integrity": "sha512-X7sjQzceUhu1u7Y/ylrRZFU2FS6LRiFVp6rKLPg23y3x3c3DOKAwuXGDp+PAGjh6CSnCjYeAul8pcT8bAl+lSA==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "mdn-data": "2.27.1",
+        "source-map-js": "^1.2.1"
+      },
+      "engines": {
+        "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0"
+      }
+    },
     "node_modules/cssesc": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz",
@@ -5705,6 +6309,32 @@
         "node": ">=4"
       }
     },
+    "node_modules/cssstyle": {
+      "version": "6.2.0",
+      "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-6.2.0.tgz",
+      "integrity": "sha512-Fm5NvhYathRnXNVndkUsCCuR63DCLVVwGOOwQw782coXFi5HhkXdu289l59HlXZBawsyNccXfWRYvLzcDCdDig==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@asamuzakjp/css-color": "^5.0.1",
+        "@csstools/css-syntax-patches-for-csstree": "^1.0.28",
+        "css-tree": "^3.1.0",
+        "lru-cache": "^11.2.6"
+      },
+      "engines": {
+        "node": ">=20"
+      }
+    },
+    "node_modules/cssstyle/node_modules/lru-cache": {
+      "version": "11.3.0",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.3.0.tgz",
+      "integrity": "sha512-sr8xPKE25m6vJVcrdn6NxtC0fVfuPowbscLypegRgOm0yXSqr5JNHCAY3hnusdJ7HRBW04j6Ip4khvHU778DuQ==",
+      "dev": true,
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
     "node_modules/csstype": {
       "version": "3.2.3",
       "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
@@ -5832,6 +6462,20 @@
         "node": ">=12"
       }
     },
+    "node_modules/data-urls": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz",
+      "integrity": "sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "whatwg-mimetype": "^5.0.0",
+        "whatwg-url": "^16.0.0"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      }
+    },
     "node_modules/date-fns": {
       "version": "4.1.0",
       "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-4.1.0.tgz",
@@ -5865,6 +6509,13 @@
         }
       }
     },
+    "node_modules/decimal.js": {
+      "version": "10.6.0",
+      "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz",
+      "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/decimal.js-light": {
       "version": "2.5.1",
       "resolved": "https://registry.npmjs.org/decimal.js-light/-/decimal.js-light-2.5.1.tgz",
@@ -5942,6 +6593,13 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/dom-accessibility-api": {
+      "version": "0.5.16",
+      "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz",
+      "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/dom-helpers": {
       "version": "5.2.1",
       "resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-5.2.1.tgz",
@@ -6000,6 +6658,13 @@
         "url": "https://github.com/fb55/entities?sponsor=1"
       }
     },
+    "node_modules/es-module-lexer": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-2.0.0.tgz",
+      "integrity": "sha512-5POEcUuZybH7IdmGsD8wlf0AI55wMecM9rVBTI/qEAy2c1kTOm3DjFYjrBdI2K3BaJjJYfYFeRtM0t9ssnRuxw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/esbuild": {
       "version": "0.27.2",
       "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz",
@@ -6250,6 +6915,16 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/estree-walker": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz",
+      "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/estree": "^1.0.0"
+      }
+    },
     "node_modules/esutils": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
@@ -6266,6 +6941,16 @@
       "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
       "license": "MIT"
     },
+    "node_modules/expect-type": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz",
+      "integrity": "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=12.0.0"
+      }
+    },
     "node_modules/extend": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
@@ -6697,6 +7382,19 @@
         "hermes-estree": "0.25.1"
       }
     },
+    "node_modules/html-encoding-sniffer": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz",
+      "integrity": "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@exodus/bytes": "^1.6.0"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      }
+    },
     "node_modules/html-url-attributes": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz",
@@ -6717,6 +7415,43 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/http-proxy-agent": {
+      "version": "7.0.2",
+      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
+      "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^7.1.0",
+        "debug": "^4.3.4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/https-proxy-agent": {
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
+      "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^7.1.2",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/iceberg-js": {
+      "version": "0.8.1",
+      "resolved": "https://registry.npmjs.org/iceberg-js/-/iceberg-js-0.8.1.tgz",
+      "integrity": "sha512-1dhVQZXhcHje7798IVM+xoo/1ZdVfzOMIc8/rgVSijRK38EDqOJoGula9N/8ZI5RD8QTxNQtK/Gozpr+qUqRRA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
     "node_modules/ignore": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -6897,6 +7632,13 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/is-potential-custom-element-name": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz",
+      "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/isexe": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
@@ -6934,6 +7676,60 @@
         "js-yaml": "bin/js-yaml.js"
       }
     },
+    "node_modules/jsdom": {
+      "version": "28.1.0",
+      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-28.1.0.tgz",
+      "integrity": "sha512-0+MoQNYyr2rBHqO1xilltfDjV9G7ymYGlAUazgcDLQaUf8JDHbuGwsxN6U9qWaElZ4w1B2r7yEGIL3GdeW3Rug==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@acemir/cssom": "^0.9.31",
+        "@asamuzakjp/dom-selector": "^6.8.1",
+        "@bramus/specificity": "^2.4.2",
+        "@exodus/bytes": "^1.11.0",
+        "cssstyle": "^6.0.1",
+        "data-urls": "^7.0.0",
+        "decimal.js": "^10.6.0",
+        "html-encoding-sniffer": "^6.0.0",
+        "http-proxy-agent": "^7.0.2",
+        "https-proxy-agent": "^7.0.6",
+        "is-potential-custom-element-name": "^1.0.1",
+        "parse5": "^8.0.0",
+        "saxes": "^6.0.0",
+        "symbol-tree": "^3.2.4",
+        "tough-cookie": "^6.0.0",
+        "undici": "^7.21.0",
+        "w3c-xmlserializer": "^5.0.0",
+        "webidl-conversions": "^8.0.1",
+        "whatwg-mimetype": "^5.0.0",
+        "whatwg-url": "^16.0.0",
+        "xml-name-validator": "^5.0.0"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      },
+      "peerDependencies": {
+        "canvas": "^3.0.0"
+      },
+      "peerDependenciesMeta": {
+        "canvas": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jsdom/node_modules/parse5": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.0.tgz",
+      "integrity": "sha512-9m4m5GSgXjL4AjumKzq1Fgfp3Z8rsvjRNbnkVwfu2ImRqE5D0LnY2QfDen18FSY9C573YU5XxSapdHZTZ2WolA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "entities": "^6.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
     "node_modules/jsesc": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz",
@@ -7121,6 +7917,16 @@
         "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
       }
     },
+    "node_modules/lz-string": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz",
+      "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "lz-string": "bin/bin.js"
+      }
+    },
     "node_modules/magic-string": {
       "version": "0.30.21",
       "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz",
@@ -7435,6 +8241,13 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/mdn-data": {
+      "version": "2.27.1",
+      "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.27.1.tgz",
+      "integrity": "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ==",
+      "dev": true,
+      "license": "CC0-1.0"
+    },
     "node_modules/merge2": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
@@ -8138,6 +8951,17 @@
         "node": ">= 6"
       }
     },
+    "node_modules/obug": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz",
+      "integrity": "sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==",
+      "dev": true,
+      "funding": [
+        "https://github.com/sponsors/sxzz",
+        "https://opencollective.com/debug"
+      ],
+      "license": "MIT"
+    },
     "node_modules/optionator": {
       "version": "0.9.4",
       "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -8265,6 +9089,13 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/pathe": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz",
+      "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/picocolors": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
@@ -8480,6 +9311,41 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/pretty-format": {
+      "version": "27.5.1",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz",
+      "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-regex": "^5.0.1",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^17.0.1"
+      },
+      "engines": {
+        "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0"
+      }
+    },
+    "node_modules/pretty-format/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/pretty-format/node_modules/react-is": {
+      "version": "17.0.2",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz",
+      "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/prop-types": {
       "version": "15.8.1",
       "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
@@ -9008,6 +9874,16 @@
       "integrity": "sha512-4ZJgIB9EG9fQE41mOJCRHMmnxDTKHWawQoJWZyUbZuj680wVyogu2ihnj8Edqm7vh2mo/TWHyEZpn2kqeDvS7w==",
       "license": "Apache-2.0"
     },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/resolve": {
       "version": "1.22.11",
       "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.11.tgz",
@@ -9119,6 +9995,19 @@
         "queue-microtask": "^1.2.2"
       }
     },
+    "node_modules/saxes": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
+      "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "xmlchars": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=v12.22.7"
+      }
+    },
     "node_modules/scheduler": {
       "version": "0.27.0",
       "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
@@ -9164,6 +10053,13 @@
         "node": ">=8"
       }
     },
+    "node_modules/siginfo": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz",
+      "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==",
+      "dev": true,
+      "license": "ISC"
+    },
     "node_modules/sonner": {
       "version": "2.0.7",
       "resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz",
@@ -9194,6 +10090,20 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/stackback": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz",
+      "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/std-env": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/std-env/-/std-env-4.0.0.tgz",
+      "integrity": "sha512-zUMPtQ/HBY3/50VbpkupYHbRroTRZJPRLvreamgErJVys0ceuzMkD44J/QjqhHjOzK42GQ3QZIeFG1OYfOtKqQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/streamdown": {
       "version": "2.4.0",
       "resolved": "https://registry.npmjs.org/streamdown/-/streamdown-2.4.0.tgz",
@@ -9315,6 +10225,13 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/symbol-tree": {
+      "version": "3.2.4",
+      "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz",
+      "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/tailwind-merge": {
       "version": "3.4.0",
       "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.4.0.tgz",
@@ -9403,6 +10320,23 @@
       "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
       "license": "MIT"
     },
+    "node_modules/tinybench": {
+      "version": "2.9.0",
+      "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
+      "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/tinyexec": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-1.0.4.tgz",
+      "integrity": "sha512-u9r3uZC0bdpGOXtlxUIdwf9pkmvhqJdrVCH9fapQtgy/OeTTMZ1nqH7agtvEfmGui6e1XxjcdrlxvxJvc3sMqw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/tinyglobby": {
       "version": "0.2.15",
       "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
@@ -9420,6 +10354,36 @@
         "url": "https://github.com/sponsors/SuperchupuDev"
       }
     },
+    "node_modules/tinyrainbow": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-3.1.0.tgz",
+      "integrity": "sha512-Bf+ILmBgretUrdJxzXM0SgXLZ3XfiaUuOj/IKQHuTXip+05Xn+uyEYdVg0kYDipTBcLrCVyUzAPz7QmArb0mmw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/tldts": {
+      "version": "7.0.28",
+      "resolved": "https://registry.npmjs.org/tldts/-/tldts-7.0.28.tgz",
+      "integrity": "sha512-+Zg3vWhRUv8B1maGSTFdev9mjoo8Etn2Ayfs4cnjlD3CsGkxXX4QyW3j2WJ0wdjYcYmy7Lx2RDsZMhgCWafKIw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "tldts-core": "^7.0.28"
+      },
+      "bin": {
+        "tldts": "bin/cli.js"
+      }
+    },
+    "node_modules/tldts-core": {
+      "version": "7.0.28",
+      "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-7.0.28.tgz",
+      "integrity": "sha512-7W5Efjhsc3chVdFhqtaU0KtK32J37Zcr9RKtID54nG+tIpcY79CQK/veYPODxtD/LJ4Lue66jvrQzIX2Z2/pUQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/to-regex-range": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
@@ -9433,6 +10397,32 @@
         "node": ">=8.0"
       }
     },
+    "node_modules/tough-cookie": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-6.0.1.tgz",
+      "integrity": "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "tldts": "^7.0.5"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/tr46": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz",
+      "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "punycode": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=20"
+      }
+    },
     "node_modules/trim-lines": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz",
@@ -9541,11 +10531,20 @@
         "typescript": ">=4.8.4 <6.0.0"
       }
     },
+    "node_modules/undici": {
+      "version": "7.24.7",
+      "resolved": "https://registry.npmjs.org/undici/-/undici-7.24.7.tgz",
+      "integrity": "sha512-H/nlJ/h0ggGC+uRL3ovD+G0i4bqhvsDOpbDv7At5eFLlj2b41L8QliGbnl2H7SnDiYhENphh1tQFJZf+MyfLsQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.18.1"
+      }
+    },
     "node_modules/undici-types": {
       "version": "7.16.0",
       "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
       "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/unicode-canonical-property-names-ecmascript": {
@@ -9932,6 +10931,101 @@
         }
       }
     },
+    "node_modules/vitest": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/vitest/-/vitest-4.1.2.tgz",
+      "integrity": "sha512-xjR1dMTVHlFLh98JE3i/f/WePqJsah4A0FK9cc8Ehp9Udk0AZk6ccpIZhh1qJ/yxVWRZ+Q54ocnD8TXmkhspGg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@vitest/expect": "4.1.2",
+        "@vitest/mocker": "4.1.2",
+        "@vitest/pretty-format": "4.1.2",
+        "@vitest/runner": "4.1.2",
+        "@vitest/snapshot": "4.1.2",
+        "@vitest/spy": "4.1.2",
+        "@vitest/utils": "4.1.2",
+        "es-module-lexer": "^2.0.0",
+        "expect-type": "^1.3.0",
+        "magic-string": "^0.30.21",
+        "obug": "^2.1.1",
+        "pathe": "^2.0.3",
+        "picomatch": "^4.0.3",
+        "std-env": "^4.0.0-rc.1",
+        "tinybench": "^2.9.0",
+        "tinyexec": "^1.0.2",
+        "tinyglobby": "^0.2.15",
+        "tinyrainbow": "^3.1.0",
+        "vite": "^6.0.0 || ^7.0.0 || ^8.0.0",
+        "why-is-node-running": "^2.3.0"
+      },
+      "bin": {
+        "vitest": "vitest.mjs"
+      },
+      "engines": {
+        "node": "^20.0.0 || ^22.0.0 || >=24.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      },
+      "peerDependencies": {
+        "@edge-runtime/vm": "*",
+        "@opentelemetry/api": "^1.9.0",
+        "@types/node": "^20.0.0 || ^22.0.0 || >=24.0.0",
+        "@vitest/browser-playwright": "4.1.2",
+        "@vitest/browser-preview": "4.1.2",
+        "@vitest/browser-webdriverio": "4.1.2",
+        "@vitest/ui": "4.1.2",
+        "happy-dom": "*",
+        "jsdom": "*",
+        "vite": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@edge-runtime/vm": {
+          "optional": true
+        },
+        "@opentelemetry/api": {
+          "optional": true
+        },
+        "@types/node": {
+          "optional": true
+        },
+        "@vitest/browser-playwright": {
+          "optional": true
+        },
+        "@vitest/browser-preview": {
+          "optional": true
+        },
+        "@vitest/browser-webdriverio": {
+          "optional": true
+        },
+        "@vitest/ui": {
+          "optional": true
+        },
+        "happy-dom": {
+          "optional": true
+        },
+        "jsdom": {
+          "optional": true
+        },
+        "vite": {
+          "optional": false
+        }
+      }
+    },
+    "node_modules/w3c-xmlserializer": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz",
+      "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "xml-name-validator": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/web-namespaces": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz",
@@ -9942,6 +11036,41 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/webidl-conversions": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz",
+      "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=20"
+      }
+    },
+    "node_modules/whatwg-mimetype": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz",
+      "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=20"
+      }
+    },
+    "node_modules/whatwg-url": {
+      "version": "16.0.1",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.1.tgz",
+      "integrity": "sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@exodus/bytes": "^1.11.0",
+        "tr46": "^6.0.0",
+        "webidl-conversions": "^8.0.1"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      }
+    },
     "node_modules/which": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
@@ -9958,6 +11087,23 @@
         "node": ">= 8"
       }
     },
+    "node_modules/why-is-node-running": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz",
+      "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "siginfo": "^2.0.0",
+        "stackback": "0.0.2"
+      },
+      "bin": {
+        "why-is-node-running": "cli.js"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/word-wrap": {
       "version": "1.2.5",
       "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz",
@@ -9968,6 +11114,44 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/ws": {
+      "version": "8.20.0",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
+      "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/xml-name-validator": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz",
+      "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/xmlchars": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
+      "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/yallist": {
       "version": "3.1.1",
       "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
diff --git a/frontend/app/package.json b/frontend/app/package.json
index 52199cd30..1e247d29d 100644
--- a/frontend/app/package.json
+++ b/frontend/app/package.json
@@ -7,7 +7,8 @@
     "dev": "vite",
     "build": "tsc -b && vite build",
     "lint": "eslint .",
-    "preview": "vite preview"
+    "preview": "vite preview",
+    "test": "vitest run"
   },
   "dependencies": {
     "@hookform/resolvers": "^5.2.2",
@@ -55,6 +56,7 @@
     "react-resizable-panels": "^4.2.2",
     "react-router-dom": "^7.13.0",
     "recharts": "^2.15.4",
+    "@supabase/supabase-js": "^2.49.8",
     "sonner": "^2.0.7",
     "streamdown": "^2.4.0",
     "tailwind-merge": "^3.4.0",
@@ -64,6 +66,7 @@
   },
   "devDependencies": {
     "@eslint/js": "^9.39.1",
+    "@testing-library/react": "^16.3.2",
     "@types/node": "^24.10.1",
     "@types/react": "^19.2.5",
     "@types/react-dom": "^19.2.3",
@@ -73,6 +76,7 @@
     "eslint-plugin-react-hooks": "^7.0.1",
     "eslint-plugin-react-refresh": "^0.4.24",
     "globals": "^16.5.0",
+    "jsdom": "^28.1.0",
     "kimi-plugin-inspect-react": "^1.0.3",
     "postcss": "^8.5.6",
     "tailwindcss": "^3.4.19",
@@ -80,6 +84,7 @@
     "tw-animate-css": "^1.4.0",
     "typescript": "~5.9.3",
     "typescript-eslint": "^8.46.4",
-    "vite": "^7.2.4"
+    "vite": "^7.2.4",
+    "vitest": "^4.1.2"
   }
 }
diff --git a/frontend/app/src/api/client.ts b/frontend/app/src/api/client.ts
index 2dd5c8c56..894171688 100644
--- a/frontend/app/src/api/client.ts
+++ b/frontend/app/src/api/client.ts
@@ -11,7 +11,10 @@ import type {
   LeaseStatus,
   ThreadDetail,
   ThreadSummary,
-  SandboxChannelFilesResult,
+  ThreadPermissions,
+  ThreadPermissionRules,
+  PermissionRuleBehavior,
+  AskUserAnswer,
   SandboxFileResult,
   SandboxFilesListResult,
   SandboxUploadResult,
@@ -31,21 +34,11 @@ export async function request<T>(url: string, init?: RequestInit): Promise<T> {
   return (await response.json()) as T;
 }
 
-function toThreads(payload: unknown): ThreadSummary[] {
-  if (payload && typeof payload === "object" && Array.isArray((payload as { threads?: unknown }).threads)) {
-    return (payload as { threads: ThreadSummary[] }).threads;
-  }
-  if (Array.isArray(payload)) {
-    return payload as ThreadSummary[];
-  }
-  throw new Error("Unexpected /api/threads response shape");
-}
-
 // --- Thread API ---
 
 export async function listThreads(): Promise<ThreadSummary[]> {
-  const payload = await request<unknown>("/api/threads");
-  return toThreads(payload);
+  const payload = await request<{ threads: ThreadSummary[] }>("/api/threads");
+  return payload.threads;
 }
 
 export interface CreateThreadOptions {
@@ -68,7 +61,10 @@ export async function createThread(opts: CreateThreadOptions): Promise<ThreadSum
   return request<ThreadSummary>("/api/threads", { method: "POST", body: JSON.stringify(body) });
 }
 
-export async function getMainThread(memberId: string, signal?: AbortSignal): Promise<ThreadSummary | null> {
+export async function getDefaultThread(memberId: string, signal?: AbortSignal): Promise<ThreadSummary | null> {
+  // @@@default-thread-wire-legacy - frontend now treats this as a template ->
+  // default-thread resolver, but the backend endpoint name stays `/threads/main`
+  // until the route contract is renamed in a later slice.
   const payload = await request<{ thread: ThreadSummary | null }>("/api/threads/main", {
     method: "POST",
     body: JSON.stringify({ member_id: memberId }),
@@ -99,26 +95,55 @@ export async function getThread(threadId: string): Promise<ThreadDetail> {
   return request(`/api/threads/${encodeURIComponent(threadId)}`);
 }
 
-export async function getThreadRuntime(threadId: string): Promise<StreamStatus> {
-  return request(`/api/threads/${encodeURIComponent(threadId)}/runtime`);
+export async function getThreadPermissions(threadId: string, signal?: AbortSignal): Promise<ThreadPermissions> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/permissions`, { signal });
 }
 
-export async function sendMessage(threadId: string, message: string): Promise<{ status: string; routing: string }> {
-  return request(`/api/threads/${encodeURIComponent(threadId)}/messages`, {
+export async function resolveThreadPermission(
+  threadId: string,
+  requestId: string,
+  decision: "allow" | "deny",
+  message?: string,
+  answers?: AskUserAnswer[],
+  annotations?: Record<string, unknown>,
+): Promise<{ ok: boolean; thread_id: string; request_id: string }> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/permissions/${encodeURIComponent(requestId)}/resolve`, {
     method: "POST",
-    body: JSON.stringify({ message }),
+    body: JSON.stringify({ decision, message, answers, annotations }),
   });
 }
 
-export async function queueMessage(threadId: string, message: string): Promise<void> {
-  await request(`/api/threads/${encodeURIComponent(threadId)}/queue`, {
+export async function addThreadPermissionRule(
+  threadId: string,
+  behavior: PermissionRuleBehavior,
+  toolName: string,
+): Promise<{ ok: boolean; thread_id: string; scope: string; rules: ThreadPermissionRules; managed_only: boolean }> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/permissions/rules`, {
     method: "POST",
-    body: JSON.stringify({ message }),
+    body: JSON.stringify({ behavior, tool_name: toolName }),
   });
 }
 
-export async function getQueue(threadId: string): Promise<{ messages: Array<{ id: number; content: string; created_at: string }> }> {
-  return request(`/api/threads/${encodeURIComponent(threadId)}/queue`);
+export async function removeThreadPermissionRule(
+  threadId: string,
+  behavior: PermissionRuleBehavior,
+  toolName: string,
+): Promise<{ ok: boolean; thread_id: string; scope: string; rules: ThreadPermissionRules; managed_only: boolean }> {
+  return request(
+    `/api/threads/${encodeURIComponent(threadId)}/permissions/rules/${encodeURIComponent(behavior)}/${encodeURIComponent(toolName)}`,
+    { method: "DELETE" },
+  );
+}
+
+export async function getThreadRuntime(threadId: string): Promise<StreamStatus> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/runtime`);
+}
+
+export async function sendMessage(threadId: string, message: string): Promise<{ status: string; routing: string }> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/messages`, {
+    method: "POST",
+    body: JSON.stringify({ message }),
+  });
 }
 
 // --- Sandbox API ---
@@ -163,32 +188,6 @@ export async function listMyLeases(signal?: AbortSignal): Promise<UserLeaseSumma
   return payload.leases;
 }
 
-export async function pauseThreadSandbox(threadId: string): Promise<void> {
-  await request(`/api/threads/${encodeURIComponent(threadId)}/sandbox/pause`, { method: "POST" });
-}
-
-export async function resumeThreadSandbox(threadId: string): Promise<void> {
-  await request(`/api/threads/${encodeURIComponent(threadId)}/sandbox/resume`, { method: "POST" });
-}
-
-export async function destroyThreadSandbox(threadId: string): Promise<void> {
-  await request(`/api/threads/${encodeURIComponent(threadId)}/sandbox`, { method: "DELETE" });
-}
-
-export async function pauseSandboxSession(sessionId: string, provider: string): Promise<void> {
-  await request(
-    `/api/sandbox/sessions/${encodeURIComponent(sessionId)}/pause?provider=${encodeURIComponent(provider)}`,
-    { method: "POST" },
-  );
-}
-
-export async function resumeSandboxSession(sessionId: string, provider: string): Promise<void> {
-  await request(
-    `/api/sandbox/sessions/${encodeURIComponent(sessionId)}/resume?provider=${encodeURIComponent(provider)}`,
-    { method: "POST" },
-  );
-}
-
 export async function destroySandboxSession(sessionId: string, provider: string): Promise<void> {
   await request(
     `/api/sandbox/sessions/${encodeURIComponent(sessionId)}?provider=${encodeURIComponent(provider)}`,
@@ -206,8 +205,16 @@ export async function getThreadTerminal(threadId: string): Promise<TerminalStatu
   return request(`/api/threads/${encodeURIComponent(threadId)}/terminal`);
 }
 
-export async function getThreadLease(threadId: string): Promise<LeaseStatus> {
-  return request(`/api/threads/${encodeURIComponent(threadId)}/lease`);
+export async function getThreadLease(threadId: string): Promise<LeaseStatus | null> {
+  const response = await authFetch(`/api/threads/${encodeURIComponent(threadId)}/lease`);
+  if (response.status === 404) {
+    return null;
+  }
+  if (!response.ok) {
+    const body = await response.text();
+    throw new Error(`API ${response.status}: ${body || response.statusText}`);
+  }
+  return (await response.json()) as LeaseStatus;
 }
 
 // --- Sandbox Files API ---
@@ -225,12 +232,6 @@ export async function readSandboxFile(threadId: string, path: string): Promise<S
   return request(`${sandboxFilesBase(threadId)}/read?path=${encodeURIComponent(path)}`);
 }
 
-export async function listSandboxChannelFiles(
-  threadId: string,
-): Promise<SandboxChannelFilesResult> {
-  return request(`${sandboxFilesBase(threadId)}/channel-files`);
-}
-
 export async function uploadSandboxFile(
   threadId: string,
   opts: { file: File; path?: string },
@@ -261,11 +262,6 @@ export function getSandboxDownloadUrl(
 
 // --- Settings API ---
 
-export async function listSandboxConfigs(): Promise<Record<string, Record<string, unknown>>> {
-  const payload = await request<{ sandboxes: Record<string, Record<string, unknown>> }>("/api/settings/sandboxes");
-  return payload.sandboxes;
-}
-
 export async function saveSandboxConfig(name: string, config: Record<string, unknown>): Promise<void> {
   await request("/api/settings/sandboxes", {
     method: "POST",
@@ -275,10 +271,6 @@ export async function saveSandboxConfig(name: string, config: Record<string, unk
 
 // --- Observation API ---
 
-export async function getObservationConfig(): Promise<Record<string, unknown>> {
-  return request("/api/settings/observation");
-}
-
 export async function saveObservationConfig(
   active: string | null,
   config?: Record<string, unknown>,
@@ -309,9 +301,8 @@ export interface InviteCode {
 }
 
 export async function fetchInviteCodes(): Promise<InviteCode[]> {
-  const payload = await request<{ codes: InviteCode[] } | InviteCode[]>("/api/invite-codes");
-  if (Array.isArray(payload)) return payload;
-  return (payload as { codes: InviteCode[] }).codes;
+  const payload = await request<{ codes: InviteCode[] }>("/api/invite-codes");
+  return payload.codes;
 }
 
 export async function generateInviteCode(expiresDays = 7): Promise<InviteCode> {
diff --git a/frontend/app/src/api/types.ts b/frontend/app/src/api/types.ts
index 08d990935..7aa8548cb 100644
--- a/frontend/app/src/api/types.ts
+++ b/frontend/app/src/api/types.ts
@@ -28,11 +28,12 @@ export interface ThreadSummary {
   preview?: string;
   updated_at?: string;
   running?: boolean;
+  /** Template entry id for this thread; actor identity still lives in `thread_id`. */
   member_id?: string;
+  /** Template-facing secondary label; child threads should prefer `sidebar_label` when present. */
   member_name?: string;
-  /** Canonical thread/entity display name. Main: {member}. Child: {member} · 分身N */
-  entity_name?: string;
   branch_index?: number;
+  /** Canonical actor-facing label for sidebar/header surfaces. */
   sidebar_label?: string | null;
   avatar_url?: string;
   is_main?: boolean;
@@ -45,6 +46,49 @@ export interface ThreadDetail {
   sandbox: SandboxInfo | null;
 }
 
+export interface PermissionRequest {
+  request_id: string;
+  thread_id: string;
+  tool_name: string;
+  args: Record<string, unknown>;
+  message?: string | null;
+}
+
+export interface AskUserQuestionOption {
+  label: string;
+  description: string;
+  preview?: string | null;
+}
+
+export interface AskUserQuestionPrompt {
+  header: string;
+  question: string;
+  options: AskUserQuestionOption[];
+  multiSelect?: boolean;
+}
+
+export interface AskUserAnswer {
+  header?: string;
+  question?: string;
+  selected_options: string[];
+  free_text?: string | null;
+}
+
+export type PermissionRuleBehavior = "allow" | "deny" | "ask";
+
+export interface ThreadPermissionRules {
+  allow: string[];
+  deny: string[];
+  ask: string[];
+}
+
+export interface ThreadPermissions {
+  thread_id: string;
+  requests: PermissionRequest[];
+  session_rules: ThreadPermissionRules;
+  managed_only: boolean;
+}
+
 export interface SandboxType {
   name: string;
   provider?: string;
@@ -109,7 +153,9 @@ export interface UserLeaseSummary {
   cwd?: string | null;
   thread_ids: string[];
   agents: Array<{
+    /** Template entry bound to the lease; not an actor thread id. */
     member_id: string;
+    /** Template-facing label for the lease summary card. */
     member_name: string;
     avatar_url?: string | null;
   }>;
@@ -200,6 +246,11 @@ export interface UserMessage {
   timestamp: number;
   /** Backend-computed: is this message visible to thread owner? */
   showing?: boolean;
+  ask_user_question_answered?: {
+    questions: AskUserQuestionPrompt[];
+    answers: AskUserAnswer[];
+    annotations?: Record<string, unknown>;
+  };
   senderName?: string;
   senderAvatarUrl?: string;
   attachments?: string[];
@@ -219,6 +270,7 @@ export interface StreamStatus {
   state: { state: string; flags: Record<string, boolean> };
   tokens: { total_tokens: number; input_tokens: number; output_tokens: number; cost: number };
   context: { message_count: number; estimated_tokens: number; usage_percent: number; near_limit: boolean };
+  model?: string;
   current_tool?: string;
   last_seq?: number;
   run_start_seq?: number;
@@ -278,35 +330,29 @@ export interface SandboxFileResult {
   size: number;
 }
 
-// --- Entity Chat types ---
+// --- Chat types ---
 
-export interface ChatEntity {
+export interface ChatMember {
   id: string;
+  /** Current chat-facing display label for this participant. */
   name: string;
   type: string;
   avatar_url?: string;
   owner_name?: string | null;
+  /** Template-facing auxiliary label when this chat member is thread-backed. */
   member_name?: string | null;
+  /** Actor thread backing this participant when applicable. */
   thread_id?: string | null;
   is_main?: boolean | null;
   branch_index?: number | null;
 }
 
-export interface ChatSummary {
-  id: string;
-  title: string | null;
-  entities: ChatEntity[];
-  last_message?: { content: string; sender_name: string; created_at: number };
-  unread_count: number;
-  has_mention: boolean;
-}
-
 export interface ChatDetail {
   id: string;
   title: string | null;
   status: string;
   created_at: number;
-  entities: ChatEntity[];
+  entities: ChatMember[];
 }
 
 export interface ChatMessage {
@@ -319,29 +365,6 @@ export interface ChatMessage {
   created_at: number;
 }
 
-export interface TaskAgentRequest {
-  subagent_type: string;
-  prompt: string;
-  description?: string;
-  model?: string;
-  max_turns?: number;
-}
-
-// @@@channel-kind - string union used directly as a selector, not an object
-export type SandboxChannelKind = "upload" | "download";
-
-export interface SandboxChannelFileEntry {
-  relative_path: string;
-  size_bytes: number;
-  updated_at: string;
-}
-
-export interface SandboxChannelFilesResult {
-  thread_id: string;
-  channel: SandboxChannelKind;
-  entries: SandboxChannelFileEntry[];
-}
-
 export interface SandboxUploadResult {
   thread_id: string;
   relative_path: string;
diff --git a/frontend/app/src/components/ChatArea.test.tsx b/frontend/app/src/components/ChatArea.test.tsx
new file mode 100644
index 000000000..6c4350157
--- /dev/null
+++ b/frontend/app/src/components/ChatArea.test.tsx
@@ -0,0 +1,185 @@
+// @vitest-environment jsdom
+
+import { afterEach, describe, expect, it } from "vitest";
+import { cleanup, fireEvent, render, screen } from "@testing-library/react";
+
+import ChatArea from "./ChatArea";
+
+afterEach(() => {
+  cleanup();
+});
+
+describe("ChatArea", () => {
+  it("does not render hidden user entries", () => {
+    render(
+      <ChatArea
+        entries={[
+          {
+            id: "hidden-user",
+            role: "user",
+            content: "<ask_user_question_answers>{}</ask_user_question_answers>",
+            timestamp: Date.now(),
+            showing: false,
+          },
+        ]}
+        runtimeStatus={null}
+        loading={false}
+      />,
+    );
+
+    expect(screen.queryByText(/ask_user_question_answers/i)).toBeNull();
+  });
+
+  it("renders AskUserQuestion inline inside the assistant turn", () => {
+    render(
+      <ChatArea
+        entries={[
+          {
+            id: "assistant-ask",
+            role: "assistant",
+            timestamp: Date.now(),
+            segments: [
+              {
+                type: "tool",
+                step: {
+                  id: "ask-step",
+                  name: "AskUserQuestion",
+                  args: {},
+                  status: "done",
+                  timestamp: Date.now(),
+                },
+              },
+            ],
+          },
+        ]}
+        runtimeStatus={null}
+        loading={false}
+        askUserQuestion={{
+          requestId: "req-1",
+          promptMessage: "请先回答这个问题",
+          prompts: [
+            {
+              header: "选择一个方向",
+              question: "你希望我问什么？",
+              options: [
+                { label: "A", description: "简单问题" },
+                { label: "B", description: "工作问题" },
+              ],
+            },
+          ],
+          selections: {},
+          resolving: false,
+          canSubmit: false,
+          onSelect: () => undefined,
+          onSubmit: () => undefined,
+          selectionKeyForIndex: (index) => String(index),
+        }}
+      />,
+    );
+
+    expect(screen.getByText("等待回答")).toBeTruthy();
+    expect(screen.getByText("选择一个方向")).toBeTruthy();
+    expect(screen.getByRole("button", { name: "提交回答" })).toBeTruthy();
+  });
+
+  it("anchors hidden ask-user answers back onto the original assistant turn", () => {
+    render(
+      <ChatArea
+        entries={[
+          {
+            id: "assistant-ask",
+            role: "assistant",
+            timestamp: Date.now(),
+            segments: [
+              {
+                type: "tool",
+                step: {
+                  id: "ask-step",
+                  name: "AskUserQuestion",
+                  args: {},
+                  status: "done",
+                  timestamp: Date.now(),
+                },
+              },
+            ],
+          },
+          {
+            id: "hidden-user",
+            role: "user",
+            content:
+              'The user answered your AskUserQuestion prompt. Continue the task using these answers.\n<ask_user_question_answers>\n{"questions":[{"header":"选择一个方向","question":"你希望我问什么？","options":[{"label":"A","description":"简单问题"},{"label":"B","description":"工作问题"}]}],"answers":[{"header":"选择一个方向","question":"你希望我问什么？","selected_options":["B"]}]}\n</ask_user_question_answers>',
+            timestamp: Date.now() + 1,
+            showing: false,
+          },
+        ]}
+        runtimeStatus={null}
+        loading={false}
+      />,
+    );
+
+    expect(screen.queryByText(/ask_user_question_answers/i)).toBeNull();
+    expect(screen.getByText(/已回答 · 选择一个方向：B/)).toBeTruthy();
+    expect(screen.queryByText("你希望我问什么？")).toBeNull();
+
+    fireEvent.click(screen.getByRole("button", { name: "查看已回答详情" }));
+
+    expect(screen.getByText("你希望我问什么？")).toBeTruthy();
+    expect(screen.getByText("B")).toBeTruthy();
+  });
+
+  it("prefers explicit answered payload metadata over parsing hidden content", () => {
+    render(
+      <ChatArea
+        entries={[
+          {
+            id: "assistant-ask",
+            role: "assistant",
+            timestamp: Date.now(),
+            segments: [
+              {
+                type: "tool",
+                step: {
+                  id: "ask-step",
+                  name: "AskUserQuestion",
+                  args: {},
+                  status: "done",
+                  timestamp: Date.now(),
+                },
+              },
+            ],
+          },
+          {
+            id: "hidden-user",
+            role: "user",
+            content: "",
+            timestamp: Date.now() + 1,
+            showing: false,
+            ask_user_question_answered: {
+              questions: [
+                {
+                  header: "选择一个方向",
+                  question: "你希望我问什么？",
+                  options: [
+                    { label: "A", description: "简单问题" },
+                    { label: "B", description: "工作问题" },
+                  ],
+                },
+              ],
+              answers: [
+                {
+                  header: "选择一个方向",
+                  question: "你希望我问什么？",
+                  selected_options: ["A"],
+                },
+              ],
+            },
+          },
+        ]}
+        runtimeStatus={null}
+        loading={false}
+      />,
+    );
+
+    expect(screen.getByText(/已回答 · 选择一个方向：A/)).toBeTruthy();
+  });
+});
diff --git a/frontend/app/src/components/ChatArea.tsx b/frontend/app/src/components/ChatArea.tsx
index b203acdf2..b385c580f 100644
--- a/frontend/app/src/components/ChatArea.tsx
+++ b/frontend/app/src/components/ChatArea.tsx
@@ -1,5 +1,7 @@
 import type { AssistantTurn, ChatEntry, NoticeMessage, StreamStatus } from "../api";
 import { useStickyScroll } from "../hooks/use-sticky-scroll";
+import type { AskUserQuestionPendingState } from "../pages/ask-user-question";
+import { parseAskUserQuestionAnswerPayload } from "../pages/ask-user-question";
 import { AssistantBlock } from "./chat-area/AssistantBlock";
 import { ChatSkeleton } from "./chat-area/ChatSkeleton";
 import { NoticeBubble } from "./chat-area/NoticeBubble";
@@ -15,10 +17,47 @@ interface ChatAreaProps {
   agentAvatarUrl?: string;
   userName?: string;
   userAvatarUrl?: string;
+  askUserQuestion?: AskUserQuestionPendingState;
 }
 
-export default function ChatArea({ entries, runtimeStatus, loading, onFocusAgent, onTaskNoticeClick, agentName, agentAvatarUrl, userName, userAvatarUrl }: ChatAreaProps) {
+function hasAskUserQuestionTool(entry: AssistantTurn): boolean {
+  return entry.segments.some((segment) => segment.type === "tool" && segment.step.name === "AskUserQuestion");
+}
+
+export default function ChatArea({ entries, runtimeStatus, loading, onFocusAgent, onTaskNoticeClick, agentName, agentAvatarUrl, userName, userAvatarUrl, askUserQuestion }: ChatAreaProps) {
   const containerRef = useStickyScroll<HTMLDivElement>();
+  const askUserQuestionDisplays = new Map<
+    string,
+    | { mode: "pending"; pending: AskUserQuestionPendingState }
+    | {
+        mode: "answered";
+        answered: NonNullable<ReturnType<typeof parseAskUserQuestionAnswerPayload>>;
+      }
+  >();
+
+  let lastAskAssistantId: string | null = null;
+  for (const entry of entries) {
+    if (entry.role === "assistant" && hasAskUserQuestionTool(entry as AssistantTurn)) {
+      lastAskAssistantId = entry.id;
+      continue;
+    }
+    if (entry.role === "user" && "showing" in entry && entry.showing === false) {
+      const answered = entry.ask_user_question_answered ?? parseAskUserQuestionAnswerPayload(entry.content);
+      if (answered && lastAskAssistantId) {
+        askUserQuestionDisplays.set(lastAskAssistantId, { mode: "answered", answered });
+        lastAskAssistantId = null;
+      }
+    }
+  }
+
+  if (askUserQuestion) {
+    const pendingAssistant = [...entries]
+      .reverse()
+      .find((entry): entry is AssistantTurn => entry.role === "assistant" && hasAskUserQuestionTool(entry as AssistantTurn));
+    if (pendingAssistant) {
+      askUserQuestionDisplays.set(pendingAssistant.id, { mode: "pending", pending: askUserQuestion });
+    }
+  }
 
   return (
     <div ref={containerRef} className="flex-1 overflow-y-auto py-5 bg-background">
@@ -28,23 +67,21 @@ export default function ChatArea({ entries, runtimeStatus, loading, onFocusAgent
         <div className="max-w-3xl mx-auto px-5 space-y-3.5">
           {entries.map((entry) => {
             const isHidden = "showing" in entry && entry.showing === false;
+            if (isHidden) return null;
             if (entry.role === "notice") {
               return <NoticeBubble key={entry.id} entry={entry as NoticeMessage} onTaskNoticeClick={onTaskNoticeClick} />;
             }
             if (entry.role === "user") {
               return (
-                <div key={entry.id} className={isHidden ? "opacity-40" : ""}>
-                  {isHidden && entry.senderName && (
-                    <div className="text-2xs text-muted-foreground/70 mb-0.5 text-right mr-2">{entry.senderName}</div>
-                  )}
-                  <UserBubble entry={entry} userName={isHidden ? (entry.senderName || "external") : userName} avatarUrl={isHidden ? entry.senderAvatarUrl : userAvatarUrl} />
+                <div key={entry.id}>
+                  <UserBubble entry={entry} userName={userName} avatarUrl={userAvatarUrl} />
                 </div>
               );
             }
             const assistantEntry = entry as AssistantTurn;
             const isStreamingThis = assistantEntry.streaming === true;
             return (
-              <div key={entry.id} className={isHidden ? "opacity-40" : ""}>
+              <div key={entry.id}>
                 <AssistantBlock
                   entry={assistantEntry}
                   isStreamingThis={isStreamingThis}
@@ -52,6 +89,7 @@ export default function ChatArea({ entries, runtimeStatus, loading, onFocusAgent
                   onFocusAgent={onFocusAgent}
                   agentName={agentName}
                   agentAvatarUrl={agentAvatarUrl}
+                  askUserQuestion={askUserQuestionDisplays.get(assistantEntry.id)}
                 />
               </div>
             );
diff --git a/frontend/app/src/components/ComputerPanel.tsx b/frontend/app/src/components/ComputerPanel.tsx
deleted file mode 100644
index 5a9f92065..000000000
--- a/frontend/app/src/components/ComputerPanel.tsx
+++ /dev/null
@@ -1,3 +0,0 @@
-// Re-export from refactored module
-export { default } from "./computer-panel";
-export type { ComputerPanelProps } from "./computer-panel";
diff --git a/frontend/app/src/components/CreateMemberDialog.tsx b/frontend/app/src/components/CreateMemberDialog.tsx
index fff6bfb34..58c6c401f 100644
--- a/frontend/app/src/components/CreateMemberDialog.tsx
+++ b/frontend/app/src/components/CreateMemberDialog.tsx
@@ -29,7 +29,7 @@ export default function CreateMemberDialog({ open, onOpenChange }: Props) {
       onOpenChange(false);
       setName("");
       setDescription("");
-      navigate(`/members/${member.id}`);
+      navigate(`/contacts/agents/${member.id}`);
     } catch (e) {
       toast.error("创建失败，请重试");
     }
diff --git a/frontend/app/src/components/FileBrowser.tsx b/frontend/app/src/components/FileBrowser.tsx
deleted file mode 100644
index 4cef7086a..000000000
--- a/frontend/app/src/components/FileBrowser.tsx
+++ /dev/null
@@ -1,101 +0,0 @@
-import { useState } from 'react';
-import { authFetch } from '@/store/auth-store';
-import { useFileList } from '@/hooks/useFileList';
-import { MoreVertical } from 'lucide-react';
-import {
-  DropdownMenu,
-  DropdownMenuContent,
-  DropdownMenuItem,
-  DropdownMenuTrigger,
-} from '@/components/ui/dropdown-menu';
-import { Button } from '@/components/ui/button';
-import {
-  AlertDialog,
-  AlertDialogAction,
-  AlertDialogCancel,
-  AlertDialogContent,
-  AlertDialogDescription,
-  AlertDialogFooter,
-  AlertDialogHeader,
-  AlertDialogTitle,
-} from '@/components/ui/alert-dialog';
-
-interface FileBrowserProps {
-  threadId: string;
-}
-
-export function FileBrowser({ threadId }: FileBrowserProps) {
-  const { files, loading, error, refetch } = useFileList(threadId);
-  const [deleteTarget, setDeleteTarget] = useState<string | null>(null);
-  const [deleting, setDeleting] = useState(false);
-
-  const handleDownload = (path: string) => {
-    const url = `/api/threads/${threadId}/files/download?path=${encodeURIComponent(path)}`;
-    window.open(url, '_blank');
-  };
-
-  const handleDelete = async () => {
-    if (!deleteTarget) return;
-    setDeleting(true);
-    try {
-      const res = await authFetch(
-        `/api/threads/${threadId}/files/files?path=${encodeURIComponent(deleteTarget)}`,
-        { method: 'DELETE' }
-      );
-      if (!res.ok) throw new Error('Failed to delete file');
-      await refetch();
-    } catch (e) {
-      alert(e instanceof Error ? e.message : 'Failed to delete file');
-    } finally {
-      setDeleting(false);
-      setDeleteTarget(null);
-    }
-  };
-
-  if (loading) return <div>加载文件中...</div>;
-  if (error) return <div>错误：{error}</div>;
-  if (files.length === 0) return <div>暂无已上传文件</div>;
-
-  return (
-    <>
-      <div className="space-y-2">
-        {files.map((file) => (
-          <div key={file.relative_path} className="flex items-center justify-between p-2 border rounded">
-            <span>{file.relative_path}</span>
-            <div className="flex items-center gap-2">
-              <span className="text-sm text-gray-500">{(file.size_bytes / 1024).toFixed(1)} KB</span>
-              <DropdownMenu>
-                <DropdownMenuTrigger asChild>
-                  <Button variant="ghost" size="sm" aria-label="File actions">
-                    <MoreVertical className="h-4 w-4" />
-                  </Button>
-                </DropdownMenuTrigger>
-                <DropdownMenuContent align="end">
-                  <DropdownMenuItem onClick={() => handleDownload(file.relative_path)}>下载</DropdownMenuItem>
-                  <DropdownMenuItem className="text-destructive" onClick={() => setDeleteTarget(file.relative_path)} disabled={deleting}>删除</DropdownMenuItem>
-                </DropdownMenuContent>
-              </DropdownMenu>
-            </div>
-          </div>
-        ))}
-      </div>
-
-      <AlertDialog open={!!deleteTarget} onOpenChange={() => setDeleteTarget(null)}>
-        <AlertDialogContent>
-          <AlertDialogHeader>
-            <AlertDialogTitle>删除文件？</AlertDialogTitle>
-            <AlertDialogDescription>
-              确定要删除 "{deleteTarget}" 吗？此操作无法撤销。
-            </AlertDialogDescription>
-          </AlertDialogHeader>
-          <AlertDialogFooter>
-            <AlertDialogCancel>取消</AlertDialogCancel>
-            <AlertDialogAction onClick={handleDelete} disabled={deleting}>
-              {deleting ? '删除中...' : '删除'}
-            </AlertDialogAction>
-          </AlertDialogFooter>
-        </AlertDialogContent>
-      </AlertDialog>
-    </>
-  );
-}
diff --git a/frontend/app/src/components/Header.tsx b/frontend/app/src/components/Header.tsx
index 9273f8c7b..1d850dbaf 100644
--- a/frontend/app/src/components/Header.tsx
+++ b/frontend/app/src/components/Header.tsx
@@ -1,4 +1,4 @@
-import { ChevronLeft, PanelLeft, Pause, Play } from "lucide-react";
+import { ChevronLeft, PanelLeft } from "lucide-react";
 import { useNavigate } from "react-router-dom";
 import type { SandboxInfo } from "../api";
 import { useIsMobile } from "../hooks/use-mobile";
@@ -22,8 +22,6 @@ interface HeaderProps {
   sandboxInfo: SandboxInfo | null;
   currentModel?: string;
   onToggleSidebar: () => void;
-  onPauseSandbox: () => void;
-  onResumeSandbox: () => void;
   onModelChange?: (model: string) => void;
 }
 
@@ -33,8 +31,6 @@ export default function Header({
   sandboxInfo,
   currentModel = "leon:medium",
   onToggleSidebar,
-  onPauseSandbox,
-  onResumeSandbox,
   onModelChange,
 }: HeaderProps) {
   const isMobile = useIsMobile();
@@ -52,7 +48,7 @@ export default function Header({
       <div className="flex items-center gap-3 min-w-0">
         {isMobile ? (
           <button
-            onClick={() => navigate("/threads")}
+            onClick={() => navigate("/chat")}
             className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-muted hover:text-foreground"
           >
             <ChevronLeft className="w-5 h-5" />
@@ -90,25 +86,6 @@ export default function Header({
           threadId={activeThreadId}
           onModelChange={onModelChange}
         />
-
-        {hasRemote && sandboxInfo?.status === "running" && (
-          <button
-            className="px-3 py-1.5 rounded-lg text-xs flex items-center gap-2 border border-border text-foreground-secondary hover:bg-muted hover:text-foreground"
-            onClick={onPauseSandbox}
-          >
-            <Pause className="w-3.5 h-3.5" />
-            暂停
-          </button>
-        )}
-        {hasRemote && sandboxInfo?.status === "paused" && (
-          <button
-            className="px-3 py-1.5 rounded-lg text-xs flex items-center gap-2 border border-border text-foreground-secondary hover:bg-muted hover:text-foreground"
-            onClick={onResumeSandbox}
-          >
-            <Play className="w-3.5 h-3.5" />
-            恢复
-          </button>
-        )}
       </div>
     </header>
   );
diff --git a/frontend/app/src/components/LibraryEditor.tsx b/frontend/app/src/components/LibraryEditor.tsx
deleted file mode 100644
index 33c269af0..000000000
--- a/frontend/app/src/components/LibraryEditor.tsx
+++ /dev/null
@@ -1,145 +0,0 @@
-import { useState, useEffect } from "react";
-import { X, Save, Tag, Users, Calendar, FileText } from "lucide-react";
-import { Button } from "@/components/ui/button";
-import { Input } from "@/components/ui/input";
-import { toast } from "sonner";
-import { useAppStore } from "@/store/app-store";
-import { formatDistanceToNow } from "date-fns";
-import { zhCN } from "date-fns/locale";
-import type { ResourceItem } from "@/store/types";
-
-interface Props {
-  item: ResourceItem | null;
-  type: "skill" | "mcp" | "agent";
-  onClose: () => void;
-  onCreated?: (item: ResourceItem) => void;
-}
-
-export default function LibraryEditor({ item, type, onClose, onCreated }: Props) {
-  const fetchResourceContent = useAppStore(s => s.fetchResourceContent);
-  const updateResourceContent = useAppStore(s => s.updateResourceContent);
-  const updateResource = useAppStore(s => s.updateResource);
-  const addResource = useAppStore(s => s.addResource);
-  const getResourceUsedBy = useAppStore(s => s.getResourceUsedBy);
-
-  const isNew = item === null;
-
-  const [name, setName] = useState("");
-  const [content, setContent] = useState("");
-  const [savedContent, setSavedContent] = useState("");
-  const [loading, setLoading] = useState(!isNew);
-  const [saving, setSaving] = useState(false);
-  const [desc, setDesc] = useState("");
-
-  // Load existing item data
-  useEffect(() => {
-    if (!item) {
-      setName(""); setDesc("");
-      setContent(""); setSavedContent("");
-      setLoading(false);
-      return;
-    }
-    setName(item.name);
-    setDesc(item.desc);
-    setLoading(true);
-    fetchResourceContent(type, item.id)
-      .then(c => { setContent(c); setSavedContent(c); })
-      .catch(() => { setContent(""); setSavedContent(""); })
-      .finally(() => setLoading(false));
-  }, [item?.id, type, fetchResourceContent]);
-
-  const savedMeta = item ? { name: item.name, desc: item.desc } : null;
-  const contentDirty = content !== savedContent;
-  const metaDirty = isNew
-    ? name.trim().length > 0
-    : (desc !== savedMeta!.desc);
-  const dirty = contentDirty || metaDirty;
-  const canSave = isNew ? name.trim().length > 0 : dirty;
-
-  const usedByMembers = item ? getResourceUsedBy(type, item.name) : [];
-  const updatedText = item?.updated_at
-    ? formatDistanceToNow(new Date(item.updated_at), { addSuffix: true, locale: zhCN })
-    : "";
-
-  const handleSave = async () => {
-    setSaving(true);
-    try {
-      if (isNew) {
-        const created = await addResource(type, name.trim(), desc.trim());
-        if (content.trim()) await updateResourceContent(type, created.id, content);
-        toast.success(`${name.trim()} 已创建`);
-        onCreated?.(created);
-      } else {
-        if (metaDirty) await updateResource(type, item.id, { desc });
-        if (contentDirty) await updateResourceContent(type, item.id, content);
-        setSavedContent(content);
-        toast.success("已保存");
-      }
-    } catch { toast.error(isNew ? "创建失败" : "保存失败"); }
-    finally { setSaving(false); }
-  };
-
-  const typeLabel = type === "skill" ? "Skill" : type === "mcp" ? "MCP" : "Agent";
-  const fileHint = type === "skill" ? "SKILL.md" : type === "agent" ? `${item?.id || "new"}.md` : ".mcp.json";
-
-  return (
-    <div className="w-[420px] shrink-0 border-l border-border bg-card flex flex-col overflow-hidden">
-      {/* Header */}
-      <div className="h-12 flex items-center justify-between px-4 border-b border-border shrink-0">
-        {isNew ? (
-          <Input className="h-7 text-sm font-semibold flex-1 mr-2" placeholder="输入名称..." value={name} onChange={e => setName(e.target.value)} autoFocus />
-        ) : (
-          <h3 className="text-sm font-semibold text-foreground truncate">{item.name}</h3>
-        )}
-        <div className="flex items-center gap-1.5 shrink-0">
-          <Button size="sm" className="h-7" disabled={!canSave || saving} onClick={handleSave}>
-            <Save className="h-3.5 w-3.5 mr-1" /> {saving ? "..." : isNew ? "创建" : "保存"}
-          </Button>
-          <button onClick={onClose} className="p-1 rounded-md hover:bg-muted transition-colors duration-fast">
-            <X className="w-4 h-4 text-muted-foreground" />
-          </button>
-        </div>
-      </div>
-
-      <div className="flex-1 overflow-y-auto">
-        {/* Meta section */}
-        <div className="px-4 py-3 space-y-2 border-b border-border">
-          {!isNew && (
-            <div className="flex items-center gap-4 text-xs text-muted-foreground">
-              <span className="flex items-center gap-1"><Tag className="w-3 h-3" /> {typeLabel}</span>
-              <span className="flex items-center gap-1" title={usedByMembers.length ? usedByMembers.join(", ") : undefined}>
-                <Users className="w-3 h-3" /> {usedByMembers.length ? usedByMembers.join(", ") : "未被使用"}
-              </span>
-              {updatedText && <span className="flex items-center gap-1"><Calendar className="w-3 h-3" /> {updatedText}</span>}
-            </div>
-          )}
-          <div className="space-y-1.5">
-            <Input className="h-7 text-xs" placeholder="描述" value={desc} onChange={e => setDesc(e.target.value)} />
-          </div>
-        </div>
-
-        {/* Content editor */}
-        <div className="flex-1 flex flex-col px-4 py-3 gap-2">
-          <div className="flex items-center gap-2">
-            <FileText className="w-3.5 h-3.5 text-muted-foreground" />
-            <span className="text-xs font-medium text-muted-foreground font-mono">{fileHint}</span>
-          </div>
-          {loading ? (
-            <div className="flex-1 flex items-center justify-center py-12">
-              <p className="text-xs text-muted-foreground">加载中...</p>
-            </div>
-          ) : (
-            <textarea
-              className="w-full rounded-md border bg-background px-3 py-2 text-xs font-mono resize-none focus:outline-none focus:ring-2 focus:ring-ring"
-              style={{ minHeight: "320px" }}
-              value={content}
-              onChange={e => setContent(e.target.value)}
-              placeholder={type === "mcp" ? "MCP 配置 (JSON)..." : "编辑内容..."}
-              spellCheck={false}
-            />
-          )}
-        </div>
-      </div>
-    </div>
-  );
-}
diff --git a/frontend/app/src/components/MemberAvatar.tsx b/frontend/app/src/components/MemberAvatar.tsx
index 1d77d388b..c2f817200 100644
--- a/frontend/app/src/components/MemberAvatar.tsx
+++ b/frontend/app/src/components/MemberAvatar.tsx
@@ -20,7 +20,7 @@ interface MemberAvatarProps {
   name: string;
   /** Avatar image URL from backend. Frontend doesn't build URLs. */
   avatarUrl?: string;
-  /** Entity/member type — for deterministic fallback color. */
+  /** Member type — for deterministic fallback color. */
   type?: string;
   size?: keyof typeof SIZE_MAP;
   className?: string;
diff --git a/frontend/app/src/components/NewChatDialog.tsx b/frontend/app/src/components/NewChatDialog.tsx
index 1a7ed3a29..e07d53963 100644
--- a/frontend/app/src/components/NewChatDialog.tsx
+++ b/frontend/app/src/components/NewChatDialog.tsx
@@ -34,15 +34,15 @@ export default function NewChatDialog({ open, onOpenChange }: NewChatDialogProps
 
   const handleSelect = (member: typeof memberList[0]) => {
     onOpenChange(false);
-    navigate(`/threads/${member.id}`);
+    navigate(`/chat/hire/${member.id}`);
   };
 
   return (
     <Dialog open={open} onOpenChange={onOpenChange}>
-      <DialogContent className="sm:max-w-md p-0 gap-0">
+        <DialogContent className="sm:max-w-md p-0 gap-0">
         <DialogHeader className="px-4 pt-4 pb-3">
-          <DialogTitle className="text-base">发起会话</DialogTitle>
-          <DialogDescription className="sr-only">选择成员发起新对话</DialogDescription>
+          <DialogTitle className="text-base">打开成员默认线程</DialogTitle>
+          <DialogDescription className="sr-only">选择成员打开默认线程入口</DialogDescription>
         </DialogHeader>
         <div className="px-4 pb-3">
           <div className="relative">
diff --git a/frontend/app/src/components/NewThreadModal.tsx b/frontend/app/src/components/NewThreadModal.tsx
deleted file mode 100644
index 2774cb34f..000000000
--- a/frontend/app/src/components/NewThreadModal.tsx
+++ /dev/null
@@ -1,175 +0,0 @@
-import { FolderOpen, Server } from "lucide-react";
-import { useState } from "react";
-import { pickFolder, type SandboxType } from "../api";
-import {
-  Dialog,
-  DialogContent,
-  DialogDescription,
-  DialogHeader,
-  DialogTitle,
-} from "./ui/dialog";
-
-interface NewThreadModalProps {
-  open: boolean;
-  sandboxTypes: SandboxType[];
-  onClose: () => void;
-  onCreate: (sandboxName: string, cwd?: string) => void;
-}
-
-const KNOWN_LABELS: Record<string, { label: string; desc: string }> = {
-  local: { label: "本地", desc: "在本机运行，适合本地项目开发" },
-  agentbay: { label: "AgentBay", desc: "云端沙箱环境，安全隔离" },
-  daytona: { label: "Daytona", desc: "云端开发环境，开箱即用" },
-  docker: { label: "Docker", desc: "容器化隔离环境，可复现" },
-  e2b: { label: "E2B", desc: "云端代码沙箱，快速启动" },
-};
-function sandboxLabel(name: string): { label: string; desc: string } {
-  return KNOWN_LABELS[name]
-    ?? {
-      label: name
-        .split(/[_-]+/)
-        .filter(Boolean)
-        .map(part => part.charAt(0).toUpperCase() + part.slice(1))
-        .join(" "),
-      desc: "",
-    };
-}
-
-export default function NewThreadModal({ open, sandboxTypes, onClose, onCreate }: NewThreadModalProps) {
-  const [localExpanded, setLocalExpanded] = useState(false);
-  const [cwdInput, setCwdInput] = useState("");
-
-  const handleClose = () => {
-    setLocalExpanded(false);
-    setCwdInput("");
-    onClose();
-  };
-
-  const handleLocalConfirm = () => {
-    const cwd = cwdInput.trim() || undefined;
-    handleClose();
-    onCreate("local", cwd);
-  };
-
-  const handleBrowseFolder = async () => {
-    try {
-      const path = await pickFolder();
-      if (path) {
-        setCwdInput(path);
-      }
-    } catch (err) {
-      console.error('Failed to pick folder:', err);
-    }
-  };
-
-  return (
-    <Dialog open={open} onOpenChange={(v) => { if (!v) handleClose(); }}>
-      <DialogContent className="sm:max-w-[400px] p-0 gap-0" showCloseButton>
-        <DialogHeader className="px-5 py-4 border-b border-border">
-          <DialogTitle className="text-base">新建会话</DialogTitle>
-          <DialogDescription className="sr-only">选择运行环境以创建新会话</DialogDescription>
-        </DialogHeader>
-        <div className="px-5 py-4">
-          <p className="text-sm mb-3 text-muted-foreground">选择运行环境</p>
-          <div className="space-y-2">
-            {sandboxTypes.map((item) => {
-              const info = sandboxLabel(item.name);
-
-              if (item.name === "local") {
-                return (
-                  <div key="local" className="rounded-lg border border-border transition-all duration-fast overflow-hidden">
-                    <button
-                      disabled={!item.available}
-                      className={`w-full text-left px-4 py-3 transition-all duration-fast ${
-                        item.available
-                          ? "hover:bg-accent"
-                          : "opacity-30 cursor-not-allowed"
-                      }`}
-                      onClick={() => {
-                        if (localExpanded) {
-                          handleLocalConfirm();
-                        } else {
-                          setLocalExpanded(true);
-                        }
-                      }}
-                    >
-                      <div className="flex items-center gap-3">
-                        <Server className="w-4 h-4 flex-shrink-0 text-muted-foreground" />
-                        <div className="flex-1">
-                          <div className="text-sm font-medium">{info.label}</div>
-                          <div className="text-xs text-muted-foreground">{info.desc}</div>
-                        </div>
-                      </div>
-                    </button>
-
-                    {localExpanded && (
-                      <div className="px-4 pb-3 animate-fade-in">
-                        <div className="flex items-center gap-2 mt-1">
-                          <FolderOpen className="w-4 h-4 text-muted-foreground flex-shrink-0" />
-                          <input
-                            type="text"
-                            value={cwdInput}
-                            onChange={(e) => setCwdInput(e.target.value)}
-                            onKeyDown={(e) => {
-                              if (e.key === "Enter") handleLocalConfirm();
-                            }}
-                            placeholder="工作目录，如 ~/projects/my-app"
-                            className="flex-1 text-sm px-3 py-1.5 rounded-lg border border-border bg-accent/50 placeholder:text-muted-foreground/50 focus:outline-none focus:border-ring transition-colors duration-fast"
-                            autoFocus
-                          />
-                          <button
-                            className="px-3 py-1.5 text-xs rounded-lg border border-border bg-background text-foreground/70 hover:bg-accent hover:border-border/80 transition-colors duration-fast flex items-center gap-1.5"
-                            onClick={handleBrowseFolder}
-                            title="选择文件夹"
-                          >
-                            <FolderOpen className="w-3.5 h-3.5" />
-                            浏览
-                          </button>
-                        </div>
-                        <div className="flex items-center justify-between mt-2">
-                          <span className="text-2xs text-muted-foreground/50">留空则使用默认目录</span>
-                          <button
-                            className="text-xs px-3 py-1 rounded-lg bg-primary text-primary-foreground hover:bg-primary/90 transition-colors duration-fast"
-                            onClick={handleLocalConfirm}
-                          >
-                            确认
-                          </button>
-                        </div>
-                      </div>
-                    )}
-                  </div>
-                );
-              }
-
-              return (
-                <button
-                  key={item.name}
-                  disabled={!item.available}
-                  className={`w-full text-left px-4 py-3 rounded-lg border border-border transition-all duration-fast ${
-                    item.available
-                      ? "hover:border-border/80 hover:bg-accent hover:shadow-sm"
-                      : "opacity-30 cursor-not-allowed"
-                  }`}
-                  onClick={() => {
-                    handleClose();
-                    onCreate(item.name);
-                  }}
-                >
-                  <div className="flex items-center gap-3">
-                    <Server className="w-4 h-4 flex-shrink-0 text-muted-foreground" />
-                    <div>
-                      <div className="text-sm font-medium">{info.label}</div>
-                      <div className="text-xs text-muted-foreground">
-                        {info.desc}{!item.available ? " (不可用)" : ""}
-                      </div>
-                    </div>
-                  </div>
-                </button>
-              );
-            })}
-          </div>
-        </div>
-      </DialogContent>
-    </Dialog>
-  );
-}
diff --git a/frontend/app/src/components/RecipeEditor.tsx b/frontend/app/src/components/RecipeEditor.tsx
deleted file mode 100644
index ffa632d8b..000000000
--- a/frontend/app/src/components/RecipeEditor.tsx
+++ /dev/null
@@ -1,243 +0,0 @@
-import { useEffect, useMemo, useState } from "react";
-import { RotateCcw, Save, Trash2, X } from "lucide-react";
-import { toast } from "sonner";
-
-import {
-  AlertDialog,
-  AlertDialogAction,
-  AlertDialogCancel,
-  AlertDialogContent,
-  AlertDialogDescription,
-  AlertDialogFooter,
-  AlertDialogHeader,
-  AlertDialogTitle,
-} from "@/components/ui/alert-dialog";
-import { Button } from "@/components/ui/button";
-import { Checkbox } from "@/components/ui/checkbox";
-import { Input } from "@/components/ui/input";
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
-import { useAppStore } from "@/store/app-store";
-import type { ResourceItem } from "@/store/types";
-import type { RecipeFeatureOption } from "@/api/types";
-
-interface Props {
-  item: ResourceItem | null;
-  providerTypeOptions: Array<{ value: string; label: string }>;
-  featureOptions: RecipeFeatureOption[];
-  onClose: () => void;
-  onDirtyChange?: (dirty: boolean) => void;
-  onCreated?: (item: ResourceItem) => void;
-  onDeleted?: () => void;
-}
-
-function buildDefaultFeatureState(featureOptions: RecipeFeatureOption[]): Record<string, boolean> {
-  return Object.fromEntries(featureOptions.map((option) => [option.key, false]));
-}
-
-export default function RecipeEditor({
-  item,
-  providerTypeOptions,
-  featureOptions,
-  onClose,
-  onDirtyChange,
-  onCreated,
-  onDeleted,
-}: Props) {
-  const updateResource = useAppStore((s) => s.updateResource);
-  const addResource = useAppStore((s) => s.addResource);
-  const deleteResource = useAppStore((s) => s.deleteResource);
-  const isCreate = item == null;
-
-  const [name, setName] = useState(item?.name ?? "");
-  const [desc, setDesc] = useState(item?.desc ?? "");
-  const [providerType, setProviderType] = useState(item?.provider_type ?? providerTypeOptions[0]?.value ?? "local");
-  const [features, setFeatures] = useState<Record<string, boolean>>(
-    item?.features ?? buildDefaultFeatureState(featureOptions),
-  );
-  const [saving, setSaving] = useState(false);
-  const [destructiveOpen, setDestructiveOpen] = useState(false);
-
-  useEffect(() => {
-    setName(item?.name ?? "");
-    setDesc(item?.desc ?? "");
-    setProviderType(item?.provider_type ?? providerTypeOptions[0]?.value ?? "local");
-    setFeatures(item?.features ?? buildDefaultFeatureState(featureOptions));
-  }, [featureOptions, item, providerTypeOptions]);
-
-  const dirty = useMemo(() => {
-    if (isCreate) {
-      if (name.trim().length > 0) return true;
-      if (desc.trim().length > 0) return true;
-      if (providerType !== (providerTypeOptions[0]?.value ?? "local")) return true;
-      return Object.values(features).some(Boolean);
-    }
-    if (!item) return false;
-    if (name !== item.name || desc !== item.desc) return true;
-    const base = item.features ?? {};
-    const keys = new Set([...Object.keys(base), ...Object.keys(features)]);
-    return [...keys].some((key) => Boolean(base[key]) !== Boolean(features[key]));
-  }, [desc, features, isCreate, item, name, providerType, providerTypeOptions]);
-
-  useEffect(() => {
-    onDirtyChange?.(dirty);
-    return () => {
-      onDirtyChange?.(false);
-    };
-  }, [dirty, onDirtyChange]);
-
-  async function handleSave() {
-    setSaving(true);
-    try {
-      if (isCreate) {
-        const created = await addResource("recipe", name.trim(), desc.trim(), {
-          provider_type: providerType,
-          features,
-        });
-        toast.success("Recipe 已创建");
-        onCreated?.(created);
-      } else if (item) {
-        await updateResource("recipe", item.id, {
-          name,
-          desc,
-          features,
-        });
-        toast.success("Recipe 已保存");
-      }
-    } catch (error) {
-      toast.error(`${isCreate ? "创建" : "保存"}失败: ${error instanceof Error ? error.message : String(error)}`);
-    } finally {
-      setSaving(false);
-    }
-  }
-
-  async function handleDestructiveAction() {
-    setSaving(true);
-    try {
-      if (!item) return;
-      await deleteResource("recipe", item.id);
-      toast.success(item.builtin ? "已重置为默认配置" : "Recipe 已删除");
-      setDestructiveOpen(false);
-      onDeleted?.();
-      onClose();
-    } catch (error) {
-      toast.error(`${item?.builtin ? "重置" : "删除"}失败: ${error instanceof Error ? error.message : String(error)}`);
-    } finally {
-      setSaving(false);
-    }
-  }
-
-  const saveDisabled = saving || (!isCreate && !dirty) || (isCreate && name.trim().length === 0);
-  const visibleFeatureOptions = (item?.feature_options?.length ? item.feature_options : featureOptions);
-  const destructiveTitle = item?.builtin ? "重置 recipe" : "删除 recipe";
-  const destructiveDescription = item?.builtin
-    ? "这会丢掉你对默认 recipe 的自定义修改，并恢复到系统默认值。"
-    : "这会永久删除这个自定义 recipe。";
-
-  return (
-    <div className="w-[420px] shrink-0 border-l border-border bg-card flex flex-col overflow-hidden">
-      <div className="flex items-start justify-between gap-3 px-4 py-4 border-b border-border shrink-0">
-        <div className="min-w-0">
-          <h3 className="text-sm font-semibold text-foreground truncate">{isCreate ? "新建 Recipe" : item?.name}</h3>
-          <div className="mt-1 text-xs text-muted-foreground">
-            {isCreate ? "创建一个按 provider type 复用的 sandbox 模板" : item?.provider_type}
-          </div>
-        </div>
-        <button onClick={onClose} className="p-1 rounded-md hover:bg-muted transition-colors shrink-0">
-          <X className="w-4 h-4 text-muted-foreground" />
-        </button>
-      </div>
-
-      <div className="flex-1 overflow-y-auto px-4 py-4 space-y-4">
-        <div className="space-y-2">
-          <div className="text-xs uppercase tracking-[0.18em] text-muted-foreground">Name</div>
-          <Input value={name} onChange={(e) => setName(e.target.value)} className="h-9 text-sm" />
-        </div>
-
-        {isCreate && (
-          <div className="space-y-2">
-            <div className="text-xs uppercase tracking-[0.18em] text-muted-foreground">Provider Type</div>
-            <Select value={providerType} onValueChange={setProviderType}>
-              <SelectTrigger className="h-9 text-sm">
-                <SelectValue placeholder="Choose a provider type" />
-              </SelectTrigger>
-              <SelectContent>
-                {providerTypeOptions.map((option) => (
-                  <SelectItem key={option.value} value={option.value}>
-                    {option.label}
-                  </SelectItem>
-                ))}
-              </SelectContent>
-            </Select>
-          </div>
-        )}
-
-        <div className="space-y-2">
-          <div className="text-xs uppercase tracking-[0.18em] text-muted-foreground">Description</div>
-          <Input value={desc} onChange={(e) => setDesc(e.target.value)} className="h-9 text-sm" />
-        </div>
-
-        <div className="space-y-2">
-          <div className="text-xs uppercase tracking-[0.18em] text-muted-foreground">Features</div>
-          <div className="space-y-1.5">
-            {visibleFeatureOptions.map((option) => {
-              const checked = Boolean(features[option.key]);
-              return (
-                <div
-                  key={option.key}
-                  onClick={() => setFeatures((current) => ({ ...current, [option.key]: !checked }))}
-                  onKeyDown={(event) => {
-                    if (event.key === "Enter" || event.key === " ") {
-                      event.preventDefault();
-                      setFeatures((current) => ({ ...current, [option.key]: !checked }));
-                    }
-                  }}
-                  role="button"
-                  tabIndex={0}
-                  className="flex w-full items-start gap-3 rounded-xl border border-border bg-background px-3 py-2.5 text-left transition-colors hover:bg-accent/30"
-                >
-                  <Checkbox checked={checked} className="pointer-events-none mt-0.5 shrink-0" />
-                  <div className="min-w-0">
-                    <div className="text-sm font-medium text-foreground">{option.name}</div>
-                    <div className="mt-0.5 text-xs text-muted-foreground">{option.description}</div>
-                  </div>
-                </div>
-              );
-            })}
-          </div>
-        </div>
-      </div>
-
-      <div className="flex items-center justify-between gap-3 border-t border-border px-4 py-3 shrink-0">
-        {isCreate ? <div /> : (
-          <Button size="sm" variant="outline" className="h-8" disabled={saving} onClick={() => setDestructiveOpen(true)}>
-            {item?.builtin ? <RotateCcw className="h-3.5 w-3.5 mr-1" /> : <Trash2 className="h-3.5 w-3.5 mr-1" />}
-            {item?.builtin ? "重置" : "删除"}
-          </Button>
-        )}
-        <Button
-          size="sm"
-          className={dirty ? "h-8 ring-2 ring-primary/20" : "h-8"}
-          disabled={saveDisabled}
-          onClick={() => void handleSave()}
-        >
-          <Save className="h-3.5 w-3.5 mr-1" /> {isCreate ? "创建" : "保存"}
-        </Button>
-      </div>
-
-      <AlertDialog open={destructiveOpen} onOpenChange={setDestructiveOpen}>
-        <AlertDialogContent>
-          <AlertDialogHeader>
-            <AlertDialogTitle>{destructiveTitle}</AlertDialogTitle>
-            <AlertDialogDescription>{destructiveDescription}</AlertDialogDescription>
-          </AlertDialogHeader>
-          <AlertDialogFooter>
-            <AlertDialogCancel>取消</AlertDialogCancel>
-            <AlertDialogAction onClick={() => void handleDestructiveAction()}>
-              {item?.builtin ? "确认重置" : "确认删除"}
-            </AlertDialogAction>
-          </AlertDialogFooter>
-        </AlertDialogContent>
-      </AlertDialog>
-    </div>
-  );
-}
diff --git a/frontend/app/src/components/SandboxSessionsModal.tsx b/frontend/app/src/components/SandboxSessionsModal.tsx
deleted file mode 100644
index 955a1b28c..000000000
--- a/frontend/app/src/components/SandboxSessionsModal.tsx
+++ /dev/null
@@ -1,193 +0,0 @@
-import { Loader2, Pause, Play, Trash2 } from "lucide-react";
-import { useEffect, useState } from "react";
-import {
-  destroySandboxSession,
-  listSandboxSessions,
-  pauseSandboxSession,
-  resumeSandboxSession,
-  type SandboxSession,
-} from "../api";
-import {
-  Dialog,
-  DialogContent,
-  DialogDescription,
-  DialogHeader,
-  DialogTitle,
-} from "./ui/dialog";
-
-interface SandboxSessionsModalProps {
-  isOpen: boolean;
-  onClose: () => void;
-  onSessionMutated?: (threadId: string) => void;
-}
-
-export default function SandboxSessionsModal({ isOpen, onClose, onSessionMutated }: SandboxSessionsModalProps) {
-  const [sessions, setSessions] = useState<SandboxSession[]>([]);
-  const [loading, setLoading] = useState(false);
-  const [refreshing, setRefreshing] = useState(false);
-  const [hasLoaded, setHasLoaded] = useState(false);
-  const [busy, setBusy] = useState<string | null>(null);
-  const [error, setError] = useState<string | null>(null);
-
-  async function refresh(opts?: { silent?: boolean }) {
-    const silent = opts?.silent ?? false;
-    const showInitialLoading = !hasLoaded && !silent;
-    if (showInitialLoading) {
-      setLoading(true);
-    } else {
-      setRefreshing(true);
-    }
-    try {
-      const rows = await listSandboxSessions();
-      setSessions(rows);
-      setHasLoaded(true);
-      setError(null);
-    } catch (e) {
-      setError(e instanceof Error ? e.message : String(e));
-    } finally {
-      setLoading(false);
-      setRefreshing(false);
-    }
-  }
-
-  useEffect(() => {
-    if (!isOpen) return;
-    void refresh();
-    const timer = window.setInterval(() => {
-      void refresh({ silent: true });
-    }, 2500);
-    return () => window.clearInterval(timer);
-  }, [isOpen]);
-
-  async function withBusy(row: SandboxSession, fn: () => Promise<void>) {
-    setBusy(row.session_id);
-    try {
-      await fn();
-      if (!row.thread_id.startsWith("(")) {
-        onSessionMutated?.(row.thread_id);
-      }
-      await refresh();
-    } catch (e) {
-      setError(e instanceof Error ? e.message : String(e));
-    } finally {
-      setBusy(null);
-    }
-  }
-
-  function statusBadge(status: string) {
-    if (status === "running") {
-      return (
-        <span className="px-2 py-0.5 rounded text-xs font-medium bg-success/10 text-success border border-success/20">
-          运行中
-        </span>
-      );
-    }
-    if (status === "paused") {
-      return (
-        <span className="px-2 py-0.5 rounded text-xs font-medium bg-warning/10 text-warning border border-warning/20">
-          已暂停
-        </span>
-      );
-    }
-    return (
-      <span className="px-2 py-0.5 rounded text-xs font-medium bg-secondary text-muted-foreground border border-border">
-        {status}
-      </span>
-    );
-  }
-
-  return (
-    <Dialog open={isOpen} onOpenChange={(v) => { if (!v) onClose(); }}>
-      <DialogContent className="sm:max-w-[860px] p-0 gap-0" showCloseButton>
-        <DialogHeader className="h-12 px-5 flex-row items-center justify-between border-b border-border">
-          <div className="flex items-center gap-3">
-            <DialogTitle className="text-sm">运行环境会话</DialogTitle>
-            <DialogDescription className="sr-only">查看和管理沙箱运行环境会话</DialogDescription>
-            {refreshing && (
-              <Loader2 className="w-3.5 h-3.5 animate-spin text-muted-foreground" />
-            )}
-          </div>
-          <button
-            className="px-3 py-1.5 rounded-lg text-xs border border-border text-foreground/70 hover:bg-accent hover:text-foreground"
-            onClick={() => void refresh()}
-          >
-            刷新
-          </button>
-        </DialogHeader>
-
-        <div className="p-5 overflow-auto max-h-[calc(85vh-48px)] custom-scrollbar">
-          {loading && sessions.length === 0 && (
-            <div className="flex items-center gap-2 py-8 justify-center">
-              <Loader2 className="w-4 h-4 animate-spin text-muted-foreground" />
-              <span className="text-sm text-muted-foreground">加载中...</span>
-            </div>
-          )}
-          {error && sessions.length === 0 && <p className="text-sm py-8 text-center text-destructive">{error}</p>}
-          {error && sessions.length > 0 && <p className="text-xs mb-3 text-destructive">刷新失败: {error}</p>}
-          {!loading && sessions.length === 0 && !error && (
-            <p className="text-sm py-8 text-center text-muted-foreground">暂无活跃会话</p>
-          )}
-          {sessions.length > 0 && (
-            <div className="space-y-2">
-              {sessions.map((row) => (
-                <div
-                  key={row.session_id}
-                  className="flex items-center gap-4 p-3 rounded-lg bg-accent/50 border border-border"
-                >
-                  <div className="flex-1 min-w-0 grid grid-cols-4 gap-3 items-center">
-                    <div>
-                      <div className="text-2xs uppercase tracking-wider mb-0.5 text-muted-foreground">对话</div>
-                      <div className="text-sm font-mono truncate">{row.thread_id.slice(0, 16)}</div>
-                    </div>
-                    <div>
-                      <div className="text-2xs uppercase tracking-wider mb-0.5 text-muted-foreground">会话</div>
-                      <div className="text-sm font-mono truncate">{row.session_id.slice(0, 16)}</div>
-                    </div>
-                    <div>
-                      <div className="text-2xs uppercase tracking-wider mb-0.5 text-muted-foreground">环境</div>
-                      <div className="text-sm">{row.provider}</div>
-                    </div>
-                    <div>
-                      <div className="text-2xs uppercase tracking-wider mb-0.5 text-muted-foreground">状态</div>
-                      {statusBadge(row.status)}
-                    </div>
-                  </div>
-                  <div className="flex items-center gap-1.5 flex-shrink-0">
-                    {row.status === "running" && (
-                      <button
-                        className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-accent hover:text-foreground disabled:opacity-30"
-                        disabled={busy === row.session_id}
-                        onClick={() => void withBusy(row, () => pauseSandboxSession(row.session_id, row.provider))}
-                        title="暂停"
-                      >
-                        <Pause className="w-4 h-4" />
-                      </button>
-                    )}
-                    {row.status === "paused" && (
-                      <button
-                        className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-accent hover:text-success disabled:opacity-30"
-                        disabled={busy === row.session_id}
-                        onClick={() => void withBusy(row, () => resumeSandboxSession(row.session_id, row.provider))}
-                        title="恢复"
-                      >
-                        <Play className="w-4 h-4" />
-                      </button>
-                    )}
-                    <button
-                      className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-destructive/10 hover:text-destructive disabled:opacity-30"
-                      disabled={busy === row.session_id}
-                      onClick={() => void withBusy(row, () => destroySandboxSession(row.session_id, row.provider))}
-                      title="销毁"
-                    >
-                      <Trash2 className="w-4 h-4" />
-                    </button>
-                  </div>
-                </div>
-              ))}
-            </div>
-          )}
-        </div>
-      </DialogContent>
-    </Dialog>
-  );
-}
diff --git a/frontend/app/src/components/SearchModal.tsx b/frontend/app/src/components/SearchModal.tsx
deleted file mode 100644
index cf384c5b2..000000000
--- a/frontend/app/src/components/SearchModal.tsx
+++ /dev/null
@@ -1,55 +0,0 @@
-import type { ThreadSummary } from "../api";
-import {
-  CommandDialog,
-  CommandEmpty,
-  CommandGroup,
-  CommandInput,
-  CommandItem,
-  CommandList,
-} from "./ui/command";
-
-interface SearchModalProps {
-  isOpen: boolean;
-  threads: ThreadSummary[];
-  onClose: () => void;
-  onSelectThread: (threadId: string) => void;
-}
-
-export default function SearchModal({ isOpen, threads, onClose, onSelectThread }: SearchModalProps) {
-  return (
-    <CommandDialog
-      open={isOpen}
-      onOpenChange={(open) => { if (!open) onClose(); }}
-      title="搜索对话"
-      description="搜索对话或运行环境"
-      showCloseButton={false}
-    >
-      <CommandInput placeholder="搜索对话或运行环境..." />
-      <CommandList className="max-h-[440px]">
-        <CommandEmpty>未找到匹配的对话</CommandEmpty>
-        <CommandGroup>
-          {threads.map((thread) => (
-            <CommandItem
-              key={thread.thread_id}
-              value={`${thread.thread_id} ${thread.sandbox ?? "local"} ${thread.entity_name ?? ""} ${thread.member_name ?? ""} ${thread.sidebar_label ?? ""}`}
-              onSelect={() => {
-                onSelectThread(thread.thread_id);
-                onClose();
-              }}
-            >
-              <div className="flex flex-col gap-0.5 min-w-0">
-                <span className="text-sm truncate">{thread.entity_name || thread.thread_id}</span>
-                <span className="text-xs text-muted-foreground flex gap-2">
-                  <span>{thread.sandbox ?? "local"}</span>
-                  {thread.updated_at && (
-                    <span>{new Date(thread.updated_at).toLocaleString("zh-CN", { month: "numeric", day: "numeric", hour: "2-digit", minute: "2-digit" })}</span>
-                  )}
-                </span>
-              </div>
-            </CommandItem>
-          ))}
-        </CommandGroup>
-      </CommandList>
-    </CommandDialog>
-  );
-}
diff --git a/frontend/app/src/components/SettingsPanel.tsx b/frontend/app/src/components/SettingsPanel.tsx
deleted file mode 100644
index 8ea88e67b..000000000
--- a/frontend/app/src/components/SettingsPanel.tsx
+++ /dev/null
@@ -1,61 +0,0 @@
-import { Folder, Settings as SettingsIcon } from "lucide-react";
-import { useState } from "react";
-import WorkspaceSetupModal from "./WorkspaceSetupModal";
-import { useWorkspaceSettings } from "../hooks/use-workspace-settings";
-import { Popover, PopoverContent, PopoverTrigger } from "./ui/popover";
-
-export default function SettingsPanel() {
-  const [settingsOpen, setSettingsOpen] = useState(false);
-  const [workspaceModalOpen, setWorkspaceModalOpen] = useState(false);
-  const { settings, refreshSettings } = useWorkspaceSettings();
-
-  async function handleWorkspaceSet(_workspace: string) {
-    await refreshSettings();
-    setWorkspaceModalOpen(false);
-    setSettingsOpen(false);
-  }
-
-  return (
-    <>
-      <Popover open={settingsOpen} onOpenChange={setSettingsOpen}>
-        <PopoverTrigger asChild>
-          <button
-            className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-accent hover:text-foreground"
-          >
-            <SettingsIcon className="w-4 h-4" />
-          </button>
-        </PopoverTrigger>
-
-        <PopoverContent align="end" sideOffset={8} className="w-72 p-0">
-          {/* Workspace Section */}
-          <div className="px-4 py-2">
-            <div className="text-xs font-medium text-muted-foreground uppercase tracking-wider mb-3">
-              工作区设置
-            </div>
-            <button
-              onClick={() => {
-                setWorkspaceModalOpen(true);
-                setSettingsOpen(false);
-              }}
-              className="w-full flex items-center gap-3 py-2 hover:bg-accent rounded-lg px-2 -mx-2"
-            >
-              <Folder className="w-4 h-4 text-muted-foreground" />
-              <div className="text-left flex-1">
-                <div className="text-sm">默认工作区</div>
-                <div className="text-xs text-muted-foreground mt-0.5 truncate">
-                  {settings?.default_workspace || "未设置"}
-                </div>
-              </div>
-            </button>
-          </div>
-        </PopoverContent>
-      </Popover>
-
-      <WorkspaceSetupModal
-        open={workspaceModalOpen}
-        onClose={() => setWorkspaceModalOpen(false)}
-        onWorkspaceSet={handleWorkspaceSet}
-      />
-    </>
-  );
-}
diff --git a/frontend/app/src/components/Sidebar.tsx b/frontend/app/src/components/Sidebar.tsx
deleted file mode 100644
index 16e27551e..000000000
--- a/frontend/app/src/components/Sidebar.tsx
+++ /dev/null
@@ -1,466 +0,0 @@
-import { Check, ChevronRight, MoreHorizontal, Plus, Search, Trash2 } from "lucide-react";
-import { useEffect, useMemo, useState } from "react";
-import { Link, useLocation, useParams } from "react-router-dom";
-import type { ThreadSummary } from "../api";
-import MemberAvatar from "./MemberAvatar";
-import { useAppStore } from "../store/app-store";
-import { Skeleton } from "./ui/skeleton";
-
-function requireThreadMemberId(thread: ThreadSummary): string {
-  // @@@thread-member-id-required - thread grouping/routing must use stable member IDs, never display names.
-  if (!thread.member_id) {
-    throw new Error(`Thread ${thread.thread_id} missing member_id`);
-  }
-  return thread.member_id;
-}
-
-function requireSidebarLabel(thread: ThreadSummary): string {
-  if (thread.is_main) {
-    throw new Error(`Main thread ${thread.thread_id} should not render as child thread`);
-  }
-  if (!thread.sidebar_label) {
-    throw new Error(`Thread ${thread.thread_id} missing sidebar_label`);
-  }
-  return thread.sidebar_label;
-}
-
-function formatRelativeTime(dateStr?: string): string {
-  if (!dateStr) return "";
-  const date = new Date(dateStr);
-  if (isNaN(date.getTime())) return "";
-  const now = new Date();
-  const diffMs = now.getTime() - date.getTime();
-  const diffMinutes = Math.floor(diffMs / 60000);
-  const diffHours = Math.floor(diffMs / 3600000);
-  const diffDays = Math.floor(diffMs / 86400000);
-  if (diffMinutes < 1) return "刚刚";
-  if (diffMinutes < 60) return `${diffMinutes}分钟前`;
-  if (diffHours < 24) return `${diffHours}小时前`;
-  const todayStart = new Date(now.getFullYear(), now.getMonth(), now.getDate());
-  const yesterdayStart = new Date(todayStart.getTime() - 86400000);
-  if (date >= yesterdayStart && date < todayStart) return "昨天";
-  if (diffDays < 7) return `${diffDays}天前`;
-  return `${date.getMonth() + 1}月${date.getDate()}日`;
-}
-
-interface SidebarProps {
-  threads: ThreadSummary[];
-  collapsed?: boolean;
-  loading?: boolean;
-  width?: number;
-  onDeleteThread: (threadId: string) => void;
-  onSearchClick: () => void;
-  onNewChat: () => void;
-}
-
-function ThreadSkeleton() {
-  return (
-    <div className="space-y-0.5">
-      {[...Array(5)].map((_, i) => (
-        <div key={i} className="px-3 py-2.5 rounded-lg" style={{ animationDelay: `calc(var(--duration-instant) * ${i})` }}>
-          <Skeleton className="h-4 w-[70%] mb-1.5" />
-          <Skeleton className="h-3 w-[40%]" />
-        </div>
-      ))}
-    </div>
-  );
-}
-
-function ThreadItem({
-  thread,
-  isActive,
-  label,
-  to,
-  isSelectMode,
-  isSelected,
-  onToggleSelect,
-  confirmDelete,
-  setConfirmDelete,
-  onDeleteThread,
-}: {
-  thread: ThreadSummary;
-  isActive: boolean;
-  label: string;
-  to: string;
-  isSelectMode: boolean;
-  isSelected: boolean;
-  onToggleSelect: (id: string) => void;
-  confirmDelete: string | null;
-  setConfirmDelete: (id: string | null) => void;
-  onDeleteThread: (id: string) => void;
-}) {
-  return (
-    <div className={`group/item flex items-center rounded-lg transition-colors duration-fast ${
-      isSelected ? "bg-primary/10" : isActive ? "bg-background shadow-sm" : "hover:bg-muted"
-    }`}>
-      {/* Left gutter: fixed w-7, holds active indicator OR checkbox — text never moves */}
-      <div className="relative w-7 flex-shrink-0 self-stretch flex items-center justify-center">
-        {/* Active indicator line */}
-        {isActive && !isSelected && (
-          <div className="absolute left-0 top-2 bottom-2 w-0.5 rounded-r-full bg-foreground" />
-        )}
-        {isSelected && (
-          <div className="absolute left-0 top-2 bottom-2 w-0.5 rounded-r-full bg-primary" />
-        )}
-        {/* Checkbox — only visible in select mode */}
-        {isSelectMode && (
-          <button
-            className={`w-4 h-4 rounded border-[1.5px] flex items-center justify-center transition-colors duration-fast ${
-              isSelected ? "bg-primary border-primary" : "border-muted-foreground/40 bg-card"
-            }`}
-            onClick={(e) => { e.stopPropagation(); onToggleSelect(thread.thread_id); }}
-          >
-            {isSelected && <Check className="w-2.5 h-2.5 text-primary-foreground" />}
-          </button>
-        )}
-      </div>
-
-      {/* Text content */}
-      <Link
-        to={isSelectMode ? "#" : to}
-        onClick={(e) => { if (isSelectMode) { e.preventDefault(); onToggleSelect(thread.thread_id); } }}
-        className="flex-1 min-w-0 py-2.5 pr-2"
-      >
-        <div className={`flex items-center gap-1.5 ${isActive ? "text-foreground font-medium" : "text-foreground"}`}>
-          {thread.running && !isSelectMode && (
-            <span className="w-2 h-2 rounded-full bg-success flex-shrink-0 animate-pulse" />
-          )}
-          <span className="text-sm font-medium truncate">{label}</span>
-        </div>
-        <div className="flex items-center gap-1 mt-0.5">
-          <span className="text-xs text-muted-foreground/60 truncate flex-1 min-w-0">
-            {thread.sandbox || "local"}
-          </span>
-          {thread.updated_at && (
-            <span className="text-2xs text-muted-foreground/40 flex-shrink-0">
-              {formatRelativeTime(thread.updated_at)}
-            </span>
-          )}
-        </div>
-      </Link>
-
-      {/* Single-item delete — hidden in select mode */}
-      {!isSelectMode && (
-        <div className={`${confirmDelete === thread.thread_id ? "flex" : "hidden group-hover/item:flex"} items-center gap-0.5 pr-1.5`}>
-          {confirmDelete === thread.thread_id ? (
-            <>
-              <button
-                className="w-6 h-6 rounded flex items-center justify-center text-destructive bg-destructive/10 hover:bg-destructive/20"
-                onClick={(e) => { e.stopPropagation(); setConfirmDelete(null); onDeleteThread(thread.thread_id); }}
-              >
-                <Trash2 className="w-3.5 h-3.5" />
-              </button>
-              <button
-                className="w-6 h-6 rounded flex items-center justify-center text-muted-foreground/60 hover:bg-muted hover:text-foreground text-xs"
-                onClick={(e) => { e.stopPropagation(); setConfirmDelete(null); }}
-              >
-                ✕
-              </button>
-            </>
-          ) : (
-            <button
-              className="w-6 h-6 rounded flex items-center justify-center text-muted-foreground/60 hover:bg-muted hover:text-foreground"
-              onClick={(e) => { e.stopPropagation(); setConfirmDelete(thread.thread_id); }}
-            >
-              <MoreHorizontal className="w-3.5 h-3.5" />
-            </button>
-          )}
-        </div>
-      )}
-    </div>
-  );
-}
-
-export default function Sidebar({
-  threads,
-  collapsed = false,
-  loading = false,
-  width = 272,
-  onDeleteThread,
-  onSearchClick,
-  onNewChat,
-}: SidebarProps) {
-  const location = useLocation();
-  const { memberId, threadId } = useParams<{ memberId?: string; threadId?: string }>();
-  const activeMemberId = memberId ? decodeURIComponent(memberId) : null;
-  const activeThreadId = threadId || null;
-  const [confirmDelete, setConfirmDelete] = useState<string | null>(null);
-  const [expandedMembers, setExpandedMembers] = useState<Set<string>>(() => {
-    try {
-      const saved = localStorage.getItem("sidebar-expanded-members");
-      return saved ? new Set(JSON.parse(saved)) : new Set();
-    } catch { return new Set(); }
-  });
-  const [isSelectMode, setIsSelectMode] = useState(false);
-  const [selectedIds, setSelectedIds] = useState<Set<string>>(new Set());
-
-  const onToggleSelect = (threadId: string) => {
-    if (!isSelectMode) setIsSelectMode(true);
-    setSelectedIds(prev => {
-      const next = new Set(prev);
-      if (next.has(threadId)) next.delete(threadId);
-      else next.add(threadId);
-      return next;
-    });
-  };
-
-  const exitSelectMode = () => { setIsSelectMode(false); setSelectedIds(new Set()); };
-
-  const isAllSelected = threads.length > 0 && threads.every(t => selectedIds.has(t.thread_id));
-
-  const handleSelectAll = () => {
-    setSelectedIds(isAllSelected ? new Set() : new Set(threads.map(t => t.thread_id)));
-  };
-
-  const handleBulkDelete = () => {
-    selectedIds.forEach(id => onDeleteThread(id));
-    exitSelectMode();
-  };
-
-  useEffect(() => {
-    if (!isSelectMode) return;
-    const onKey = (e: KeyboardEvent) => { if (e.key === "Escape") exitSelectMode(); };
-    document.addEventListener("keydown", onKey);
-    return () => document.removeEventListener("keydown", onKey);
-  }, [isSelectMode]);
-
-  const memberList = useAppStore(s => s.memberList);
-
-  // Group threads by member, then merge in members with no threads
-  const groups = useMemo(() => {
-    const map = new Map<string, { memberName: string; avatarUrl?: string; threads: ThreadSummary[]; latestAt: number }>();
-
-    for (const thread of threads) {
-      const key = requireThreadMemberId(thread);
-      if (!map.has(key)) map.set(key, { memberName: thread.member_name || "Agent", avatarUrl: thread.avatar_url, threads: [], latestAt: 0 });
-      const g = map.get(key)!;
-      if (!g.memberName && thread.member_name) g.memberName = thread.member_name;
-      if (!g.avatarUrl && thread.avatar_url) g.avatarUrl = thread.avatar_url;
-      const at = thread.updated_at ? new Date(thread.updated_at).getTime() : 0;
-      g.threads.push(thread);
-      g.latestAt = Math.max(g.latestAt, at);
-    }
-
-    // Add members that have no threads yet (e.g. newly created copies)
-    for (const member of memberList) {
-      if (!map.has(member.id)) {
-        map.set(member.id, { memberName: member.name, avatarUrl: member.avatar_url, threads: [], latestAt: 0 });
-      }
-    }
-
-    return [...map.entries()]
-      .map(([memberId, g]) => ({ memberId, ...g }))
-      .sort((a, b) => b.latestAt - a.latestAt)
-      .map(g => ({
-        ...g,
-        threads: [...g.threads].sort((a, b) => {
-          const ta = a.updated_at ? new Date(a.updated_at).getTime() : 0;
-          const tb = b.updated_at ? new Date(b.updated_at).getTime() : 0;
-          return tb - ta;
-        }),
-      }));
-  }, [threads, memberList]);
-
-  const toggleMember = (memberId: string) => {
-    setExpandedMembers(prev => {
-      const next = new Set(prev);
-      if (next.has(memberId)) next.delete(memberId);
-      else next.add(memberId);
-      localStorage.setItem("sidebar-expanded-members", JSON.stringify([...next]));
-      return next;
-    });
-  };
-
-  function isMemberActive(memberId: string, mainThreadId?: string): boolean {
-    if (memberId !== activeMemberId) return false;
-    if (location.pathname === `/threads/${encodeURIComponent(memberId)}/new`) return false;
-    return !activeThreadId || activeThreadId === mainThreadId;
-  }
-
-  // ── Collapsed (narrow) mode ──────────────────────────────────────────────
-  if (collapsed) {
-    return (
-      <div className="w-14 h-full flex flex-col items-center py-3 bg-card border-r border-border animate-slide-in overflow-hidden flex-shrink-0">
-        <button onClick={onNewChat} className="w-9 h-9 rounded-lg flex items-center justify-center mb-1 text-muted-foreground hover:bg-muted hover:text-foreground">
-          <Plus className="w-4 h-4" />
-        </button>
-        <button onClick={onSearchClick} className="w-9 h-9 rounded-lg flex items-center justify-center mb-2 text-muted-foreground hover:bg-muted hover:text-foreground">
-          <Search className="w-4 h-4" />
-        </button>
-
-        <div className="w-8 h-px bg-border mb-2" />
-
-        <div className="flex-1 min-h-0 overflow-y-auto w-full flex flex-col items-center gap-1 px-2 py-1 custom-scrollbar">
-          {groups.map((group) => {
-            const mainThread = group.threads.find((thread) => thread.is_main);
-            const isActive = isMemberActive(group.memberId, mainThread?.thread_id);
-            const isRunning = group.threads.some(t => t.running);
-            return (
-              <div key={group.memberId} className="relative group/item w-full flex justify-center">
-                <Link
-                  to={`/threads/${encodeURIComponent(group.memberId)}`}
-                  title={group.memberName}
-                  className={`flex items-center justify-center rounded-xl p-1 transition-colors duration-fast ${
-                    isActive ? "bg-muted" : "hover:bg-muted/70"
-                  }`}
-                >
-                  {isRunning
-                    ? <span className="w-9 h-9 rounded-xl flex items-center justify-center bg-muted"><span className="w-3 h-3 rounded-full border-2 border-muted-foreground border-t-transparent animate-spin" /></span>
-                    : <MemberAvatar name={group.memberName} avatarUrl={group.avatarUrl} type="mycel_agent" size="sm" />}
-                </Link>
-                <div className="absolute left-[52px] top-1/2 -translate-y-1/2 px-2 py-1 bg-foreground text-background text-xs rounded opacity-0 group-hover/item:opacity-100 pointer-events-none transition-opacity duration-fast whitespace-nowrap z-50 max-w-[200px] truncate">
-                  {group.memberName}
-                </div>
-              </div>
-            );
-          })}
-        </div>
-      </div>
-    );
-  }
-
-  // ── Expanded mode ────────────────────────────────────────────────────────
-
-  return (
-    <div className="h-full flex flex-col bg-card border-r border-border animate-slide-in flex-shrink-0" style={{ width }}>
-      {/* Header */}
-      <div className="px-4 pt-3 pb-1 flex items-center justify-between">
-        <span className="text-sm font-semibold text-foreground">消息</span>
-      </div>
-
-      {/* Search */}
-      <div className="px-3 pb-3">
-        <button
-          className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-muted-foreground/60 hover:bg-muted hover:text-foreground"
-          onClick={onSearchClick}
-        >
-          <Search className="w-4 h-4" />
-          <span>搜索对话...</span>
-        </button>
-      </div>
-
-      <div className="h-px mx-3 bg-border" />
-
-      {/* Bulk action bar */}
-      {isSelectMode && (
-        <div className="px-3 py-2.5 border-b border-border flex items-center gap-2 flex-shrink-0">
-          <button
-            onClick={handleSelectAll}
-            className="text-xs text-muted-foreground/70 hover:text-foreground transition-colors duration-fast"
-          >
-            {isAllSelected ? "取消全选" : "全选"}
-          </button>
-          <span className="text-xs text-muted-foreground/40">·</span>
-          <span className="text-xs text-muted-foreground flex-1">已选 {selectedIds.size} 条</span>
-          <button
-            onClick={handleBulkDelete}
-            disabled={selectedIds.size === 0}
-            className="flex items-center gap-1 px-2.5 py-1.5 rounded-lg bg-destructive/10 text-destructive hover:bg-destructive/20 disabled:opacity-40 text-xs font-medium transition-colors duration-fast"
-          >
-            <Trash2 className="w-3 h-3" />
-            删除
-          </button>
-          <button
-            onClick={exitSelectMode}
-            className="px-2.5 py-1.5 rounded-lg text-xs text-muted-foreground hover:bg-muted transition-colors duration-fast"
-          >
-            取消
-          </button>
-        </div>
-      )}
-
-      {/* Thread list */}
-      <div className="flex-1 min-h-0 px-3 pt-3 flex flex-col">
-        <div className="flex items-center justify-between px-2 mb-2 flex-shrink-0">
-          <span className="text-xs font-medium tracking-wider uppercase text-muted-foreground/60">对话</span>
-          <div className="flex items-center gap-1.5">
-            <span className="text-xs text-muted-foreground/40">{threads.length}</span>
-            {!isSelectMode && (
-              <button
-                onClick={() => setIsSelectMode(true)}
-                className="text-xs text-muted-foreground/50 hover:text-foreground transition-colors duration-fast px-1"
-              >
-                管理
-              </button>
-            )}
-          </div>
-        </div>
-
-        <div className="flex-1 min-h-0 overflow-y-auto space-y-0.5 custom-scrollbar">
-          {loading ? (
-            <ThreadSkeleton />
-          ) : (
-            groups.map((group) => {
-              const isExpanded = expandedMembers.has(group.memberId);
-              const urlId = encodeURIComponent(group.memberId);
-              const mainThread = group.threads.find((thread) => thread.is_main);
-              const memberIsActive = isMemberActive(group.memberId, mainThread?.thread_id);
-              const childThreads = group.threads.filter((thread) => !thread.is_main);
-              return (
-                <div key={group.memberId} className="mb-1">
-                  <div className={`flex items-center gap-1 px-2 py-1.5 rounded-xl transition-colors duration-fast ${
-                    memberIsActive
-                      ? "bg-muted"
-                      : "hover:bg-muted/70"
-                  }`}>
-                    <button
-                      onClick={() => toggleMember(group.memberId)}
-                      className={`w-5 h-5 flex items-center justify-center rounded transition-colors duration-fast ${
-                        memberIsActive ? "hover:bg-background/80" : "hover:bg-background/60"
-                      }`}
-                      aria-label={isExpanded ? "收起分支对话" : "展开分支对话"}
-                    >
-                      <ChevronRight className={`w-3.5 h-3.5 transition-transform duration-fast flex-shrink-0 ${
-                        memberIsActive ? "text-foreground/70" : "text-muted-foreground/50"
-                      } ${isExpanded ? "rotate-90" : ""}`} />
-                    </button>
-                    <Link
-                      to={`/threads/${urlId}`}
-                      className="flex items-center gap-1.5 min-w-0 flex-1"
-                    >
-                      <MemberAvatar name={group.memberName} avatarUrl={group.avatarUrl} type="mycel_agent" size="xs" />
-                      <span className={`text-xs flex-1 truncate ${
-                        memberIsActive ? "font-semibold text-foreground" : "font-medium text-foreground"
-                      }`}>
-                        {group.memberName}
-                      </span>
-                    </Link>
-                  </div>
-                  {isExpanded && (
-                    <>
-                      <div className="mt-0.5 ml-3 space-y-0.5">
-                        {childThreads.map((thread) => (
-                          <ThreadItem
-                            key={thread.thread_id}
-                            thread={thread}
-                            isActive={activeThreadId === thread.thread_id}
-                            label={requireSidebarLabel(thread)}
-                            to={`/threads/${urlId}/${thread.thread_id}`}
-                            isSelectMode={isSelectMode}
-                            isSelected={selectedIds.has(thread.thread_id)}
-                            onToggleSelect={onToggleSelect}
-                            confirmDelete={confirmDelete}
-                            setConfirmDelete={setConfirmDelete}
-                            onDeleteThread={onDeleteThread}
-                          />
-                        ))}
-                      </div>
-                      <div className="px-3">
-                        <Link
-                          to={`/threads/${urlId}/new`}
-                          className="block py-2 text-center text-xs text-muted-foreground/50 hover:text-muted-foreground transition-colors duration-fast"
-                        >
-                          + 发起新对话
-                        </Link>
-                      </div>
-                    </>
-                  )}
-                </div>
-              );
-            })
-          )}
-        </div>
-      </div>
-    </div>
-  );
-}
diff --git a/frontend/app/src/components/SplitPaneLayout.tsx b/frontend/app/src/components/SplitPaneLayout.tsx
new file mode 100644
index 000000000..4205fb591
--- /dev/null
+++ b/frontend/app/src/components/SplitPaneLayout.tsx
@@ -0,0 +1,37 @@
+import { Outlet } from "react-router-dom";
+import { useIsMobile } from "@/hooks/use-mobile";
+import type { ReactNode } from "react";
+
+interface SplitPaneLayoutProps {
+  sidebar: ReactNode;
+  hasDetail: boolean;
+  emptyMessage?: string;
+  outletContext?: unknown;
+}
+
+export default function SplitPaneLayout({ sidebar, hasDetail, emptyMessage = "选择一项查看详情", outletContext }: SplitPaneLayoutProps) {
+  const isMobile = useIsMobile();
+
+  if (isMobile) {
+    return (
+      <div className="h-full w-full">
+        {hasDetail ? <Outlet context={outletContext} /> : sidebar}
+      </div>
+    );
+  }
+
+  return (
+    <div className="h-full w-full flex overflow-hidden">
+      <div className="w-72 shrink-0 h-full">{sidebar}</div>
+      <div className="flex-1 min-w-0">
+        {hasDetail ? (
+          <Outlet context={outletContext} />
+        ) : (
+          <div className="h-full flex items-center justify-center">
+            <p className="text-sm text-muted-foreground">{emptyMessage}</p>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/app/src/components/chat-area/AskUserQuestionCard.tsx b/frontend/app/src/components/chat-area/AskUserQuestionCard.tsx
new file mode 100644
index 000000000..9e2340540
--- /dev/null
+++ b/frontend/app/src/components/chat-area/AskUserQuestionCard.tsx
@@ -0,0 +1,176 @@
+import { CheckCircle2, ChevronDown, ChevronRight, Clock } from "lucide-react";
+import { useMemo, useState } from "react";
+import type { AskUserQuestionPrompt } from "../../api";
+import type { AskUserQuestionAnsweredPayload, AskUserQuestionPendingState } from "../../pages/ask-user-question";
+import { Button } from "../ui/button";
+
+type AskUserQuestionCardProps =
+  | {
+      mode: "pending";
+      pending: AskUserQuestionPendingState;
+    }
+  | {
+      mode: "answered";
+      answered: AskUserQuestionAnsweredPayload;
+    };
+
+function AnsweredSummary({ answered }: { answered: AskUserQuestionAnsweredPayload }) {
+  const summary = useMemo(
+    () =>
+      answered.answers.map((answer, index) => {
+        const header = answer.header || answered.questions[index]?.header || `问题 ${index + 1}`;
+        const selected = answer.selected_options.join("、") || "未选择";
+        return `${header}：${selected}`;
+      }),
+    [answered],
+  );
+
+  return (
+    <span className="text-xs text-muted-foreground truncate">
+      已回答 · {summary.join(" · ")}
+    </span>
+  );
+}
+
+function QuestionChoices({
+  question,
+  index,
+  selected,
+  onSelect,
+}: {
+  question: AskUserQuestionPrompt;
+  index: number;
+  selected: string[];
+  onSelect: (questionIndex: number, question: AskUserQuestionPrompt, optionLabel: string) => void;
+}) {
+  return (
+    <div className="space-y-1.5">
+      {question.options.map((option) => {
+        const active = selected.includes(option.label);
+        return (
+          <button
+            key={option.label}
+            type="button"
+            className={`w-full rounded-lg border px-3 py-2 text-left transition-colors ${
+              active
+                ? "border-primary bg-primary/10 text-foreground"
+                : "border-border/60 bg-background hover:border-primary/40 hover:bg-muted/40"
+            }`}
+            onClick={() => onSelect(index, question, option.label)}
+          >
+            <div className="text-sm font-medium">{option.label}</div>
+            <div className="text-xs text-muted-foreground mt-0.5">{option.description}</div>
+            {option.preview ? (
+              <div className="text-xs text-muted-foreground/80 mt-1">{option.preview}</div>
+            ) : null}
+          </button>
+        );
+      })}
+    </div>
+  );
+}
+
+export function AskUserQuestionCard(props: AskUserQuestionCardProps) {
+  const [expanded, setExpanded] = useState(props.mode === "pending");
+
+  if (props.mode === "pending") {
+    const { pending } = props;
+    return (
+      <section className="rounded-lg border border-border bg-muted px-4 py-3 space-y-3">
+        <div className="space-y-1">
+          <div className="flex items-center gap-1.5">
+            <Clock className="w-3 h-3 text-amber-500" />
+            <span className="text-xs font-medium text-foreground">等待回答</span>
+          </div>
+          <p className="text-xs text-muted-foreground">
+            {pending.promptMessage || "Leon 需要你的回答后才能继续当前任务。"}
+          </p>
+        </div>
+
+        <div className="space-y-3">
+          {pending.prompts.map((question, index) => {
+            const selected = pending.selections[pending.selectionKeyForIndex(index)] ?? [];
+            return (
+              <section
+                key={`${question.header}:${index}`}
+                className={index > 0 ? "border-t border-border/60 pt-3" : ""}
+              >
+                <div className="space-y-1 mb-2">
+                  <p className="text-sm font-medium text-foreground">{question.header}</p>
+                  <p className="text-xs text-muted-foreground">{question.question}</p>
+                </div>
+                <QuestionChoices question={question} index={index} selected={selected} onSelect={pending.onSelect} />
+              </section>
+            );
+          })}
+        </div>
+
+        <div className="flex items-center justify-end">
+          <Button size="sm" onClick={pending.onSubmit} disabled={pending.resolving || !pending.canSubmit}>
+            提交回答
+          </Button>
+        </div>
+      </section>
+    );
+  }
+
+  const { answered } = props;
+  return (
+    <section className="rounded-lg border border-border bg-muted/50 px-3 py-2.5">
+      <div className="flex items-center justify-between gap-3">
+        <div className="flex items-center gap-1.5 min-w-0">
+          <CheckCircle2 className="w-3 h-3 text-muted-foreground/70 flex-shrink-0" />
+          <AnsweredSummary answered={answered} />
+        </div>
+        <button
+          type="button"
+          className="inline-flex items-center gap-0.5 text-2xs text-muted-foreground hover:text-foreground transition-colors flex-shrink-0"
+          aria-label={expanded ? "收起已回答详情" : "查看已回答详情"}
+          onClick={() => setExpanded((value) => !value)}
+        >
+          {expanded ? <ChevronDown className="w-3 h-3" /> : <ChevronRight className="w-3 h-3" />}
+          {expanded ? "收起" : "详情"}
+        </button>
+      </div>
+
+      {expanded && (
+        <div className="mt-2.5 space-y-3">
+          {answered.questions.map((question, index) => {
+            const answer = answered.answers[index];
+            const selectedOptions = new Set(answer?.selected_options ?? []);
+            return (
+              <section
+                key={`${question.header}:${index}`}
+                className={index > 0 ? "border-t border-border/60 pt-3" : ""}
+              >
+                <div className="space-y-1 mb-2">
+                  <p className="text-sm font-medium text-foreground">{question.header}</p>
+                  <p className="text-xs text-muted-foreground">{question.question}</p>
+                </div>
+                <div className="space-y-1.5">
+                  {question.options.map((option) => {
+                    const active = selectedOptions.has(option.label);
+                    return (
+                      <div
+                        key={option.label}
+                        className={`rounded-lg border px-3 py-2 ${
+                          active
+                            ? "border-primary/40 bg-primary/5 text-foreground"
+                            : "border-border/60 bg-background text-muted-foreground"
+                        }`}
+                      >
+                        <div className="text-sm font-medium">{option.label}</div>
+                        <div className="text-xs mt-0.5">{option.description}</div>
+                        {option.preview ? <div className="text-xs mt-1 opacity-80">{option.preview}</div> : null}
+                      </div>
+                    );
+                  })}
+                </div>
+              </section>
+            );
+          })}
+        </div>
+      )}
+    </section>
+  );
+}
diff --git a/frontend/app/src/components/chat-area/AssistantBlock.tsx b/frontend/app/src/components/chat-area/AssistantBlock.tsx
index 16558351a..3c498c793 100644
--- a/frontend/app/src/components/chat-area/AssistantBlock.tsx
+++ b/frontend/app/src/components/chat-area/AssistantBlock.tsx
@@ -1,4 +1,4 @@
-import { memo, useEffect, useState } from "react";
+import { memo } from "react";
 import { Loader2 } from "lucide-react";
 import type { AssistantTurn, NoticeSegment, NotificationType, RetrySegment, StreamStatus, ToolSegment, TurnSegment } from "../../api";
 import MarkdownContent from "../MarkdownContent";
@@ -8,6 +8,8 @@ import { InlineNotice } from "./NoticeBubble";
 import { ThinkingIndicator } from "./ThinkingIndicator";
 import { ToolDetailBox } from "./ToolDetailBox";
 import { formatTime } from "./utils";
+import { AskUserQuestionCard } from "./AskUserQuestionCard";
+import type { AskUserQuestionAnsweredPayload, AskUserQuestionPendingState } from "../../pages/ask-user-question";
 
 // --- Phase splitting: segments → content phases + notice dividers ---
 
@@ -40,29 +42,34 @@ function NoticeDivider({ content, notificationType }: { content: string; notific
 // --- Content phase rendering (tools + final text) ---
 
 function ContentPhaseBlock({
-  segments, allSegments, isStreaming, onFocusAgent,
+  segments, allSegments, isStreaming, onFocusAgent, askUserQuestion,
 }: {
   segments: TurnSegment[];
   /** All segments in the full turn (passed to DetailBoxModal). */
   allSegments?: TurnSegment[];
   isStreaming: boolean;
   onFocusAgent?: (taskId: string) => void;
+  askUserQuestion?: { mode: "pending"; pending: AskUserQuestionPendingState } | { mode: "answered"; answered: AskUserQuestionAnsweredPayload };
 }) {
   const toolSegs = segments.filter((s) => s.type === "tool") as ToolSegment[];
+  const visibleToolSegs = askUserQuestion
+    ? toolSegs.filter((segment) => segment.step.name !== "AskUserQuestion")
+    : toolSegs;
   const textSegs = segments.filter((s) => s.type === "text");
   const visibleText = textSegs.length > 0 ? textSegs[textSegs.length - 1] : null;
   const retrySeg = segments.find((s) => s.type === "retry") as RetrySegment | undefined;
 
   return (
     <>
-      {toolSegs.length > 0 && (
+      {visibleToolSegs.length > 0 && (
         <ToolDetailBox
-          toolSegments={toolSegs}
+          toolSegments={visibleToolSegs}
           isStreaming={isStreaming}
-          allSegments={allSegments}
+          allSegments={allSegments?.filter((segment) => segment.type !== "tool" || segment.step.name !== "AskUserQuestion")}
           onFocusAgent={onFocusAgent}
         />
       )}
+      {askUserQuestion ? <AskUserQuestionCard {...askUserQuestion} /> : null}
       {visibleText && visibleText.type === "text" && (
         <MarkdownContent content={visibleText.content} />
       )}
@@ -85,6 +92,7 @@ interface AssistantBlockProps {
   onFocusAgent?: (taskId: string) => void;
   agentName?: string;
   agentAvatarUrl?: string;
+  askUserQuestion?: { mode: "pending"; pending: AskUserQuestionPendingState } | { mode: "answered"; answered: AskUserQuestionAnsweredPayload };
 }
 
 function formatDuration(ms: number): string {
@@ -92,19 +100,11 @@ function formatDuration(ms: number): string {
   return `${Math.floor(ms / 60000)}m ${Math.round((ms % 60000) / 1000)}s`;
 }
 
-export const AssistantBlock = memo(function AssistantBlock({ entry, isStreamingThis, runtimeStatus, onFocusAgent, agentName, agentAvatarUrl }: AssistantBlockProps) {
+export const AssistantBlock = memo(function AssistantBlock({ entry, isStreamingThis, runtimeStatus, onFocusAgent, agentName, agentAvatarUrl, askUserQuestion }: AssistantBlockProps) {
   const displayName = agentName || "Agent";
   const hasNotice = entry.segments.some((s) => s.type === "notice");
 
-  const [elapsed, setElapsed] = useState<number | null>(() =>
-    entry.endTimestamp ? entry.endTimestamp - entry.timestamp : null
-  );
-
-  useEffect(() => {
-    if (entry.endTimestamp) {
-      setElapsed(entry.endTimestamp - entry.timestamp);
-    }
-  }, [entry.timestamp, entry.endTimestamp]);
+  const elapsed = entry.endTimestamp ? entry.endTimestamp - entry.timestamp : null;
 
   const fullText = entry.segments
     .filter((s) => s.type === "text")
@@ -146,6 +146,7 @@ export const AssistantBlock = memo(function AssistantBlock({ entry, isStreamingT
                   allSegments={entry.segments}
                   isStreaming={!!isStreamingThis}
                   onFocusAgent={onFocusAgent}
+                  askUserQuestion={askUserQuestion}
                 />
           )
         ) : (
@@ -155,6 +156,7 @@ export const AssistantBlock = memo(function AssistantBlock({ entry, isStreamingT
             allSegments={entry.segments}
             isStreaming={!!isStreamingThis}
             onFocusAgent={onFocusAgent}
+            askUserQuestion={askUserQuestion}
           />
         )}
 
diff --git a/frontend/app/src/components/chat-area/ChatBubble.tsx b/frontend/app/src/components/chat-area/ChatBubble.tsx
index fe8410f50..d463dfa85 100644
--- a/frontend/app/src/components/chat-area/ChatBubble.tsx
+++ b/frontend/app/src/components/chat-area/ChatBubble.tsx
@@ -7,7 +7,7 @@ interface ChatBubbleProps {
   content: string;
   senderName: string;
   avatarUrl?: string;
-  entityType?: string;
+  memberType?: string;
   timestamp?: number;
   showName?: boolean;
 }
@@ -16,13 +16,13 @@ export const ChatBubble = memo(function ChatBubble({
   content,
   senderName,
   avatarUrl,
-  entityType,
+  memberType,
   timestamp,
   showName = true,
 }: ChatBubbleProps) {
   return (
     <div className="flex gap-2.5 mb-1 animate-fade-in">
-      <MemberAvatar name={senderName} avatarUrl={avatarUrl} type={entityType} size="xs" />
+      <MemberAvatar name={senderName} avatarUrl={avatarUrl} type={memberType} size="xs" />
       <div className="flex-1 min-w-0">
         <div className="flex items-center gap-2">
           {showName && <span className="text-sm font-medium text-foreground">{senderName}</span>}
diff --git a/frontend/app/src/components/chat-area/StreamingIndicator.tsx b/frontend/app/src/components/chat-area/StreamingIndicator.tsx
deleted file mode 100644
index ac6041e65..000000000
--- a/frontend/app/src/components/chat-area/StreamingIndicator.tsx
+++ /dev/null
@@ -1,29 +0,0 @@
-import type { AssistantTurn, StreamStatus } from "../../api";
-
-interface StreamingIndicatorProps {
-  entries: AssistantTurn[];
-  runtimeStatus: StreamStatus | null;
-}
-
-export function StreamingIndicator({ entries, runtimeStatus }: StreamingIndicatorProps) {
-  if (entries.length === 0) return null;
-
-  const lastEntry = entries[entries.length - 1];
-  if (lastEntry.role !== "assistant") return null;
-
-  const hasContent = lastEntry.segments?.some(s =>
-    (s.type === 'text' && s.content.trim()) || s.type === 'tool'
-  );
-
-  if (hasContent) return null;
-
-  return (
-    <div className="flex items-center animate-fade-in">
-      <span className="text-sm text-muted-foreground/70">
-        {runtimeStatus?.current_tool
-          ? `Mycel 正在使用 ${runtimeStatus.current_tool}...`
-          : "Mycel 正在思考..."}
-      </span>
-    </div>
-  );
-}
diff --git a/frontend/app/src/components/chat-area/index.ts b/frontend/app/src/components/chat-area/index.ts
deleted file mode 100644
index 5b5d189ff..000000000
--- a/frontend/app/src/components/chat-area/index.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-export { AssistantBlock } from "./AssistantBlock";
-export { ChatSkeleton } from "./ChatSkeleton";
-export { CopyButton } from "./CopyButton";
-export { StreamingIndicator } from "./StreamingIndicator";
-export { ToolDetailBox } from "./ToolDetailBox";
-export { UserBubble } from "./UserBubble";
-export { TOOL_BADGE_STYLES, DEFAULT_BADGE } from "./constants";
-export { formatTime, getStepSummary } from "./utils";
diff --git a/frontend/app/src/components/computer-panel/AgentsView.tsx b/frontend/app/src/components/computer-panel/AgentsView.tsx
index 51a537de0..d9866046f 100644
--- a/frontend/app/src/components/computer-panel/AgentsView.tsx
+++ b/frontend/app/src/components/computer-panel/AgentsView.tsx
@@ -2,6 +2,9 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { Loader2 } from "lucide-react";
 import type { AssistantTurn, ToolStep } from "../../api";
 import { useThreadData } from "../../hooks/use-thread-data";
+import { useDisplayDeltas } from "../../hooks/use-display-deltas";
+import { useThreadStream } from "../../hooks/use-thread-stream";
+import { resolveAgentVisualStatus, type AgentVisualStatus } from "./agent-visual-status";
 import { parseAgentArgs } from "./utils";
 import type { FlowItem } from "./utils";
 import { FlowList } from "./flow-items";
@@ -22,12 +25,43 @@ export function AgentsView({ steps }: AgentsViewProps) {
   const dragStartX = useRef(0);
   const dragStartWidth = useRef(0);
 
-  const focused = steps.find((s) => s.id === selectedAgentId) ?? null;
+  const effectiveSelectedAgentId = useMemo(() => {
+    if (steps.length === 0) return null;
+    if (selectedAgentId && steps.some((step) => step.id === selectedAgentId)) return selectedAgentId;
+    return (
+      [...steps].reverse().find((step) => {
+        const status = step.subagent_stream?.status;
+        return status === "running" || step.status === "calling";
+      })?.id ?? steps[steps.length - 1].id
+    );
+  }, [steps, selectedAgentId]);
+
+  const focused = steps.find((s) => s.id === effectiveSelectedAgentId) ?? null;
   const stream = focused?.subagent_stream;
   const threadId = stream?.thread_id || undefined;
-  const isRunning = stream?.status === "running" || focused?.status === "calling";
-
-  const { entries, loading, refreshThread } = useThreadData(threadId);
+  const { entries, loading, refreshThread, setEntries, displaySeq } = useThreadData(threadId);
+  const refreshThreads = useCallback(async () => {}, []);
+  // @@@child-thread-live-bridge - the Agent pane must subscribe to the child
+  // thread's own SSE stream. Polling child detail alone misses the running
+  // window and makes the pane look empty until a later refresh.
+  const childStream = useThreadStream(threadId ?? "", {
+    loading: loading || !threadId,
+    refreshThreads,
+  });
+  const childDisplay = useDisplayDeltas({
+    threadId: threadId ?? "",
+    onUpdate: setEntries,
+    displaySeq,
+    stream: childStream,
+  });
+  const focusedStatus =
+    focused
+      ? resolveAgentVisualStatus(focused, {
+        childDisplayRunning: childDisplay.isRunning,
+        childRuntimeState: childStream.runtimeStatus?.state?.state ?? null,
+      })
+      : null;
+  const isRunning = focusedStatus === "running";
 
   // Poll every second while sub-agent is running
   useEffect(() => {
@@ -61,7 +95,7 @@ export function AgentsView({ steps }: AgentsViewProps) {
           id: tc.id, name: tc.name, args: tc.args,
           status: tc.status === "done" ? "done" : "calling",
           result: tc.result,
-          timestamp: Date.now(),
+          timestamp: focused?.timestamp ?? 0,
         },
         turnId: "live",
       });
@@ -73,7 +107,7 @@ export function AgentsView({ steps }: AgentsViewProps) {
     }
 
     return items;
-  }, [entries]);
+  }, [entries, stream, focused?.timestamp]);
 
   const handleMouseDown = useCallback((e: React.MouseEvent) => {
     e.preventDefault();
@@ -118,7 +152,8 @@ export function AgentsView({ steps }: AgentsViewProps) {
             <AgentListItem
               key={step.id}
               step={step}
-              isSelected={step.id === selectedAgentId}
+              visualStatus={step.id === effectiveSelectedAgentId ? focusedStatus ?? null : null}
+              isSelected={step.id === effectiveSelectedAgentId}
               onClick={() => setSelectedAgentId(step.id)}
             />
           ))}
@@ -141,7 +176,7 @@ export function AgentsView({ steps }: AgentsViewProps) {
           </div>
         ) : (
           <>
-            <AgentDetailHeader focused={focused} stream={stream} />
+            <AgentDetailHeader focused={focused} stream={stream} visualStatus={focusedStatus ?? "completed"} />
             <AgentPromptSection args={focused.args} />
             {loading ? (
               <div className="h-full flex items-center justify-center">
@@ -164,14 +199,25 @@ export function AgentsView({ steps }: AgentsViewProps) {
 
 /* -- Agent list item -- */
 
-function AgentListItem({ step, isSelected, onClick }: { step: ToolStep; isSelected: boolean; onClick: () => void }) {
+function AgentListItem({
+  step,
+  visualStatus,
+  isSelected,
+  onClick,
+}: {
+  step: ToolStep;
+  visualStatus: AgentVisualStatus | null;
+  isSelected: boolean;
+  onClick: () => void;
+}) {
   const args = parseAgentArgs(step.args);
   const ss = step.subagent_stream;
   const displayName = ss?.description || args.description || args.prompt?.slice(0, 40) || "子任务";
   const prompt = args.prompt || "";
-  const isRunning = ss?.status === "running" || (step.status === "calling" && ss?.status !== "completed");
-  const isError = step.status === "error" || ss?.status === "error";
-  const isDone = !isRunning && !isError && (step.status === "done" || ss?.status === "completed");
+  const status = resolveAgentVisualStatus(step, { statusOverride: visualStatus });
+  const isRunning = status === "running";
+  const isError = status === "error";
+  const isDone = status === "completed";
   const statusDot = isRunning ? "bg-success animate-pulse" : isError ? "bg-destructive" : isDone ? "bg-success" : "bg-warning animate-pulse";
 
   return (
@@ -194,21 +240,27 @@ function AgentListItem({ step, isSelected, onClick }: { step: ToolStep; isSelect
 
 /* -- Agent detail header -- */
 
-function getStatusLabel(focused: ToolStep, stream: SubagentStream | undefined): string {
-  if (stream?.status === "running") return "运行中";
-  if (stream?.status === "error") return "出错";
-  if (focused.status === "calling") return "启动中";
+function getStatusLabel(status: AgentVisualStatus): string {
+  if (status === "running") return "运行中";
+  if (status === "error") return "出错";
   return "已完成";
 }
 
-function getStatusDotClass(focused: ToolStep, stream: SubagentStream | undefined): string {
-  if (stream?.status === "running") return "bg-success animate-pulse";
-  if (stream?.status === "error") return "bg-destructive";
-  if (focused.status === "calling") return "bg-warning animate-pulse";
+function getStatusDotClass(status: AgentVisualStatus): string {
+  if (status === "running") return "bg-success animate-pulse";
+  if (status === "error") return "bg-destructive";
   return "bg-success";
 }
 
-function AgentDetailHeader({ focused, stream }: { focused: ToolStep; stream: SubagentStream | undefined }) {
+function AgentDetailHeader({
+  focused,
+  stream,
+  visualStatus,
+}: {
+  focused: ToolStep;
+  stream: SubagentStream | undefined;
+  visualStatus: AgentVisualStatus;
+}) {
   const args = parseAgentArgs(focused.args);
   const displayName = stream?.description || args.description || args.prompt?.slice(0, 40) || "子任务";
   const agentType = args.subagent_type;
@@ -218,8 +270,8 @@ function AgentDetailHeader({ focused, stream }: { focused: ToolStep; stream: Sub
         <span className="text-2xs font-mono bg-border text-foreground-secondary px-1.5 py-0.5 rounded flex-shrink-0">{agentType}</span>
       )}
       <div className="text-sm font-medium text-foreground truncate flex-1">{displayName}</div>
-      <span className={`w-2 h-2 rounded-full flex-shrink-0 ${getStatusDotClass(focused, stream)}`} />
-      <span className="text-2xs text-muted-foreground/70 flex-shrink-0">{getStatusLabel(focused, stream)}</span>
+      <span className={`w-2 h-2 rounded-full flex-shrink-0 ${getStatusDotClass(visualStatus)}`} />
+      <span className="text-2xs text-muted-foreground/70 flex-shrink-0">{getStatusLabel(visualStatus)}</span>
     </div>
   );
 }
@@ -239,4 +291,3 @@ function AgentPromptSection({ args }: { args: unknown }) {
     </div>
   );
 }
-
diff --git a/frontend/app/src/components/computer-panel/PanelHeader.test.tsx b/frontend/app/src/components/computer-panel/PanelHeader.test.tsx
new file mode 100644
index 000000000..c061dfe59
--- /dev/null
+++ b/frontend/app/src/components/computer-panel/PanelHeader.test.tsx
@@ -0,0 +1,31 @@
+// @vitest-environment jsdom
+
+import { render, screen } from "@testing-library/react";
+import { describe, expect, it, vi } from "vitest";
+import { PanelHeader } from "./PanelHeader";
+
+describe("PanelHeader", () => {
+  it("does not render pause or resume controls for remote sandboxes", () => {
+    const onClose = vi.fn();
+
+    const { rerender } = render(
+      <PanelHeader
+        threadId="thread-1"
+        onClose={onClose}
+      />,
+    );
+
+    expect(screen.getAllByRole("button")).toHaveLength(1);
+    expect(screen.getByTitle("收起视窗")).toBeTruthy();
+
+    rerender(
+      <PanelHeader
+        threadId="thread-1"
+        onClose={onClose}
+      />,
+    );
+
+    expect(screen.getAllByRole("button")).toHaveLength(1);
+    expect(screen.getByTitle("收起视窗")).toBeTruthy();
+  });
+});
diff --git a/frontend/app/src/components/computer-panel/PanelHeader.tsx b/frontend/app/src/components/computer-panel/PanelHeader.tsx
index 8340d2634..871586479 100644
--- a/frontend/app/src/components/computer-panel/PanelHeader.tsx
+++ b/frontend/app/src/components/computer-panel/PanelHeader.tsx
@@ -1,17 +1,9 @@
-import { Pause, Play } from "lucide-react";
-import { pauseThreadSandbox, resumeThreadSandbox, type LeaseStatus } from "../../api";
-
 interface PanelHeaderProps {
   threadId: string | null;
-  isRemote: boolean;
-  lease: LeaseStatus | null;
   onClose: () => void;
-  onRefreshStatus: () => Promise<unknown>;
 }
 
-export function PanelHeader({ threadId, isRemote, lease, onClose, onRefreshStatus }: PanelHeaderProps) {
-  const instanceState = lease?.instance?.state;
-
+export function PanelHeader({ threadId, onClose }: PanelHeaderProps) {
   return (
     <div className="h-12 flex items-center justify-between px-4 flex-shrink-0 border-b border-border">
       <div>
@@ -21,22 +13,6 @@ export function PanelHeader({ threadId, isRemote, lease, onClose, onRefreshStatu
         </p>
       </div>
       <div className="flex items-center gap-1">
-        {isRemote && instanceState === "running" && (
-          <button
-            className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground/70 hover:bg-muted hover:text-foreground"
-            onClick={() => void (threadId && pauseThreadSandbox(threadId).then(() => onRefreshStatus()))}
-          >
-            <Pause className="w-4 h-4" />
-          </button>
-        )}
-        {isRemote && instanceState === "paused" && (
-          <button
-            className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground/70 hover:bg-muted hover:text-success"
-            onClick={() => void (threadId && resumeThreadSandbox(threadId).then(() => onRefreshStatus()))}
-          >
-            <Play className="w-4 h-4" />
-          </button>
-        )}
         <button
           className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground/70 hover:bg-muted hover:text-foreground"
           onClick={onClose}
diff --git a/frontend/app/src/components/computer-panel/agent-visual-status.test.ts b/frontend/app/src/components/computer-panel/agent-visual-status.test.ts
new file mode 100644
index 000000000..a40713d3a
--- /dev/null
+++ b/frontend/app/src/components/computer-panel/agent-visual-status.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, it } from "vitest";
+import type { ToolStep } from "../../api";
+import { resolveAgentVisualStatus } from "./agent-visual-status";
+
+function makeStep(): ToolStep {
+  return {
+    id: "tool-1",
+    name: "Agent",
+    args: {},
+    status: "calling",
+    timestamp: Date.now(),
+    subagent_stream: {
+      task_id: "task-1",
+      thread_id: "subagent-1",
+      description: "inspect",
+      text: "done text",
+      tool_calls: [],
+      status: "running",
+    },
+  };
+}
+
+describe("resolveAgentVisualStatus", () => {
+  it("trusts the child thread idle state over a stale parent running badge", () => {
+    expect(
+      resolveAgentVisualStatus(makeStep(), {
+        childDisplayRunning: false,
+        childRuntimeState: "idle",
+      }),
+    ).toBe("completed");
+  });
+
+  it("keeps the agent running while the child display is still open", () => {
+    expect(
+      resolveAgentVisualStatus(makeStep(), {
+        childDisplayRunning: true,
+        childRuntimeState: "active",
+      }),
+    ).toBe("running");
+  });
+});
diff --git a/frontend/app/src/components/computer-panel/agent-visual-status.ts b/frontend/app/src/components/computer-panel/agent-visual-status.ts
new file mode 100644
index 000000000..09b2df236
--- /dev/null
+++ b/frontend/app/src/components/computer-panel/agent-visual-status.ts
@@ -0,0 +1,25 @@
+import type { ToolStep } from "../../api";
+
+export type AgentVisualStatus = "running" | "completed" | "error";
+
+interface ResolveAgentVisualStatusOptions {
+  childDisplayRunning?: boolean;
+  childRuntimeState?: string | null;
+  statusOverride?: AgentVisualStatus | null;
+}
+
+export function resolveAgentVisualStatus(
+  step: ToolStep,
+  options: ResolveAgentVisualStatusOptions = {},
+): AgentVisualStatus {
+  const { childDisplayRunning = false, childRuntimeState = null, statusOverride = null } = options;
+  const stream = step.subagent_stream;
+
+  if (statusOverride) return statusOverride;
+  if (step.status === "error" || stream?.status === "error") return "error";
+  if (childRuntimeState === "idle" && !childDisplayRunning) return "completed";
+  if (childDisplayRunning) return "running";
+  if (stream?.status === "running") return "running";
+  if (step.status === "done" || stream?.status === "completed") return "completed";
+  return "running";
+}
diff --git a/frontend/app/src/components/computer-panel/index.tsx b/frontend/app/src/components/computer-panel/index.tsx
index bb8f9df65..a8932d95b 100644
--- a/frontend/app/src/components/computer-panel/index.tsx
+++ b/frontend/app/src/components/computer-panel/index.tsx
@@ -31,25 +31,36 @@ export default function ComputerPanel({
   const agentSteps = useMemo(() => extractAgentSteps(chatEntries), [chatEntries]);
   const { width: treeWidth, onMouseDown: onDragStart } = useResizable(288, 160, 500);
 
-  const { lease, refreshStatus } = useSandboxStatus({ threadId, isRemote });
-
-  const fileExplorer = useFileExplorer({ threadId });
+  const { refreshStatus } = useSandboxStatus({ threadId, isRemote });
+  const {
+    currentPath,
+    setCurrentPath,
+    workspaceRoot,
+    treeNodes,
+    selectedFilePath,
+    selectedFileContent,
+    loadingWorkspace,
+    workspaceError,
+    handleToggleFolder,
+    handleSelectFile,
+    refreshWorkspace,
+  } = useFileExplorer({ threadId });
 
   // Refresh sandbox status when panel opens
   useEffect(() => {
     if (!isOpen) return;
     refreshStatus().then((cwd) => {
-      if (cwd && !fileExplorer.currentPath) {
-        fileExplorer.setCurrentPath(cwd);
+      if (cwd && !currentPath) {
+        setCurrentPath(cwd);
       }
     });
-  }, [isOpen, threadId, sandboxType]);
+  }, [isOpen, refreshStatus, currentPath, setCurrentPath]);
 
   // Refresh workspace when files tab is active
   useEffect(() => {
     if (!isOpen || !threadId || activeTab !== "files") return;
-    void fileExplorer.refreshWorkspace();
-  }, [isOpen, threadId, activeTab]);
+    void refreshWorkspace();
+  }, [isOpen, threadId, activeTab, refreshWorkspace]);
 
   if (!isOpen) return null;
 
@@ -60,10 +71,7 @@ export default function ComputerPanel({
     >
       <PanelHeader
         threadId={threadId}
-        isRemote={isRemote}
-        lease={lease}
         onClose={onClose}
-        onRefreshStatus={refreshStatus}
       />
 
       <TabBar
@@ -84,16 +92,16 @@ export default function ComputerPanel({
 
         {activeTab === "files" && (
           <FilesView
-            workspaceRoot={fileExplorer.workspaceRoot}
-            treeNodes={fileExplorer.treeNodes}
-            loadingWorkspace={fileExplorer.loadingWorkspace}
-            workspaceError={fileExplorer.workspaceError}
-            selectedFilePath={fileExplorer.selectedFilePath}
-            selectedFileContent={fileExplorer.selectedFileContent}
+            workspaceRoot={workspaceRoot}
+            treeNodes={treeNodes}
+            loadingWorkspace={loadingWorkspace}
+            workspaceError={workspaceError}
+            selectedFilePath={selectedFilePath}
+            selectedFileContent={selectedFileContent}
             treeWidth={treeWidth}
             onDragStart={onDragStart}
-            onToggleFolder={fileExplorer.handleToggleFolder}
-            onSelectFile={fileExplorer.handleSelectFile}
+            onToggleFolder={handleToggleFolder}
+            onSelectFile={handleSelectFile}
           />
         )}
 
diff --git a/frontend/app/src/components/computer-panel/types.ts b/frontend/app/src/components/computer-panel/types.ts
index 053fb6d10..5fe1e7b85 100644
--- a/frontend/app/src/components/computer-panel/types.ts
+++ b/frontend/app/src/components/computer-panel/types.ts
@@ -1,5 +1,4 @@
-import type { ChatEntry, LeaseStatus, SessionStatus, TerminalStatus } from "../../api";
-import type { SandboxChannelFileEntry, SandboxChannelKind } from "../../api";
+import type { ChatEntry } from "../../api";
 
 export type TabType = "terminal" | "files" | "agents";
 
@@ -25,35 +24,3 @@ export interface TreeNode {
   expanded?: boolean;
   loading?: boolean;
 }
-
-export interface SandboxStatus {
-  session: SessionStatus | null;
-  terminal: TerminalStatus | null;
-  lease: LeaseStatus | null;
-  error: string | null;
-  refresh: () => Promise<void>;
-}
-
-export interface FileExplorerState {
-  currentPath: string;
-  workspaceRoot: string;
-  treeNodes: TreeNode[];
-  selectedFilePath: string | null;
-  selectedFileContent: string;
-  loadingWorkspace: boolean;
-  workspaceError: string | null;
-  channel: SandboxChannelKind;
-  channelRootPath: string;
-  sandboxFilesId: string | null;
-  channelEntries: SandboxChannelFileEntry[];
-  loadingChannelFiles: boolean;
-  uploadingChannelFile: boolean;
-  channelError: string | null;
-  setChannel: (channel: SandboxChannelKind) => void;
-  refreshChannelFiles: () => Promise<void>;
-  uploadChannelFile: (file: File) => Promise<void>;
-  downloadChannelFile: (relativePath: string) => void;
-  handleToggleFolder: (fullPath: string) => Promise<void>;
-  handleSelectFile: (fullPath: string) => Promise<void>;
-  refreshWorkspace: (pathOverride?: string) => Promise<void>;
-}
diff --git a/frontend/app/src/components/computer-panel/utils.ts b/frontend/app/src/components/computer-panel/utils.ts
index 532bd5ce4..89199ab8b 100644
--- a/frontend/app/src/components/computer-panel/utils.ts
+++ b/frontend/app/src/components/computer-panel/utils.ts
@@ -6,36 +6,7 @@ import type { TreeNode } from "./types";
 export type FlowItem =
   | { type: "text"; content: string; turnId: string }
   | { type: "tool"; step: ToolStep; turnId: string };
-
-/** Extract a chronological message flow (text + tool) from chat entries.
- *  The last non-empty text segment per turn is excluded (already shown in chat area). */
-export function extractMessageFlow(entries: ChatEntry[]): FlowItem[] {
-  const items: FlowItem[] = [];
-  for (const entry of entries) {
-    if (entry.role !== "assistant") continue;
-    const segs = entry.segments;
-    // Find last non-empty text index — exclude it (displayed in chat area)
-    let lastTextIdx = -1;
-    for (let i = segs.length - 1; i >= 0; i--) {
-      const seg = segs[i];
-      if (seg.type === "text" && seg.content.trim()) {
-        lastTextIdx = i;
-        break;
-      }
-    }
-    for (let i = 0; i < segs.length; i++) {
-      const seg = segs[i];
-      if (seg.type === "tool") {
-        items.push({ type: "tool", step: seg.step, turnId: entry.id });
-      } else if (seg.type === "text" && i !== lastTextIdx && seg.content.trim()) {
-        items.push({ type: "text", content: seg.content, turnId: entry.id });
-      }
-    }
-  }
-  return items;
-}
-
-export function joinPath(base: string, name: string): string {
+function joinPath(base: string, name: string): string {
   if (base.endsWith("/")) return `${base}${name}`;
   return `${base}/${name}`;
 }
@@ -68,20 +39,6 @@ export function extractAgentSteps(entries: ChatEntry[]): ToolStep[] {
   return steps;
 }
 
-/** Extract all tool steps from chat entries */
-export function extractAllToolSteps(entries: ChatEntry[]): ToolStep[] {
-  const steps: ToolStep[] = [];
-  for (const entry of entries) {
-    if (entry.role !== "assistant") continue;
-    for (const seg of entry.segments) {
-      if (seg.type === "tool") {
-        steps.push(seg.step);
-      }
-    }
-  }
-  return steps;
-}
-
 export function parseCommandArgs(args: unknown): { command?: string; cwd?: string; description?: string } {
   if (args && typeof args === "object") {
     const a = args as Record<string, unknown>;
diff --git a/frontend/app/src/components/cron-editor.tsx b/frontend/app/src/components/cron-editor.tsx
deleted file mode 100644
index d136d9beb..000000000
--- a/frontend/app/src/components/cron-editor.tsx
+++ /dev/null
@@ -1,421 +0,0 @@
-import { useState, useEffect, useMemo } from "react";
-import { X, ChevronDown, Tag } from "lucide-react";
-import type { CronJob, Priority } from "@/store/types";
-
-// ── Types ──────────────────────────────────────────────────
-
-type Frequency = "interval" | "daily" | "weekdays" | "weekly" | "monthly";
-
-interface ScheduleState {
-  frequency: Frequency;
-  hour: number;
-  minute: number;
-  weekdays: number[];   // 0=Sun ... 6=Sat
-  monthDay: number;     // 1-31
-  intervalValue: number; // for "every N hours"
-}
-
-interface TaskTemplate {
-  title: string;
-  description: string;
-  priority: Priority;
-  category: string;
-}
-
-// ── Constants ──────────────────────────────────────────────
-
-const WEEK_LABELS = ["日", "一", "二", "三", "四", "五", "六"];
-
-const INTERVAL_OPTIONS = [1, 2, 3, 4, 6, 8, 12];
-
-const CATEGORIES = [
-  { id: "code-review", label: "代码审查", color: "bg-blue-500" },
-  { id: "report", label: "日报周报", color: "bg-emerald-500" },
-  { id: "backup", label: "数据备份", color: "bg-amber-500" },
-  { id: "security", label: "安全检查", color: "bg-red-500" },
-  { id: "cleanup", label: "清理维护", color: "bg-violet-500" },
-  { id: "monitoring", label: "监控巡检", color: "bg-cyan-500" },
-  { id: "other", label: "其他", color: "bg-gray-400" },
-];
-
-const PRIORITY_OPTIONS: { value: Priority; label: string; className: string }[] = [
-  { value: "high", label: "高", className: "bg-destructive/10 text-destructive border-destructive/20" },
-  { value: "medium", label: "中", className: "bg-warning/10 text-warning border-warning/20" },
-  { value: "low", label: "低", className: "bg-muted text-muted-foreground border-border" },
-];
-
-const FREQ_OPTIONS: { value: Frequency; label: string }[] = [
-  { value: "interval", label: "每隔" },
-  { value: "daily", label: "每天" },
-  { value: "weekdays", label: "工作日" },
-  { value: "weekly", label: "每周" },
-  { value: "monthly", label: "每月" },
-];
-
-// ── Helpers ────────────────────────────────────────────────
-
-function parseSchedule(expr: string): ScheduleState {
-  const parts = expr.split(" ");
-  const defaults: ScheduleState = { frequency: "daily", hour: 9, minute: 0, weekdays: [1], monthDay: 1, intervalValue: 2 };
-  if (parts.length !== 5) return defaults;
-
-  const [min, hour, dom, , dow] = parts;
-
-  // interval: 0 */2 * * *
-  if (hour.startsWith("*/") && dom === "*" && dow === "*") {
-    return { ...defaults, frequency: "interval", intervalValue: parseInt(hour.slice(2)) || 2, minute: parseInt(min) || 0 };
-  }
-
-  const h = parseInt(hour) || 9;
-  const m = parseInt(min) || 0;
-
-  // weekdays: 0 9 * * 1-5
-  if (dom === "*" && dow === "1-5") return { ...defaults, frequency: "weekdays", hour: h, minute: m };
-  // weekly: 0 9 * * 1,3,5
-  if (dom === "*" && dow !== "*") {
-    const days = dow.split(",").map(Number).filter((n) => !isNaN(n));
-    return { ...defaults, frequency: "weekly", hour: h, minute: m, weekdays: days.length ? days : [1] };
-  }
-  // monthly: 0 9 1 * *
-  if (dom !== "*" && dow === "*") return { ...defaults, frequency: "monthly", hour: h, minute: m, monthDay: parseInt(dom) || 1 };
-  // daily: 0 9 * * *  (or any other * * pattern)
-  return { ...defaults, frequency: "daily", hour: h, minute: m };
-}
-
-function buildCron(s: ScheduleState): string {
-  switch (s.frequency) {
-    case "interval": return `${s.minute} */${s.intervalValue} * * *`;
-    case "daily": return `${s.minute} ${s.hour} * * *`;
-    case "weekdays": return `${s.minute} ${s.hour} * * 1-5`;
-    case "weekly": return `${s.minute} ${s.hour} * * ${[...s.weekdays].sort().join(",")}`;
-    case "monthly": return `${s.minute} ${s.hour} ${s.monthDay} * *`;
-  }
-}
-
-function scheduleToHuman(s: ScheduleState): string {
-  const t = `${String(s.hour).padStart(2, "0")}:${String(s.minute).padStart(2, "0")}`;
-  switch (s.frequency) {
-    case "interval": return `每 ${s.intervalValue} 小时`;
-    case "daily": return `每天 ${t}`;
-    case "weekdays": return `工作日 ${t}`;
-    case "weekly": return `每周${s.weekdays.map((d) => WEEK_LABELS[d]).join("、")} ${t}`;
-    case "monthly": return `每月 ${s.monthDay} 日 ${t}`;
-  }
-}
-
-function parseTaskTemplate(json: string): TaskTemplate {
-  try {
-    const obj = JSON.parse(json);
-    return {
-      title: obj.title || "",
-      description: obj.description || "",
-      priority: obj.priority || "medium",
-      category: obj.category || "other",
-    };
-  } catch {
-    return { title: "", description: "", priority: "medium", category: "other" };
-  }
-}
-
-function buildTaskTemplate(t: TaskTemplate): string {
-  return JSON.stringify({ title: t.title, description: t.description, priority: t.priority, category: t.category });
-}
-
-// ── Component ──────────────────────────────────────────────
-
-interface CronEditorProps {
-  cronForm: CronJob;
-  isMobile: boolean;
-  onUpdate: (form: CronJob) => void;
-  onSave: () => void;
-  onClose: () => void;
-  onDelete: () => void;
-}
-
-export default function CronEditor({ cronForm, isMobile, onUpdate, onSave, onClose, onDelete }: CronEditorProps) {
-  const [schedule, setSchedule] = useState<ScheduleState>(() => parseSchedule(cronForm.cron_expression));
-  const [template, setTemplate] = useState<TaskTemplate>(() => parseTaskTemplate(cronForm.task_template));
-  const [freqOpen, setFreqOpen] = useState(false);
-
-  // Sync schedule/template changes back to cronForm
-  useEffect(() => {
-    const expr = buildCron(schedule);
-    const tmpl = buildTaskTemplate(template);
-    if (expr !== cronForm.cron_expression || tmpl !== cronForm.task_template) {
-      onUpdate({ ...cronForm, cron_expression: expr, task_template: tmpl });
-    }
-  }, [schedule, template]);
-
-  // Reset when cronForm.id changes (switching between cron jobs)
-  useEffect(() => {
-    setSchedule(parseSchedule(cronForm.cron_expression));
-    setTemplate(parseTaskTemplate(cronForm.task_template));
-  }, [cronForm.id]);
-
-  const humanSchedule = useMemo(() => scheduleToHuman(schedule), [schedule]);
-
-  const updateSchedule = (patch: Partial<ScheduleState>) => setSchedule((s) => ({ ...s, ...patch }));
-  const updateTemplate = (patch: Partial<TaskTemplate>) => setTemplate((t) => ({ ...t, ...patch }));
-
-  const toggleWeekday = (day: number) => {
-    setSchedule((s) => {
-      const has = s.weekdays.includes(day);
-      const next = has ? s.weekdays.filter((d) => d !== day) : [...s.weekdays, day];
-      return { ...s, weekdays: next.length ? next : [day] };
-    });
-  };
-
-  const needsTimePicker = schedule.frequency !== "interval";
-
-  // ── Render ─────────────────────────────────────────────
-
-  return (
-    <div className={`${isMobile ? "fixed inset-0 z-50 flex" : "w-[380px] shrink-0 border-l border-border"} bg-background flex flex-col`}>
-      {isMobile && <div className="fixed inset-0 bg-black/50 -z-10" onClick={onClose} />}
-
-      {/* Header */}
-      <div className="h-14 flex items-center justify-between px-5 border-b border-border shrink-0">
-        <h3 className="text-sm font-semibold text-foreground">编辑定时任务</h3>
-        <div className="flex items-center gap-1.5">
-          <button onClick={onSave} className="px-3.5 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 transition-opacity duration-fast">
-            保存
-          </button>
-          <button onClick={onClose} className="p-1.5 rounded-lg hover:bg-muted transition-colors duration-fast">
-            <X className="w-4 h-4 text-muted-foreground" />
-          </button>
-        </div>
-      </div>
-
-      <div className="flex-1 overflow-y-auto">
-        {/* ── Section 1: Identity ── */}
-        <div className="px-5 pt-5 pb-4 space-y-3">
-          <input
-            value={cronForm.name}
-            onChange={(e) => onUpdate({ ...cronForm, name: e.target.value })}
-            placeholder="任务名称"
-            className="w-full text-lg font-semibold text-foreground bg-transparent outline-none placeholder:text-muted-foreground/50"
-          />
-          <textarea
-            value={cronForm.description}
-            onChange={(e) => onUpdate({ ...cronForm, description: e.target.value })}
-            placeholder="添加描述..."
-            rows={2}
-            className="w-full text-sm text-muted-foreground bg-transparent outline-none placeholder:text-muted-foreground/40 resize-none leading-relaxed"
-          />
-        </div>
-
-        <div className="mx-5 border-t border-border" />
-
-        {/* ── Section 2: Schedule ── */}
-        <div className="px-5 py-4 space-y-4">
-          <div className="flex items-center justify-between">
-            <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">调度</span>
-            <span className="text-xs text-primary font-medium">{humanSchedule}</span>
-          </div>
-
-          {/* Sentence builder */}
-          <div className="flex items-center gap-2 flex-wrap">
-            {/* Frequency dropdown */}
-            <div className="relative">
-              <button
-                onClick={() => setFreqOpen(!freqOpen)}
-                className="flex items-center gap-1 px-3 py-2 rounded-xl bg-primary/8 border border-primary/15 text-sm font-medium text-primary hover:bg-primary/12 transition-colors duration-fast"
-              >
-                {FREQ_OPTIONS.find((f) => f.value === schedule.frequency)?.label}
-                <ChevronDown className="w-3.5 h-3.5" />
-              </button>
-              {freqOpen && (
-                <div className="absolute top-full left-0 mt-1 py-1 bg-background border border-border rounded-xl shadow-lg z-20 min-w-[120px]">
-                  {FREQ_OPTIONS.map((f) => (
-                    <button
-                      key={f.value}
-                      onClick={() => { updateSchedule({ frequency: f.value }); setFreqOpen(false); }}
-                      className={`w-full text-left px-3 py-1.5 text-sm transition-colors duration-fast ${
-                        schedule.frequency === f.value ? "text-primary font-medium bg-primary/5" : "text-foreground hover:bg-muted"
-                      }`}
-                    >
-                      {f.label}
-                    </button>
-                  ))}
-                </div>
-              )}
-            </div>
-
-            {/* Interval: "每隔 [N] 小时 执行" */}
-            {schedule.frequency === "interval" && (
-              <>
-                <select
-                  value={schedule.intervalValue}
-                  onChange={(e) => updateSchedule({ intervalValue: parseInt(e.target.value) })}
-                  className="appearance-none px-2.5 py-2 rounded-xl bg-muted/60 border border-border text-sm font-medium text-foreground outline-none focus:border-primary/40 transition-colors duration-fast cursor-pointer"
-                >
-                  {INTERVAL_OPTIONS.map((n) => (
-                    <option key={n} value={n}>{n}</option>
-                  ))}
-                </select>
-                <span className="text-sm text-muted-foreground">小时执行</span>
-              </>
-            )}
-
-            {/* Fixed time: "的 HH:MM 执行" */}
-            {needsTimePicker && (
-              <>
-                <span className="text-sm text-muted-foreground">的</span>
-                <div className="flex items-center gap-1">
-                  <select
-                    value={schedule.hour}
-                    onChange={(e) => updateSchedule({ hour: parseInt(e.target.value) })}
-                    className="appearance-none px-2.5 py-2 rounded-xl bg-muted/60 border border-border text-sm font-mono text-foreground outline-none focus:border-primary/40 transition-colors duration-fast cursor-pointer"
-                  >
-                    {Array.from({ length: 24 }, (_, i) => (
-                      <option key={i} value={i}>{String(i).padStart(2, "0")}</option>
-                    ))}
-                  </select>
-                  <span className="text-sm font-medium text-muted-foreground">:</span>
-                  <select
-                    value={schedule.minute}
-                    onChange={(e) => updateSchedule({ minute: parseInt(e.target.value) })}
-                    className="appearance-none px-2.5 py-2 rounded-xl bg-muted/60 border border-border text-sm font-mono text-foreground outline-none focus:border-primary/40 transition-colors duration-fast cursor-pointer"
-                  >
-                    {Array.from({ length: 12 }, (_, i) => i * 5).map((m) => (
-                      <option key={m} value={m}>{String(m).padStart(2, "0")}</option>
-                    ))}
-                  </select>
-                </div>
-                <span className="text-sm text-muted-foreground">执行</span>
-              </>
-            )}
-          </div>
-
-          {/* Weekly: day pills */}
-          {schedule.frequency === "weekly" && (
-            <div className="flex items-center gap-1.5">
-              {WEEK_LABELS.map((label, i) => (
-                <button
-                  key={i}
-                  onClick={() => toggleWeekday(i)}
-                  className={`w-9 h-9 rounded-full text-xs font-medium transition-all duration-fast ${
-                    schedule.weekdays.includes(i)
-                      ? "bg-primary text-primary-foreground shadow-sm"
-                      : "bg-muted/60 text-muted-foreground hover:bg-muted"
-                  }`}
-                >
-                  {label}
-                </button>
-              ))}
-            </div>
-          )}
-
-          {/* Monthly: day picker */}
-          {schedule.frequency === "monthly" && (
-            <div className="space-y-2">
-              <span className="text-xs text-muted-foreground">选择日期</span>
-              <div className="grid grid-cols-7 gap-1">
-                {Array.from({ length: 31 }, (_, i) => i + 1).map((d) => (
-                  <button
-                    key={d}
-                    onClick={() => updateSchedule({ monthDay: d })}
-                    className={`h-8 rounded-lg text-xs font-medium transition-all duration-fast ${
-                      schedule.monthDay === d
-                        ? "bg-primary text-primary-foreground shadow-sm"
-                        : "bg-muted/40 text-muted-foreground hover:bg-muted"
-                    }`}
-                  >
-                    {d}
-                  </button>
-                ))}
-              </div>
-            </div>
-          )}
-        </div>
-
-        <div className="mx-5 border-t border-border" />
-
-        {/* ── Section 3: Task preview ── */}
-        <div className="px-5 py-4 space-y-4">
-          <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">到时候创建的任务</span>
-
-          <input
-            value={template.title}
-            onChange={(e) => updateTemplate({ title: e.target.value })}
-            placeholder="任务标题"
-            className="w-full px-3.5 py-2.5 rounded-xl bg-card border border-border text-sm font-medium text-foreground outline-none focus:border-primary/40 transition-colors duration-fast placeholder:text-muted-foreground/50"
-          />
-
-          <textarea
-            value={template.description}
-            onChange={(e) => updateTemplate({ description: e.target.value })}
-            placeholder="任务描述（可选）"
-            rows={2}
-            className="w-full px-3.5 py-2.5 rounded-xl bg-card border border-border text-sm text-foreground outline-none focus:border-primary/40 transition-colors duration-fast resize-none placeholder:text-muted-foreground/50 leading-relaxed"
-          />
-
-          {/* Category tags */}
-          <div className="space-y-2">
-            <span className="text-xs text-muted-foreground font-medium flex items-center gap-1">
-              <Tag className="w-3 h-3" />分类
-            </span>
-            <div className="flex flex-wrap gap-1.5">
-              {CATEGORIES.map((cat) => (
-                <button
-                  key={cat.id}
-                  onClick={() => updateTemplate({ category: cat.id })}
-                  className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-xs font-medium transition-all duration-fast ${
-                    template.category === cat.id
-                      ? "bg-foreground text-background shadow-sm"
-                      : "bg-muted/60 text-muted-foreground hover:bg-muted"
-                  }`}
-                >
-                  <span className={`w-2 h-2 rounded-full ${cat.color}`} />
-                  {cat.label}
-                </button>
-              ))}
-            </div>
-          </div>
-
-          {/* Priority */}
-          <div className="space-y-2">
-            <span className="text-xs text-muted-foreground font-medium">优先级</span>
-            <div className="flex gap-2">
-              {PRIORITY_OPTIONS.map((p) => (
-                <button
-                  key={p.value}
-                  onClick={() => updateTemplate({ priority: p.value })}
-                  className={`flex-1 py-2 rounded-xl text-xs font-medium border transition-all duration-fast ${
-                    template.priority === p.value
-                      ? p.className + " shadow-sm"
-                      : "bg-transparent border-border text-muted-foreground hover:bg-muted/40"
-                  }`}
-                >
-                  {p.label}
-                </button>
-              ))}
-            </div>
-          </div>
-        </div>
-
-        <div className="mx-5 border-t border-border" />
-
-        {/* ── Section 4: Toggle + Danger ── */}
-        <div className="px-5 py-4 space-y-4">
-          <div className="flex items-center justify-between">
-            <span className="text-sm text-foreground font-medium">启用调度</span>
-            <button
-              onClick={() => onUpdate({ ...cronForm, enabled: cronForm.enabled ? 0 : 1 })}
-              className={`relative w-11 h-6 rounded-full transition-colors duration-fast ${cronForm.enabled ? "bg-primary" : "bg-muted"}`}
-            >
-              <span className={`absolute top-0.5 w-5 h-5 rounded-full bg-background shadow-sm transition-transform duration-fast ${cronForm.enabled ? "left-[22px]" : "left-0.5"}`} />
-            </button>
-          </div>
-
-          <button
-            onClick={onDelete}
-            className="w-full px-3 py-2.5 rounded-xl text-destructive text-xs font-medium hover:bg-destructive/5 transition-colors duration-fast"
-          >
-            删除定时任务
-          </button>
-        </div>
-      </div>
-    </div>
-  );
-}
diff --git a/frontend/app/src/components/task-modal.tsx b/frontend/app/src/components/task-modal.tsx
deleted file mode 100644
index 181c8fe34..000000000
--- a/frontend/app/src/components/task-modal.tsx
+++ /dev/null
@@ -1,634 +0,0 @@
-import { useState, useMemo, useEffect, useRef } from "react";
-import { X, Calendar, User, Tag, ChevronDown } from "lucide-react";
-import type { Priority, CronJob, Task, TaskStatus } from "@/store/types";
-import { BLUR_CLOSE_DELAY } from "@/styles/ux-timing";
-
-// ── Constants ─────────────────────────────────────────────
-
-type TabType = "task" | "cron";
-
-// ── TagInput ──────────────────────────────────────────────
-
-function TagInput({
-  tags,
-  onChange,
-  existingTags = [],
-  placeholder = "输入标签，回车添加...",
-}: {
-  tags: string[];
-  onChange: (tags: string[]) => void;
-  existingTags?: string[];
-  placeholder?: string;
-}) {
-  const [input, setInput] = useState("");
-  const [open, setOpen] = useState(false);
-  const inputRef = useRef<HTMLInputElement>(null);
-
-  const suggestions = existingTags
-    .filter((t) => !tags.includes(t) && t.toLowerCase().includes(input.toLowerCase().trim()))
-    .slice(0, 8);
-
-  const addTag = (tag: string) => {
-    const trimmed = tag.trim();
-    if (trimmed && !tags.includes(trimmed)) onChange([...tags, trimmed]);
-    setInput("");
-  };
-
-  const removeTag = (tag: string) => onChange(tags.filter((t) => t !== tag));
-
-  const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
-    if (e.key === "Enter" && input.trim()) {
-      e.preventDefault();
-      addTag(input);
-    } else if (e.key === "Backspace" && !input && tags.length) {
-      removeTag(tags[tags.length - 1]);
-    }
-  };
-
-  return (
-    <div className="relative">
-      <div
-        className="min-h-[42px] flex flex-wrap gap-1.5 items-center px-3 py-2 rounded-xl bg-card border border-border focus-within:border-primary/40 transition-colors duration-fast cursor-text"
-        onClick={() => inputRef.current?.focus()}
-      >
-        {tags.map((tag) => (
-          <span key={tag} className="flex items-center gap-1 px-2 py-0.5 rounded-full bg-primary/10 text-primary text-xs font-medium">
-            {tag}
-            <button
-              type="button"
-              onClick={(e) => { e.stopPropagation(); removeTag(tag); }}
-              className="hover:text-primary/60 leading-none text-sm"
-            >
-              ×
-            </button>
-          </span>
-        ))}
-        <input
-          ref={inputRef}
-          value={input}
-          onChange={(e) => setInput(e.target.value)}
-          onKeyDown={handleKeyDown}
-          onFocus={() => setOpen(true)}
-          onBlur={() => setTimeout(() => setOpen(false), BLUR_CLOSE_DELAY)}
-          placeholder={tags.length === 0 ? placeholder : ""}
-          className="flex-1 min-w-[80px] bg-transparent text-sm text-foreground outline-none placeholder:text-muted-foreground/40"
-        />
-      </div>
-      {open && suggestions.length > 0 && (
-        <div className="absolute top-full left-0 right-0 mt-1 py-1 bg-background border border-border rounded-xl shadow-lg z-20 max-h-[160px] overflow-y-auto">
-          {suggestions.map((tag) => (
-            <button
-              key={tag}
-              type="button"
-              onMouseDown={(e) => { e.preventDefault(); addTag(tag); }}
-              className="w-full text-left px-3 py-1.5 text-sm text-foreground hover:bg-muted transition-colors duration-fast flex items-center gap-2"
-            >
-              <span className="w-1.5 h-1.5 rounded-full bg-primary/50 shrink-0" />
-              {tag}
-            </button>
-          ))}
-        </div>
-      )}
-    </div>
-  );
-}
-
-const PRIORITY_OPTIONS: { value: Priority; label: string; active: string }[] = [
-  { value: "high", label: "高", active: "bg-destructive/10 text-destructive border-destructive/20" },
-  { value: "medium", label: "中", active: "bg-warning/10 text-warning border-warning/20" },
-  { value: "low", label: "低", active: "bg-muted text-muted-foreground border-border" },
-];
-
-const STATUS_OPTIONS: { value: TaskStatus; label: string }[] = [
-  { value: "pending", label: "等待中" },
-  { value: "running", label: "执行中" },
-  { value: "completed", label: "已完成" },
-  { value: "failed", label: "失败" },
-];
-
-// ── Cron schedule types ───────────────────────────────────
-
-type Frequency = "interval" | "daily" | "weekdays" | "weekly" | "monthly";
-
-interface ScheduleState {
-  frequency: Frequency;
-  hour: number;
-  minute: number;
-  weekdays: number[];
-  monthDay: number;
-  intervalValue: number;
-}
-
-const FREQ_OPTIONS: { value: Frequency; label: string }[] = [
-  { value: "interval", label: "每隔" },
-  { value: "daily", label: "每天" },
-  { value: "weekdays", label: "工作日" },
-  { value: "weekly", label: "每周" },
-  { value: "monthly", label: "每月" },
-];
-
-const WEEK_LABELS = ["日", "一", "二", "三", "四", "五", "六"];
-const INTERVAL_OPTIONS = [1, 2, 3, 4, 6, 8, 12];
-
-function buildCron(s: ScheduleState): string {
-  switch (s.frequency) {
-    case "interval": return `${s.minute} */${s.intervalValue} * * *`;
-    case "daily": return `${s.minute} ${s.hour} * * *`;
-    case "weekdays": return `${s.minute} ${s.hour} * * 1-5`;
-    case "weekly": return `${s.minute} ${s.hour} * * ${[...s.weekdays].sort().join(",")}`;
-    case "monthly": return `${s.minute} ${s.hour} ${s.monthDay} * *`;
-  }
-}
-
-function scheduleToHuman(s: ScheduleState): string {
-  const t = `${String(s.hour).padStart(2, "0")}:${String(s.minute).padStart(2, "0")}`;
-  switch (s.frequency) {
-    case "interval": return `每 ${s.intervalValue} 小时`;
-    case "daily": return `每天 ${t}`;
-    case "weekdays": return `工作日 ${t}`;
-    case "weekly": return `每周${s.weekdays.map((d) => WEEK_LABELS[d]).join("、")} ${t}`;
-    case "monthly": return `每月 ${s.monthDay} 日 ${t}`;
-  }
-}
-
-// ── Props ─────────────────────────────────────────────────
-
-interface Member {
-  id: string;
-  name: string;
-}
-
-interface TaskModalProps {
-  open: boolean;
-  // Create mode: editTask is undefined, defaultTab controls which tab is active
-  // Edit mode: editTask is set, shows edit form directly (no tabs)
-  editTask?: Task;
-  defaultTab?: TabType;
-  members: Member[];
-  existingTags?: string[];
-  onClose: () => void;
-  onCreateTask: (fields: Partial<Task>) => Promise<void>;
-  onSaveTask: (id: string, fields: Partial<Task>) => Promise<void>;
-  onDeleteTask: (id: string) => void;
-  onCreateCronJob: (fields: Partial<CronJob>) => Promise<void>;
-}
-
-// ── Task form fields (shared between create/edit) ─────────
-
-interface TaskFormState {
-  title: string;
-  description: string;
-  priority: Priority;
-  status: TaskStatus;
-  assigneeId: string;
-  deadline: string;
-  tags: string[];
-  progress: number;
-}
-
-function taskToForm(task: Task): TaskFormState {
-  return {
-    title: task.title,
-    description: task.description,
-    priority: task.priority,
-    status: task.status,
-    assigneeId: task.assignee_id,
-    deadline: task.deadline || "",
-    tags: task.tags || [],
-    progress: task.progress,
-  };
-}
-
-function defaultTaskForm(): TaskFormState {
-  return { title: "", description: "", priority: "medium", status: "pending", assigneeId: "", deadline: "", tags: [], progress: 0 };
-}
-
-// ── Component ─────────────────────────────────────────────
-
-export default function TaskModal({
-  open, editTask, defaultTab = "task", members, existingTags = [],
-  onClose, onCreateTask, onSaveTask, onDeleteTask, onCreateCronJob,
-}: TaskModalProps) {
-  const isEdit = !!editTask;
-  const [tab, setTab] = useState<TabType>(defaultTab);
-  const [saving, setSaving] = useState(false);
-
-  // ── Task form ──
-  const [form, setForm] = useState<TaskFormState>(() => editTask ? taskToForm(editTask) : defaultTaskForm());
-
-  // ── Cron form ──
-  const [cronName, setCronName] = useState("");
-  const [cronDescription, setCronDescription] = useState("");
-  const [schedule, setSchedule] = useState<ScheduleState>({
-    frequency: "daily", hour: 9, minute: 0, weekdays: [1], monthDay: 1, intervalValue: 2,
-  });
-  const [cronTaskTitle, setCronTaskTitle] = useState("");
-  const [cronTaskDescription, setCronTaskDescription] = useState("");
-  const [cronTaskPriority, setCronTaskPriority] = useState<Priority>("medium");
-  const [cronTaskTags, setCronTaskTags] = useState<string[]>([]);
-  const [freqOpen, setFreqOpen] = useState(false);
-
-  const humanSchedule = useMemo(() => scheduleToHuman(schedule), [schedule]);
-
-  // Sync edit task into form when it changes
-  useEffect(() => {
-    if (editTask) {
-      setForm(taskToForm(editTask));
-    } else {
-      setForm(defaultTaskForm());
-    }
-  }, [editTask?.id]);
-
-  // Sync defaultTab when opening create mode
-  useEffect(() => {
-    if (!isEdit) setTab(defaultTab);
-  }, [defaultTab, isEdit]);
-
-  const resetCronForm = () => {
-    setCronName(""); setCronDescription(""); setCronTaskTitle(""); setCronTaskDescription("");
-    setCronTaskPriority("medium"); setCronTaskTags([]);
-    setSchedule({ frequency: "daily", hour: 9, minute: 0, weekdays: [1], monthDay: 1, intervalValue: 2 });
-    setFreqOpen(false);
-  };
-
-  const handleClose = () => {
-    if (!isEdit) { setForm(defaultTaskForm()); resetCronForm(); }
-    onClose();
-  };
-
-  const handleSave = async () => {
-    setSaving(true);
-    try {
-      if (isEdit) {
-        if (!form.title.trim()) return;
-        await onSaveTask(editTask!.id, {
-          title: form.title.trim(), description: form.description,
-          priority: form.priority, status: form.status,
-          assignee_id: form.assigneeId, deadline: form.deadline,
-          tags: form.tags,
-          progress: form.status === "completed" ? 100 : form.status === "pending" ? 0 : form.progress,
-        });
-      } else if (tab === "task") {
-        if (!form.title.trim()) return;
-        await onCreateTask({ title: form.title.trim(), description: form.description, priority: form.priority, assignee_id: form.assigneeId, deadline: form.deadline, tags: form.tags, source: "manual" });
-      } else {
-        if (!cronName.trim()) return;
-        await onCreateCronJob({
-          name: cronName.trim(), description: cronDescription,
-          cron_expression: buildCron(schedule),
-          task_template: JSON.stringify({ title: cronTaskTitle, description: cronTaskDescription, priority: cronTaskPriority, tags: cronTaskTags }),
-          enabled: 1,
-        });
-      }
-      handleClose();
-    } finally {
-      setSaving(false);
-    }
-  };
-
-  const canSave = isEdit ? form.title.trim().length > 0 : tab === "task" ? form.title.trim().length > 0 : cronName.trim().length > 0;
-
-  const updateSchedule = (patch: Partial<ScheduleState>) => setSchedule((s) => ({ ...s, ...patch }));
-
-  const toggleWeekday = (day: number) => {
-    setSchedule((s) => {
-      const has = s.weekdays.includes(day);
-      const next = has ? s.weekdays.filter((d) => d !== day) : [...s.weekdays, day];
-      return { ...s, weekdays: next.length ? next : [day] };
-    });
-  };
-
-  if (!open) return null;
-
-  // ── Task form fields ───────────────────────────────────
-  const taskFormFields = (
-    <div className="space-y-4">
-      <div>
-        <input
-          value={form.title}
-          onChange={(e) => setForm((f) => ({ ...f, title: e.target.value }))}
-          placeholder={isEdit ? "任务标题" : "任务标题 *"}
-          autoFocus={!isEdit}
-          className="w-full text-lg font-semibold text-foreground bg-transparent outline-none placeholder:text-muted-foreground/50"
-        />
-      </div>
-      <div>
-        <textarea
-          value={form.description}
-          onChange={(e) => setForm((f) => ({ ...f, description: e.target.value }))}
-          placeholder="描述这个任务..."
-          rows={3}
-          className="w-full text-sm text-foreground bg-transparent outline-none placeholder:text-muted-foreground/40 resize-none leading-relaxed"
-        />
-      </div>
-
-      <div className="border-t border-border" />
-
-      {/* Tags */}
-      <div className="space-y-2">
-        <span className="text-xs text-muted-foreground font-medium flex items-center gap-1">
-          <Tag className="w-3 h-3" />标签
-        </span>
-        <TagInput
-          tags={form.tags}
-          onChange={(tags) => setForm((f) => ({ ...f, tags }))}
-          existingTags={existingTags}
-        />
-      </div>
-
-      {/* Priority */}
-      <div className="space-y-2">
-        <span className="text-xs text-muted-foreground font-medium">优先级</span>
-        <div className="flex gap-2">
-          {PRIORITY_OPTIONS.map((p) => (
-            <button key={p.value} onClick={() => setForm((f) => ({ ...f, priority: p.value }))}
-              className={`flex-1 py-2 rounded-xl text-xs font-medium border transition-all duration-fast ${
-                form.priority === p.value ? p.active + " shadow-sm" : "bg-transparent border-border text-muted-foreground hover:bg-muted/40"
-              }`}
-            >
-              {p.label}
-            </button>
-          ))}
-        </div>
-      </div>
-
-      {/* Status (edit only) */}
-      {isEdit && (
-        <div className="space-y-2">
-          <span className="text-xs text-muted-foreground font-medium">状态</span>
-          <div className="flex flex-wrap gap-1.5">
-            {STATUS_OPTIONS.map((s) => (
-              <button key={s.value} onClick={() => setForm((f) => ({ ...f, status: s.value }))}
-                className={`px-3 py-1.5 rounded-xl text-xs font-medium border transition-all duration-fast ${
-                  form.status === s.value ? "bg-primary/10 text-primary border-primary/20 shadow-sm" : "bg-transparent border-border text-muted-foreground hover:bg-muted/40"
-                }`}
-              >
-                {s.label}
-              </button>
-            ))}
-          </div>
-        </div>
-      )}
-
-      {/* Progress (edit + running) */}
-      {isEdit && form.status === "running" && (
-        <div className="space-y-2">
-          <span className="text-xs text-muted-foreground font-medium">进度</span>
-          <div className="flex items-center gap-3">
-            <input type="range" min={0} max={100} value={form.progress}
-              onChange={(e) => setForm((f) => ({ ...f, progress: Number(e.target.value) }))}
-              className="flex-1 accent-primary"
-            />
-            <span className="text-xs font-mono text-primary w-10 text-right">{form.progress}%</span>
-          </div>
-        </div>
-      )}
-
-      {/* Assignee */}
-      <div className="space-y-2">
-        <span className="text-xs text-muted-foreground font-medium flex items-center gap-1">
-          <User className="w-3 h-3" />执行者
-        </span>
-        <select value={form.assigneeId} onChange={(e) => setForm((f) => ({ ...f, assigneeId: e.target.value }))}
-          className="w-full px-3 py-2 rounded-xl bg-card border border-border text-sm text-foreground outline-none focus:border-primary/40 transition-colors duration-fast"
-        >
-          <option value="">未分配</option>
-          {members.map((m) => <option key={m.id} value={m.id}>{m.name}</option>)}
-        </select>
-      </div>
-
-      {/* Deadline */}
-      <div className="space-y-2">
-        <span className="text-xs text-muted-foreground font-medium flex items-center gap-1">
-          <Calendar className="w-3 h-3" />截止日期
-        </span>
-        <input type="date" value={form.deadline} onChange={(e) => setForm((f) => ({ ...f, deadline: e.target.value }))}
-          className="w-full px-3 py-2 rounded-xl bg-card border border-border text-sm text-foreground outline-none focus:border-primary/40 transition-colors duration-fast"
-        />
-      </div>
-
-      {/* Delete (edit only) */}
-      {isEdit && (
-        <div className="pt-2 border-t border-border">
-          <button onClick={() => { onDeleteTask(editTask!.id); onClose(); }}
-            className="w-full px-3 py-2.5 rounded-xl text-destructive text-xs font-medium hover:bg-destructive/5 transition-colors duration-fast"
-          >
-            删除任务
-          </button>
-        </div>
-      )}
-    </div>
-  );
-
-  // ── Cron form fields ───────────────────────────────────
-  const cronFormFields = (
-    <div className="space-y-4">
-      <div>
-        <input value={cronName} onChange={(e) => setCronName(e.target.value)}
-          placeholder="定时任务名称 *" autoFocus
-          className="w-full text-lg font-semibold text-foreground bg-transparent outline-none placeholder:text-muted-foreground/50"
-        />
-      </div>
-      <div>
-        <textarea value={cronDescription} onChange={(e) => setCronDescription(e.target.value)}
-          placeholder="描述这个定时任务..." rows={2}
-          className="w-full text-sm text-foreground bg-transparent outline-none placeholder:text-muted-foreground/40 resize-none leading-relaxed"
-        />
-      </div>
-
-      <div className="border-t border-border" />
-
-      {/* Schedule */}
-      <div className="space-y-3">
-        <div className="flex items-center justify-between">
-          <span className="text-xs text-muted-foreground font-medium uppercase tracking-wider">调度频率</span>
-          <span className="text-xs text-primary font-medium">{humanSchedule}</span>
-        </div>
-        <div className="flex items-center gap-2 flex-wrap">
-          {/* Frequency dropdown */}
-          <div className="relative">
-            <button onClick={() => setFreqOpen(!freqOpen)}
-              className="flex items-center gap-1 px-3 py-2 rounded-xl bg-primary/8 border border-primary/15 text-sm font-medium text-primary hover:bg-primary/12 transition-colors duration-fast"
-            >
-              {FREQ_OPTIONS.find((f) => f.value === schedule.frequency)?.label}
-              <ChevronDown className="w-3.5 h-3.5" />
-            </button>
-            {freqOpen && (
-              <div className="absolute top-full left-0 mt-1 py-1 bg-background border border-border rounded-xl shadow-lg z-20 min-w-[120px]">
-                {FREQ_OPTIONS.map((f) => (
-                  <button key={f.value} onClick={() => { updateSchedule({ frequency: f.value }); setFreqOpen(false); }}
-                    className={`w-full text-left px-3 py-1.5 text-sm transition-colors duration-fast ${
-                      schedule.frequency === f.value ? "text-primary font-medium bg-primary/5" : "text-foreground hover:bg-muted"
-                    }`}
-                  >
-                    {f.label}
-                  </button>
-                ))}
-              </div>
-            )}
-          </div>
-
-          {schedule.frequency === "interval" && (
-            <>
-              <select value={schedule.intervalValue} onChange={(e) => updateSchedule({ intervalValue: parseInt(e.target.value) })}
-                className="appearance-none px-2.5 py-2 rounded-xl bg-muted/60 border border-border text-sm font-medium text-foreground outline-none focus:border-primary/40 transition-colors duration-fast cursor-pointer"
-              >
-                {INTERVAL_OPTIONS.map((n) => <option key={n} value={n}>{n}</option>)}
-              </select>
-              <span className="text-sm text-muted-foreground">小时执行</span>
-            </>
-          )}
-
-          {schedule.frequency !== "interval" && (
-            <>
-              <span className="text-sm text-muted-foreground">的</span>
-              <div className="flex items-center gap-1">
-                <select value={schedule.hour} onChange={(e) => updateSchedule({ hour: parseInt(e.target.value) })}
-                  className="appearance-none px-2.5 py-2 rounded-xl bg-muted/60 border border-border text-sm font-mono text-foreground outline-none focus:border-primary/40 transition-colors duration-fast cursor-pointer"
-                >
-                  {Array.from({ length: 24 }, (_, i) => <option key={i} value={i}>{String(i).padStart(2, "0")}</option>)}
-                </select>
-                <span className="text-sm font-medium text-muted-foreground">:</span>
-                <select value={schedule.minute} onChange={(e) => updateSchedule({ minute: parseInt(e.target.value) })}
-                  className="appearance-none px-2.5 py-2 rounded-xl bg-muted/60 border border-border text-sm font-mono text-foreground outline-none focus:border-primary/40 transition-colors duration-fast cursor-pointer"
-                >
-                  {Array.from({ length: 12 }, (_, i) => i * 5).map((m) => <option key={m} value={m}>{String(m).padStart(2, "0")}</option>)}
-                </select>
-              </div>
-              <span className="text-sm text-muted-foreground">执行</span>
-            </>
-          )}
-        </div>
-
-        {schedule.frequency === "weekly" && (
-          <div className="flex items-center gap-1.5">
-            {WEEK_LABELS.map((label, i) => (
-              <button key={i} onClick={() => toggleWeekday(i)}
-                className={`w-9 h-9 rounded-full text-xs font-medium transition-all duration-fast ${
-                  schedule.weekdays.includes(i) ? "bg-primary text-primary-foreground shadow-sm" : "bg-muted/60 text-muted-foreground hover:bg-muted"
-                }`}
-              >
-                {label}
-              </button>
-            ))}
-          </div>
-        )}
-
-        {schedule.frequency === "monthly" && (
-          <div className="space-y-2">
-            <span className="text-xs text-muted-foreground">选择日期</span>
-            <div className="grid grid-cols-7 gap-1">
-              {Array.from({ length: 31 }, (_, i) => i + 1).map((d) => (
-                <button key={d} onClick={() => updateSchedule({ monthDay: d })}
-                  className={`h-8 rounded-lg text-xs font-medium transition-all duration-fast ${
-                    schedule.monthDay === d ? "bg-primary text-primary-foreground shadow-sm" : "bg-muted/40 text-muted-foreground hover:bg-muted"
-                  }`}
-                >
-                  {d}
-                </button>
-              ))}
-            </div>
-          </div>
-        )}
-      </div>
-
-      <div className="border-t border-border" />
-
-      {/* Task template */}
-      <div className="space-y-3">
-        <span className="text-xs text-muted-foreground font-medium uppercase tracking-wider">触发时创建的任务</span>
-        <input value={cronTaskTitle} onChange={(e) => setCronTaskTitle(e.target.value)}
-          placeholder="任务标题"
-          className="w-full px-3.5 py-2.5 rounded-xl bg-card border border-border text-sm font-medium text-foreground outline-none focus:border-primary/40 transition-colors duration-fast placeholder:text-muted-foreground/50"
-        />
-        <textarea value={cronTaskDescription} onChange={(e) => setCronTaskDescription(e.target.value)}
-          placeholder="任务描述（可选）" rows={2}
-          className="w-full px-3.5 py-2.5 rounded-xl bg-card border border-border text-sm text-foreground outline-none focus:border-primary/40 transition-colors duration-fast resize-none placeholder:text-muted-foreground/50 leading-relaxed"
-        />
-        <div className="space-y-2">
-          <span className="text-xs text-muted-foreground font-medium flex items-center gap-1">
-            <Tag className="w-3 h-3" />标签
-          </span>
-          <TagInput
-            tags={cronTaskTags}
-            onChange={setCronTaskTags}
-            existingTags={existingTags}
-          />
-        </div>
-        <div className="space-y-2">
-          <span className="text-xs text-muted-foreground font-medium">优先级</span>
-          <div className="flex gap-2">
-            {PRIORITY_OPTIONS.map((p) => (
-              <button key={p.value} onClick={() => setCronTaskPriority(p.value)}
-                className={`flex-1 py-2 rounded-xl text-xs font-medium border transition-all duration-fast ${
-                  cronTaskPriority === p.value ? p.active + " shadow-sm" : "bg-transparent border-border text-muted-foreground hover:bg-muted/40"
-                }`}
-              >
-                {p.label}
-              </button>
-            ))}
-          </div>
-        </div>
-      </div>
-    </div>
-  );
-
-  return (
-    <div className="fixed inset-0 z-50 flex items-center justify-center">
-      <div className="absolute inset-0 bg-black/50" onClick={handleClose} />
-      <div className="relative w-full max-w-lg mx-4 bg-background rounded-2xl shadow-2xl border border-border flex flex-col max-h-[85vh] overflow-hidden">
-
-        {/* Header */}
-        <div className="flex items-center justify-between px-6 pt-5 pb-3">
-          <h2 className="text-base font-semibold text-foreground">
-            {isEdit ? "编辑任务" : "新建任务"}
-          </h2>
-          <button onClick={handleClose} className="p-1.5 rounded-lg hover:bg-muted transition-colors duration-fast">
-            <X className="w-4 h-4 text-muted-foreground" />
-          </button>
-        </div>
-
-        {/* Tab switcher (create mode only) */}
-        {!isEdit && (
-          <div className="px-6 pb-3">
-            <div className="flex items-center gap-1 bg-muted rounded-lg p-0.5">
-              <button onClick={() => setTab("task")}
-                className={`flex-1 px-3 py-1.5 rounded-md text-sm font-medium transition-colors duration-fast ${
-                  tab === "task" ? "bg-background text-foreground shadow-sm" : "text-muted-foreground hover:text-foreground"
-                }`}
-              >
-                普通任务
-              </button>
-              <button onClick={() => setTab("cron")}
-                className={`flex-1 px-3 py-1.5 rounded-md text-sm font-medium transition-colors duration-fast ${
-                  tab === "cron" ? "bg-background text-foreground shadow-sm" : "text-muted-foreground hover:text-foreground"
-                }`}
-              >
-                定时任务
-              </button>
-            </div>
-          </div>
-        )}
-
-        {/* Body */}
-        <div className="flex-1 overflow-y-auto px-6 pb-2">
-          {(isEdit || tab === "task") ? taskFormFields : cronFormFields}
-        </div>
-
-        {/* Footer */}
-        <div className="flex items-center justify-end gap-2 px-6 py-4 border-t border-border">
-          <button onClick={handleClose}
-            className="px-4 py-2 rounded-xl text-sm text-muted-foreground hover:text-foreground hover:bg-muted transition-colors duration-fast"
-          >
-            取消
-          </button>
-          <button onClick={handleSave} disabled={!canSave || saving}
-            className="px-5 py-2 rounded-xl bg-primary text-primary-foreground text-sm font-medium hover:opacity-90 transition-opacity duration-fast disabled:opacity-50 disabled:cursor-not-allowed"
-          >
-            {saving ? (isEdit ? "保存中..." : "创建中...") : (isEdit ? "保存" : "创建")}
-          </button>
-        </div>
-      </div>
-    </div>
-  );
-}
diff --git a/frontend/app/src/components/tool-renderers/utils.ts b/frontend/app/src/components/tool-renderers/utils.ts
deleted file mode 100644
index 68b211e59..000000000
--- a/frontend/app/src/components/tool-renderers/utils.ts
+++ /dev/null
@@ -1,46 +0,0 @@
-export function inferLanguage(filePath: string): string {
-  const ext = filePath.split('.').pop()?.toLowerCase();
-  if (!ext) return 'plaintext';
-
-  const langMap: Record<string, string> = {
-    ts: 'typescript',
-    tsx: 'typescript',
-    js: 'javascript',
-    jsx: 'javascript',
-    py: 'python',
-    md: 'markdown',
-    json: 'json',
-    yaml: 'yaml',
-    yml: 'yaml',
-    html: 'html',
-    css: 'css',
-    scss: 'scss',
-    sass: 'sass',
-    sh: 'bash',
-    bash: 'bash',
-    zsh: 'bash',
-    sql: 'sql',
-    go: 'go',
-    rs: 'rust',
-    java: 'java',
-    c: 'c',
-    cpp: 'cpp',
-    h: 'c',
-    hpp: 'cpp',
-    rb: 'ruby',
-    php: 'php',
-    swift: 'swift',
-    kt: 'kotlin',
-    xml: 'xml',
-    toml: 'toml',
-    ini: 'ini',
-    conf: 'conf',
-    txt: 'plaintext',
-  };
-
-  return langMap[ext] || 'plaintext';
-}
-
-export function countLines(text: string): number {
-  return text.split('\n').length;
-}
diff --git a/frontend/app/src/hooks/use-background-tasks.ts b/frontend/app/src/hooks/use-background-tasks.ts
index 1b6e1b10e..c2da771d5 100644
--- a/frontend/app/src/hooks/use-background-tasks.ts
+++ b/frontend/app/src/hooks/use-background-tasks.ts
@@ -1,5 +1,5 @@
 import { useState, useEffect, useCallback } from 'react';
-import { useThreadStream } from './use-thread-stream';
+import type { UseThreadStreamResult } from './use-thread-stream';
 import type { StreamEvent } from '../api/types';
 
 export interface BackgroundTask {
@@ -14,23 +14,38 @@ export interface BackgroundTask {
 
 interface UseBackgroundTasksProps {
   threadId: string;
-  loading: boolean;
-  refreshThreads: () => Promise<void>;
+  subscribe: UseThreadStreamResult["subscribe"];
 }
 
-export function useBackgroundTasks({ threadId, loading, refreshThreads }: UseBackgroundTasksProps) {
+const threadTasksInflight = new Map<string, Promise<BackgroundTask[]>>();
+
+function loadThreadTasks(threadId: string): Promise<BackgroundTask[]> {
+  const existing = threadTasksInflight.get(threadId);
+  if (existing) return existing;
+  // @@@tasks-inflight-dedup - React StrictMode remounts the page in dev.
+  // Reuse the first thread task fetch so the dev switch hot path does not
+  // double-hit /tasks before the first response lands.
+  const pending = fetch(`/api/threads/${threadId}/tasks`)
+    .then(async (response) => {
+      if (!response.ok) {
+        throw new Error(response.statusText || `HTTP ${response.status}`);
+      }
+      return response.json() as Promise<BackgroundTask[]>;
+    })
+    .finally(() => {
+      threadTasksInflight.delete(threadId);
+    });
+  threadTasksInflight.set(threadId, pending);
+  return pending;
+}
+
+export function useBackgroundTasks({ threadId, subscribe }: UseBackgroundTasksProps) {
   const [tasks, setTasks] = useState<BackgroundTask[]>([]);
-  const { subscribe } = useThreadStream(threadId, { loading, refreshThreads });
 
   // 从 API 获取任务列表
   const fetchTasks = useCallback(async () => {
     try {
-      const response = await fetch(`/api/threads/${threadId}/tasks`);
-      if (!response.ok) {
-        console.error('[BackgroundTasks] Failed to fetch tasks:', response.statusText);
-        return;
-      }
-      const data = await response.json();
+      const data = await loadThreadTasks(threadId);
       setTasks(data);
     } catch (err) {
       console.error('[BackgroundTasks] Error fetching tasks:', err);
diff --git a/frontend/app/src/hooks/use-display-deltas.test.tsx b/frontend/app/src/hooks/use-display-deltas.test.tsx
new file mode 100644
index 000000000..90d0edc48
--- /dev/null
+++ b/frontend/app/src/hooks/use-display-deltas.test.tsx
@@ -0,0 +1,213 @@
+// @vitest-environment jsdom
+
+import { act, cleanup, fireEvent, render, screen } from "@testing-library/react";
+import { useState } from "react";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import type { ChatEntry, StreamEvent } from "../api";
+import { useDisplayDeltas } from "./use-display-deltas";
+
+vi.mock("../api", async () => {
+  const actual = await vi.importActual<typeof import("../api")>("../api");
+  return {
+    ...actual,
+    cancelRun: vi.fn(async () => undefined),
+    postRun: vi.fn(async () => ({ run_id: "run-1", thread_id: "thread-1" })),
+  };
+});
+
+let latestHandler: ((event: StreamEvent) => void) | null = null;
+
+afterEach(() => {
+  latestHandler = null;
+  cleanup();
+});
+
+function Harness({
+  initialEntries,
+  threadId = "thread-1",
+  streamIsRunning = true,
+}: {
+  initialEntries: ChatEntry[];
+  threadId?: string;
+  streamIsRunning?: boolean;
+}) {
+  const [entries, setEntries] = useState<ChatEntry[]>(initialEntries);
+  const { isRunning, handleSendMessage } = useDisplayDeltas({
+    threadId,
+    onUpdate: setEntries,
+    displaySeq: 0,
+    stream: {
+      runtimeStatus: null,
+      isRunning: streamIsRunning,
+      subscribe: (handler) => {
+        latestHandler = handler;
+        return () => {
+          if (latestHandler === handler) latestHandler = null;
+        };
+      },
+    },
+  });
+  return (
+    <>
+      <pre data-testid="entries">{JSON.stringify(entries)}</pre>
+      <div data-testid="running">{String(isRunning)}</div>
+      <button data-testid="send" onClick={() => void handleSendMessage("hello")} />
+    </>
+  );
+}
+
+describe("useDisplayDeltas", () => {
+  it("marks the parent Agent tool done when subagent completion arrives", () => {
+    const initialEntries: ChatEntry[] = [
+      {
+        id: "turn-1",
+        role: "assistant",
+        timestamp: Date.now(),
+        segments: [
+          {
+            type: "tool",
+            step: {
+              id: "tool-1",
+              name: "Agent",
+              args: {},
+              status: "calling",
+              timestamp: Date.now(),
+              subagent_stream: {
+                task_id: "task-1",
+                thread_id: "subagent-task-1",
+                description: "inspect workspace",
+                text: "",
+                tool_calls: [],
+                status: "running",
+              },
+            },
+          },
+        ],
+      },
+    ];
+
+    render(<Harness initialEntries={initialEntries} />);
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "update_segment",
+          index: 0,
+          patch: {
+            subagent_stream_status: "completed",
+          },
+        },
+      });
+    });
+
+    const entries = JSON.parse(screen.getByTestId("entries").textContent || "[]");
+    expect(entries[0].segments[0].step.subagent_stream.status).toBe("completed");
+    expect(entries[0].segments[0].step.status).toBe("done");
+  });
+
+  it("stops reporting running after the assistant turn finalizes", () => {
+    render(<Harness initialEntries={[]} />);
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "append_entry",
+          entry: {
+            id: "turn-1",
+            role: "assistant",
+            timestamp: Date.now(),
+            streaming: true,
+            segments: [],
+          },
+        },
+      });
+    });
+
+    expect(screen.getByTestId("running").textContent).toBe("true");
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "finalize_turn",
+          timestamp: Date.now(),
+        },
+      });
+    });
+
+    expect(screen.getByTestId("running").textContent).toBe("false");
+  });
+
+  it("resets display-owned running state when the child thread changes", () => {
+    const view = render(<Harness initialEntries={[]} threadId="thread-1" />);
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "append_entry",
+          entry: {
+            id: "turn-1",
+            role: "assistant",
+            timestamp: Date.now(),
+            streaming: true,
+            segments: [],
+          },
+        },
+      });
+    });
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "finalize_turn",
+          timestamp: Date.now(),
+        },
+      });
+    });
+
+    expect(screen.getByTestId("running").textContent).toBe("false");
+
+    view.rerender(<Harness initialEntries={[]} threadId="thread-2" />);
+
+    expect(screen.getByTestId("running").textContent).toBe("true");
+  });
+
+  it("clears queued-send pending once the assistant turn starts streaming", () => {
+    render(<Harness initialEntries={[]} streamIsRunning={false} />);
+
+    fireEvent.click(screen.getByTestId("send"));
+    expect(screen.getByTestId("running").textContent).toBe("true");
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "append_entry",
+          entry: {
+            id: "turn-1",
+            role: "assistant",
+            timestamp: Date.now(),
+            streaming: true,
+            segments: [],
+          },
+        },
+      });
+    });
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "finalize_turn",
+          timestamp: Date.now(),
+        },
+      });
+    });
+
+    expect(screen.getByTestId("running").textContent).toBe("false");
+  });
+});
diff --git a/frontend/app/src/hooks/use-display-deltas.ts b/frontend/app/src/hooks/use-display-deltas.ts
index 1ad01e6e3..50c526931 100644
--- a/frontend/app/src/hooks/use-display-deltas.ts
+++ b/frontend/app/src/hooks/use-display-deltas.ts
@@ -16,7 +16,7 @@ import {
   type ChatEntry,
   type StreamStatus,
 } from "../api";
-import { useThreadStream } from "./use-thread-stream";
+import type { UseThreadStreamResult } from "./use-thread-stream";
 import { makeId } from "./utils";
 
 // --- Delta types from backend ---
@@ -115,6 +115,7 @@ function applyDelta(entries: ChatEntry[], delta: DisplayDelta): ChatEntry[] {
           if (seg.step.subagent_stream) {
             seg.step = {
               ...seg.step,
+              status: patch.subagent_stream_status === "completed" ? "done" : seg.step.status,
               subagent_stream: { ...seg.step.subagent_stream, status: patch.subagent_stream_status as "completed" },
             };
           }
@@ -153,12 +154,10 @@ function applyDelta(entries: ChatEntry[], delta: DisplayDelta): ChatEntry[] {
 
 interface DisplayDeltaDeps {
   threadId: string;
-  refreshThreads: () => Promise<void>;
   onUpdate: (updater: (prev: ChatEntry[]) => ChatEntry[]) => void;
-  loading: boolean;
-  runStarted?: boolean;
   /** display_seq from GET response — skip deltas with _display_seq <= this */
   displaySeq: number;
+  stream: Pick<UseThreadStreamResult, "runtimeStatus" | "isRunning" | "subscribe">;
 }
 
 export interface DisplayDeltaState {
@@ -174,14 +173,16 @@ export interface DisplayDeltaActions {
 export function useDisplayDeltas(
   deps: DisplayDeltaDeps,
 ): DisplayDeltaState & DisplayDeltaActions {
-  const { threadId, refreshThreads, onUpdate, loading, runStarted, displaySeq } = deps;
+  const { threadId, onUpdate, displaySeq, stream } = deps;
 
   const [sendPending, setSendPending] = useState(false);
-
-  const { isRunning: streamIsRunning, runtimeStatus, subscribe } =
-    useThreadStream(threadId, { loading, refreshThreads, runStarted });
-
-  const isRunning = streamIsRunning || sendPending;
+  const [displayRunState, setDisplayRunState] = useState<{
+    threadId: string;
+    state: "unknown" | "open" | "closed";
+  }>({ threadId, state: "unknown" });
+  const { isRunning: streamIsRunning, runtimeStatus, subscribe } = stream;
+  const currentDisplayRunState = displayRunState.threadId === threadId ? displayRunState.state : "unknown";
+  const isRunning = sendPending || (currentDisplayRunState === "unknown" ? streamIsRunning : currentDisplayRunState === "open");
 
   useEffect(() => {
     if (!streamIsRunning) return;
@@ -208,11 +209,18 @@ export function useDisplayDeltas(
       // @@@display-seq-dedup — skip stale deltas replayed from ring buffer
       const deltaSeq = delta._display_seq;
       if (typeof deltaSeq === "number" && deltaSeq <= displaySeqRef.current) return;
+      if (delta.type === "append_entry" && delta.entry.role === "assistant" && delta.entry.streaming !== false) {
+        setSendPending(false);
+        setDisplayRunState({ threadId, state: "open" });
+      }
+      if (delta.type === "finalize_turn") {
+        setDisplayRunState({ threadId, state: "closed" });
+      }
       flushSync(() => {
         onUpdateRef.current((prev) => applyDelta(prev, delta));
       });
     });
-  }, [subscribe]);
+  }, [subscribe, threadId]);
 
   const handleSendMessage = useCallback(
     async (message: string, attachments?: string[]) => {
diff --git a/frontend/app/src/hooks/use-sandbox-manager.ts b/frontend/app/src/hooks/use-sandbox-manager.ts
index 8ec4b18d5..bcf81dfa8 100644
--- a/frontend/app/src/hooks/use-sandbox-manager.ts
+++ b/frontend/app/src/hooks/use-sandbox-manager.ts
@@ -1,8 +1,6 @@
-import { useCallback, useEffect, useState } from "react";
+import { useEffect } from "react";
 import {
   getThreadLease,
-  pauseThreadSandbox,
-  resumeThreadSandbox,
   type SandboxInfo,
 } from "../api";
 
@@ -11,21 +9,10 @@ interface SandboxManagerDeps {
   isStreaming: boolean;
   activeSandbox: SandboxInfo | null;
   setActiveSandbox: React.Dispatch<React.SetStateAction<SandboxInfo | null>>;
-  loadThread: (threadId: string) => Promise<void>;
 }
 
-export interface SandboxManagerState {
-  sandboxActionError: string | null;
-}
-
-export interface SandboxManagerActions {
-  handlePauseSandbox: () => Promise<void>;
-  handleResumeSandbox: () => Promise<void>;
-}
-
-export function useSandboxManager(deps: SandboxManagerDeps): SandboxManagerState & SandboxManagerActions {
-  const { activeThreadId, isStreaming, activeSandbox, setActiveSandbox, loadThread } = deps;
-  const [sandboxActionError, setSandboxActionError] = useState<string | null>(null);
+export function useSandboxManager(deps: SandboxManagerDeps): void {
+  const { activeThreadId, isStreaming, activeSandbox, setActiveSandbox } = deps;
 
   // Poll sandbox status while streaming (remote sandboxes only)
   useEffect(() => {
@@ -37,6 +24,9 @@ export function useSandboxManager(deps: SandboxManagerDeps): SandboxManagerState
       try {
         const lease = await getThreadLease(threadId);
         if (cancelled) return;
+        if (!lease) {
+          return;
+        }
         const status = lease.instance?.state ?? null;
         setActiveSandbox((prev) => {
           if (!prev) return prev;
@@ -58,29 +48,5 @@ export function useSandboxManager(deps: SandboxManagerDeps): SandboxManagerState
       cancelled = true;
       window.clearInterval(timer);
     };
-  }, [isStreaming, activeThreadId, activeSandbox?.type, setActiveSandbox]);
-
-  const handlePauseSandbox = useCallback(async () => {
-    if (!activeThreadId) return;
-    setSandboxActionError(null);
-    try {
-      await pauseThreadSandbox(activeThreadId);
-      await loadThread(activeThreadId);
-    } catch (e) {
-      setSandboxActionError(e instanceof Error ? e.message : String(e));
-    }
-  }, [activeThreadId, loadThread]);
-
-  const handleResumeSandbox = useCallback(async () => {
-    if (!activeThreadId) return;
-    setSandboxActionError(null);
-    try {
-      await resumeThreadSandbox(activeThreadId);
-      await loadThread(activeThreadId);
-    } catch (e) {
-      setSandboxActionError(e instanceof Error ? e.message : String(e));
-    }
-  }, [activeThreadId, loadThread]);
-
-  return { sandboxActionError, handlePauseSandbox, handleResumeSandbox };
+  }, [isStreaming, activeThreadId, activeSandbox, setActiveSandbox]);
 }
diff --git a/frontend/app/src/hooks/use-thread-data.ts b/frontend/app/src/hooks/use-thread-data.ts
index 1c0a85de0..93dea1ee1 100644
--- a/frontend/app/src/hooks/use-thread-data.ts
+++ b/frontend/app/src/hooks/use-thread-data.ts
@@ -3,6 +3,7 @@ import {
   getThread,
   type ChatEntry,
   type SandboxInfo,
+  type ThreadDetail,
 } from "../api";
 
 export interface ThreadDataState {
@@ -20,6 +21,18 @@ export interface ThreadDataActions {
   refreshThread: () => Promise<void>;
 }
 
+const threadDetailInflight = new Map<string, Promise<ThreadDetail>>();
+
+function loadThreadDetail(threadId: string): Promise<ThreadDetail> {
+  const existing = threadDetailInflight.get(threadId);
+  if (existing) return existing;
+  const pending = getThread(threadId).finally(() => {
+    threadDetailInflight.delete(threadId);
+  });
+  threadDetailInflight.set(threadId, pending);
+  return pending;
+}
+
 export function useThreadData(threadId: string | undefined, skipInitialLoad = false, initialEntries?: ChatEntry[]): ThreadDataState & ThreadDataActions {
   const [entries, setEntries] = useState<ChatEntry[]>(initialEntries ?? []);
   const [activeSandbox, setActiveSandbox] = useState<SandboxInfo | null>(null);
@@ -29,7 +42,7 @@ export function useThreadData(threadId: string | undefined, skipInitialLoad = fa
   const loadThread = useCallback(async (id: string, silent = false) => {
     if (!silent) setLoading(true);
     try {
-      const thread = await getThread(id);
+      const thread = await loadThreadDetail(id);
       // @@@display-builder — backend returns pre-computed entries + display_seq
       setEntries(thread.entries ?? []);
       setDisplaySeq(thread.display_seq ?? 0);
@@ -60,7 +73,7 @@ export function useThreadData(threadId: string | undefined, skipInitialLoad = fa
       // @@@skip-entries-not-sandbox — skipInitialLoad skips ENTRIES (to avoid
       // overwriting optimistic entries), but we still need sandbox status so
       // TaskProgress shows the correct indicator from the start.
-      getThread(threadId).then(thread => {
+      loadThreadDetail(threadId).then(thread => {
         const sandbox = thread.sandbox;
         setActiveSandbox(sandbox && typeof sandbox === "object" ? (sandbox as SandboxInfo) : null);
       }).catch(() => {});
diff --git a/frontend/app/src/hooks/use-thread-manager.ts b/frontend/app/src/hooks/use-thread-manager.ts
index f167a0bcb..749c9a143 100644
--- a/frontend/app/src/hooks/use-thread-manager.ts
+++ b/frontend/app/src/hooks/use-thread-manager.ts
@@ -2,7 +2,7 @@ import { useCallback, useEffect, useState } from "react";
 import {
   createThread,
   deleteThread,
-  getMainThread,
+  getDefaultThread,
   listSandboxTypes,
   listThreads,
   type RecipeSnapshot,
@@ -10,6 +10,11 @@ import {
   type ThreadSummary,
 } from "../api";
 
+let bootstrapInflight: Promise<{
+  sandboxTypes: SandboxType[];
+  threads: ThreadSummary[];
+}> | null = null;
+
 export interface ThreadManagerState {
   threads: ThreadSummary[];
   sandboxTypes: SandboxType[];
@@ -29,7 +34,7 @@ export interface ThreadManagerActions {
     leaseId?: string,
     recipe?: RecipeSnapshot,
   ) => Promise<string>;
-  handleGetMainThread: (memberId: string, signal?: AbortSignal) => Promise<ThreadSummary | null>;
+  handleGetDefaultThread: (memberId: string, signal?: AbortSignal) => Promise<ThreadSummary | null>;
   handleDeleteThread: (threadId: string) => Promise<void>;
 }
 
@@ -38,6 +43,16 @@ function upsertThread(prev: ThreadSummary[], thread: ThreadSummary): ThreadSumma
   return [thread, ...next];
 }
 
+function loadThreadBootstrap() {
+  if (bootstrapInflight) return bootstrapInflight;
+  bootstrapInflight = Promise.all([listSandboxTypes(), listThreads()])
+    .then(([sandboxTypes, threads]) => ({ sandboxTypes, threads }))
+    .finally(() => {
+      bootstrapInflight = null;
+    });
+  return bootstrapInflight;
+}
+
 export function useThreadManager(): ThreadManagerState & ThreadManagerActions {
   const [threads, setThreads] = useState<ThreadSummary[]>([]);
   const [sandboxTypes, setSandboxTypes] = useState<SandboxType[]>([{ name: "local", available: true }]);
@@ -51,19 +66,31 @@ export function useThreadManager(): ThreadManagerState & ThreadManagerActions {
 
   // Bootstrap: load sandbox types + threads on mount
   useEffect(() => {
+    let cancelled = false;
+
     void (async () => {
       try {
-        const [types] = await Promise.all([listSandboxTypes(), refreshThreads()]);
+        // @@@thread-bootstrap-singleflight - /threads now redirects before AppLayout mounts,
+        // but dev StrictMode still double-mounts the thread shell. Reuse the first
+        // bootstrap request so sidebar threads/provider inventory do not refetch twice.
+        const { sandboxTypes: types, threads: rows } = await loadThreadBootstrap();
+        if (cancelled) return;
+        setThreads(rows);
         setSandboxTypes(types);
         const preferred = types.find((t) => t.available)?.name ?? "local";
         setSelectedSandbox(preferred);
       } catch {
         // ignore bootstrap errors in UI; user can retry by action
       } finally {
+        if (cancelled) return;
         setLoading(false);
       }
     })();
-  }, [refreshThreads]);
+
+    return () => {
+      cancelled = true;
+    };
+  }, []);
 
   const handleCreateThread = useCallback(async (
     sandbox?: string,
@@ -80,8 +107,10 @@ export function useThreadManager(): ThreadManagerState & ThreadManagerActions {
     return thread.thread_id;
   }, [selectedSandbox]);
 
-  const handleGetMainThread = useCallback(async (memberId: string, signal?: AbortSignal): Promise<ThreadSummary | null> => {
-    const thread = await getMainThread(memberId, signal);
+  // @@@template-default-thread-entry - this hook resolves a template entry to its
+  // current default thread without changing the existing backend wire name yet.
+  const handleGetDefaultThread = useCallback(async (memberId: string, signal?: AbortSignal): Promise<ThreadSummary | null> => {
+    const thread = await getDefaultThread(memberId, signal);
     if (thread) {
       setThreads((prev) => upsertThread(prev, thread));
     }
@@ -99,6 +128,6 @@ export function useThreadManager(): ThreadManagerState & ThreadManagerActions {
   return {
     threads, sandboxTypes, selectedSandbox, loading,
     setSelectedSandbox, setThreads,
-    refreshThreads, handleCreateThread, handleGetMainThread, handleDeleteThread,
+    refreshThreads, handleCreateThread, handleGetDefaultThread, handleDeleteThread,
   };
 }
diff --git a/frontend/app/src/hooks/use-thread-permissions.test.tsx b/frontend/app/src/hooks/use-thread-permissions.test.tsx
new file mode 100644
index 000000000..d23871fa2
--- /dev/null
+++ b/frontend/app/src/hooks/use-thread-permissions.test.tsx
@@ -0,0 +1,72 @@
+// @vitest-environment jsdom
+
+import { render } from "@testing-library/react";
+import { useEffect } from "react";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { useThreadPermissions } from "./use-thread-permissions";
+
+const { getThreadPermissions } = vi.hoisted(() => ({
+  getThreadPermissions: vi.fn(),
+}));
+
+vi.mock("../api", async () => {
+  const actual = await vi.importActual<typeof import("../api")>("../api");
+  return {
+    ...actual,
+    getThreadPermissions,
+    addThreadPermissionRule: vi.fn(),
+    removeThreadPermissionRule: vi.fn(),
+    resolveThreadPermission: vi.fn(),
+  };
+});
+
+afterEach(() => {
+  vi.clearAllMocks();
+  window.history.replaceState({}, "", "/");
+});
+
+function Harness({ threadId }: { threadId?: string }) {
+  const state = useThreadPermissions(threadId);
+  useEffect(() => {
+    void state.loading;
+  }, [state.loading]);
+  return null;
+}
+
+describe("useThreadPermissions", () => {
+  it("does not log an error when an in-flight permissions request is aborted on unmount", async () => {
+    const consoleError = vi.spyOn(console, "error").mockImplementation(() => undefined);
+
+    getThreadPermissions.mockImplementation((_threadId: string, signal?: AbortSignal) => new Promise((_, reject) => {
+      signal?.addEventListener("abort", () => {
+        reject(new DOMException("The user aborted a request.", "AbortError"));
+      });
+    }));
+
+    const view = render(<Harness threadId="thread-1" />);
+    view.unmount();
+
+    await Promise.resolve();
+
+    expect(consoleError).not.toHaveBeenCalled();
+    consoleError.mockRestore();
+  });
+
+  it("does not log a failed fetch once navigation already left the thread route", async () => {
+    window.history.replaceState({}, "", "/chat/hire/member-1/thread-1");
+    const consoleError = vi.spyOn(console, "error").mockImplementation(() => undefined);
+
+    getThreadPermissions.mockImplementation(async () => {
+      window.history.replaceState({}, "", "/resources");
+      throw new TypeError("Failed to fetch");
+    });
+
+    render(<Harness threadId="thread-1" />);
+
+    await Promise.resolve();
+    await Promise.resolve();
+
+    expect(consoleError).not.toHaveBeenCalled();
+    consoleError.mockRestore();
+  });
+});
diff --git a/frontend/app/src/hooks/use-thread-permissions.ts b/frontend/app/src/hooks/use-thread-permissions.ts
new file mode 100644
index 000000000..be0dbdf15
--- /dev/null
+++ b/frontend/app/src/hooks/use-thread-permissions.ts
@@ -0,0 +1,163 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import {
+  addThreadPermissionRule,
+  getThreadPermissions,
+  removeThreadPermissionRule,
+  resolveThreadPermission,
+  type AskUserAnswer,
+  type PermissionRequest,
+  type ThreadPermissionRules,
+  type PermissionRuleBehavior,
+} from "../api";
+
+export interface ThreadPermissionsState {
+  requests: PermissionRequest[];
+  sessionRules: ThreadPermissionRules;
+  managedOnly: boolean;
+  loading: boolean;
+  resolvingId: string | null;
+}
+
+export interface ThreadPermissionsActions {
+  refreshPermissions: () => Promise<void>;
+  resolvePermission: (
+    requestId: string,
+    decision: "allow" | "deny",
+    message?: string,
+    answers?: AskUserAnswer[],
+    annotations?: Record<string, unknown>,
+  ) => Promise<void>;
+  addSessionRule: (behavior: PermissionRuleBehavior, toolName: string) => Promise<void>;
+  removeSessionRule: (behavior: PermissionRuleBehavior, toolName: string) => Promise<void>;
+}
+
+function isActiveThreadRoute(threadId: string): boolean {
+  const path = window.location.pathname.replace(/\/+$/, "");
+  return (path.startsWith("/threads/") || path.startsWith("/chat/hire/")) && path.endsWith(`/${encodeURIComponent(threadId)}`);
+}
+
+export function useThreadPermissions(threadId: string | undefined): ThreadPermissionsState & ThreadPermissionsActions {
+  const [requests, setRequests] = useState<PermissionRequest[]>([]);
+  const [sessionRules, setSessionRules] = useState<ThreadPermissionRules>({ allow: [], deny: [], ask: [] });
+  const [managedOnly, setManagedOnly] = useState(false);
+  const [loading, setLoading] = useState(false);
+  const [resolvingId, setResolvingId] = useState<string | null>(null);
+  const refreshGenerationRef = useRef(0);
+  const requestAbortRef = useRef<AbortController | null>(null);
+
+  const refreshPermissions = useCallback(async () => {
+    if (!threadId) {
+      setRequests([]);
+      setSessionRules({ allow: [], deny: [], ask: [] });
+      setManagedOnly(false);
+      return;
+    }
+    // @@@permission-refresh-generation - route switches can leave an old
+    // permissions fetch resolving after the chat page has already unmounted.
+    // Only the latest in-scope refresh is allowed to touch state or logs.
+    const generation = ++refreshGenerationRef.current;
+    requestAbortRef.current?.abort();
+    const controller = new AbortController();
+    requestAbortRef.current = controller;
+    setLoading(true);
+    try {
+      const payload = await getThreadPermissions(threadId, controller.signal);
+      if (refreshGenerationRef.current !== generation) return;
+      setRequests(payload.requests ?? []);
+      setSessionRules(payload.session_rules ?? { allow: [], deny: [], ask: [] });
+      setManagedOnly(payload.managed_only ?? false);
+    } catch (err) {
+      if (controller.signal.aborted) return;
+      if (refreshGenerationRef.current !== generation) return;
+      // @@@permission-route-teardown - browser navigation can tear down the old
+      // thread page before React cleanup runs, which surfaces as a generic
+      // Failed to fetch from the abandoned route. Only log if this thread page
+      // is still the active route.
+      if (!isActiveThreadRoute(threadId)) return;
+      console.error("[useThreadPermissions] Failed to load permissions:", err);
+    } finally {
+      if (requestAbortRef.current === controller) {
+        requestAbortRef.current = null;
+      }
+      if (refreshGenerationRef.current === generation) {
+        setLoading(false);
+      }
+    }
+  }, [threadId]);
+
+  const resolvePermissionRequest = useCallback(
+    async (
+      requestId: string,
+      decision: "allow" | "deny",
+      message?: string,
+      answers?: AskUserAnswer[],
+      annotations?: Record<string, unknown>,
+    ) => {
+      if (!threadId) return;
+      setResolvingId(requestId);
+      try {
+        await resolveThreadPermission(threadId, requestId, decision, message, answers, annotations);
+        await refreshPermissions();
+      } finally {
+        setResolvingId(null);
+      }
+    },
+    [refreshPermissions, threadId],
+  );
+
+  const addSessionRule = useCallback(
+    async (behavior: PermissionRuleBehavior, toolName: string) => {
+      if (!threadId) return;
+      await addThreadPermissionRule(threadId, behavior, toolName);
+      await refreshPermissions();
+    },
+    [refreshPermissions, threadId],
+  );
+
+  const removeSessionRule = useCallback(
+    async (behavior: PermissionRuleBehavior, toolName: string) => {
+      if (!threadId) return;
+      await removeThreadPermissionRule(threadId, behavior, toolName);
+      await refreshPermissions();
+    },
+    [refreshPermissions, threadId],
+  );
+
+  useEffect(() => {
+    if (!threadId) {
+      refreshGenerationRef.current += 1;
+      setRequests([]);
+      setSessionRules({ allow: [], deny: [], ask: [] });
+      setManagedOnly(false);
+      setLoading(false);
+      return;
+    }
+    void refreshPermissions();
+
+    // @@@permission-poll-bridge - permission requests are thread-scoped runtime
+    // state, but they are not first-class SSE events yet. Poll the small
+    // thread endpoint so ask-mode is owner-visible without inventing a second
+    // client-side state source.
+    const timer = window.setInterval(() => {
+      void refreshPermissions();
+    }, 2000);
+    return () => {
+      refreshGenerationRef.current += 1;
+      requestAbortRef.current?.abort();
+      requestAbortRef.current = null;
+      window.clearInterval(timer);
+    };
+  }, [threadId, refreshPermissions]);
+
+  return {
+    requests,
+    sessionRules,
+    managedOnly,
+    loading,
+    resolvingId,
+    refreshPermissions,
+    resolvePermission: resolvePermissionRequest,
+    addSessionRule,
+    removeSessionRule,
+  };
+}
diff --git a/frontend/app/src/hooks/use-thread-stream.ts b/frontend/app/src/hooks/use-thread-stream.ts
index 34dcb0f70..57de9d641 100644
--- a/frontend/app/src/hooks/use-thread-stream.ts
+++ b/frontend/app/src/hooks/use-thread-stream.ts
@@ -1,4 +1,4 @@
-import { useCallback, useEffect, useReducer, useRef } from "react";
+import { useCallback, useEffect, useReducer, useState } from "react";
 import { getThreadRuntime, streamThreadEvents, type StreamStatus } from "../api";
 import type { StreamEvent } from "../api/types";
 
@@ -35,11 +35,11 @@ class ThreadConnectionManager {
   private threadId = "";
   private ac: AbortController | null = null;
   private version = 0;
-  // @@@dedup-events — track seen seqs in a set (not monotonic max) because
-  // activity_sink and run emit write to thread_buf concurrently, so events
-  // can arrive out of seq order.  A monotonic lastSeenSeq would wrongly skip
-  // lower-seq events that arrive after a higher-seq one.
-  private seenSeqs = new Set<number>();
+  // @@@dedup-events - dedupe by event-type+seq, not raw seq alone. Backend
+  // derived display_delta events intentionally reuse the source event _seq, so
+  // seq-only dedupe would drop the UI-driving delta right after user_message /
+  // run_start and make the thread look frozen until a manual refresh.
+  private seenEventKeys = new Set<string>();
   private subscribers = new Set<(event: StreamEvent) => void>();
   private listener: (() => void) | null = null; // React re-render trigger
   private refreshThreads: (() => Promise<void>) | null = null;
@@ -90,14 +90,15 @@ class ThreadConnectionManager {
             // can open duplicate SSE connections in dev; both deliver the same events).
             const d = (event.data ?? {}) as { _seq?: number };
             if (typeof d._seq === "number") {
-              if (this.seenSeqs.has(d._seq)) {
+              const eventKey = `${event.type}:${d._seq}`;
+              if (this.seenEventKeys.has(eventKey)) {
                 return;
               }
-              this.seenSeqs.add(d._seq);
+              this.seenEventKeys.add(eventKey);
               // Cap set size to prevent unbounded growth
-              if (this.seenSeqs.size > 5000) {
-                const sorted = [...this.seenSeqs].sort((a, b) => a - b);
-                for (let i = 0; i < 2500; i++) this.seenSeqs.delete(sorted[i]);
+              if (this.seenEventKeys.size > 5000) {
+                const oldKeys = [...this.seenEventKeys];
+                for (let i = 0; i < 2500; i++) this.seenEventKeys.delete(oldKeys[i]);
               }
             }
             if (event.type === "status" && event.data) {
@@ -201,12 +202,11 @@ export function useThreadStream(
 ): UseThreadStreamResult {
   const { loading, refreshThreads, runStarted } = deps;
   const [, rerender] = useReducer((x: number) => x + 1, 0);
-  const mgrRef = useRef<ThreadConnectionManager | null>(null);
-  if (!mgrRef.current) mgrRef.current = new ThreadConnectionManager();
-  const mgr = mgrRef.current;
+  const [mgr] = useState(() => new ThreadConnectionManager());
 
-  // Keep refreshThreads callback up-to-date without re-creating the manager
-  mgr.setRefreshThreads(refreshThreads);
+  useEffect(() => {
+    mgr.setRefreshThreads(refreshThreads);
+  }, [mgr, refreshThreads]);
 
   // State changes → re-render; dispose on unmount
   useEffect(() => {
@@ -216,14 +216,14 @@ export function useThreadStream(
 
   // Connection lifecycle — driven by threadId/loading/runStarted
   useEffect(() => {
-    if (loading) return;
+    if (loading || !threadId) return;
     if (runStarted) {
       mgr.initForNewRun(threadId);
     } else {
       mgr.initFromRuntime(threadId);
     }
     return () => mgr.disconnect();
-  }, [mgr, threadId, loading]);
+  }, [mgr, threadId, loading, runStarted]);
 
   // Tab visibility: reconnect on error when tab becomes visible
   useEffect(() => {
diff --git a/frontend/app/src/hooks/use-workspace-settings.ts b/frontend/app/src/hooks/use-workspace-settings.ts
index fbb309b15..8cbae5ee9 100644
--- a/frontend/app/src/hooks/use-workspace-settings.ts
+++ b/frontend/app/src/hooks/use-workspace-settings.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useState } from "react";
 
-export interface UserSettings {
+interface UserSettings {
   default_workspace: string | null;
   recent_workspaces: string[];
   default_model: string;
diff --git a/frontend/app/src/hooks/useFileList.ts b/frontend/app/src/hooks/useFileList.ts
deleted file mode 100644
index 7b2f95b0e..000000000
--- a/frontend/app/src/hooks/useFileList.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-import { useState, useEffect, useCallback } from 'react';
-import { authRequest } from '../store/auth-store';
-
-interface FileEntry {
-  relative_path: string;
-  size_bytes: number;
-  updated_at: string;
-}
-
-interface ChannelFilesResponse {
-  thread_id: string;
-  entries: FileEntry[];
-}
-
-export function useFileList(threadId: string) {
-  const [files, setFiles] = useState<FileEntry[]>([]);
-  const [loading, setLoading] = useState(false);
-  const [error, setError] = useState<string | null>(null);
-
-  const fetchFiles = useCallback(async () => {
-    setLoading(true);
-    setError(null);
-    try {
-      const data = await authRequest<ChannelFilesResponse>(`/api/threads/${threadId}/files/channel-files`);
-      setFiles(data.entries || []);
-    } catch (e) {
-      setError(e instanceof Error ? e.message : 'Unknown error');
-    } finally {
-      setLoading(false);
-    }
-  }, [threadId]);
-
-  useEffect(() => {
-    fetchFiles();
-  }, [fetchFiles]);
-
-  return { files, loading, error, refetch: fetchFiles };
-}
diff --git a/frontend/app/src/lib/supabase.ts b/frontend/app/src/lib/supabase.ts
new file mode 100644
index 000000000..859b0a265
--- /dev/null
+++ b/frontend/app/src/lib/supabase.ts
@@ -0,0 +1,18 @@
+/**
+ * Supabase client singleton for frontend Realtime subscriptions.
+ *
+ * URL and anon key are injected at build time via Vite env vars:
+ *   VITE_SUPABASE_URL
+ *   VITE_SUPABASE_ANON_KEY
+ *
+ * For local dev without Supabase, both vars can be empty — the client
+ * will be null and subscriptions will be skipped (SSE fallback remains).
+ */
+
+import { createClient, type SupabaseClient } from "@supabase/supabase-js";
+
+const url = import.meta.env.VITE_SUPABASE_URL as string | undefined;
+const anonKey = import.meta.env.VITE_SUPABASE_ANON_KEY as string | undefined;
+
+export const supabase: SupabaseClient | null =
+  url && anonKey ? createClient(url, anonKey) : null;
diff --git a/frontend/app/src/main.tsx b/frontend/app/src/main.tsx
index e12e74dc7..d5b1c4c44 100644
--- a/frontend/app/src/main.tsx
+++ b/frontend/app/src/main.tsx
@@ -9,46 +9,6 @@ import './styles/motion-presets.css'
 import './styles/effects.css'
 import { router } from './router.tsx'
 
-const serializeLogArg = (arg: unknown): string => {
-  if (typeof arg !== 'object' || arg === null) {
-    return String(arg)
-  }
-
-  try {
-    const seen = new WeakSet<object>()
-    // @@@safe-log-serialization - prevent circular references from turning console.log calls into runtime errors
-    const json = JSON.stringify(arg, (_key, value) => {
-      if (typeof value === 'object' && value !== null) {
-        if (seen.has(value)) {
-          return '[Circular]'
-        }
-        seen.add(value)
-      }
-      return value
-    })
-    return json ?? String(arg)
-  } catch (error) {
-    originalLog('[frontend-debug-log] failed to serialize console.log arg:', error)
-    return String(arg)
-  }
-}
-
-// Intercept console.log and send to backend
-const originalLog = console.log;
-console.log = (...args: unknown[]) => {
-  originalLog(...args);
-  // Send to backend for logging
-  const message = args.map((arg) => serializeLogArg(arg)).join(' ')
-
-  fetch('/api/debug/log', {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({ message, timestamp: new Date().toISOString() }),
-  }).catch((error) => {
-    originalLog('[frontend-debug-log] failed to send /api/debug/log:', error)
-  })
-};
-
 createRoot(document.getElementById('root')!).render(
   <StrictMode>
     <RouterProvider router={router} />
diff --git a/frontend/app/src/pages/AppLayout.tsx b/frontend/app/src/pages/AppLayout.tsx
deleted file mode 100644
index f76c90c5f..000000000
--- a/frontend/app/src/pages/AppLayout.tsx
+++ /dev/null
@@ -1,165 +0,0 @@
-import { useEffect, useState } from "react";
-import { Link, Outlet, useParams } from "react-router-dom";
-import { DragHandle } from "../components/DragHandle";
-import NewChatDialog from "../components/NewChatDialog";
-import NewThreadModal from "../components/NewThreadModal";
-import SandboxSessionsModal from "../components/SandboxSessionsModal";
-import SearchModal from "../components/SearchModal";
-import Sidebar from "../components/Sidebar";
-import type { ThreadSummary } from "../api";
-import { useIsMobile } from "../hooks/use-mobile";
-import { useResizableX } from "../hooks/use-resizable-x";
-import { useThreadManager } from "../hooks/use-thread-manager";
-import { useAppStore } from "../store/app-store";
-import MemberAvatar from "../components/MemberAvatar";
-import { Plus, Trash2 } from "lucide-react";
-
-function requireThreadMemberId(thread: { thread_id: string; member_id?: string }): string {
-  // @@@thread-member-id-required - mobile thread navigation must use stable member IDs, not mutable display names.
-  if (!thread.member_id) {
-    throw new Error(`Thread ${thread.thread_id} missing member_id`);
-  }
-  return thread.member_id;
-}
-
-export default function AppLayout() {
-  const tm = useThreadManager();
-  const {
-    threads, sandboxTypes, loading,
-    refreshThreads, handleCreateThread, handleDeleteThread,
-  } = tm;
-  const fetchMembers = useAppStore(s => s.fetchMembers);
-  useEffect(() => { void fetchMembers(); }, [fetchMembers]);
-
-  const isMobile = useIsMobile();
-  const { threadId } = useParams<{ memberId?: string; threadId?: string }>();
-  const [sidebarCollapsed, setSidebarCollapsed] = useState(false);
-  const [searchOpen, setSearchOpen] = useState(false);
-  const [newThreadOpen, setNewThreadOpen] = useState(false);
-  const [sessionsOpen, setSessionsOpen] = useState(false);
-  const [newChatOpen, setNewChatOpen] = useState(false);
-
-  const sidebarResize = useResizableX(272, 200, 420);
-
-  if (isMobile) {
-    if (!threadId) {
-      return (
-        <MobileThreadList
-          threads={threads}
-          loading={loading}
-          onNewChat={() => setNewChatOpen(true)}
-          onDeleteThread={(id) => void handleDeleteThread(id)}
-          newChatOpen={newChatOpen}
-          setNewChatOpen={setNewChatOpen}
-        />
-      );
-    }
-    return (
-      <div className="h-full w-full bg-background flex flex-col overflow-hidden">
-        <div className="flex-1 flex flex-col min-w-0 min-h-0">
-          <Outlet context={{ tm, sidebarCollapsed, setSidebarCollapsed, setSessionsOpen }} />
-        </div>
-        <NewChatDialog open={newChatOpen} onOpenChange={setNewChatOpen} />
-      </div>
-    );
-  }
-  return (
-    <div className="h-full w-full bg-background flex overflow-hidden">
-      <Sidebar
-        threads={threads}
-        collapsed={sidebarCollapsed}
-        loading={loading}
-        width={sidebarResize.width}
-        onDeleteThread={(id) => void handleDeleteThread(id)}
-        onSearchClick={() => setSearchOpen(true)}
-        onNewChat={() => setNewChatOpen(true)}
-      />
-      {!sidebarCollapsed && <DragHandle onMouseDown={sidebarResize.onMouseDown} />}
-
-      <div className="flex-1 flex flex-col min-w-0">
-        <Outlet context={{ tm, sidebarCollapsed, setSidebarCollapsed, setSessionsOpen }} />
-      </div>
-
-      <NewThreadModal
-        open={newThreadOpen}
-        sandboxTypes={sandboxTypes}
-        onClose={() => setNewThreadOpen(false)}
-        onCreate={(sandbox, cwd) => {
-          setNewThreadOpen(false);
-          void handleCreateThread(sandbox, cwd);
-        }}
-      />
-
-      <SearchModal
-        isOpen={searchOpen}
-        onClose={() => setSearchOpen(false)}
-        threads={threads}
-        onSelectThread={() => {}}
-      />
-
-      <SandboxSessionsModal
-        isOpen={sessionsOpen}
-        onClose={() => setSessionsOpen(false)}
-        onSessionMutated={() => {
-          void refreshThreads();
-        }}
-      />
-
-      <NewChatDialog open={newChatOpen} onOpenChange={setNewChatOpen} />
-    </div>
-  );
-}
-
-function MobileThreadList({ threads, loading, onNewChat, onDeleteThread, newChatOpen, setNewChatOpen }: {
-  threads: ThreadSummary[];
-  loading: boolean;
-  onNewChat: () => void;
-  onDeleteThread: (id: string) => void;
-  newChatOpen: boolean;
-  setNewChatOpen: (v: boolean) => void;
-}) {
-  return (
-    <div className="h-full w-full bg-background flex flex-col overflow-hidden">
-      <div className="h-14 flex items-center justify-between px-4 border-b border-border shrink-0">
-        <h2 className="text-sm font-semibold text-foreground">消息</h2>
-        <button onClick={onNewChat} className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-muted">
-          <Plus className="w-4 h-4" />
-        </button>
-      </div>
-      <div className="flex-1 overflow-y-auto">
-        {loading ? (
-          <p className="text-sm text-muted-foreground text-center py-8">加载中...</p>
-        ) : threads.length === 0 ? (
-          <div className="flex flex-col items-center justify-center py-20 px-4">
-            <p className="text-sm text-muted-foreground mb-3">暂无会话</p>
-            <button onClick={onNewChat} className="px-4 py-2 rounded-lg bg-primary text-primary-foreground text-sm">发起会话</button>
-          </div>
-        ) : (
-          threads.map(t => {
-            const memberId = requireThreadMemberId(t);
-            const entityName = t.entity_name || t.member_name || "Agent";
-            const subtitle = t.is_main ? "主线对话" : (t.sidebar_label || "分支对话");
-            return (
-              <div key={t.thread_id} className="flex items-center border-b border-border">
-                <Link to={`/threads/${encodeURIComponent(memberId)}/${t.thread_id}`} className="flex items-center gap-3 px-4 py-3 flex-1 min-w-0 hover:bg-muted/50 transition-colors duration-fast">
-                  <MemberAvatar name={entityName} avatarUrl={t.avatar_url} type="mycel_agent" size="md" />
-                  <div className="min-w-0 flex-1">
-                    <p className="text-sm font-medium text-foreground truncate">{entityName}</p>
-                    <p className="text-xs text-muted-foreground truncate">{subtitle}</p>
-                  </div>
-                </Link>
-                <button
-                  onClick={() => onDeleteThread(t.thread_id)}
-                  className="w-8 h-8 flex items-center justify-center text-muted-foreground/40 hover:text-destructive transition-colors duration-fast shrink-0 mr-1"
-                >
-                  <Trash2 className="w-3.5 h-3.5" />
-                </button>
-              </div>
-            );
-          })
-        )}
-      </div>
-      <NewChatDialog open={newChatOpen} onOpenChange={setNewChatOpen} />
-    </div>
-  );
-}
diff --git a/frontend/app/src/pages/ChatConversationPage.tsx b/frontend/app/src/pages/ChatConversationPage.tsx
index 6f5a5b5c1..cea9b49d9 100644
--- a/frontend/app/src/pages/ChatConversationPage.tsx
+++ b/frontend/app/src/pages/ChatConversationPage.tsx
@@ -4,7 +4,7 @@ import { PanelLeft, Send } from "lucide-react";
 import { authFetch, useAuthStore } from "../store/auth-store";
 import { UserBubble } from "../components/chat-area/UserBubble";
 import { ChatBubble } from "../components/chat-area/ChatBubble";
-import type { ChatEntity, ChatMessage, ChatDetail } from "../api/types";
+import type { ChatMember, ChatMessage, ChatDetail } from "../api/types";
 
 // @@@time-gap — only show timestamp when gap >= 5 minutes
 function shouldShowTime(prev: ChatMessage | null, curr: ChatMessage): boolean {
@@ -17,6 +17,10 @@ function formatMessageTime(ts: number): string {
   return d.toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" });
 }
 
+function chatMemberDisplayName(member: ChatMember | undefined, defaultName: string): string {
+  return member?.name || defaultName;
+}
+
 export default function ChatConversationPage() {
   const { chatId } = useParams<{ chatId: string }>();
   if (!chatId) return null;
@@ -38,7 +42,7 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
   }, [_refreshRaw]);
   useEffect(() => () => { if (refreshTimer.current) clearTimeout(refreshTimer.current); }, []);
 
-  const myEntityId = useAuthStore(s => s.entityId);
+  const myUserId = useAuthStore(s => s.userId);
   const myName = useAuthStore(s => s.user?.name) || "You";
   const [chat, setChat] = useState<ChatDetail | null>(null);
   const [messages, setMessages] = useState<ChatMessage[]>([]);
@@ -51,8 +55,8 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
   const scrollContainerRef = useRef<HTMLDivElement>(null);
   const isAtBottomRef = useRef(true);
 
-  const entityMap = useMemo(() => {
-    const m = new Map<string, ChatEntity>();
+  const memberMap = useMemo(() => {
+    const m = new Map<string, ChatMember>();
     chat?.entities.forEach(e => m.set(e.id, e));
     return m;
   }, [chat?.entities]);
@@ -178,7 +182,7 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
   // Send message
   const handleSend = useCallback(async () => {
     const text = input.trim();
-    if (!text || !myEntityId || sending) return;
+    if (!text || !myUserId || sending) return;
 
     setInput("");
     setSending(true);
@@ -187,7 +191,7 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
     const optimisticMsg: ChatMessage = {
       id: `optimistic-${Date.now()}`,
       chat_id: chatId,
-      sender_id: myEntityId,
+      sender_id: myUserId,
       sender_name: useAuthStore.getState().user?.name || "me",
       content: text,
       mentioned_ids: [],
@@ -201,7 +205,7 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
         method: "POST",
         body: JSON.stringify({
           content: text,
-          sender_id: myEntityId,
+          sender_id: myUserId,
         }),
       });
       if (!res.ok) {
@@ -226,7 +230,7 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
       setSending(false);
       refreshChatList(); // update last_message in sidebar
     }
-  }, [input, myEntityId, sending, chatId, scrollToBottom, refreshChatList]);
+  }, [input, myUserId, sending, chatId, scrollToBottom, refreshChatList]);
 
   const handleKeyDown = (e: React.KeyboardEvent) => {
     if (e.key === "Enter" && !e.shiftKey) {
@@ -237,7 +241,7 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
 
   // Typing indicator display — works for both 1:1 and group
   const typingNames = [...typingEntities]
-    .map(id => entityMap.get(id)?.name)
+    .map(id => memberMap.get(id)?.name)
     .filter(Boolean);
   const typingDisplay = typingEntities.size > 0 ? (
     <div className="flex items-center gap-2 px-4 py-1">
@@ -254,7 +258,7 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
 
   // Display name for header
   const chatName = chat
-    ? chat.title || chat.entities.filter(e => e.id !== myEntityId).map(e => e.name).join(", ") || "聊天"
+    ? chat.title || chat.entities.filter(e => e.id !== myUserId).map(e => e.name).join(", ") || "聊天"
     : "聊天";
 
   if (loading) {
@@ -269,7 +273,7 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
     return (
       <div className="h-full flex flex-col items-center justify-center gap-2">
         <p className="text-sm text-destructive">{error}</p>
-        <Link to="/chats" className="text-xs text-primary hover:underline">返回对话列表</Link>
+        <Link to="/chat" className="text-xs text-primary hover:underline">返回对话列表</Link>
       </div>
     );
   }
@@ -309,10 +313,10 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
         ) : (
           <div className="max-w-3xl mx-auto space-y-3.5">
             {messages.map((msg, i) => {
-              const isMine = msg.sender_id === myEntityId;
+              const isMine = msg.sender_id === myUserId;
               const prev = i > 0 ? messages[i - 1] : null;
               const showTime = shouldShowTime(prev, msg);
-              const entity = entityMap.get(msg.sender_id);
+              const member = memberMap.get(msg.sender_id);
               const ts = msg.created_at * 1000;
 
               return (
@@ -325,13 +329,13 @@ function ChatConversationInner({ chatId }: { chatId: string }) {
                     </div>
                   )}
                   {isMine ? (
-                    <UserBubble content={msg.content} timestamp={ts} userName={myName} avatarUrl={entityMap.get(myEntityId!)?.avatar_url} />
+                    <UserBubble content={msg.content} timestamp={ts} userName={myName} avatarUrl={memberMap.get(myUserId!)?.avatar_url} />
                   ) : (
                     <ChatBubble
                       content={msg.content}
-                      senderName={msg.sender_name}
-                      avatarUrl={entity?.avatar_url}
-                      entityType={entity?.type}
+                      senderName={chatMemberDisplayName(member, msg.sender_name)}
+                      avatarUrl={member?.avatar_url}
+                      memberType={member?.type}
                       timestamp={ts}
                       showName
                     />
diff --git a/frontend/app/src/pages/ChatPage.tsx b/frontend/app/src/pages/ChatPage.tsx
index e4bb378d1..05fb6c3e7 100644
--- a/frontend/app/src/pages/ChatPage.tsx
+++ b/frontend/app/src/pages/ChatPage.tsx
@@ -1,15 +1,19 @@
-import { useCallback, useEffect, useState } from "react";
+import { useCallback, useEffect, useMemo, useState } from "react";
 import { useParams, useOutletContext, useLocation } from "react-router-dom";
+import { Check, ShieldAlert, X } from "lucide-react";
 import { toast } from "sonner";
 import ChatArea from "../components/ChatArea";
-import type { AssistantTurn } from "../api";
+import type { AssistantTurn, AskUserAnswer, AskUserQuestionPrompt, PermissionRequest } from "../api";
 import { uploadSandboxFile } from "../api";
-import ComputerPanel from "../components/ComputerPanel";
+import { Alert, AlertDescription, AlertTitle } from "../components/ui/alert";
+import { Button } from "../components/ui/button";
+import ComputerPanel from "../components/computer-panel";
 import { DragHandle } from "../components/DragHandle";
 import Header from "../components/Header";
 import InputBox from "../components/InputBox";
 import TaskProgress from "../components/TaskProgress";
 import TokenStats from "../components/TokenStats";
+import { askUserQuestionSelectionKey, buildAskUserAnswers } from "./ask-user-question";
 import { authFetch, useAuthStore } from "../store/auth-store";
 import { useAppActions } from "../hooks/use-app-actions";
 import { useBackgroundTasks } from "../hooks/use-background-tasks";
@@ -18,6 +22,9 @@ import { useResizableX } from "../hooks/use-resizable-x";
 import { useSandboxManager } from "../hooks/use-sandbox-manager";
 import { useDisplayDeltas } from "../hooks/use-display-deltas";
 import { useThreadData } from "../hooks/use-thread-data";
+import { useThreadPermissions } from "../hooks/use-thread-permissions";
+import { useThreadStream } from "../hooks/use-thread-stream";
+import type { PermissionRuleBehavior } from "../api";
 import type { ThreadManagerState, ThreadManagerActions } from "../hooks/use-thread-manager";
 
 interface OutletContext {
@@ -27,6 +34,12 @@ interface OutletContext {
   setSessionsOpen: (value: boolean) => void;
 }
 
+function isAskUserQuestionRequest(
+  request: PermissionRequest | null,
+): request is PermissionRequest & { args: PermissionRequest["args"] & { questions: AskUserQuestionPrompt[] } } {
+  return !!request && request.tool_name === "AskUserQuestion" && Array.isArray(request.args?.questions);
+}
+
 /** Thin wrapper: key={threadId} forces remount → all hook state resets naturally. */
 export default function ChatPage() {
   const { threadId } = useParams<{ memberId: string; threadId: string }>();
@@ -43,13 +56,15 @@ function ChatPageInner({ threadId }: { threadId: string }) {
 
   // Derive avatar URLs from thread data
   const currentThread = tm.threads.find(t => t.thread_id === threadId);
-  const agentName = currentThread?.entity_name ?? currentThread?.member_name;
+  const threadDisplayName = currentThread?.sidebar_label ?? currentThread?.member_name ?? null;
+  const agentName = threadDisplayName ?? undefined;
   const agentAvatarUrl = currentThread?.avatar_url;
   const userAvatarUrl = userHasAvatar && userId ? `/api/members/${userId}/avatar` : undefined;
   const [attachedFiles, setAttachedFiles] = useState<File[]>([]);
 
   const state = location.state as { selectedModel?: string; runStarted?: boolean; message?: string } | null;
   const [currentModel, setCurrentModel] = useState<string>(state?.selectedModel ?? "");
+  const [defaultModel, setDefaultModel] = useState<string>("");
 
   // location.state.runStarted is set by NewChatPage on SPA navigation only.
   // On page refresh the browser preserves state but React Router resets it to null,
@@ -60,52 +75,49 @@ function ChatPageInner({ threadId }: { threadId: string }) {
   // Backend sends user_message + run_start via display_delta.
   const initialEntries = undefined;
 
-  useEffect(() => {
-    if (state?.selectedModel) return;
-    authFetch(`/api/threads/${threadId}/runtime`)
-      .then((r) => r.json())
-      .then((d) => {
-        if (d.model) {
-          setCurrentModel(d.model);
-          return;
-        }
-        return fetch("/api/settings")
-          .then((r) => r.json())
-          .then((settings) => setCurrentModel(settings.default_model || "leon:large"));
-      })
-      .catch(() => setCurrentModel("leon:large"));
-  }, [state?.selectedModel, threadId]);
-
   const { entries, activeSandbox, loading, displaySeq, setEntries, setActiveSandbox, refreshThread } = useThreadData(threadId, runStarted, initialEntries);
+  const threadStream = useThreadStream(threadId, {
+    loading,
+    refreshThreads: tm.refreshThreads,
+    runStarted,
+  });
+  const {
+    requests: pendingPermissionRequests,
+    sessionRules,
+    managedOnly,
+    resolvingId,
+    addSessionRule,
+    removeSessionRule,
+    resolvePermission,
+  } = useThreadPermissions(threadId);
 
   const { runtimeStatus, isRunning, handleSendMessage, handleStopStreaming } =
     useDisplayDeltas({
       threadId,
-      refreshThreads: tm.refreshThreads,
       onUpdate: (updater) => setEntries(updater),
-      loading,
-      runStarted,
       displaySeq,
+      stream: threadStream,
     });
 
-  // @@@debug-entries — expose current entries for backend comparison
   useEffect(() => {
-    (window as Window & { __debugEntries?: () => unknown[] }).__debugEntries =
-      () => JSON.parse(JSON.stringify(entries)) as unknown[];
-  }, [entries]);
+    if (state?.selectedModel || runtimeStatus?.model || currentModel) return;
+    if (threadStream.phase === "connecting" || threadStream.phase === "idle") return;
+    fetch("/api/settings")
+      .then((r) => r.json())
+      .then((settings) => setDefaultModel(settings.default_model || "leon:large"))
+      .catch(() => setDefaultModel("leon:large"));
+  }, [currentModel, runtimeStatus?.model, state?.selectedModel, threadStream.phase]);
 
-  const { tasks, refresh: refreshTasks } = useBackgroundTasks({ threadId, loading, refreshThreads: tm.refreshThreads });
+  const { tasks, refresh: refreshTasks } = useBackgroundTasks({ threadId, subscribe: threadStream.subscribe });
 
   const isStreaming = isRunning;
 
-  const { sandboxActionError, handlePauseSandbox, handleResumeSandbox } =
-    useSandboxManager({
-      activeThreadId: threadId,
-      isStreaming,
-      activeSandbox,
-      setActiveSandbox,
-      loadThread: refreshThread,
-    });
+  useSandboxManager({
+    activeThreadId: threadId,
+    isStreaming,
+    activeSandbox,
+    setActiveSandbox,
+  });
 
   const ui = useAppActions({ activeThreadId: threadId });
   const {
@@ -148,6 +160,117 @@ function ChatPageInner({ threadId }: { threadId: string }) {
   );
 
   const computerResize = useResizableX(600, 360, 1200, true);
+  const currentPermissionRequest = pendingPermissionRequests[0] ?? null;
+  const [questionSelectionsByRequest, setQuestionSelectionsByRequest] = useState<Record<string, Record<string, string[]>>>({});
+  const questionSelections = useMemo(
+    () => (currentPermissionRequest ? (questionSelectionsByRequest[currentPermissionRequest.request_id] ?? {}) : {}),
+    [currentPermissionRequest, questionSelectionsByRequest],
+  );
+  const effectiveModel = (state?.selectedModel ?? runtimeStatus?.model ?? currentModel) || defaultModel;
+
+  const handleResolvePermission = useCallback(
+    async (decision: "allow" | "deny") => {
+      if (!currentPermissionRequest) return;
+      try {
+        await resolvePermission(currentPermissionRequest.request_id, decision);
+        await refreshThread();
+        toast.success(decision === "allow" ? "已批准该权限请求" : "已拒绝该权限请求");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        toast.error(`权限处理失败: ${message}`);
+      }
+    },
+    [currentPermissionRequest, refreshThread, resolvePermission],
+  );
+
+  const handleQuestionSelection = useCallback(
+    (questionIndex: number, question: AskUserQuestionPrompt, optionLabel: string) => {
+      if (!currentPermissionRequest) return;
+      const key = askUserQuestionSelectionKey(questionIndex);
+      setQuestionSelectionsByRequest((prev) => {
+        const currentForRequest = prev[currentPermissionRequest.request_id] ?? {};
+        const current = currentForRequest[key] ?? [];
+        if (question.multiSelect) {
+          const next = current.includes(optionLabel)
+            ? current.filter((item) => item !== optionLabel)
+            : [...current, optionLabel];
+          return {
+            ...prev,
+            [currentPermissionRequest.request_id]: { ...currentForRequest, [key]: next },
+          };
+        }
+        return {
+          ...prev,
+          [currentPermissionRequest.request_id]: { ...currentForRequest, [key]: [optionLabel] },
+        };
+      });
+    },
+    [currentPermissionRequest],
+  );
+
+  const handleSubmitQuestionAnswers = useCallback(async () => {
+    if (!currentPermissionRequest || !isAskUserQuestionRequest(currentPermissionRequest)) return;
+    const answers: AskUserAnswer[] = buildAskUserAnswers(currentPermissionRequest.args.questions, questionSelections);
+    try {
+      await resolvePermission(
+        currentPermissionRequest.request_id,
+        "allow",
+        undefined,
+        answers,
+        typeof currentPermissionRequest.args.annotations === "object" && currentPermissionRequest.args.annotations !== null
+          ? currentPermissionRequest.args.annotations as Record<string, unknown>
+          : undefined,
+      );
+      await refreshThread();
+      toast.success("已提交回答，Leon 会继续当前任务");
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      toast.error(`提交回答失败: ${message}`);
+    }
+  }, [currentPermissionRequest, questionSelections, refreshThread, resolvePermission]);
+
+  const questionPrompts = isAskUserQuestionRequest(currentPermissionRequest)
+    ? currentPermissionRequest.args.questions
+    : [];
+  const canSubmitQuestionAnswers = questionPrompts.length > 0
+    && questionPrompts.every((_, index) => (questionSelections[askUserQuestionSelectionKey(index)] ?? []).length > 0);
+
+  const handlePersistedPermissionDecision = useCallback(
+    async (decision: "allow" | "deny") => {
+      if (!currentPermissionRequest) return;
+      try {
+        await addSessionRule(decision, currentPermissionRequest.tool_name);
+        await resolvePermission(currentPermissionRequest.request_id, decision);
+        await refreshThread();
+        toast.success(decision === "allow" ? "已为当前线程保存长期批准" : "已为当前线程保存长期拒绝");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        toast.error(`线程权限规则保存失败: ${message}`);
+      }
+    },
+    [addSessionRule, currentPermissionRequest, refreshThread, resolvePermission],
+  );
+
+  const activeSessionRules = ([
+    ["allow", sessionRules.allow],
+    ["deny", sessionRules.deny],
+    ["ask", sessionRules.ask],
+  ] as const).flatMap(([behavior, tools]) =>
+    tools.map((toolName) => ({ behavior, toolName })),
+  );
+
+  const handleRemoveSessionRule = useCallback(
+    async (behavior: PermissionRuleBehavior, toolName: string) => {
+      try {
+        await removeSessionRule(behavior, toolName);
+        toast.success("已移除当前线程权限规则");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        toast.error(`移除线程权限规则失败: ${message}`);
+      }
+    },
+    [removeSessionRule],
+  );
 
   // @@@workspace-upload — upload attached files then send message with attachment filenames
   async function handleSendWithAttachments(message: string): Promise<void> {
@@ -173,20 +296,102 @@ function ChatPageInner({ threadId }: { threadId: string }) {
     <>
       <Header
         activeThreadId={threadId}
-        threadTitle={currentThread?.entity_name ?? null}
+        threadTitle={threadDisplayName}
         sandboxInfo={activeSandbox}
-        currentModel={currentModel}
+        currentModel={effectiveModel}
         onToggleSidebar={() => setSidebarCollapsed(v => !v)}
-        onPauseSandbox={() => void handlePauseSandbox()}
-        onResumeSandbox={() => void handleResumeSandbox()}
         onModelChange={setCurrentModel}
       />
-
       <div className="flex-1 flex min-h-0">
         <div className="flex-1 flex flex-col min-w-[320px]">
-          {sandboxActionError && (
-            <div className="px-3 py-2 text-xs bg-destructive/10 text-destructive border-b border-destructive/20">
-              {sandboxActionError}
+          {currentPermissionRequest && !isAskUserQuestionRequest(currentPermissionRequest) && (
+            <div className="px-3 py-2 border-b border-warning/20 bg-warning/5">
+              <div className="max-w-3xl mx-auto">
+                <Alert className="border-warning/20 bg-transparent px-0 py-0">
+                  <ShieldAlert className="text-warning" />
+                  <AlertTitle>{`权限确认：${currentPermissionRequest.tool_name}`}</AlertTitle>
+                  <AlertDescription>
+                    <>
+                      <p>{currentPermissionRequest.message || "该工具需要你明确批准后才能继续。"}</p>
+                      <p className="text-xs text-muted-foreground">
+                        处理后不会自动重跑；Leon 需要在下一次相同操作时继续执行。
+                      </p>
+                      <code className="block w-full overflow-x-auto rounded-md bg-background/80 px-2 py-1 text-xs text-foreground border border-border/60">
+                        {JSON.stringify(currentPermissionRequest.args)}
+                      </code>
+                    </>
+                    {pendingPermissionRequests.length > 1 && (
+                      <p className="text-xs text-muted-foreground">
+                        还有 {pendingPermissionRequests.length - 1} 条待处理请求。
+                      </p>
+                    )}
+                    <div className="flex items-center gap-2 pt-1">
+                      <Button
+                        size="sm"
+                        onClick={() => void handleResolvePermission("allow")}
+                        disabled={resolvingId === currentPermissionRequest.request_id}
+                      >
+                        <Check className="w-4 h-4" />
+                        批准
+                      </Button>
+                      <Button
+                        size="sm"
+                        variant="outline"
+                        onClick={() => void handleResolvePermission("deny")}
+                        disabled={resolvingId === currentPermissionRequest.request_id}
+                      >
+                        <X className="w-4 h-4" />
+                        拒绝
+                      </Button>
+                      {!managedOnly && (
+                        <>
+                          <Button
+                            size="sm"
+                            variant="secondary"
+                            onClick={() => void handlePersistedPermissionDecision("allow")}
+                            disabled={resolvingId === currentPermissionRequest.request_id}
+                          >
+                            本线程始终批准
+                          </Button>
+                          <Button
+                            size="sm"
+                            variant="secondary"
+                            onClick={() => void handlePersistedPermissionDecision("deny")}
+                            disabled={resolvingId === currentPermissionRequest.request_id}
+                          >
+                            本线程始终拒绝
+                          </Button>
+                        </>
+                      )}
+                    </div>
+                    {managedOnly && (
+                      <p className="pt-1 text-xs text-muted-foreground">
+                        当前为 managed-only 模式，不能写入线程级权限覆盖规则。
+                      </p>
+                    )}
+                  </AlertDescription>
+                </Alert>
+              </div>
+            </div>
+          )}
+          {activeSessionRules.length > 0 && (
+            <div className="px-3 py-2 border-b border-border/60 bg-muted/20">
+              <div className="max-w-3xl mx-auto flex flex-wrap items-center gap-2">
+                <span className="text-xs font-medium text-muted-foreground">本线程权限规则</span>
+                {activeSessionRules.map(({ behavior, toolName }) => (
+                  <Button
+                    key={`${behavior}:${toolName}`}
+                    type="button"
+                    size="sm"
+                    variant="outline"
+                    className="h-7 gap-2 text-xs"
+                    onClick={() => void handleRemoveSessionRule(behavior, toolName)}
+                  >
+                    <span>{behavior}:{toolName}</span>
+                    <X className="w-3 h-3" />
+                  </Button>
+                ))}
+              </div>
             </div>
           )}
           <div className="relative flex-1 flex flex-col min-h-0">
@@ -201,6 +406,21 @@ function ChatPageInner({ threadId }: { threadId: string }) {
               agentAvatarUrl={agentAvatarUrl}
               userName={userName}
               userAvatarUrl={userAvatarUrl}
+              askUserQuestion={
+                isAskUserQuestionRequest(currentPermissionRequest)
+                  ? {
+                      requestId: currentPermissionRequest.request_id,
+                      promptMessage: currentPermissionRequest.message || "Leon 需要你的回答后才能继续当前任务。",
+                      prompts: questionPrompts,
+                      selections: questionSelections,
+                      resolving: resolvingId === currentPermissionRequest.request_id,
+                      canSubmit: canSubmitQuestionAnswers,
+                      onSelect: handleQuestionSelection,
+                      onSubmit: () => void handleSubmitQuestionAnswers(),
+                      selectionKeyForIndex: askUserQuestionSelectionKey,
+                    }
+                  : undefined
+              }
             />
           </div>
           <TaskProgress
diff --git a/frontend/app/src/pages/ChatsEmptyState.tsx b/frontend/app/src/pages/ChatsEmptyState.tsx
deleted file mode 100644
index bcc190bbe..000000000
--- a/frontend/app/src/pages/ChatsEmptyState.tsx
+++ /dev/null
@@ -1,7 +0,0 @@
-export default function ChatsEmptyState() {
-  return (
-    <div className="h-full flex items-center justify-center">
-      <p className="text-sm text-muted-foreground">选择一个对话或发起新会话</p>
-    </div>
-  );
-}
diff --git a/frontend/app/src/pages/ChatsLayout.tsx b/frontend/app/src/pages/ChatsLayout.tsx
deleted file mode 100644
index 0afa5b872..000000000
--- a/frontend/app/src/pages/ChatsLayout.tsx
+++ /dev/null
@@ -1,402 +0,0 @@
-import { useCallback, useEffect, useRef, useState } from "react";
-import { Link, Outlet, useParams, useNavigate } from "react-router-dom";
-import { Check, Plus, Search, Users, X } from "lucide-react";
-import MemberAvatar from "../components/MemberAvatar";
-import { authFetch, useAuthStore } from "../store/auth-store";
-import type { ChatEntity, ChatSummary } from "../api/types";
-
-function formatTime(ts: number): string {
-  const d = new Date(ts * 1000);
-  const now = new Date();
-  const diffMs = now.getTime() - d.getTime();
-  if (diffMs < 60_000) return "刚刚";
-  if (diffMs < 3600_000) return `${Math.floor(diffMs / 60_000)}m`;
-  if (diffMs < 86400_000) return `${Math.floor(diffMs / 3600_000)}h`;
-  return `${d.getMonth() + 1}/${d.getDate()}`;
-}
-
-function chatDisplayName(chat: ChatSummary, myEntityId: string | null): string {
-  if (chat.title) return chat.title;
-  const others = chat.entities.filter(e => e.id !== myEntityId);
-  return others.map(e => e.name).join(", ") || "Chat";
-}
-
-// @@@new-chat-dialog — entity picker with multi-select for 1:1 and group chat
-function NewChatDialog({ onClose, onCreated }: { onClose: () => void; onCreated: (chatId: string) => void }) {
-  const [entities, setEntities] = useState<ChatEntity[]>([]);
-  const [search, setSearch] = useState("");
-  const [selected, setSelected] = useState<Set<string>>(new Set());
-  const [title, setTitle] = useState("");
-  const [creating, setCreating] = useState(false);
-  const myEntityId = useAuthStore(s => s.entityId);
-
-  useEffect(() => {
-    authFetch("/api/entities")
-      .then(r => r.json())
-      .then((data: ChatEntity[]) => setEntities(data))
-      .catch(console.error);
-  }, []);
-
-  const filtered = search
-    ? entities.filter((e) => {
-      const haystack = [e.name, e.owner_name || "", e.member_name || ""].join(" ").toLowerCase();
-      return haystack.includes(search.toLowerCase());
-    })
-    : entities;
-
-  const toggle = (id: string) => {
-    setSelected(prev => {
-      const next = new Set(prev);
-      if (next.has(id)) next.delete(id); else next.add(id);
-      return next;
-    });
-  };
-
-  const isGroup = selected.size >= 2;
-  const selectedEntities = entities.filter(e => selected.has(e.id));
-
-  const handleCreate = useCallback(async () => {
-    if (!myEntityId || selected.size === 0 || creating) return;
-    setCreating(true);
-    try {
-      const body: Record<string, unknown> = { user_ids: [myEntityId, ...selected] };
-      if (isGroup && title.trim()) body.title = title.trim();
-      const res = await authFetch("/api/chats", {
-        method: "POST",
-        body: JSON.stringify(body),
-      });
-      if (!res.ok) {
-        const data = await res.json().catch(() => ({}));
-        throw new Error(data.detail || `${res.status}`);
-      }
-      const data = await res.json();
-      onCreated(data.id);
-    } catch (err) {
-      console.error("[NewChat] error:", err);
-      setCreating(false);
-    }
-  }, [myEntityId, selected, isGroup, title, creating, onCreated]);
-
-  return (
-    <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/40" onClick={onClose}>
-      <div className="w-full max-w-sm bg-card rounded-xl shadow-xl border border-border" onClick={e => e.stopPropagation()}>
-        <div className="flex items-center justify-between px-4 py-3 border-b border-border">
-          <h3 className="text-sm font-semibold">新建聊天</h3>
-          <button onClick={onClose} className="text-muted-foreground hover:text-foreground"><X className="w-4 h-4" /></button>
-        </div>
-
-        {/* Selected chips */}
-        {selectedEntities.length > 0 && (
-          <div className="flex flex-wrap gap-1.5 px-4 py-2 border-b border-border">
-            {selectedEntities.map(e => (
-              <button key={e.id} onClick={() => toggle(e.id)}
-                className="flex items-center gap-1 px-2 py-0.5 rounded-full bg-primary/10 text-primary text-xs">
-                {e.name} <X className="w-3 h-3" />
-              </button>
-            ))}
-          </div>
-        )}
-
-        {/* Group title input — only when 2+ selected */}
-        {isGroup && (
-          <div className="px-4 py-2 border-b border-border">
-            <input type="text" placeholder="群组名称（可选）" value={title}
-              onChange={e => setTitle(e.target.value)}
-              className="w-full text-sm bg-transparent outline-none text-foreground placeholder:text-muted-foreground/50" />
-          </div>
-        )}
-
-        <div className="px-4 py-2">
-          <div className="flex items-center gap-2 px-3 py-2 rounded-lg bg-muted/50 border border-border">
-            <Search className="w-4 h-4 text-muted-foreground" />
-            <input type="text" placeholder="搜索..." value={search} onChange={e => setSearch(e.target.value)}
-              className="flex-1 bg-transparent text-sm outline-none" autoFocus />
-          </div>
-        </div>
-        <div className="max-h-56 overflow-y-auto px-2 pb-2">
-          {filtered.length === 0 ? (
-            <p className="text-xs text-muted-foreground text-center py-4">
-              {entities.length === 0 ? "暂无其他用户" : "无匹配结果"}
-            </p>
-          ) : filtered.map(e => {
-            const isSelected = selected.has(e.id);
-            return (
-              <button key={e.id} onClick={() => toggle(e.id)}
-                className={`w-full flex items-center gap-3 px-3 py-2 rounded-lg transition-colors duration-fast text-left ${
-                  isSelected ? "bg-primary/5" : "hover:bg-muted"
-                }`}>
-                <MemberAvatar name={e.name} avatarUrl={e.avatar_url} type={e.type} size="sm" />
-                <div className="min-w-0 flex-1">
-                  <p className="text-sm font-medium truncate">{e.name}</p>
-                  <p className="text-2xs text-muted-foreground truncate">
-                    {e.owner_name ? `owner: ${e.owner_name}` : "human"}
-                  </p>
-                </div>
-                {isSelected && <Check className="w-4 h-4 text-primary shrink-0" />}
-              </button>
-            );
-          })}
-        </div>
-
-        {/* Create button */}
-        {selected.size > 0 && (
-          <div className="px-4 py-3 border-t border-border">
-            <button onClick={() => void handleCreate()} disabled={creating}
-              className="w-full py-2 rounded-lg bg-foreground text-background text-sm font-medium hover:bg-foreground/90 disabled:opacity-50 transition-colors duration-fast">
-              {creating ? "创建中..." : isGroup ? `创建群组 (${selected.size + 1})` : "开始对话"}
-            </button>
-          </div>
-        )}
-      </div>
-    </div>
-  );
-}
-
-// @@@chat-search-modal — same pattern as Threads SearchModal
-function ChatSearchModal({ chats, myEntityId, onSelect, onClose }: {
-  chats: ChatSummary[];
-  myEntityId: string | null;
-  onSelect: (chatId: string) => void;
-  onClose: () => void;
-}) {
-  const [query, setQuery] = useState("");
-  const filtered = query
-    ? chats.filter(c => chatDisplayName(c, myEntityId).toLowerCase().includes(query.toLowerCase()))
-    : chats;
-
-  useEffect(() => {
-    const onKey = (e: KeyboardEvent) => { if (e.key === "Escape") onClose(); };
-    document.addEventListener("keydown", onKey);
-    return () => document.removeEventListener("keydown", onKey);
-  }, [onClose]);
-
-  return (
-    <>
-      <div className="fixed inset-0 z-40 bg-black/40" onClick={onClose} />
-      <div className="fixed inset-x-0 top-20 z-50 mx-auto w-full max-w-md bg-card border border-border rounded-xl shadow-2xl overflow-hidden">
-        <div className="flex items-center gap-2 px-4 py-3 border-b border-border">
-          <Search className="w-4 h-4 text-muted-foreground shrink-0" />
-          <input
-            type="text"
-            placeholder="搜索聊天..."
-            value={query}
-            onChange={e => setQuery(e.target.value)}
-            className="flex-1 bg-transparent text-sm outline-none text-foreground"
-            autoFocus
-          />
-        </div>
-        <div className="max-h-64 overflow-y-auto">
-          {filtered.length === 0 ? (
-            <p className="text-xs text-muted-foreground text-center py-6">无结果</p>
-          ) : filtered.map(chat => {
-            const name = chatDisplayName(chat, myEntityId);
-            const otherEntity = chat.entities.find(e => e.id !== myEntityId);
-            return (
-              <button
-                key={chat.id}
-                onClick={() => { onSelect(chat.id); onClose(); }}
-                className="w-full flex items-center gap-3 px-4 py-2.5 hover:bg-muted transition-colors duration-fast text-left"
-              >
-                <MemberAvatar name={name} avatarUrl={otherEntity?.avatar_url} type={otherEntity?.type} size="sm" />
-                <div className="min-w-0 flex-1">
-                  <p className="text-sm font-medium truncate">{name}</p>
-                  {chat.last_message && (
-                    <p className="text-xs text-muted-foreground truncate">{chat.last_message.content}</p>
-                  )}
-                </div>
-              </button>
-            );
-          })}
-        </div>
-      </div>
-    </>
-  );
-}
-
-export default function ChatsLayout() {
-  const { chatId } = useParams<{ chatId?: string }>();
-  const navigate = useNavigate();
-  const myEntityId = useAuthStore(s => s.entityId);
-  const [chats, setChats] = useState<ChatSummary[]>([]);
-  const [loading, setLoading] = useState(true);
-  const [showNewChat, setShowNewChat] = useState(false);
-  const [showSearch, setShowSearch] = useState(false);
-  const [sidebarCollapsed, setSidebarCollapsed] = useState(false);
-
-  const chatsRef = useRef(chats);
-  useEffect(() => {
-    chatsRef.current = chats;
-  }, [chats]);
-
-  const refresh = useCallback(() => {
-    authFetch("/api/chats")
-      .then(r => r.json())
-      .then((data: ChatSummary[]) => {
-        // Skip re-render if data unchanged (polling no-op guard)
-        const prev = chatsRef.current;
-        if (prev.length === data.length && JSON.stringify(prev) === JSON.stringify(data)) return;
-        setChats(data);
-      })
-      .catch(console.error)
-      .finally(() => setLoading(false));
-  }, []);
-
-  useEffect(() => { refresh(); }, [refresh]);
-
-  // Poll every 5s while tab is visible
-  useEffect(() => {
-    let timer: ReturnType<typeof setInterval> | null = null;
-    const start = () => { if (!timer) timer = setInterval(refresh, 5000); };
-    const stop = () => { if (timer) { clearInterval(timer); timer = null; } };
-    const onVis = () => document.visibilityState === "visible" ? start() : stop();
-    start();
-    document.addEventListener("visibilitychange", onVis);
-    return () => { stop(); document.removeEventListener("visibilitychange", onVis); };
-  }, [refresh]);
-
-  const handleCreated = useCallback((newChatId: string) => {
-    setShowNewChat(false);
-    refresh();
-    navigate(`/chats/${newChatId}`);
-  }, [navigate, refresh]);
-
-  // Sort: unread first, then by time
-  const sorted = [...chats].sort((a, b) => {
-    if (a.unread_count > 0 && b.unread_count === 0) return -1;
-    if (b.unread_count > 0 && a.unread_count === 0) return 1;
-    const ta = a.last_message?.created_at ?? 0;
-    const tb = b.last_message?.created_at ?? 0;
-    return tb - ta;
-  });
-
-  return (
-    <div className="h-full w-full flex overflow-hidden">
-      {/* Sidebar — mirrors Sidebar.tsx structure. Collapsible via header toggle. */}
-      {!sidebarCollapsed && (
-      <div className="w-72 h-full flex flex-col bg-card border-r border-border shrink-0">
-        {/* Header — same as Sidebar.tsx */}
-        <div className="px-4 pt-3 pb-1 flex items-center justify-between">
-          <span className="text-sm font-semibold text-foreground">对话</span>
-        </div>
-
-        {/* Search button — same style as Sidebar.tsx, opens modal */}
-        <div className="px-3 pb-3">
-          <button
-            className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-muted-foreground/60 hover:bg-muted hover:text-foreground"
-            onClick={() => setShowSearch(true)}
-          >
-            <Search className="w-4 h-4" />
-            <span>搜索聊天...</span>
-          </button>
-        </div>
-
-        <div className="h-px mx-3 bg-border" />
-
-        {/* Chat list — same spacing as Sidebar.tsx thread list */}
-        <div className="flex-1 min-h-0 px-3 pt-3 flex flex-col">
-          <div className="flex items-center justify-between px-2 mb-2 flex-shrink-0">
-            <span className="text-xs font-medium tracking-wider uppercase text-muted-foreground/60">聊天</span>
-            <div className="flex items-center gap-1.5">
-              <span className="text-xs text-muted-foreground/40">{chats.length}</span>
-              <button
-                onClick={() => setShowNewChat(true)}
-                className="text-xs text-muted-foreground/50 hover:text-foreground transition-colors duration-fast px-1"
-              >
-                <Plus className="w-3 h-3" />
-              </button>
-            </div>
-          </div>
-
-          <div className="flex-1 min-h-0 overflow-y-auto space-y-0.5 custom-scrollbar">
-            {loading ? (
-              <div className="space-y-0.5">
-                {[...Array(3)].map((_, i) => (
-                  <div key={i} className="px-3 py-2.5 rounded-lg animate-pulse">
-                    <div className="h-4 w-[60%] bg-muted rounded mb-1.5" />
-                    <div className="h-3 w-[40%] bg-muted rounded" />
-                  </div>
-                ))}
-              </div>
-            ) : sorted.length === 0 ? (
-              <div className="flex flex-col items-center justify-center py-12 px-4">
-                <p className="text-xs text-muted-foreground mb-2">暂无聊天</p>
-                <button onClick={() => setShowNewChat(true)}
-                  className="text-xs text-primary hover:underline">开始对话</button>
-              </div>
-            ) : sorted.map(chat => {
-              const isActive = chatId === chat.id;
-              const name = chatDisplayName(chat, myEntityId);
-              const others = chat.entities.filter(e => e.id !== myEntityId);
-              const isGroupChat = others.length > 1;
-              return (
-                <div key={chat.id} className={`group/item flex items-center rounded-lg transition-colors duration-fast ${
-                  isActive ? "bg-background shadow-sm" : "hover:bg-muted"
-                }`}>
-                  {/* Active indicator — same as Sidebar.tsx ThreadItem */}
-                  <div className="relative w-7 flex-shrink-0 self-stretch flex items-center justify-center">
-                    {isActive && (
-                      <div className="absolute left-0 top-2 bottom-2 w-0.5 rounded-r-full bg-foreground" />
-                    )}
-                  </div>
-
-                  <Link to={`/chats/${chat.id}`} className="flex-1 min-w-0 py-2.5 pr-2 flex items-center gap-2">
-                    {isGroupChat ? (
-                      <div className="relative w-7 h-7 shrink-0">
-                        <Users className="w-7 h-7 p-1.5 rounded-full bg-muted text-muted-foreground" />
-                        <span className="absolute -bottom-0.5 -right-0.5 w-3.5 h-3.5 rounded-full bg-foreground text-background text-3xs font-bold flex items-center justify-center">
-                          {others.length + 1}
-                        </span>
-                      </div>
-                    ) : (
-                      <MemberAvatar name={name} avatarUrl={others[0]?.avatar_url} type={others[0]?.type} size="xs" />
-                    )}
-                    <div className="flex-1 min-w-0">
-                      <div className="flex items-center gap-1.5">
-                        <span className={`text-sm font-medium truncate ${isActive ? "text-foreground" : ""}`}>
-                          {name}
-                        </span>
-                      </div>
-                      <div className="flex items-center gap-1 mt-0.5">
-                        <span className="text-xs text-muted-foreground/60 truncate flex-1 min-w-0">
-                          {chat.last_message?.content || "暂无消息"}
-                        </span>
-                        {chat.last_message && (
-                          <span className="text-2xs text-muted-foreground/40 flex-shrink-0">
-                            {formatTime(chat.last_message.created_at)}
-                          </span>
-                        )}
-                      </div>
-                    </div>
-                    {chat.has_mention ? (
-                      <span className="w-4 h-4 rounded-full bg-destructive text-destructive-foreground text-2xs font-bold flex items-center justify-center shrink-0">@</span>
-                    ) : chat.unread_count > 0 ? (
-                      <span className="min-w-4 h-4 rounded-full bg-primary text-primary-foreground text-2xs flex items-center justify-center px-1 shrink-0">
-                        {chat.unread_count > 99 ? "99+" : chat.unread_count}
-                      </span>
-                    ) : null}
-                  </Link>
-                </div>
-              );
-            })}
-          </div>
-        </div>
-      </div>
-      )}
-
-      {/* Main content */}
-      <div className="flex-1 min-w-0">
-        <Outlet context={{ sidebarCollapsed, setSidebarCollapsed, refreshChatList: refresh }} />
-      </div>
-
-      {showNewChat && <NewChatDialog onClose={() => setShowNewChat(false)} onCreated={handleCreated} />}
-      {showSearch && (
-        <ChatSearchModal
-          chats={chats}
-          myEntityId={myEntityId}
-          onSelect={(id) => navigate(`/chats/${id}`)}
-          onClose={() => setShowSearch(false)}
-        />
-      )}
-    </div>
-  );
-}
diff --git a/frontend/app/src/pages/ConnectionsPage.tsx b/frontend/app/src/pages/ConnectionsPage.tsx
deleted file mode 100644
index f31300465..000000000
--- a/frontend/app/src/pages/ConnectionsPage.tsx
+++ /dev/null
@@ -1,480 +0,0 @@
-import { useState, useEffect, useCallback, useRef } from "react";
-import { Plug, QrCode, Loader2, CheckCircle2, XCircle, MessageCircle, Settings, X, ArrowRight } from "lucide-react";
-import { QRCodeSVG } from "qrcode.react";
-import { request } from "@/api/client";
-import MemberAvatar from "@/components/MemberAvatar";
-import { toast } from "sonner";
-
-// --- Types ---
-
-interface RoutingConfig {
-  type?: "thread" | "chat";
-  id?: string;
-  label?: string;
-}
-
-interface WeChatState {
-  connected: boolean;
-  polling?: boolean;
-  account_id?: string;
-  user_id?: string;
-  contacts?: { user_id: string; display_name: string }[];
-  routing?: RoutingConfig;
-}
-
-interface RoutingTarget {
-  id: string;
-  label: string;
-  avatar_url?: string;
-}
-
-interface RoutingTargets {
-  threads: RoutingTarget[];
-  chats: RoutingTarget[];
-}
-
-// --- Main Page ---
-
-export default function ConnectionsPage() {
-  return (
-    <div className="h-full flex flex-col bg-background">
-      <div className="h-14 flex items-center px-4 md:px-6 border-b border-border shrink-0">
-        <Plug className="w-4 h-4 text-muted-foreground mr-2" />
-        <h2 className="text-sm font-semibold text-foreground">Connections</h2>
-      </div>
-      <div className="flex-1 overflow-auto p-4 md:p-6">
-        <div className="max-w-2xl mx-auto space-y-4">
-          <WeChatCard />
-        </div>
-      </div>
-    </div>
-  );
-}
-
-// --- WeChat Connection Card ---
-
-type WeChatPhase = "idle" | "loading-qr" | "showing-qr" | "connected";
-
-function WeChatCard() {
-  const [phase, setPhase] = useState<WeChatPhase>("idle");
-  const [state, setState] = useState<WeChatState | null>(null);
-  const [qrImgUrl, setQrImgUrl] = useState<string | null>(null);
-  const [scanStatus, setScanStatus] = useState<string>("");
-  const [settingsOpen, setSettingsOpen] = useState(false);
-  const pollRef = useRef(false);
-
-  // Fetch current state on mount
-  useEffect(() => {
-    request<WeChatState>("/api/connections/wechat/state").then((s) => {
-      setState(s);
-      if (s.connected) setPhase("connected");
-    }).catch((e) => console.error("WeChat state fetch failed:", e));
-  }, []);
-
-  // Refresh state periodically when connected
-  useEffect(() => {
-    if (phase !== "connected") return;
-    const interval = setInterval(() => {
-      request<WeChatState>("/api/connections/wechat/state").then(setState).catch((e) => console.error("WeChat state fetch failed:", e));
-    }, 10000);
-    return () => clearInterval(interval);
-  }, [phase]);
-
-  const startConnect = useCallback(async () => {
-    setPhase("loading-qr");
-    try {
-      const data = await request<{ qrcode: string; qrcode_img_url: string }>(
-        "/api/connections/wechat/qrcode",
-        { method: "POST" },
-      );
-      setQrImgUrl(data.qrcode_img_url);
-      setPhase("showing-qr");
-      setScanStatus("等待扫码...");
-      pollRef.current = true;
-      pollQrStatus(data.qrcode);
-    } catch (err) {
-      toast.error(`Failed: ${err instanceof Error ? err.message : "unknown"}`);
-      setPhase("idle");
-    }
-  }, []);
-
-  const pollQrStatus = useCallback(async (qr: string) => {
-    while (pollRef.current) {
-      try {
-        const result = await request<{ status: string; account_id?: string }>(
-          "/api/connections/wechat/qrcode/poll",
-          { method: "POST", body: JSON.stringify({ qrcode: qr }) },
-        );
-        if (!pollRef.current) return;
-        switch (result.status) {
-          case "scaned":
-            setScanStatus("已扫码，请在微信中确认...");
-            break;
-          case "confirmed":
-            setScanStatus("");
-            setPhase("connected");
-            pollRef.current = false;
-            request<WeChatState>("/api/connections/wechat/state").then(setState);
-            toast.success("WeChat connected");
-            return;
-          case "expired":
-            setScanStatus("二维码已过期");
-            setPhase("idle");
-            pollRef.current = false;
-            return;
-          case "error":
-            setScanStatus("连接失败");
-            setPhase("idle");
-            pollRef.current = false;
-            return;
-          default:
-            break;
-        }
-      } catch {
-        await new Promise((r) => setTimeout(r, 2000));
-      }
-    }
-  }, []);
-
-  const handleDisconnect = useCallback(async () => {
-    pollRef.current = false;
-    try {
-      await request("/api/connections/wechat/disconnect", { method: "POST" });
-      setState(null);
-      setPhase("idle");
-      setQrImgUrl(null);
-      toast.success("WeChat disconnected");
-    } catch (err) {
-      toast.error(`Disconnect failed: ${err instanceof Error ? err.message : "unknown"}`);
-    }
-  }, []);
-
-  useEffect(() => () => { pollRef.current = false; }, []);
-
-  const routing = state?.routing;
-  const hasRouting = routing?.type && routing?.id;
-
-  return (
-    <div className="rounded-xl border border-border bg-card overflow-hidden">
-      {/* Card header */}
-      <div className="flex items-center justify-between px-5 py-4 border-b border-border">
-        <div className="flex items-center gap-3">
-          <div className="w-9 h-9 rounded-lg bg-success/10 flex items-center justify-center">
-            <MessageCircle className="w-5 h-5 text-success" />
-          </div>
-          <div>
-            <h3 className="text-sm font-medium text-foreground">WeChat</h3>
-            <p className="text-xs text-muted-foreground">连接微信，让 Agent 收发消息</p>
-          </div>
-        </div>
-        <div className="flex items-center gap-2">
-          {phase === "connected" && (
-            <button
-              onClick={() => setSettingsOpen(true)}
-              className="p-1.5 rounded-lg text-muted-foreground hover:text-foreground hover:bg-muted transition-colors duration-fast"
-              title="Message routing settings"
-            >
-              <Settings className="w-4 h-4" />
-            </button>
-          )}
-          <StatusBadge phase={phase} />
-        </div>
-      </div>
-
-      {/* Card body */}
-      <div className="px-5 py-4">
-        {phase === "idle" && (
-          <button
-            onClick={startConnect}
-            className="flex items-center gap-2 px-4 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:opacity-90 transition-opacity duration-fast"
-          >
-            <QrCode className="w-4 h-4" />
-            扫码连接
-          </button>
-        )}
-
-        {phase === "loading-qr" && (
-          <div className="flex items-center gap-2 text-sm text-muted-foreground">
-            <Loader2 className="w-4 h-4 animate-spin" />
-            获取二维码中...
-          </div>
-        )}
-
-        {phase === "showing-qr" && qrImgUrl && (
-          <div className="space-y-3">
-            <div className="flex justify-center">
-              <div className="p-4 bg-background rounded-xl">
-                <QRCodeSVG value={qrImgUrl} size={192} level="M" />
-              </div>
-            </div>
-            <p className="text-center text-xs text-muted-foreground">{scanStatus}</p>
-            <button
-              onClick={() => { pollRef.current = false; setPhase("idle"); }}
-              className="block mx-auto text-xs text-muted-foreground hover:text-foreground transition-colors duration-fast"
-            >
-              取消
-            </button>
-          </div>
-        )}
-
-        {phase === "connected" && state && (
-          <div className="space-y-4">
-            {/* Routing indicator */}
-            <div className="flex items-center gap-2 text-sm">
-              <span className="text-muted-foreground">消息发送至</span>
-              {hasRouting ? (
-                <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded bg-primary/10 text-primary text-xs font-medium">
-                  {routing!.type === "thread" ? "会话" : "聊天"}：{routing!.label || routing!.id?.slice(0, 12)}
-                </span>
-              ) : (
-                <button
-                  onClick={() => setSettingsOpen(true)}
-                  className="inline-flex items-center gap-1 px-2 py-0.5 rounded bg-warning/10 text-warning text-xs font-medium hover:bg-warning/20 transition-colors duration-fast"
-                >
-                  未配置 — 点击设置
-                </button>
-              )}
-            </div>
-
-            <div className="grid grid-cols-2 gap-x-8 gap-y-2 text-sm">
-              <div className="text-muted-foreground">账号</div>
-              <div className="font-mono text-xs text-foreground truncate">{state.account_id}</div>
-              <div className="text-muted-foreground">轮询</div>
-              <div className="text-foreground">{state.polling ? "运行中" : "已停止"}</div>
-              <div className="text-muted-foreground">联系人</div>
-              <div className="text-foreground">{state.contacts?.length || 0} 个</div>
-            </div>
-
-            {state.contacts && state.contacts.length > 0 && (
-              <div className="pt-2 border-t border-border">
-                <p className="text-xs text-muted-foreground mb-2">最近联系人</p>
-                <div className="space-y-1">
-                  {state.contacts.map((c) => (
-                    <div key={c.user_id} className="flex items-center gap-2 text-xs">
-                      <div className="w-5 h-5 rounded-full bg-muted flex items-center justify-center text-2xs font-medium">
-                        {c.display_name[0]?.toUpperCase()}
-                      </div>
-                      <span className="text-foreground">{c.display_name}</span>
-                      <span className="text-muted-foreground font-mono truncate">{c.user_id}</span>
-                    </div>
-                  ))}
-                </div>
-              </div>
-            )}
-
-            <button
-              onClick={handleDisconnect}
-              className="flex items-center gap-2 px-3 py-1.5 rounded-lg text-xs text-destructive hover:bg-destructive/10 transition-colors duration-fast"
-            >
-              <XCircle className="w-3.5 h-3.5" />
-              断开连接
-            </button>
-          </div>
-        )}
-      </div>
-
-      {/* Settings dialog */}
-      {settingsOpen && (
-        <RoutingDialog
-          currentRouting={routing || {}}
-          onClose={() => setSettingsOpen(false)}
-          onSaved={(newRouting) => {
-            setState((s) => s ? { ...s, routing: newRouting } : s);
-            setSettingsOpen(false);
-          }}
-        />
-      )}
-    </div>
-  );
-}
-
-// --- Routing Settings Dialog ---
-
-function RoutingDialog({
-  currentRouting,
-  onClose,
-  onSaved,
-}: {
-  currentRouting: RoutingConfig;
-  onClose: () => void;
-  onSaved: (r: RoutingConfig) => void;
-}) {
-  const [targets, setTargets] = useState<RoutingTargets | null>(null);
-  const [loading, setLoading] = useState(true);
-  const [tab, setTab] = useState<"thread" | "chat">(currentRouting.type || "thread");
-  const [selectedId, setSelectedId] = useState<string>(currentRouting.id || "");
-
-  useEffect(() => {
-    request<RoutingTargets>("/api/connections/wechat/routing/targets")
-      .then(setTargets)
-      .catch((e) => toast.error(`Failed to load targets: ${e.message}`))
-      .finally(() => setLoading(false));
-  }, []);
-
-  const handleSave = async () => {
-    if (!selectedId) return;
-    const items = tab === "thread" ? targets?.threads : targets?.chats;
-    const item = items?.find((t) => t.id === selectedId);
-    try {
-      await request("/api/connections/wechat/routing", {
-        method: "POST",
-        body: JSON.stringify({ type: tab, id: selectedId, label: item?.label || "" }),
-      });
-      onSaved({ type: tab, id: selectedId, label: item?.label || "" });
-      toast.success("路由已保存");
-    } catch (e) {
-      toast.error(`Failed: ${e instanceof Error ? e.message : "unknown"}`);
-    }
-  };
-
-  const handleClear = async () => {
-    try {
-      await request("/api/connections/wechat/routing", { method: "DELETE" });
-      onSaved({});
-      toast.success("路由已清除");
-    } catch (e) {
-      toast.error(`Failed: ${e instanceof Error ? e.message : "unknown"}`);
-    }
-  };
-
-  return (
-    <>
-      <div className="fixed inset-0 bg-black/40 z-40" onClick={onClose} />
-      <div className="fixed inset-0 z-50 flex items-center justify-center p-4 pointer-events-none">
-        <div className="bg-card border border-border rounded-xl shadow-xl w-full max-w-md max-h-[80vh] flex flex-col pointer-events-auto">
-          {/* Header */}
-          <div className="flex items-center justify-between px-5 py-4 border-b border-border shrink-0">
-            <h3 className="text-sm font-semibold text-foreground">消息路由</h3>
-            <button onClick={onClose} className="p-1 rounded hover:bg-muted transition-colors duration-fast">
-              <X className="w-4 h-4 text-muted-foreground" />
-            </button>
-          </div>
-
-          {/* Tab selector */}
-          <div className="px-5 pt-4 shrink-0">
-            <p className="text-xs text-muted-foreground mb-3">
-              选择微信消息的接收目标
-            </p>
-            <div className="flex gap-1 p-0.5 bg-muted rounded-lg">
-              <button
-                onClick={() => { setTab("thread"); setSelectedId(""); }}
-                className={`flex-1 py-1.5 text-xs font-medium rounded-md transition-colors duration-fast ${
-                  tab === "thread" ? "bg-background text-foreground shadow-sm" : "text-muted-foreground"
-                }`}
-              >
-                会话
-              </button>
-              <button
-                onClick={() => { setTab("chat"); setSelectedId(""); }}
-                className={`flex-1 py-1.5 text-xs font-medium rounded-md transition-colors duration-fast ${
-                  tab === "chat" ? "bg-background text-foreground shadow-sm" : "text-muted-foreground"
-                }`}
-              >
-                聊天
-              </button>
-            </div>
-          </div>
-
-          {/* List */}
-          <div className="flex-1 overflow-auto px-5 py-3">
-            {loading ? (
-              <div className="flex items-center gap-2 text-sm text-muted-foreground py-8 justify-center">
-                <Loader2 className="w-4 h-4 animate-spin" />
-                加载中...
-              </div>
-            ) : (
-              <ItemList
-                items={(tab === "thread" ? targets?.threads : targets?.chats) || []}
-                selectedId={selectedId}
-                onSelect={setSelectedId}
-                emptyText={tab === "thread" ? "暂无会话" : "暂无聊天"}
-              />
-            )}
-          </div>
-
-          {/* Footer */}
-          <div className="flex items-center justify-between px-5 py-3 border-t border-border shrink-0">
-            <button
-              onClick={handleClear}
-              className="text-xs text-muted-foreground hover:text-foreground transition-colors duration-fast"
-            >
-              清除路由
-            </button>
-            <button
-              onClick={handleSave}
-              disabled={!selectedId}
-              className="flex items-center gap-1.5 px-4 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 disabled:opacity-40 transition-opacity duration-fast"
-            >
-              保存
-              <ArrowRight className="w-3 h-3" />
-            </button>
-          </div>
-        </div>
-      </div>
-    </>
-  );
-}
-
-function ItemList({
-  items,
-  selectedId,
-  onSelect,
-  emptyText,
-}: {
-  items: RoutingTarget[];
-  selectedId: string;
-  onSelect: (id: string) => void;
-  emptyText: string;
-}) {
-  if (items.length === 0) {
-    return <p className="text-xs text-muted-foreground text-center py-8">{emptyText}</p>;
-  }
-  return (
-    <div className="space-y-1">
-      {items.map((item) => (
-        <button
-          key={item.id}
-          onClick={() => onSelect(item.id)}
-          className={`w-full flex items-center gap-3 px-3 py-2.5 rounded-lg text-left transition-colors duration-fast ${
-            selectedId === item.id
-              ? "bg-primary/10 ring-1 ring-primary/30"
-              : "hover:bg-muted"
-          }`}
-        >
-          <MemberAvatar name={item.label} avatarUrl={item.avatar_url} size="sm" type="mycel_agent" />
-          <div className="min-w-0 flex-1">
-            <p className="text-sm text-foreground truncate">{item.label}</p>
-            <p className="text-2xs text-muted-foreground font-mono truncate">{item.id}</p>
-          </div>
-          {selectedId === item.id && (
-            <CheckCircle2 className="w-4 h-4 text-primary shrink-0" />
-          )}
-        </button>
-      ))}
-    </div>
-  );
-}
-
-function StatusBadge({ phase }: { phase: WeChatPhase }) {
-  if (phase === "connected") {
-    return (
-      <span className="flex items-center gap-1.5 px-2.5 py-1 rounded-full text-xs font-medium bg-success/10 text-success">
-        <CheckCircle2 className="w-3 h-3" />
-        已连接
-      </span>
-    );
-  }
-  if (phase === "showing-qr" || phase === "loading-qr") {
-    return (
-      <span className="flex items-center gap-1.5 px-2.5 py-1 rounded-full text-xs font-medium bg-warning/10 text-warning">
-        <Loader2 className="w-3 h-3 animate-spin" />
-        连接中
-      </span>
-    );
-  }
-  return (
-    <span className="flex items-center gap-1.5 px-2.5 py-1 rounded-full text-xs font-medium bg-muted text-muted-foreground">
-      Not connected
-    </span>
-  );
-}
diff --git a/frontend/app/src/pages/InviteCodesPage.tsx b/frontend/app/src/pages/InviteCodesPage.tsx
deleted file mode 100644
index df9e07eec..000000000
--- a/frontend/app/src/pages/InviteCodesPage.tsx
+++ /dev/null
@@ -1,247 +0,0 @@
-import { useState, useEffect, useCallback, useRef } from "react";
-import { Ticket, Plus, Trash2, Copy, Check, AlertTriangle, RefreshCw, TicketX } from "lucide-react";
-import { fetchInviteCodes, generateInviteCode, revokeInviteCode } from "@/api/client";
-import type { InviteCode } from "@/api/client";
-import { toast } from "sonner";
-import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
-
-function formatDate(dateStr?: string | null): string {
-  if (!dateStr) return "—";
-  const d = new Date(dateStr);
-  if (isNaN(d.getTime())) return "—";
-  return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
-}
-
-function StatusBadge({ code }: { code: InviteCode }) {
-  if (code.used) {
-    return (
-      <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs bg-muted text-muted-foreground">
-        已使用
-      </span>
-    );
-  }
-  if (code.expires_at && new Date(code.expires_at) < new Date()) {
-    return (
-      <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs bg-warning/10 text-warning">
-        已过期
-      </span>
-    );
-  }
-  return (
-    <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs bg-success/10 text-success">
-      <span className="w-1.5 h-1.5 rounded-full bg-success" />
-      未使用
-    </span>
-  );
-}
-
-function CopyButton({ text }: { text: string }) {
-  const [copied, setCopied] = useState(false);
-  const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
-
-  const handleCopy = useCallback(async () => {
-    try {
-      await navigator.clipboard.writeText(text);
-      setCopied(true);
-      toast.success("已复制到剪贴板");
-      if (timerRef.current) clearTimeout(timerRef.current);
-      timerRef.current = setTimeout(() => setCopied(false), 2000);
-    } catch {
-      toast.error("复制失败");
-    }
-  }, [text]);
-
-  return (
-    <Tooltip>
-      <TooltipTrigger asChild>
-        <button
-          onClick={handleCopy}
-          className="w-7 h-7 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-primary/10 hover:text-primary transition-colors duration-fast"
-        >
-          {copied ? <Check className="w-3.5 h-3.5 text-success" /> : <Copy className="w-3.5 h-3.5" />}
-        </button>
-      </TooltipTrigger>
-      <TooltipContent side="top"><p>复制邀请码</p></TooltipContent>
-    </Tooltip>
-  );
-}
-
-export default function InviteCodesPage() {
-  const [codes, setCodes] = useState<InviteCode[]>([]);
-  const [loading, setLoading] = useState(true);
-  const [error, setError] = useState<string | null>(null);
-  const [generating, setGenerating] = useState(false);
-  const [revoking, setRevoking] = useState<string | null>(null);
-
-  const load = useCallback(async () => {
-    setLoading(true);
-    setError(null);
-    try {
-      const data = await fetchInviteCodes();
-      setCodes(data);
-    } catch (err) {
-      setError(err instanceof Error ? err.message : "加载失败");
-    } finally {
-      setLoading(false);
-    }
-  }, []);
-
-  useEffect(() => { void load(); }, [load]);
-
-  const handleGenerate = async () => {
-    setGenerating(true);
-    try {
-      const newCode = await generateInviteCode(7);
-      setCodes((prev) => [newCode, ...prev]);
-      toast.success("邀请码已生成");
-    } catch (err) {
-      toast.error(`生成失败: ${err instanceof Error ? err.message : "未知错误"}`);
-    } finally {
-      setGenerating(false);
-    }
-  };
-
-  const handleRevoke = async (code: string) => {
-    setRevoking(code);
-    try {
-      await revokeInviteCode(code);
-      setCodes((prev) => prev.filter((c) => c.code !== code));
-      toast.success("邀请码已吊销");
-    } catch (err) {
-      toast.error(`吊销失败: ${err instanceof Error ? err.message : "未知错误"}`);
-    } finally {
-      setRevoking(null);
-    }
-  };
-
-  const isRevokable = (code: InviteCode) =>
-    !code.used && !(code.expires_at && new Date(code.expires_at) < new Date());
-
-  return (
-    <div className="h-full flex flex-col bg-background">
-      {/* Header */}
-      <div className="h-14 flex items-center justify-between px-4 md:px-6 border-b border-border shrink-0">
-        <div className="flex items-center gap-3">
-          <h2 className="text-sm font-semibold text-foreground">邀请码</h2>
-          <span className="text-xs text-muted-foreground font-mono">{codes.length}</span>
-        </div>
-        <button
-          onClick={() => void handleGenerate()}
-          disabled={generating}
-          className="flex items-center gap-2 px-3 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:opacity-90 disabled:opacity-50 transition-opacity duration-fast"
-        >
-          <Plus className="w-4 h-4" />
-          <span className="hidden md:inline">{generating ? "生成中..." : "生成邀请码"}</span>
-        </button>
-      </div>
-
-      {/* Content */}
-      <div className="flex-1 overflow-y-auto p-4 md:p-6">
-        {loading ? (
-          <div className="flex flex-col items-center justify-center py-20">
-            <div className="w-6 h-6 border-2 border-primary/30 border-t-primary rounded-full animate-spin mb-3" />
-            <p className="text-sm text-muted-foreground">加载中...</p>
-          </div>
-        ) : error ? (
-          <div className="flex flex-col items-center justify-center py-20">
-            <div className="w-12 h-12 rounded-full bg-destructive/10 flex items-center justify-center mb-4">
-              <AlertTriangle className="w-6 h-6 text-destructive" />
-            </div>
-            <p className="text-sm font-medium text-foreground mb-1">加载失败</p>
-            <p className="text-xs text-muted-foreground mb-4 max-w-xs text-center">{error}</p>
-            <button
-              onClick={() => void load()}
-              className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 transition-opacity duration-fast"
-            >
-              <RefreshCw className="w-3.5 h-3.5" />重试
-            </button>
-          </div>
-        ) : codes.length === 0 ? (
-          <div className="flex flex-col items-center justify-center py-24">
-            <div className="w-14 h-14 rounded-2xl bg-primary/10 flex items-center justify-center mb-4">
-              <Ticket className="w-7 h-7 text-primary" />
-            </div>
-            <p className="text-sm font-semibold text-foreground mb-1">还没有邀请码</p>
-            <p className="text-xs text-muted-foreground mb-5 max-w-[220px] text-center leading-relaxed">
-              生成邀请码，邀请新成员加入 Mycel
-            </p>
-            <button
-              onClick={() => void handleGenerate()}
-              disabled={generating}
-              className="inline-flex items-center gap-1.5 px-4 py-2 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 disabled:opacity-50 transition-opacity duration-fast"
-            >
-              <Plus className="w-3.5 h-3.5" />{generating ? "生成中..." : "生成邀请码"}
-            </button>
-          </div>
-        ) : (
-          <div className="rounded-xl border border-border overflow-hidden">
-            {/* Table header */}
-            <div className="grid grid-cols-[1fr_auto_auto_auto_auto] gap-4 px-4 py-2.5 bg-muted/50 border-b border-border text-xs text-muted-foreground font-medium">
-              <span>邀请码</span>
-              <span className="w-20 text-center">状态</span>
-              <span className="w-24 text-center hidden sm:block">创建时间</span>
-              <span className="w-24 text-center hidden sm:block">过期时间</span>
-              <span className="w-16 text-center">操作</span>
-            </div>
-
-            {/* Table rows */}
-            {codes.map((item) => (
-              <div
-                key={item.code}
-                className="grid grid-cols-[1fr_auto_auto_auto_auto] gap-4 px-4 py-3 border-b border-border last:border-b-0 items-center hover:bg-muted/30 transition-colors duration-fast"
-              >
-                {/* Code */}
-                <div className="flex items-center gap-2 min-w-0">
-                  <code className="text-sm font-mono text-foreground truncate">{item.code}</code>
-                </div>
-
-                {/* Status */}
-                <div className="w-20 flex justify-center">
-                  <StatusBadge code={item} />
-                </div>
-
-                {/* Created at */}
-                <div className="w-24 text-center hidden sm:block">
-                  <span className="text-xs text-muted-foreground">{formatDate(item.created_at)}</span>
-                </div>
-
-                {/* Expires at */}
-                <div className="w-24 text-center hidden sm:block">
-                  <span className="text-xs text-muted-foreground">{formatDate(item.expires_at)}</span>
-                </div>
-
-                {/* Actions */}
-                <div className="w-16 flex items-center justify-center gap-0.5">
-                  <CopyButton text={item.code} />
-                  {isRevokable(item) && (
-                    <Tooltip>
-                      <TooltipTrigger asChild>
-                        <button
-                          onClick={() => void handleRevoke(item.code)}
-                          disabled={revoking === item.code}
-                          className="w-7 h-7 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-destructive/10 hover:text-destructive disabled:opacity-40 transition-colors duration-fast"
-                        >
-                          {revoking === item.code ? (
-                            <div className="w-3.5 h-3.5 border-2 border-current/30 border-t-current rounded-full animate-spin" />
-                          ) : (
-                            <Trash2 className="w-3.5 h-3.5" />
-                          )}
-                        </button>
-                      </TooltipTrigger>
-                      <TooltipContent side="top"><p>吊销</p></TooltipContent>
-                    </Tooltip>
-                  )}
-                  {!isRevokable(item) && (
-                    <div className="w-7 h-7 flex items-center justify-center text-muted-foreground/20">
-                      <TicketX className="w-3.5 h-3.5" />
-                    </div>
-                  )}
-                </div>
-              </div>
-            ))}
-          </div>
-        )}
-      </div>
-    </div>
-  );
-}
diff --git a/frontend/app/src/pages/LibraryPage.tsx b/frontend/app/src/pages/LibraryPage.tsx
deleted file mode 100644
index 45a82243d..000000000
--- a/frontend/app/src/pages/LibraryPage.tsx
+++ /dev/null
@@ -1,376 +0,0 @@
-import { useState, useEffect, useMemo } from "react";
-import { Search, Plus, Zap, Plug, Bot, Edit, Trash2, AlertTriangle, RefreshCw, FlaskConical } from "lucide-react";
-import LibraryEditor from "@/components/LibraryEditor";
-import RecipeEditor from "@/components/RecipeEditor";
-import { toast } from "sonner";
-import { useIsMobile } from "@/hooks/use-mobile";
-import { AlertDialog, AlertDialogAction, AlertDialogCancel, AlertDialogContent, AlertDialogDescription, AlertDialogFooter, AlertDialogHeader, AlertDialogTitle } from "@/components/ui/alert-dialog";
-import { useAppStore } from "@/store/app-store";
-import type { ResourceItem } from "@/store/types";
-
-type ResourceType = "skills" | "mcp" | "agents" | "recipes";
-
-const typeMap: Record<ResourceType, string> = { skills: "skill", mcp: "mcp", agents: "agent", recipes: "recipe" };
-
-const tabs: { id: ResourceType; label: string; icon: typeof Zap }[] = [
-  { id: "skills", label: "Skill", icon: Zap },
-  { id: "mcp", label: "MCP", icon: Plug },
-  { id: "agents", label: "Agent", icon: Bot },
-  { id: "recipes", label: "Recipe", icon: FlaskConical },
-];
-
-const RECIPE_PROVIDER_LABELS: Record<string, string> = {
-  local: "Local",
-  daytona: "Daytona",
-  docker: "Docker",
-  e2b: "E2B",
-  agentbay: "AgentBay",
-};
-
-const FALLBACK_RECIPE_PROVIDER_TYPES = ["local", "daytona", "docker", "e2b", "agentbay"];
-
-function providerLabel(name?: string): string {
-  if (!name) return "Unknown";
-  const hit = RECIPE_PROVIDER_LABELS[name];
-  if (hit) return hit;
-  return name
-    .split(/[_-]+/)
-    .filter(Boolean)
-    .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
-    .join(" ");
-}
-
-export default function LibraryPage() {
-  const isMobile = useIsMobile();
-  const librarySkills = useAppStore((s) => s.librarySkills);
-  const libraryMcps = useAppStore((s) => s.libraryMcps);
-  const libraryAgents = useAppStore((s) => s.libraryAgents);
-  const libraryRecipes = useAppStore((s) => s.libraryRecipes);
-  const loadAll = useAppStore((s) => s.loadAll);
-  const error = useAppStore((s) => s.error);
-  const retry = useAppStore((s) => s.retry);
-  const storeDeleteResource = useAppStore((s) => s.deleteResource);
-  const getResourceUsedBy = useAppStore((s) => s.getResourceUsedBy);
-
-  useEffect(() => { loadAll(); }, [loadAll]);
-
-  const [tab, setTab] = useState<ResourceType>("skills");
-  const [search, setSearch] = useState("");
-  const [selected, setSelected] = useState<ResourceItem | null>(null);
-  const [creating, setCreating] = useState(false);
-  const [recipeDirty, setRecipeDirty] = useState(false);
-
-  // Delete dialog state
-  const [deleteDialogOpen, setDeleteDialogOpen] = useState(false);
-  const [deletingItem, setDeletingItem] = useState<ResourceItem | null>(null);
-
-  const getList = () =>
-    tab === "skills"
-      ? librarySkills
-      : tab === "mcp"
-        ? libraryMcps
-        : tab === "agents"
-          ? libraryAgents
-          : libraryRecipes;
-
-  const items = getList();
-  const filtered = items.filter((i) => i.name.toLowerCase().includes(search.toLowerCase()));
-  const Icon = tab === "skills" ? Zap : tab === "mcp" ? Plug : tab === "agents" ? Bot : FlaskConical;
-  const isRecipeTab = tab === "recipes";
-  const recipeProviderOptions = useMemo(
-    () =>
-      Array.from(
-        new Set(
-          [
-            ...libraryRecipes.map((item) => item.provider_type).filter((value): value is string => Boolean(value)),
-            ...FALLBACK_RECIPE_PROVIDER_TYPES,
-          ],
-        ),
-      ).map((value) => ({ value, label: providerLabel(value) })),
-    [libraryRecipes],
-  );
-  const recipeFeatureOptions = useMemo(
-    () =>
-      Array.from(
-        new Map(
-          libraryRecipes.flatMap((item) => item.feature_options ?? []).map((option) => [option.key, option]),
-        ).values(),
-      ),
-    [libraryRecipes],
-  );
-
-  function confirmRecipeLeave(message: string): boolean {
-    if (!(recipeDirty && isRecipeTab)) return true;
-    return window.confirm(message);
-  }
-
-  function resetRecipeSelection(nextTab?: ResourceType) {
-    if (nextTab) setTab(nextTab);
-    setSearch("");
-    setSelected(null);
-    setCreating(false);
-    setRecipeDirty(false);
-  }
-
-  const handleCardClick = (item: ResourceItem) => {
-    if (selected?.id !== item.id && !confirmRecipeLeave("当前 recipe 还有未保存的修改，确定要切换吗？")) return;
-    setCreating(false);
-    setSelected(item);
-    setRecipeDirty(false);
-  };
-
-  const openCreate = () => {
-    if (!confirmRecipeLeave("当前 recipe 还有未保存的修改，确定要新建另一个 recipe 吗？")) return;
-    setSelected(null);
-    setCreating(true);
-    setRecipeDirty(false);
-  };
-
-  const handleCreated = (item: ResourceItem) => {
-    setCreating(false);
-    setSelected(item);
-    setRecipeDirty(false);
-  };
-
-  const openDelete = (item: ResourceItem) => {
-    setDeletingItem(item);
-    setDeleteDialogOpen(true);
-  };
-
-  const handleDelete = async () => {
-    if (!deletingItem) return;
-    try {
-      await storeDeleteResource(typeMap[tab], deletingItem.id);
-      if (selected?.id === deletingItem.id) setSelected(null);
-      toast.success(`${deletingItem.name} 已删除`);
-      setDeleteDialogOpen(false);
-    } catch (e: unknown) {
-      toast.error("删除失败: " + (e instanceof Error ? e.message : String(e)));
-    }
-  };
-
-  const resolvedSelected = selected ? items.find((item) => item.id === selected.id) ?? selected : null;
-  const showDetail = resolvedSelected !== null || creating;
-
-  return (
-    <div className="flex h-full">
-      {/* Sidebar tabs - desktop */}
-      {!isMobile && (
-        <div className="w-[200px] shrink-0 border-r border-border bg-card flex flex-col">
-          <div className="h-14 flex items-center justify-between px-4 border-b border-border">
-            <h2 className="text-sm font-semibold text-foreground">Library</h2>
-          </div>
-          <div className="flex-1 p-2 space-y-0.5">
-            {tabs.map((t) => {
-              const count = (
-                t.id === "skills" ? librarySkills :
-                t.id === "mcp" ? libraryMcps :
-                t.id === "agents" ? libraryAgents :
-                libraryRecipes
-              ).length;
-              const isActive = tab === t.id;
-              return (
-                <button key={t.id} onClick={() => {
-                  if (!confirmRecipeLeave("当前 recipe 还有未保存的修改，确定要离开吗？")) return;
-                  resetRecipeSelection(t.id);
-                }} className={`w-full flex items-center justify-between px-3 py-2.5 rounded-lg text-sm transition-all duration-fast ${
-                  isActive ? "bg-primary/5 text-foreground border border-primary/15" : "text-muted-foreground hover:bg-muted hover:text-foreground border border-transparent"
-                }`}>
-                  <div className="flex items-center gap-2.5"><t.icon className={`w-4 h-4 ${isActive ? "text-primary" : ""}`} /><span>{t.label}</span></div>
-                  <span className={`text-xs font-mono ${isActive ? "text-primary" : ""}`}>{count}</span>
-                </button>
-              );
-            })}
-          </div>
-        </div>
-      )}
-
-      {/* Content */}
-      <div className="flex-1 flex flex-col overflow-hidden bg-background">
-        {/* Content header bar */}
-        <div className="h-14 flex items-center justify-between px-4 md:px-6 border-b border-border shrink-0">
-          <div className="flex items-center gap-3">
-            {/* Mobile tabs */}
-            {isMobile && (
-              <div className="flex gap-1 overflow-x-auto">
-                {tabs.map((t) => {
-                  const isActive = tab === t.id;
-                  return (
-                    <button key={t.id} onClick={() => {
-                      if (!confirmRecipeLeave("当前 recipe 还有未保存的修改，确定要离开吗？")) return;
-                      resetRecipeSelection(t.id);
-                    }} className={`flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs whitespace-nowrap shrink-0 transition-colors duration-fast ${
-                      isActive ? "bg-primary/10 text-primary font-medium" : "text-muted-foreground hover:text-foreground hover:bg-muted"
-                    }`}>
-                      <t.icon className="w-3.5 h-3.5" />{t.label}
-                    </button>
-                  );
-                })}
-              </div>
-            )}
-            {!isMobile && (
-              <>
-                <h3 className="text-sm font-semibold text-foreground">
-                  {tab === "skills" ? "Skill" : tab === "mcp" ? "MCP" : tab === "agents" ? "Agent" : "Recipe"}
-                </h3>
-                <span className="text-xs text-muted-foreground font-mono">{items.length}</span>
-              </>
-            )}
-          </div>
-          <button onClick={openCreate} className="flex items-center gap-2 px-3 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:opacity-90 transition-opacity duration-fast">
-            <Plus className="w-4 h-4" />
-            <span className="hidden md:inline">新建</span>
-          </button>
-        </div>
-
-        <div className={`flex-1 overflow-y-auto`}>
-          <div className={`${showDetail && !isMobile ? "max-w-xl" : "max-w-2xl"} mx-auto py-6 px-4 md:px-6`}>
-
-          {/* Search */}
-          <div className="relative mb-4">
-            <Search className="absolute left-3 top-1/2 -translate-y-1/2 w-3.5 h-3.5 text-muted-foreground" />
-            <input value={search} onChange={(e) => setSearch(e.target.value)} placeholder="搜索..." className="w-full pl-9 pr-3 py-2 rounded-lg bg-card border border-border text-sm text-foreground placeholder:text-muted-foreground outline-none focus:border-primary/40 transition-colors duration-fast" />
-          </div>
-
-          {isRecipeTab && (
-            <div className="mb-4 rounded-2xl border border-border bg-card px-4 py-3 text-sm text-muted-foreground">
-              Recipes 是按 provider type 归类的 sandbox 模板。默认 recipe 可以修改或重置；自定义 recipe 可以新增和删除。
-            </div>
-          )}
-
-          {/* Grid */}
-          {error ? (
-            <div className="flex flex-col items-center justify-center py-20">
-              <div className="w-12 h-12 rounded-full bg-destructive/10 flex items-center justify-center mb-4">
-                <AlertTriangle className="w-6 h-6 text-destructive" />
-              </div>
-              <p className="text-sm font-medium text-foreground mb-1">加载失败</p>
-              <p className="text-xs text-muted-foreground mb-4 max-w-xs text-center">{error}</p>
-              <button onClick={retry} className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 transition-opacity duration-fast">
-                <RefreshCw className="w-3.5 h-3.5" />重试
-              </button>
-            </div>
-          ) : (<>
-          <div className={`grid ${isMobile ? "grid-cols-1" : "grid-cols-2"} gap-3`}>
-            {filtered.map((item) => (
-              <div
-                key={item.id}
-                onClick={() => { handleCardClick(item); }}
-                className={`${isRecipeTab ? "rounded-2xl border border-border bg-card cursor-pointer hover:bg-accent/20" : "surface-interactive cursor-pointer"} p-4 group relative ${
-                  isRecipeTab ? "" : ""
-                } ${resolvedSelected?.id === item.id ? "border-primary/40 glow-sm" : ""}`}
-              >
-                <div className="flex items-start gap-3">
-                  <div className="w-9 h-9 rounded-lg bg-primary/8 flex items-center justify-center shrink-0">
-                    <Icon className="w-4 h-4 text-primary" />
-                  </div>
-                  <div className="min-w-0 flex-1">
-                    <div className="flex items-center justify-between">
-                      <h4 className={`text-sm font-medium text-foreground ${isRecipeTab ? "" : "group-hover:text-primary transition-colors duration-fast"}`}>{item.name}</h4>
-                    </div>
-                    <p className="text-xs text-muted-foreground mt-1">{item.desc}</p>
-                    <p className="text-xs text-muted-foreground mt-2">
-                      {isRecipeTab
-                        ? `${providerLabel(item.provider_type)} · ${item.builtin ? "默认 recipe" : "自定义 recipe"}`
-                        : (() => {
-                            const n = getResourceUsedBy(typeMap[tab], item.name).length;
-                            return n ? `被 ${n} 位成员使用` : "未被使用";
-                          })()}
-                    </p>
-                  </div>
-                </div>
-                {!isRecipeTab && (
-                  <div className="absolute top-2 right-2 flex items-center gap-1 opacity-0 group-hover:opacity-100 transition-opacity duration-fast">
-                    <button onClick={(e) => { e.stopPropagation(); handleCardClick(item); }} className="p-1 rounded hover:bg-muted transition-colors duration-fast" title="编辑">
-                      <Edit className="w-3 h-3 text-muted-foreground" />
-                    </button>
-                    <button onClick={(e) => { e.stopPropagation(); openDelete(item); }} className="p-1 rounded hover:bg-destructive/10 transition-colors duration-fast" title="删除">
-                      <Trash2 className="w-3 h-3 text-muted-foreground hover:text-destructive" />
-                    </button>
-                  </div>
-                )}
-              </div>
-            ))}
-          </div>
-          {filtered.length === 0 && (
-            <div className="text-center py-12 text-sm text-muted-foreground">未找到相关内容</div>
-          )}
-          </>)}
-        </div>
-        </div>
-      </div>
-
-      {/* Editor panel */}
-      {!isMobile && showDetail && !isRecipeTab && (
-        <LibraryEditor item={resolvedSelected} type={typeMap[tab] as "skill" | "mcp" | "agent"} onClose={() => { setSelected(null); setCreating(false); }} onCreated={handleCreated} />
-      )}
-      {!isMobile && showDetail && isRecipeTab && (
-        <RecipeEditor
-          item={resolvedSelected}
-          providerTypeOptions={recipeProviderOptions}
-          featureOptions={recipeFeatureOptions}
-          onDirtyChange={setRecipeDirty}
-          onCreated={handleCreated}
-          onDeleted={() => {
-            setSelected(null);
-            setCreating(false);
-            setRecipeDirty(false);
-          }}
-          onClose={() => {
-            if (recipeDirty) {
-              const confirmed = window.confirm("当前 recipe 还有未保存的修改，确定要关闭吗？");
-              if (!confirmed) return;
-            }
-            setSelected(null);
-            setCreating(false);
-            setRecipeDirty(false);
-          }}
-        />
-      )}
-      {isMobile && showDetail && !isRecipeTab && (
-        <div className="fixed inset-0 z-50 bg-background overflow-y-auto">
-          <LibraryEditor item={resolvedSelected} type={typeMap[tab] as "skill" | "mcp" | "agent"} onClose={() => { setSelected(null); setCreating(false); }} onCreated={handleCreated} />
-        </div>
-      )}
-      {isMobile && showDetail && isRecipeTab && (
-        <div className="fixed inset-0 z-50 bg-background overflow-y-auto">
-          <RecipeEditor
-            item={resolvedSelected}
-            providerTypeOptions={recipeProviderOptions}
-            featureOptions={recipeFeatureOptions}
-            onDirtyChange={setRecipeDirty}
-            onCreated={handleCreated}
-            onDeleted={() => {
-              setSelected(null);
-              setCreating(false);
-              setRecipeDirty(false);
-            }}
-            onClose={() => {
-              if (recipeDirty) {
-                const confirmed = window.confirm("当前 recipe 还有未保存的修改，确定要关闭吗？");
-                if (!confirmed) return;
-              }
-              setSelected(null);
-              setCreating(false);
-              setRecipeDirty(false);
-            }}
-          />
-        </div>
-      )}
-
-      {/* Delete confirmation */}
-      <AlertDialog open={deleteDialogOpen} onOpenChange={setDeleteDialogOpen}>
-        <AlertDialogContent>
-          <AlertDialogHeader>
-            <AlertDialogTitle>确认删除</AlertDialogTitle>
-            <AlertDialogDescription>确定要删除 "{deletingItem?.name}" 吗？此操作不可撤销。</AlertDialogDescription>
-          </AlertDialogHeader>
-          <AlertDialogFooter>
-            <AlertDialogCancel>取消</AlertDialogCancel>
-            <AlertDialogAction onClick={handleDelete}>删除</AlertDialogAction>
-          </AlertDialogFooter>
-        </AlertDialogContent>
-      </AlertDialog>
-    </div>
-  );
-}
diff --git a/frontend/app/src/pages/MarketplacePage.tsx b/frontend/app/src/pages/MarketplacePage.tsx
index e5e85d1f3..9a68c20a4 100644
--- a/frontend/app/src/pages/MarketplacePage.tsx
+++ b/frontend/app/src/pages/MarketplacePage.tsx
@@ -349,7 +349,7 @@ export default function MarketplacePage() {
                       {filteredMembers.map((member) => {
                         const update = updates.find((u) => u.marketplace_item_id === member.id);
                         return (
-                          <div key={member.id} className="surface-interactive p-4 cursor-pointer group relative" onClick={() => navigate(`/members/${member.id}`)}>
+                          <div key={member.id} className="surface-interactive p-4 cursor-pointer group relative" onClick={() => navigate(`/contacts/agents/${member.id}`)}>
                             <div className="flex items-start gap-3">
                               <div className="w-9 h-9 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
                                 <Package className="w-4 h-4 text-primary" />
diff --git a/frontend/app/src/pages/MembersPage.tsx b/frontend/app/src/pages/MembersPage.tsx
index 6f9de5262..16f407c06 100644
--- a/frontend/app/src/pages/MembersPage.tsx
+++ b/frontend/app/src/pages/MembersPage.tsx
@@ -174,11 +174,11 @@ export default function MembersPage() {
               const isBuiltin = member.builtin === true;
               const canDelete = !isBuiltin && filtered.length > 1;
               const handleCardClick = () => {
-                navigate(`/members/${member.id}`);
+                navigate(`/contacts/agents/${member.id}`);
               };
               const handleStartChat = (e: React.MouseEvent) => {
                 e.stopPropagation();
-                navigate("/chat", { state: { startWith: member.id, memberName: member.name } });
+                navigate(`/chat/hire/${member.id}`);
               };
               const handleCopy = async (e: React.MouseEvent) => {
                 e.stopPropagation();
@@ -204,7 +204,7 @@ export default function MembersPage() {
                 } catch { toast.error("删除失败"); }
               };
               return (
-                <div key={member.id} onClick={handleCardClick} className="surface-interactive p-4 cursor-pointer group hover:-translate-y-0.5 hover:shadow-md" role="button" aria-label={isBuiltin ? `与 ${member.name} 对话` : `查看成员 ${member.name}`} tabIndex={0} onKeyDown={(e) => e.key === "Enter" && handleCardClick()}>
+                <div key={member.id} onClick={handleCardClick} className="surface-interactive p-4 cursor-pointer group hover:-translate-y-0.5 hover:shadow-md" role="button" aria-label={`查看成员 ${member.name}`} tabIndex={0} onKeyDown={(e) => e.key === "Enter" && handleCardClick()}>
                   <div className="flex items-start justify-between mb-3">
                     <AvatarUploadTrigger memberId={member.id} name={member.name} hasAvatar={!!member.avatar_url} />
                     <div className="flex items-center gap-1.5">
@@ -229,7 +229,7 @@ export default function MembersPage() {
                             <MessageSquare className="w-3.5 h-3.5" />
                           </button>
                         </TooltipTrigger>
-                        <TooltipContent side="bottom"><p>发起会话</p></TooltipContent>
+                        <TooltipContent side="bottom"><p>打开默认线程</p></TooltipContent>
                       </Tooltip>
                       <Tooltip>
                         <TooltipTrigger asChild>
diff --git a/frontend/app/src/pages/NewChatPage.test.tsx b/frontend/app/src/pages/NewChatPage.test.tsx
new file mode 100644
index 000000000..b8c60e443
--- /dev/null
+++ b/frontend/app/src/pages/NewChatPage.test.tsx
@@ -0,0 +1,197 @@
+// @vitest-environment jsdom
+
+import { render, screen, waitFor } from "@testing-library/react";
+import { MemoryRouter, Outlet, Route, Routes } from "react-router-dom";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import NewChatPage from "./NewChatPage";
+import { useAuthStore } from "../store/auth-store";
+import { useAppStore } from "../store/app-store";
+
+const handleGetDefaultThread = vi.fn();
+
+vi.mock("zustand/middleware", async () => {
+  const actual = await vi.importActual<typeof import("zustand/middleware")>("zustand/middleware");
+  return {
+    ...actual,
+    persist: ((initializer: unknown) => initializer) as typeof actual.persist,
+  };
+});
+
+vi.mock("../components/CenteredInputBox", () => ({
+  default: () => <div>centered-input-box</div>,
+}));
+
+vi.mock("../components/WorkspaceSetupModal", () => ({
+  default: () => null,
+}));
+
+vi.mock("../components/FilesystemBrowser", () => ({
+  default: () => null,
+}));
+
+vi.mock("../components/MemberAvatar", () => ({
+  default: ({ name }: { name: string }) => <div>{name}</div>,
+}));
+
+vi.mock("../hooks/use-workspace-settings", () => ({
+  useWorkspaceSettings: () => ({
+    settings: { default_workspace: null, recent_workspaces: [], default_model: "leon:large", enabled_models: ["leon:large"] },
+    loading: false,
+    hasWorkspace: false,
+    refreshSettings: vi.fn(),
+    setDefaultWorkspace: vi.fn(),
+  }),
+}));
+
+vi.mock("../api", () => ({
+  postRun: vi.fn(),
+}));
+
+vi.mock("../api/client", () => ({
+  getDefaultThreadConfig: vi.fn(() => new Promise(() => {})),
+  listMyLeases: vi.fn(async () => []),
+  saveDefaultThreadConfig: vi.fn(async () => undefined),
+}));
+
+function ContextOutlet() {
+  return (
+    <Outlet
+      context={{
+        tm: {
+          threads: [],
+          sandboxTypes: [{ name: "local", available: true }],
+          selectedSandbox: "local",
+          loading: false,
+          setSelectedSandbox: vi.fn(),
+          setThreads: vi.fn(),
+          refreshThreads: vi.fn(),
+          handleCreateThread: vi.fn(),
+          handleGetDefaultThread,
+          handleDeleteThread: vi.fn(),
+        },
+        sidebarCollapsed: false,
+        setSidebarCollapsed: vi.fn(),
+        setSessionsOpen: vi.fn(),
+      }}
+    />
+  );
+}
+
+describe("NewChatPage", () => {
+  beforeEach(() => {
+    handleGetDefaultThread.mockReset();
+    handleGetDefaultThread.mockResolvedValue(null);
+
+    useAuthStore.setState({
+      token: "token",
+      user: { id: "u-1", name: "tester", type: "human", avatar: null },
+      agent: null,
+
+      setupInfo: null,
+      login: vi.fn(),
+      sendOtp: vi.fn(),
+      verifyOtp: vi.fn(),
+      completeRegister: vi.fn(),
+      clearSetupInfo: vi.fn(),
+      logout: vi.fn(),
+    });
+
+    useAppStore.setState({
+      memberList: [{
+        id: "m_xVuNpKJNxblZ",
+        name: "Morel",
+        description: "",
+        status: "active",
+        version: "1.0.0",
+        avatar_url: "/avatars/morel.png",
+        config: {
+          prompt: "",
+          rules: [],
+          tools: [],
+          mcps: [],
+          skills: [],
+          subAgents: [],
+        },
+        created_at: 0,
+        updated_at: 0,
+      }],
+      taskList: [],
+      cronJobs: [],
+      librarySkills: [],
+      libraryMcps: [],
+      libraryAgents: [],
+      libraryRecipes: [],
+      userProfile: { name: "User", initials: "U", email: "" },
+      loaded: true,
+      error: null,
+      loadAll: vi.fn(),
+      retry: vi.fn(),
+      resetSessionData: vi.fn(),
+      fetchMembers: vi.fn(),
+      addMember: vi.fn(),
+      updateMember: vi.fn(),
+      updateMemberConfig: vi.fn(),
+      publishMember: vi.fn(),
+      deleteMember: vi.fn(),
+      getMemberById: vi.fn(),
+      fetchTasks: vi.fn(),
+      addTask: vi.fn(),
+      updateTask: vi.fn(),
+      deleteTask: vi.fn(),
+      bulkUpdateTaskStatus: vi.fn(),
+      bulkDeleteTasks: vi.fn(),
+      fetchCronJobs: vi.fn(),
+      addCronJob: vi.fn(),
+      updateCronJob: vi.fn(),
+      deleteCronJob: vi.fn(),
+      triggerCronJob: vi.fn(),
+      fetchLibrary: vi.fn(),
+      fetchLibraryNames: vi.fn(),
+      addResource: vi.fn(),
+      updateResource: vi.fn(),
+      deleteResource: vi.fn(),
+      fetchResourceContent: vi.fn(),
+      updateResourceContent: vi.fn(),
+      fetchProfile: vi.fn(),
+      updateProfile: vi.fn(),
+      getMemberNames: vi.fn(),
+      getResourceUsedBy: vi.fn(),
+    });
+  });
+
+  it("does not block the create-chat UI on a pending default-config fetch once the default thread resolves null", async () => {
+    render(
+      <MemoryRouter initialEntries={["/chat/hire/m_xVuNpKJNxblZ"]}>
+        <Routes>
+          <Route element={<ContextOutlet />}>
+            <Route path="/chat/hire/:memberId" element={<NewChatPage />} />
+          </Route>
+        </Routes>
+      </MemoryRouter>,
+    );
+
+    await waitFor(() => {
+      expect(screen.getByText("开始与 Morel 对话")).toBeTruthy();
+    });
+    expect(screen.queryByText("正在检查 Morel 的默认线程")).toBeNull();
+    expect(screen.getByText("centered-input-box")).toBeTruthy();
+  });
+
+  it("uses default-thread wording while resolving the template entry", async () => {
+    handleGetDefaultThread.mockReset();
+    handleGetDefaultThread.mockImplementation(() => new Promise(() => {}));
+
+    render(
+      <MemoryRouter initialEntries={["/chat/hire/m_xVuNpKJNxblZ"]}>
+        <Routes>
+          <Route element={<ContextOutlet />}>
+            <Route path="/chat/hire/:memberId" element={<NewChatPage />} />
+          </Route>
+        </Routes>
+      </MemoryRouter>,
+    );
+
+    expect(await screen.findByText("正在检查 Morel 的默认线程")).toBeTruthy();
+    expect(screen.getByText("如果没有默认线程，这里会进入创建界面。")).toBeTruthy();
+  });
+});
diff --git a/frontend/app/src/pages/NewChatPage.tsx b/frontend/app/src/pages/NewChatPage.tsx
index 235ca48f4..59b0ecf10 100644
--- a/frontend/app/src/pages/NewChatPage.tsx
+++ b/frontend/app/src/pages/NewChatPage.tsx
@@ -22,6 +22,34 @@ interface OutletContext {
   setSessionsOpen: (value: boolean) => void;
 }
 
+function ResolveStateCard({
+  memberName,
+  memberAvatarUrl,
+  title,
+  description,
+  destructive = false,
+}: {
+  memberName: string;
+  memberAvatarUrl?: string;
+  title: string;
+  description: string;
+  destructive?: boolean;
+}) {
+  return (
+    <div className="flex-1 flex items-center justify-center relative">
+      <div className="w-full max-w-[420px] px-6 text-center">
+        <div className="flex justify-center mb-4">
+          <MemberAvatar name={memberName} avatarUrl={memberAvatarUrl} type="mycel_agent" size="lg" />
+        </div>
+        <h1 className="text-xl font-medium text-foreground mb-2">{title}</h1>
+        <p className={`text-sm ${destructive ? "text-destructive" : "text-muted-foreground"}`}>
+          {description}
+        </p>
+      </div>
+    </div>
+  );
+}
+
 const PROVIDER_TYPE_LABELS: Record<string, string> = {
   local: "Local",
   daytona: "Daytona",
@@ -89,12 +117,12 @@ export default function NewChatPage({ mode = "member" }: { mode?: "member" | "ne
   const navigate = useNavigate();
   const { memberId } = useParams<{ memberId: string }>();
   const { tm } = useOutletContext<OutletContext>();
-  const { sandboxTypes, selectedSandbox, handleCreateThread, handleGetMainThread } = tm;
+  const { sandboxTypes, selectedSandbox, handleCreateThread, handleGetDefaultThread } = tm;
   const { settings, loading, hasWorkspace, refreshSettings, setDefaultWorkspace } = useWorkspaceSettings();
-  const shouldResolveMain = mode === "member";
+  const shouldResolveDefaultThread = mode === "member";
   const [error, setError] = useState<string | null>(null);
   const [resolveState, setResolveState] = useState<"resolving" | "ready" | "error">(
-    shouldResolveMain ? "resolving" : "ready",
+    shouldResolveDefaultThread ? "resolving" : "ready",
   );
   const [showWorkspaceSetup, setShowWorkspaceSetup] = useState(false);
   const [createMode, setCreateMode] = useState<"new" | "existing">("new");
@@ -122,12 +150,12 @@ export default function NewChatPage({ mode = "member" }: { mode?: "member" | "ne
   const memberAvatarUrl = resolvedMember?.avatar_url;
 
   useEffect(() => {
-    if (!shouldResolveMain) return;
+    if (!shouldResolveDefaultThread) return;
 
     let cancelled = false;
     const ac = new AbortController();
 
-    async function resolveMainThread() {
+    async function resolveDefaultThread() {
       if (!decodedMemberId) {
         setError("Missing member ID");
         setResolveState("error");
@@ -135,29 +163,29 @@ export default function NewChatPage({ mode = "member" }: { mode?: "member" | "ne
       }
 
       try {
-        const thread = await handleGetMainThread(decodedMemberId, ac.signal);
+        const thread = await handleGetDefaultThread(decodedMemberId, ac.signal);
         if (cancelled) return;
         if (thread) {
-          navigate(`/threads/${encodeURIComponent(decodedMemberId)}/${thread.thread_id}`, { replace: true });
+          navigate(`/chat/hire/${encodeURIComponent(decodedMemberId)}/${thread.thread_id}`, { replace: true });
           return;
         }
         setResolveState("ready");
       } catch (err) {
         if (cancelled) return;
         if (err instanceof DOMException && err.name === "AbortError") return;
-        const message = err instanceof Error ? err.message : "无法获取主对话";
-        console.error("[NewChatPage] resolve main thread failed:", err);
+        const message = err instanceof Error ? err.message : "无法获取默认线程";
+        console.error("[NewChatPage] resolve default thread failed:", err);
         setError(message);
         setResolveState("error");
       }
     }
 
-    void resolveMainThread();
+    void resolveDefaultThread();
     return () => {
       cancelled = true;
       ac.abort();
     };
-  }, [decodedMemberId, handleGetMainThread, navigate, shouldResolveMain]);
+  }, [decodedMemberId, handleGetDefaultThread, navigate, shouldResolveDefaultThread]);
 
   useEffect(() => {
     let cancelled = false;
@@ -340,7 +368,7 @@ export default function NewChatPage({ mode = "member" }: { mode?: "member" | "ne
     postRun(threadId, message, undefined, model ? { model } : undefined).catch(err => {
       console.error("[NewChatPage] postRun failed:", err);
     });
-    navigate(`/threads/${encodeURIComponent(decodedMemberId)}/${threadId}`, {
+    navigate(`/chat/hire/${encodeURIComponent(decodedMemberId)}/${threadId}`, {
       state: { selectedModel: model, runStarted: true, message },
     });
   }
@@ -472,39 +500,29 @@ export default function NewChatPage({ mode = "member" }: { mode?: "member" | "ne
     ? `复用 ${providerSummaryLabel} 的现有 sandbox`
     : `新建 ${providerSummaryLabel} sandbox · ${recipeSummaryLabel}`;
 
-  if (loading || resolveState === "resolving" || configDefaultsLoading) {
+  // @@@defer-default-config - default config should refine the create form, not block
+  // entry into the no-main-thread UI. If the config fetch stalls, users still need the
+  // create-chat surface with sane local defaults.
+  if (loading || resolveState === "resolving") {
     return (
-      <div className="flex-1 flex items-center justify-center relative">
-        <div className="w-full max-w-[420px] px-6 text-center">
-          <div className="flex justify-center mb-4">
-            <MemberAvatar name={memberName} avatarUrl={memberAvatarUrl} type="mycel_agent" size="lg" />
-          </div>
-          <h1 className="text-xl font-medium text-foreground mb-2">
-            正在检查 {memberName} 的主对话
-          </h1>
-          <p className="text-sm text-muted-foreground">
-            如果没有主对话，这里会进入创建界面。
-          </p>
-        </div>
-      </div>
+      <ResolveStateCard
+        memberName={memberName}
+        memberAvatarUrl={memberAvatarUrl ?? undefined}
+        title={`正在检查 ${memberName} 的默认线程`}
+        description="如果没有默认线程，这里会进入创建界面。"
+      />
     );
   }
 
   if (resolveState === "error") {
     return (
-      <div className="flex-1 flex items-center justify-center relative">
-        <div className="w-full max-w-[420px] px-6 text-center">
-          <div className="flex justify-center mb-4">
-            <MemberAvatar name={memberName} avatarUrl={memberAvatarUrl} type="mycel_agent" size="lg" />
-          </div>
-          <h1 className="text-xl font-medium text-foreground mb-2">
-            无法检查 {memberName} 的主对话
-          </h1>
-          <p className="text-sm text-destructive">
-            {error ?? "未知错误"}
-          </p>
-        </div>
-      </div>
+      <ResolveStateCard
+        memberName={memberName}
+        memberAvatarUrl={memberAvatarUrl ?? undefined}
+        title={`无法检查 ${memberName} 的默认线程`}
+        description={error ?? "未知错误"}
+        destructive
+      />
     );
   }
 
diff --git a/frontend/app/src/pages/ResourcesPage.tsx b/frontend/app/src/pages/ResourcesPage.tsx
deleted file mode 100644
index ac4abf27d..000000000
--- a/frontend/app/src/pages/ResourcesPage.tsx
+++ /dev/null
@@ -1,174 +0,0 @@
-import { useCallback, useEffect, useState } from "react";
-import { useIsMobile } from "@/hooks/use-mobile";
-import type { ProviderInfo } from "./resources/types";
-import { fetchResourceProviders, refreshResourceProviders } from "./resources/api";
-import ProviderCard from "./resources/ProviderCard";
-import ProviderDetail from "./resources/ProviderDetail";
-
-export default function ResourcesPage() {
-  const isMobile = useIsMobile();
-  const [providers, setProviders] = useState<ProviderInfo[]>([]);
-  const [selectedId, setSelectedId] = useState<string>("");
-  const [summary, setSummary] = useState<{
-    active_providers: number;
-    running_sessions: number;
-    last_refreshed_at?: string;
-    refresh_status?: "ok" | "error";
-  } | null>(null);
-  const [loading, setLoading] = useState(true);
-  const [refreshing, setRefreshing] = useState(false);
-  const [error, setError] = useState<string | null>(null);
-
-  const applyPayload = useCallback((payload: Awaited<ReturnType<typeof fetchResourceProviders>>) => {
-    const nextProviders = payload.providers;
-    setSummary({
-      active_providers: payload.summary.active_providers,
-      running_sessions: payload.summary.running_sessions,
-      last_refreshed_at: payload.summary.last_refreshed_at ?? payload.summary.snapshot_at,
-      refresh_status: payload.summary.refresh_status ?? "ok",
-    });
-    setProviders(nextProviders);
-    setSelectedId((prev) => {
-      if (nextProviders.some((p) => p.id === prev)) return prev;
-      return nextProviders[0]?.id ?? "";
-    });
-  }, []);
-
-  const loadSnapshot = useCallback(async () => {
-    const payload = await fetchResourceProviders();
-    applyPayload(payload);
-  }, [applyPayload]);
-
-  const refreshNow = useCallback(async () => {
-    setRefreshing(true);
-    try {
-      const payload = await refreshResourceProviders();
-      applyPayload(payload);
-      setError(null);
-    } catch (e) {
-      setError(e instanceof Error ? e.message : "Failed to refresh resources");
-    } finally {
-      setRefreshing(false);
-    }
-  }, [applyPayload]);
-
-  useEffect(() => {
-    let cancelled = false;
-
-    async function loadInitial() {
-      setLoading(true);
-      setError(null);
-      try {
-        const payload = await fetchResourceProviders();
-        if (cancelled) return;
-        applyPayload(payload);
-      } catch (e) {
-        if (cancelled) return;
-        setError(e instanceof Error ? e.message : "Failed to load resources");
-      } finally {
-        if (!cancelled) setLoading(false);
-      }
-    }
-
-    void loadInitial();
-    return () => {
-      cancelled = true;
-    };
-  }, [applyPayload]);
-
-  useEffect(() => {
-    const timer = window.setInterval(() => {
-      void loadSnapshot().catch(() => {});
-    }, 30000);
-    return () => window.clearInterval(timer);
-  }, [loadSnapshot]);
-
-  const selected = providers.find((p) => p.id === selectedId) ?? null;
-  const activeCount = summary?.active_providers ?? 0;
-  const totalSessions = summary?.running_sessions ?? 0;
-  const refreshedAt = summary?.last_refreshed_at
-    ? new Date(summary.last_refreshed_at).toLocaleTimeString()
-    : "--:--:--";
-  const refreshDotClass = summary?.refresh_status === "error" ? "bg-warning" : "bg-success";
-
-  if (loading) {
-    return (
-      <div className="h-full flex items-center justify-center bg-background">
-        <p className="text-sm text-muted-foreground">加载资源中...</p>
-      </div>
-    );
-  }
-
-  if (error) {
-    return (
-      <div className="h-full flex items-center justify-center bg-background p-6">
-        <div className="max-w-lg rounded-xl border border-border bg-card px-5 py-4">
-          <h3 className="text-sm font-semibold text-foreground mb-2">资源加载失败</h3>
-          <p className="text-xs text-muted-foreground font-mono break-all">{error}</p>
-        </div>
-      </div>
-    );
-  }
-
-  if (!selected) {
-    return (
-      <div className="h-full flex items-center justify-center bg-background">
-        <p className="text-sm text-muted-foreground">暂无已配置的提供商</p>
-      </div>
-    );
-  }
-
-  return (
-    <div className="h-full flex flex-col bg-background">
-      {/* Header */}
-      <div className="h-14 flex items-center justify-between px-4 md:px-6 border-b border-border bg-card/80 backdrop-blur-sm shrink-0">
-        <div className="flex items-center gap-3">
-          <h2 className="text-sm font-semibold text-foreground">资源</h2>
-          <div className="flex items-center gap-2 text-xs text-muted-foreground font-mono">
-            <span className="inline-flex items-center gap-1">
-              <span className="w-1.5 h-1.5 rounded-full bg-success animate-pulse-slow" />
-              {activeCount} 活跃
-            </span>
-            <span>·</span>
-            <span>{totalSessions} 会话</span>
-            <span>·</span>
-            <span className="inline-flex items-center gap-1">
-              <span className={`w-1.5 h-1.5 rounded-full ${refreshDotClass}`} />
-              刷新 {refreshedAt}
-            </span>
-          </div>
-        </div>
-        <button
-          type="button"
-          onClick={() => {
-            void refreshNow();
-          }}
-          disabled={refreshing}
-          className="h-8 px-3 rounded-md border border-border text-xs text-foreground hover:bg-muted disabled:opacity-60 disabled:cursor-not-allowed"
-        >
-          {refreshing ? "刷新中..." : "刷新"}
-        </button>
-      </div>
-
-      {/* Content */}
-      <div className="flex-1 overflow-y-auto p-4 md:p-6 space-y-6">
-        {/* Provider cards */}
-        <div className={`grid gap-3 ${isMobile ? "grid-cols-2" : "grid-cols-3 xl:grid-cols-6"}`}>
-          {providers.map((p) => (
-            <ProviderCard
-              key={p.id}
-              provider={p}
-              selected={p.id === selectedId}
-              onSelect={() => p.status !== "unavailable" && setSelectedId(p.id)}
-            />
-          ))}
-        </div>
-
-        {/* Provider detail */}
-        <div key={selectedId} className="animate-fade-in">
-          <ProviderDetail provider={selected} />
-        </div>
-      </div>
-    </div>
-  );
-}
diff --git a/frontend/app/src/pages/RootLayout.test.tsx b/frontend/app/src/pages/RootLayout.test.tsx
new file mode 100644
index 000000000..f8679d4be
--- /dev/null
+++ b/frontend/app/src/pages/RootLayout.test.tsx
@@ -0,0 +1,70 @@
+// @vitest-environment jsdom
+
+import { fireEvent, render, screen, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { MemoryRouter, Route, Routes } from "react-router-dom";
+import { LoginForm } from "./RootLayout";
+import { useAuthStore } from "../store/auth-store";
+
+vi.mock("zustand/middleware", async () => {
+  const actual = await vi.importActual<typeof import("zustand/middleware")>("zustand/middleware");
+  return {
+    ...actual,
+    persist: ((initializer: unknown) => initializer) as typeof actual.persist,
+  };
+});
+
+describe("LoginForm", () => {
+  beforeEach(() => {
+    useAuthStore.setState({
+      token: null,
+      user: null,
+      agent: null,
+      setupInfo: null,
+      login: vi.fn(async () => {
+        useAuthStore.setState({
+          token: "token",
+          user: { id: "u-1", name: "tester", type: "human", avatar: null },
+          agent: null,
+      setupInfo: null,
+        });
+      }),
+      sendOtp: vi.fn(async () => undefined),
+      verifyOtp: vi.fn(async () => ({ tempToken: "temp" })),
+      completeRegister: vi.fn(async () => undefined),
+      clearSetupInfo: vi.fn(),
+      logout: vi.fn(),
+    });
+  });
+
+  it("redirects to /threads after a successful login", async () => {
+    render(
+      <MemoryRouter initialEntries={["/login"]}>
+        <Routes>
+          <Route
+            path="/login"
+            element={
+              <>
+                <LoginForm />
+                <div>login-page</div>
+              </>
+            }
+          />
+          <Route path="/threads" element={<div>threads-page</div>} />
+        </Routes>
+      </MemoryRouter>,
+    );
+
+    fireEvent.change(screen.getByPlaceholderText("邮箱或 Mycel ID"), {
+      target: { value: "otpfull_1775371370@example.com" },
+    });
+    fireEvent.change(screen.getByPlaceholderText("密码"), {
+      target: { value: "LeonFull123!" },
+    });
+    fireEvent.click(screen.getByRole("button", { name: "登录" }));
+
+    await waitFor(() => {
+      expect(screen.getByText("threads-page")).toBeTruthy();
+    });
+  });
+});
diff --git a/frontend/app/src/pages/RootLayout.tsx b/frontend/app/src/pages/RootLayout.tsx
index d285056f0..78823516e 100644
--- a/frontend/app/src/pages/RootLayout.tsx
+++ b/frontend/app/src/pages/RootLayout.tsx
@@ -1,5 +1,5 @@
 import { NavLink, Outlet, useLocation, useNavigate } from "react-router-dom";
-import { MessageSquare, MessagesSquare, Users, ListTodo, Store, Layers, Plug, Settings, Plus, ChevronLeft, ChevronRight, LogOut, Camera, Eye, EyeOff } from "lucide-react";
+import { MessageSquare, Users, Store, Settings, Plus, ChevronLeft, ChevronRight, LogOut, Camera, Eye, EyeOff } from "lucide-react";
 import { useState, useEffect, useCallback, useRef } from "react";
 import { uploadMemberAvatar } from "@/api/client";
 import MemberAvatar from "@/components/MemberAvatar";
@@ -12,13 +12,9 @@ import { useAuthStore } from "@/store/auth-store";
 import { toast } from "sonner";
 
 const navItems = [
-  { to: "/threads", icon: MessageSquare, label: "Workspace" },
-  { to: "/chats", icon: MessagesSquare, label: "Chats" },
-  { to: "/members", icon: Users, label: "Members" },
-  { to: "/tasks", icon: ListTodo, label: "Tasks" },
-  { to: "/resources", icon: Layers, label: "Resources" },
-  { to: "/marketplace", icon: Store, label: "Marketplace" },
-  { to: "/connections", icon: Plug, label: "Connections" },
+  { to: "/chat", icon: MessageSquare, label: "对话" },
+  { to: "/contacts", icon: Users, label: "通讯录" },
+  { to: "/marketplace", icon: Store, label: "市场" },
 ];
 
 const mobileNavItems = [
@@ -40,7 +36,6 @@ function AuthenticatedLayout() {
   const authLogout = useAuthStore(s => s.logout);
 
   const location = useLocation();
-  const navigate = useNavigate();
   const isMobile = useIsMobile();
   const [showCreate, setShowCreate] = useState(false);
   const [createMemberOpen, setCreateMemberOpen] = useState(false);
@@ -65,9 +60,20 @@ function AuthenticatedLayout() {
   }, [authUser]);
 
   const loadAll = useAppStore((s) => s.loadAll);
-  const storeAddTask = useAppStore((s) => s.addTask);
+  const resetSessionData = useAppStore((s) => s.resetSessionData);
+  const lastLoadedUserIdRef = useRef<string | null>(null);
 
-  useEffect(() => { loadAll(); }, [loadAll]);
+  useEffect(() => {
+    const userId = authUser?.id ?? null;
+    if (!userId) return;
+    if (lastLoadedUserIdRef.current === userId) return;
+    // @@@auth-session-reset - switching users in the same SPA process must discard
+    // panel caches before reloading, otherwise the next account inherits old
+    // members/tasks and the sidebar mixes identities.
+    lastLoadedUserIdRef.current = userId;
+    resetSessionData();
+    void loadAll();
+  }, [authUser?.id, loadAll, resetSessionData]);
 
   const [expanded, setExpanded] = useState(() => {
     const saved = localStorage.getItem("sidebar-expanded");
@@ -90,16 +96,8 @@ function AuthenticatedLayout() {
     switch (action) {
       case "staff": setCreateMemberOpen(true); break;
       case "chat": setNewChatOpen(true); break;
-      case "task":
-        try {
-          await storeAddTask();
-          navigate("/tasks");
-        } catch (e: unknown) {
-          toast.error("创建失败: " + (e instanceof Error ? e.message : String(e)));
-        }
-        break;
     }
-  }, [navigate, storeAddTask]);
+  }, []);
 
   const createBtnRef = useRef<HTMLButtonElement>(null);
 
@@ -139,7 +137,7 @@ function AuthenticatedLayout() {
     return () => { window.removeEventListener("mousemove", onMove); window.removeEventListener("mouseup", onUp); };
   }, [dragging, dragWidth]);
 
-  const isChat = location.pathname.startsWith("/threads") || location.pathname.startsWith("/chats");
+  const isChat = location.pathname.startsWith("/chat");
   const sidebarPx = dragging && dragWidth !== null ? dragWidth : (expanded ? EXPANDED_W : COLLAPSED_W);
   const showLabels = dragging ? (dragWidth !== null && dragWidth >= SNAP_THRESHOLD) : expanded;
 
@@ -187,7 +185,10 @@ function AuthenticatedLayout() {
       <div className="flex flex-col h-full overflow-hidden bg-background">
         {/* Main content - no top bar, pages have their own headers */}
         <main className="flex-1 overflow-hidden">
-          <div key={location.pathname} className="h-full animate-page-in"><Outlet /></div>
+          {/* @@@outlet-no-route-key - thread switches should not remount the entire
+              outlet tree; RootLayout route keys were re-triggering AppLayout
+              bootstrap fetches on every /threads/:memberId/:threadId hop. */}
+          <div className="h-full animate-page-in"><Outlet /></div>
         </main>
 
         {/* Bottom tab bar */}
@@ -311,7 +312,7 @@ function AuthenticatedLayout() {
       </div>
 
       <main className="flex-1 overflow-hidden">
-        <div key={location.pathname} className="h-full animate-page-in"><Outlet /></div>
+        <div className="h-full animate-page-in"><Outlet /></div>
       </main>
       <CreateMemberDialog open={createMemberOpen} onOpenChange={setCreateMemberOpen} />
       <NewChatDialog open={newChatOpen} onOpenChange={setNewChatOpen} />
@@ -356,9 +357,6 @@ function CreateDropdown({
         <button onClick={() => onAction("chat")} className="w-full px-3 py-2 text-left text-sm text-foreground hover:bg-muted transition-colors duration-fast flex items-center gap-2.5">
           <MessageSquare className="w-3.5 h-3.5 text-muted-foreground" /> 发起会话
         </button>
-        <button onClick={() => onAction("task")} className="w-full px-3 py-2 text-left text-sm text-foreground hover:bg-muted transition-colors duration-fast flex items-center gap-2.5">
-          <ListTodo className="w-3.5 h-3.5 text-muted-foreground" /> 新建任务
-        </button>
       </div>
     </>
   );
@@ -388,10 +386,11 @@ function AuthHeader({ title, subtitle }: { title: string; subtitle?: string }) {
   );
 }
 
-function LoginForm() {
+export function LoginForm() {
   const [step, setStep] = useState<AuthStep>({ type: "login" });
   const [error, setError] = useState<string | null>(null);
   const [loading, setLoading] = useState(false);
+  const navigate = useNavigate();
 
   const login = useAuthStore(s => s.login);
   const sendOtp = useAuthStore(s => s.sendOtp);
@@ -405,6 +404,7 @@ function LoginForm() {
     return <LoginStep
       onSubmit={async (identifier, password) => {
         await login(identifier, password);
+        navigate("/chat", { replace: true });
       }}
       onSwitch={() => reset({ type: "reg_email" })}
       error={error} setError={setError}
@@ -466,8 +466,8 @@ function LoginStep({ onSubmit, onSwitch, error, setError, loading, setLoading }:
     <AuthCard>
       <AuthHeader title="Mycel" subtitle="登录你的账号" />
       <form onSubmit={handle} className="space-y-4">
-        <input type="text" placeholder="邮箱或 Mycel ID" value={identifier} onChange={e => setIdentifier(e.target.value)} className={inputCls} required autoComplete="username" />
-        <input type="password" placeholder="密码" value={password} onChange={e => setPassword(e.target.value)} className={inputCls} required autoComplete="current-password" />
+        <input type="text" name="identifier" aria-label="邮箱或 Mycel ID" placeholder="邮箱或 Mycel ID" value={identifier} onChange={e => setIdentifier(e.target.value)} className={inputCls} required autoComplete="username" />
+        <input type="password" name="password" aria-label="密码" placeholder="密码" value={password} onChange={e => setPassword(e.target.value)} className={inputCls} required autoComplete="current-password" />
         {error && <p className="text-xs text-destructive">{error}</p>}
         <button type="submit" disabled={loading} className={btnCls}>{loading ? "请稍候..." : "登录"}</button>
       </form>
@@ -500,10 +500,10 @@ function RegEmailStep({ onSubmit, onBack, error, setError, loading, setLoading }
     <AuthCard>
       <AuthHeader title="注册账号" subtitle="填写信息，发送验证码" />
       <form onSubmit={handle} className="space-y-4">
-        <input type="email" placeholder="邮箱" value={email} onChange={e => setEmail(e.target.value)} className={inputCls} required autoComplete="email" autoFocus />
-        <PasswordInput value={password} onChange={setPassword} placeholder="设置密码" autoComplete="new-password" />
-        <PasswordInput value={confirm} onChange={setConfirm} placeholder="确认密码" autoComplete="new-password" />
-        <input type="text" placeholder="邀请码" value={inviteCode} onChange={e => setInviteCode(e.target.value)} className={inputCls} autoComplete="off" required />
+        <input type="email" name="email" aria-label="邮箱" placeholder="邮箱" value={email} onChange={e => setEmail(e.target.value)} className={inputCls} required autoComplete="email" autoFocus />
+        <PasswordInput value={password} onChange={setPassword} placeholder="设置密码" autoComplete="new-password" name="register-password" ariaLabel="设置密码" />
+        <PasswordInput value={confirm} onChange={setConfirm} placeholder="确认密码" autoComplete="new-password" name="register-password-confirm" ariaLabel="确认密码" />
+        <input type="text" name="inviteCode" aria-label="邀请码" placeholder="邀请码" value={inviteCode} onChange={e => setInviteCode(e.target.value)} className={inputCls} autoComplete="off" required />
         {error && <p className="text-xs text-destructive">{error}</p>}
         <button type="submit" disabled={loading} className={btnCls}>{loading ? "发送中..." : "发送验证码"}</button>
       </form>
@@ -542,7 +542,7 @@ function RegOtpStep({ email, onSubmit, onResend, onBack, error, setError, loadin
       <AuthHeader title="验证邮箱" subtitle={`验证码已发送至 ${email}`} />
       <form onSubmit={handle} className="space-y-4">
         <input
-          type="text" inputMode="numeric" placeholder="6 位验证码"
+          type="text" name="otp" aria-label="6 位验证码" inputMode="numeric" placeholder="6 位验证码"
           value={otp} onChange={e => setOtp(e.target.value.replace(/\D/g, ""))}
           maxLength={6} autoComplete="one-time-code" autoFocus
           className={`${inputCls} text-center tracking-widest text-lg font-mono`}
@@ -563,18 +563,22 @@ function RegOtpStep({ email, onSubmit, onResend, onBack, error, setError, loadin
   );
 }
 
-function PasswordInput({ value, onChange, placeholder, autoFocus, autoComplete }: {
+function PasswordInput({ value, onChange, placeholder, autoFocus, autoComplete, name, ariaLabel }: {
   value: string;
   onChange: (v: string) => void;
   placeholder: string;
   autoFocus?: boolean;
   autoComplete?: string;
+  name?: string;
+  ariaLabel?: string;
 }) {
   const [visible, setVisible] = useState(false);
   return (
     <div className="relative">
       <input
         type={visible ? "text" : "password"}
+        name={name}
+        aria-label={ariaLabel ?? placeholder}
         placeholder={placeholder}
         value={value}
         onChange={e => onChange(e.target.value)}
@@ -600,12 +604,13 @@ function PasswordInput({ value, onChange, placeholder, autoFocus, autoComplete }
 function SetupNameStep({ userId, defaultName }: { userId: string; defaultName: string }) {
   const [name, setName] = useState(defaultName);
   const [loading, setLoading] = useState(false);
+  const navigate = useNavigate();
   const token = useAuthStore(s => s.token);
   const clearSetupInfo = useAuthStore(s => s.clearSetupInfo);
 
   function done() {
     clearSetupInfo();
-    window.location.href = "/threads";
+    navigate("/chat", { replace: true });
   }
 
   async function handleSubmit(e: React.FormEvent) {
@@ -631,6 +636,8 @@ function SetupNameStep({ userId, defaultName }: { userId: string; defaultName: s
       <form onSubmit={handleSubmit} className="space-y-4">
         <input
           type="text"
+          name="displayName"
+          aria-label="显示名称"
           value={name}
           onChange={e => setName(e.target.value)}
           className={inputCls}
diff --git a/frontend/app/src/pages/SettingsPage.tsx b/frontend/app/src/pages/SettingsPage.tsx
index 3cb879e06..ad70c272e 100644
--- a/frontend/app/src/pages/SettingsPage.tsx
+++ b/frontend/app/src/pages/SettingsPage.tsx
@@ -1,5 +1,5 @@
-import { Box, Cpu, Activity, AlertCircle, RefreshCw, ChevronLeft, ChevronRight } from "lucide-react";
-import { useCallback, useEffect, useState } from "react";
+import { Box, Cpu, Activity, AlertCircle, RefreshCw, ChevronLeft, ChevronRight, Ticket, Plus, Trash2, Copy, Check, AlertTriangle, TicketX } from "lucide-react";
+import { useCallback, useEffect, useRef, useState } from "react";
 import { useIsMobile } from "../hooks/use-mobile";
 import ModelMappingSection from "../components/ModelMappingSection";
 import ModelPoolSection from "../components/ModelPoolSection";
@@ -7,6 +7,10 @@ import ObservationSection from "../components/ObservationSection";
 import ProvidersSection from "../components/ProvidersSection";
 import SandboxSection from "../components/SandboxSection";
 import WorkspaceSection from "../components/WorkspaceSection";
+import { fetchInviteCodes, generateInviteCode, revokeInviteCode } from "@/api/client";
+import type { InviteCode } from "@/api/client";
+import { toast } from "sonner";
+import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 
 interface AvailableModelsData {
   models: Array<{
@@ -33,14 +37,240 @@ interface Settings {
   default_model: string;
 }
 
-type Tab = "model" | "sandbox" | "observation";
+type Tab = "model" | "sandbox" | "observation" | "invite";
 
 const TABS: { id: Tab; label: string; icon: typeof Cpu; desc: string }[] = [
   { id: "model", label: "模型", icon: Cpu, desc: "模型、提供商与映射" },
   { id: "sandbox", label: "沙箱", icon: Box, desc: "执行环境配置" },
   { id: "observation", label: "追踪", icon: Activity, desc: "Agent 可观测性" },
+  { id: "invite", label: "邀请码", icon: Ticket, desc: "管理注册邀请码" },
 ];
 
+function formatInviteDate(dateStr?: string | null): string {
+  if (!dateStr) return "—";
+  const d = new Date(dateStr);
+  if (isNaN(d.getTime())) return "—";
+  return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
+}
+
+function InviteStatusBadge({ code }: { code: InviteCode }) {
+  if (code.used) {
+    return (
+      <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs bg-muted text-muted-foreground">
+        已使用
+      </span>
+    );
+  }
+  if (code.expires_at && new Date(code.expires_at) < new Date()) {
+    return (
+      <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs bg-warning/10 text-warning">
+        已过期
+      </span>
+    );
+  }
+  return (
+    <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs bg-success/10 text-success">
+      <span className="w-1.5 h-1.5 rounded-full bg-success" />
+      未使用
+    </span>
+  );
+}
+
+function InviteCopyButton({ text }: { text: string }) {
+  const [copied, setCopied] = useState(false);
+  const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+
+  const handleCopy = useCallback(async () => {
+    try {
+      await navigator.clipboard.writeText(text);
+      setCopied(true);
+      toast.success("已复制到剪贴板");
+      if (timerRef.current) clearTimeout(timerRef.current);
+      timerRef.current = setTimeout(() => setCopied(false), 2000);
+    } catch {
+      toast.error("复制失败");
+    }
+  }, [text]);
+
+  return (
+    <Tooltip>
+      <TooltipTrigger asChild>
+        <button
+          onClick={handleCopy}
+          className="w-7 h-7 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-primary/10 hover:text-primary transition-colors duration-fast"
+        >
+          {copied ? <Check className="w-3.5 h-3.5 text-success" /> : <Copy className="w-3.5 h-3.5" />}
+        </button>
+      </TooltipTrigger>
+      <TooltipContent side="top"><p>复制邀请码</p></TooltipContent>
+    </Tooltip>
+  );
+}
+
+function InviteCodesSection() {
+  const [codes, setCodes] = useState<InviteCode[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const [generating, setGenerating] = useState(false);
+  const [revoking, setRevoking] = useState<string | null>(null);
+
+  const load = useCallback(async () => {
+    setLoading(true);
+    setError(null);
+    try {
+      const data = await fetchInviteCodes();
+      setCodes(data);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "加载失败");
+    } finally {
+      setLoading(false);
+    }
+  }, []);
+
+  useEffect(() => { void load(); }, [load]);
+
+  const handleGenerate = async () => {
+    setGenerating(true);
+    try {
+      const newCode = await generateInviteCode(7);
+      setCodes((prev) => [newCode, ...prev]);
+      toast.success("邀请码已生成");
+    } catch (err) {
+      toast.error(`生成失败: ${err instanceof Error ? err.message : "未知错误"}`);
+    } finally {
+      setGenerating(false);
+    }
+  };
+
+  const handleRevoke = async (code: string) => {
+    setRevoking(code);
+    try {
+      await revokeInviteCode(code);
+      setCodes((prev) => prev.filter((c) => c.code !== code));
+      toast.success("邀请码已吊销");
+    } catch (err) {
+      toast.error(`吊销失败: ${err instanceof Error ? err.message : "未知错误"}`);
+    } finally {
+      setRevoking(null);
+    }
+  };
+
+  const isRevokable = (code: InviteCode) =>
+    !code.used && !(code.expires_at && new Date(code.expires_at) < new Date());
+
+  return (
+    <div className="space-y-4">
+      <div className="flex items-center justify-between">
+        <div>
+          <h3 className="text-sm font-semibold text-foreground">邀请码</h3>
+          <p className="text-xs text-muted-foreground mt-0.5">管理注册邀请码，邀请新成员加入 Mycel</p>
+        </div>
+        <button
+          onClick={() => void handleGenerate()}
+          disabled={generating}
+          className="flex items-center gap-2 px-3 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:opacity-90 disabled:opacity-50 transition-opacity duration-fast"
+        >
+          <Plus className="w-4 h-4" />
+          {generating ? "生成中..." : "生成邀请码"}
+        </button>
+      </div>
+
+      {loading ? (
+        <div className="flex flex-col items-center justify-center py-12">
+          <div className="w-6 h-6 border-2 border-primary/30 border-t-primary rounded-full animate-spin mb-3" />
+          <p className="text-sm text-muted-foreground">加载中...</p>
+        </div>
+      ) : error ? (
+        <div className="flex flex-col items-center justify-center py-12">
+          <div className="w-12 h-12 rounded-full bg-destructive/10 flex items-center justify-center mb-4">
+            <AlertTriangle className="w-6 h-6 text-destructive" />
+          </div>
+          <p className="text-sm font-medium text-foreground mb-1">加载失败</p>
+          <p className="text-xs text-muted-foreground mb-4 max-w-xs text-center">{error}</p>
+          <button
+            onClick={() => void load()}
+            className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 transition-opacity duration-fast"
+          >
+            <RefreshCw className="w-3.5 h-3.5" />重试
+          </button>
+        </div>
+      ) : codes.length === 0 ? (
+        <div className="flex flex-col items-center justify-center py-16">
+          <div className="w-14 h-14 rounded-2xl bg-primary/10 flex items-center justify-center mb-4">
+            <Ticket className="w-7 h-7 text-primary" />
+          </div>
+          <p className="text-sm font-semibold text-foreground mb-1">还没有邀请码</p>
+          <p className="text-xs text-muted-foreground mb-5 max-w-[220px] text-center leading-relaxed">
+            生成邀请码，邀请新成员加入 Mycel
+          </p>
+          <button
+            onClick={() => void handleGenerate()}
+            disabled={generating}
+            className="inline-flex items-center gap-1.5 px-4 py-2 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 disabled:opacity-50 transition-opacity duration-fast"
+          >
+            <Plus className="w-3.5 h-3.5" />{generating ? "生成中..." : "生成邀请码"}
+          </button>
+        </div>
+      ) : (
+        <div className="rounded-xl border border-border overflow-hidden">
+          <div className="grid grid-cols-[1fr_auto_auto_auto_auto] gap-4 px-4 py-2.5 bg-muted/50 border-b border-border text-xs text-muted-foreground font-medium">
+            <span>邀请码</span>
+            <span className="w-20 text-center">状态</span>
+            <span className="w-24 text-center hidden sm:block">创建时间</span>
+            <span className="w-24 text-center hidden sm:block">过期时间</span>
+            <span className="w-16 text-center">操作</span>
+          </div>
+          {codes.map((item) => (
+            <div
+              key={item.code}
+              className="grid grid-cols-[1fr_auto_auto_auto_auto] gap-4 px-4 py-3 border-b border-border last:border-b-0 items-center hover:bg-muted/30 transition-colors duration-fast"
+            >
+              <div className="flex items-center gap-2 min-w-0">
+                <code className="text-sm font-mono text-foreground truncate">{item.code}</code>
+              </div>
+              <div className="w-20 flex justify-center">
+                <InviteStatusBadge code={item} />
+              </div>
+              <div className="w-24 text-center hidden sm:block">
+                <span className="text-xs text-muted-foreground">{formatInviteDate(item.created_at)}</span>
+              </div>
+              <div className="w-24 text-center hidden sm:block">
+                <span className="text-xs text-muted-foreground">{formatInviteDate(item.expires_at)}</span>
+              </div>
+              <div className="w-16 flex items-center justify-center gap-0.5">
+                <InviteCopyButton text={item.code} />
+                {isRevokable(item) && (
+                  <Tooltip>
+                    <TooltipTrigger asChild>
+                      <button
+                        onClick={() => void handleRevoke(item.code)}
+                        disabled={revoking === item.code}
+                        className="w-7 h-7 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-destructive/10 hover:text-destructive disabled:opacity-40 transition-colors duration-fast"
+                      >
+                        {revoking === item.code ? (
+                          <div className="w-3.5 h-3.5 border-2 border-current/30 border-t-current rounded-full animate-spin" />
+                        ) : (
+                          <Trash2 className="w-3.5 h-3.5" />
+                        )}
+                      </button>
+                    </TooltipTrigger>
+                    <TooltipContent side="top"><p>吊销</p></TooltipContent>
+                  </Tooltip>
+                )}
+                {!isRevokable(item) && (
+                  <div className="w-7 h-7 flex items-center justify-center text-muted-foreground/20">
+                    <TicketX className="w-3.5 h-3.5" />
+                  </div>
+                )}
+              </div>
+            </div>
+          ))}
+        </div>
+      )}
+    </div>
+  );
+}
+
 export default function SettingsPage() {
   const isMobile = useIsMobile();
   const [tab, setTab] = useState<Tab | null>(isMobile ? null : "model");
@@ -233,6 +463,10 @@ export default function SettingsPage() {
           onUpdate={(cfg) => setObservationConfig(cfg)}
         />
       )}
+
+      {activeTab === "invite" && (
+        <InviteCodesSection />
+      )}
     </div>
   );
 
diff --git a/frontend/app/src/pages/TasksPage.tsx b/frontend/app/src/pages/TasksPage.tsx
deleted file mode 100644
index 3787678c8..000000000
--- a/frontend/app/src/pages/TasksPage.tsx
+++ /dev/null
@@ -1,925 +0,0 @@
-import { useState, useEffect, useMemo, useCallback } from "react";
-import {
-  Search, CheckCircle2, Circle, Clock, AlertCircle,
-  ListTodo, ArrowUpDown, ChevronDown, ChevronUp, ChevronRight, LayoutGrid, List,
-  Plus, AlertTriangle, RefreshCw, ExternalLink,
-  Play, Trash2, Timer, Loader2,
-} from "lucide-react";
-import MemberAvatar from "@/components/MemberAvatar";
-import { useIsMobile } from "@/hooks/use-mobile";
-import { toast } from "sonner";
-import {
-  AlertDialog, AlertDialogAction, AlertDialogCancel, AlertDialogContent,
-  AlertDialogDescription, AlertDialogFooter, AlertDialogHeader, AlertDialogTitle,
-} from "@/components/ui/alert-dialog";
-import { useAppStore } from "@/store/app-store";
-import type { Task, TaskStatus, CronJob, Priority } from "@/store/types";
-import CronEditor from "@/components/cron-editor";
-import TaskModal from "@/components/task-modal";
-
-const statusConfig: Record<TaskStatus, { label: string; icon: typeof Circle; color: string }> = {
-  pending: { label: "等待中", icon: Circle, color: "text-muted-foreground" },
-  running: { label: "执行中", icon: Clock, color: "text-primary" },
-  completed: { label: "已完成", icon: CheckCircle2, color: "text-success" },
-  failed: { label: "失败", icon: AlertCircle, color: "text-destructive" },
-};
-
-const priorityConfig: Record<Priority, { label: string; className: string }> = {
-  high: { label: "高", className: "bg-destructive/10 text-destructive" },
-  medium: { label: "中", className: "bg-warning/10 text-warning" },
-  low: { label: "低", className: "bg-muted text-muted-foreground" },
-};
-const sourceLabel: Record<string, string> = {
-  manual: "手动",
-  cron: "定时",
-  agent: "Agent",
-  queue: "队列",
-};
-type SortField = "title" | "priority" | "created_at" | null;
-type SortDir = "asc" | "desc";
-type ViewMode = "table" | "board";
-type ActiveTab = "tasks" | "cron";
-
-function cronToHuman(expr: string): string {
-  const parts = expr.split(" ");
-  if (parts.length !== 5) return expr;
-  const [min, hour, dom, , dow] = parts;
-  if (dow === "1-5" && dom === "*") return `工作日 ${hour}:${min.padStart(2, "0")}`;
-  if (min === "0" && hour !== "*" && dom === "*" && dow === "*") return `每天 ${hour}:00`;
-  if (hour !== "*" && dom === "*" && dow === "*") return `每天 ${hour}:${min.padStart(2, "0")}`;
-  if (dom === "*" && dow !== "*") {
-    const labels = ["日","一","二","三","四","五","六"];
-    const days = dow.split(",").map((d: string) => labels[parseInt(d)] || d).join("、");
-    return `每周${days} ${hour}:${min.padStart(2, "0")}`;
-  }
-  if (dom !== "*" && dow === "*") return `每月 ${dom} 日 ${hour}:${min.padStart(2, "0")}`;
-  return expr;
-}
-
-export default function Tasks() {
-  const isMobile = useIsMobile();
-  const tasks = useAppStore((s) => s.taskList);
-  const memberList = useAppStore((s) => s.memberList);
-  const loadAll = useAppStore((s) => s.loadAll);
-  const error = useAppStore((s) => s.error);
-  const retry = useAppStore((s) => s.retry);
-  const storeAddTask = useAppStore((s) => s.addTask);
-  const storeUpdateTask = useAppStore((s) => s.updateTask);
-  const storeDeleteTask = useAppStore((s) => s.deleteTask);
-  const storeBulkUpdate = useAppStore((s) => s.bulkUpdateTaskStatus);
-  const storeBulkDelete = useAppStore((s) => s.bulkDeleteTasks);
-  const cronJobs = useAppStore((s) => s.cronJobs);
-  const storeAddCronJob = useAppStore((s) => s.addCronJob);
-  const storeUpdateCronJob = useAppStore((s) => s.updateCronJob);
-  const storeDeleteCronJob = useAppStore((s) => s.deleteCronJob);
-  const storeTriggerCronJob = useAppStore((s) => s.triggerCronJob);
-
-  const fetchTasks = useAppStore((s) => s.fetchTasks);
-
-  useEffect(() => { loadAll(); }, [loadAll]);
-
-  useEffect(() => {
-    const interval = setInterval(() => {
-      fetchTasks().catch(() => {/* background poll failure — store handles error state */});
-    }, 5000);
-    return () => clearInterval(interval);
-  }, [fetchTasks]);
-
-  const [search, setSearch] = useState("");
-  const [statusFilter, setStatusFilter] = useState<TaskStatus | "all">("all");
-  const [priorityFilter, setPriorityFilter] = useState<Priority | "all">("all");
-  const [sortField, setSortField] = useState<SortField>(null);
-  const [sortDir, setSortDir] = useState<SortDir>("asc");
-  const [selectedRows, setSelectedRows] = useState<Set<string>>(new Set());
-  const [viewMode, setViewMode] = useState<ViewMode>("table");
-  const [dragOverColumn, setDragOverColumn] = useState<TaskStatus | null>(null);
-  const [activeTab, setActiveTab] = useState<ActiveTab>("tasks");
-  const [tagFilter, setTagFilter] = useState<string | null>(null);
-
-  // Unified task modal state (create + edit)
-  const [taskModalOpen, setTaskModalOpen] = useState(false);
-  const [taskModalTab, setTaskModalTab] = useState<"task" | "cron">("task");
-  const [editingTask, setEditingTask] = useState<Task | undefined>(undefined);
-  const [deleteConfirmId, setDeleteConfirmId] = useState<string | null>(null);
-
-  // Expandable task row state
-  const [expandedTaskId, setExpandedTaskId] = useState<string | null>(null);
-  const [threadCache, setThreadCache] = useState<Record<string, { text: string | null; loading: boolean; error: string | null }>>({});
-
-  // Cron editing state
-  const [editingCron, setEditingCron] = useState<CronJob | null>(null);
-  const [cronForm, setCronForm] = useState<CronJob | null>(null);
-  const [deleteCronConfirmId, setDeleteCronConfirmId] = useState<string | null>(null);
-
-  // Helper: resolve assignee name/avatar from memberList
-  const getAssigneeInfo = (assigneeId: string) => {
-    const member = memberList.find((s) => s.id === assigneeId);
-    const name = member?.name || "";
-    const avatar = name.split(" ").map((w) => w[0]).join("").slice(0, 2);
-    return { name, avatar };
-  };
-
-  const openEdit = (task: Task) => { setEditingTask(task); setTaskModalOpen(true); };
-  const closeTaskModal = () => { setTaskModalOpen(false); setEditingTask(undefined); };
-
-  const openCreateModal = (tab: "task" | "cron" = "task") => {
-    setEditingTask(undefined);
-    setTaskModalTab(tab);
-    setTaskModalOpen(true);
-  };
-
-  const handleCreateTask = async (fields: Partial<Task>) => {
-    try {
-      await storeAddTask(fields);
-      toast.success("任务已创建");
-    } catch (e: unknown) {
-      toast.error("创建失败: " + (e instanceof Error ? e.message : String(e)));
-      throw e;
-    }
-  };
-
-  const handleSaveTask = async (id: string, fields: Partial<Task>) => {
-    try {
-      await storeUpdateTask(id, fields);
-      toast.success("任务已保存");
-    } catch (e: unknown) {
-      toast.error("保存失败: " + (e instanceof Error ? e.message : String(e)));
-      throw e;
-    }
-  };
-
-  const executeDelete = async () => {
-    if (!deleteConfirmId) return;
-    try {
-      await storeDeleteTask(deleteConfirmId);
-      toast.success("任务已删除");
-      setDeleteConfirmId(null);
-    } catch (e: unknown) {
-      toast.error("删除失败: " + (e instanceof Error ? e.message : String(e)));
-    }
-  };
-
-  const handleCreateCronJob = async (fields: Partial<CronJob>) => {
-    try {
-      await storeAddCronJob(fields);
-      toast.success("定时任务已创建");
-    } catch (e: unknown) {
-      toast.error("创建失败: " + (e instanceof Error ? e.message : String(e)));
-      throw e;
-    }
-  };
-  // Cron helpers
-  const openCronEdit = (cron: CronJob) => {
-    setEditingCron(cron);
-    setCronForm({ ...cron });
-  };
-
-  const closeCronEdit = () => {
-    setEditingCron(null);
-    setCronForm(null);
-  };
-
-  const saveCronEdit = async () => {
-    if (!cronForm) return;
-    try {
-      await storeUpdateCronJob(cronForm.id, cronForm);
-      setEditingCron(cronForm);
-      toast.success("定时任务已保存");
-    } catch (e: unknown) {
-      toast.error("保存失败: " + (e instanceof Error ? e.message : String(e)));
-    }
-  };
-
-
-  const executeCronDelete = async () => {
-    if (!deleteCronConfirmId) return;
-    try {
-      await storeDeleteCronJob(deleteCronConfirmId);
-      if (editingCron?.id === deleteCronConfirmId) closeCronEdit();
-      toast.success("定时任务已删除");
-      setDeleteCronConfirmId(null);
-    } catch (e: unknown) {
-      toast.error("删除失败: " + (e instanceof Error ? e.message : String(e)));
-    }
-  };
-
-  const handleTriggerCron = async (id: string) => {
-    try {
-      await storeTriggerCronJob(id);
-      toast.success("已触发执行");
-    } catch (e: unknown) {
-      toast.error("触发失败: " + (e instanceof Error ? e.message : String(e)));
-    }
-  };
-
-  const THREAD_CACHE_MAX = 50;
-
-  const fetchThreadDetail = useCallback(async (threadId: string) => {
-    setThreadCache((prev) => {
-      if (prev[threadId]?.loading || (prev[threadId] && !prev[threadId].loading && (prev[threadId].text !== null || prev[threadId].error !== null))) return prev;
-      const next = { ...prev, [threadId]: { text: null, loading: true, error: null } };
-      // Evict oldest entries when over limit
-      const keys = Object.keys(next);
-      if (keys.length > THREAD_CACHE_MAX) {
-        delete next[keys[0]];
-      }
-      return next;
-    });
-    try {
-      const res = await fetch(`/api/threads/${threadId}`);
-      if (!res.ok) throw new Error(`HTTP ${res.status}`);
-      const data = await res.json();
-      const messages: Array<{ role: string; content: string | Array<{ type: string; text?: string }> }> = data.messages || [];
-      let lastText: string | null = null;
-      for (let i = messages.length - 1; i >= 0; i--) {
-        const msg = messages[i];
-        if (msg.role === "assistant") {
-          const content = msg.content;
-          if (typeof content === "string") {
-            lastText = content;
-          } else if (Array.isArray(content)) {
-            const textBlock = content.find((c) => c.type === "text" && c.text);
-            if (textBlock?.text) lastText = textBlock.text;
-          }
-          if (lastText) break;
-        }
-      }
-      if (lastText && lastText.length > 300) lastText = lastText.slice(0, 300) + "…";
-      setThreadCache((prev) => ({ ...prev, [threadId]: { text: lastText, loading: false, error: null } }));
-    } catch (e: unknown) {
-      setThreadCache((prev) => ({ ...prev, [threadId]: { text: null, loading: false, error: e instanceof Error ? e.message : "加载失败" } }));
-    }
-  }, []);
-
-  const toggleExpand = (e: React.MouseEvent, task: Task) => {
-    e.stopPropagation();
-    if (!task.thread_id) return;
-    const isExpanding = expandedTaskId !== task.id;
-    setExpandedTaskId(isExpanding ? task.id : null);
-    if (isExpanding) fetchThreadDetail(task.thread_id);
-  };
-
-  const allTags = useMemo(() => {
-    const set = new Set<string>();
-    tasks.forEach((t) => (t.tags || []).forEach((tag) => set.add(tag)));
-    return [...set].sort();
-  }, [tasks]);
-
-  let filtered = tasks.filter((t) => {
-    if (statusFilter !== "all" && t.status !== statusFilter) return false;
-    if (priorityFilter !== "all" && t.priority !== priorityFilter) return false;
-    if (tagFilter && !(t.tags || []).includes(tagFilter)) return false;
-    if (search) {
-      const { name } = getAssigneeInfo(t.assignee_id);
-      if (!t.title.toLowerCase().includes(search.toLowerCase()) && !name.toLowerCase().includes(search.toLowerCase())) return false;
-    }
-    return true;
-  });
-
-  if (sortField) {
-    filtered = [...filtered].sort((a, b) => {
-      let cmp = 0;
-      if (sortField === "title") cmp = a.title.localeCompare(b.title);
-      else if (sortField === "priority") {
-        const order = { high: 0, medium: 1, low: 2 };
-        cmp = order[a.priority] - order[b.priority];
-      } else if (sortField === "created_at") cmp = a.created_at - b.created_at;
-      return sortDir === "asc" ? cmp : -cmp;
-    });
-  }
-
-  const stats = {
-    running: tasks.filter((t) => t.status === "running").length,
-    pending: tasks.filter((t) => t.status === "pending").length,
-    completed: tasks.filter((t) => t.status === "completed").length,
-    failed: tasks.filter((t) => t.status === "failed").length,
-  };
-
-  const handleSort = (field: SortField) => {
-    if (sortField === field) setSortDir(sortDir === "asc" ? "desc" : "asc");
-    else { setSortField(field); setSortDir("asc"); }
-  };
-
-  const toggleSelectAll = () => {
-    if (selectedRows.size === filtered.length) setSelectedRows(new Set());
-    else setSelectedRows(new Set(filtered.map((t) => t.id)));
-  };
-
-  const toggleSelectRow = (id: string) => {
-    setSelectedRows((prev) => {
-      const next = new Set(prev);
-      next.has(id) ? next.delete(id) : next.add(id);
-      return next;
-    });
-  };
-
-  const SortIcon = ({ field }: { field: SortField }) => {
-    if (sortField !== field) return <ArrowUpDown className="w-3 h-3 ml-1 opacity-40" />;
-    return sortDir === "asc" ? <ChevronUp className="w-3 h-3 ml-1" /> : <ChevronDown className="w-3 h-3 ml-1" />;
-  };
-
-  const handleDragStart = (e: React.DragEvent, taskId: string) => {
-    e.dataTransfer.setData("taskId", taskId);
-    e.dataTransfer.effectAllowed = "move";
-  };
-
-  const handleDragOver = (e: React.DragEvent, status: TaskStatus) => {
-    e.preventDefault();
-    e.dataTransfer.dropEffect = "move";
-    setDragOverColumn(status);
-  };
-
-  const handleDragLeave = () => setDragOverColumn(null);
-
-  const handleDrop = async (e: React.DragEvent, newStatus: TaskStatus) => {
-    e.preventDefault();
-    const taskId = e.dataTransfer.getData("taskId");
-    try {
-      await storeUpdateTask(taskId, { status: newStatus, progress: newStatus === "completed" ? 100 : newStatus === "pending" ? 0 : undefined });
-    } catch (err: unknown) {
-      toast.error("更新失败: " + (err instanceof Error ? err.message : String(err)));
-    }
-    setDragOverColumn(null);
-  };
-
-  const kanbanColumns: TaskStatus[] = ["pending", "running", "completed", "failed"];
-
-  // Cron edit panel (Apple-style)
-  const cronEditPanel = cronForm && (
-    <CronEditor
-      cronForm={cronForm}
-      isMobile={isMobile}
-      onUpdate={(updated) => setCronForm(updated)}
-      onSave={saveCronEdit}
-      onClose={closeCronEdit}
-      onDelete={() => setDeleteCronConfirmId(cronForm.id)}
-    />
-  );
-
-  return (
-    <div className="flex h-full">
-      {/* Main content */}
-      <div className="flex-1 flex flex-col bg-background overflow-hidden">
-        {/* Top bar */}
-        <div className={`h-14 flex items-center justify-between ${isMobile ? "px-3" : "px-6"} border-b border-border shrink-0`}>
-          <div className="flex items-center gap-3">
-            <h2 className="text-sm font-semibold text-foreground">任务</h2>
-            <div className="flex items-center gap-1 bg-muted rounded-lg p-0.5">
-              <button
-                className={`px-3 py-1 rounded text-sm ${activeTab === "tasks" ? "bg-background shadow-sm" : "text-muted-foreground"}`}
-                onClick={() => setActiveTab("tasks")}
-              >
-                任务看板
-              </button>
-              <button
-                className={`px-3 py-1 rounded text-sm ${activeTab === "cron" ? "bg-background shadow-sm" : "text-muted-foreground"}`}
-                onClick={() => setActiveTab("cron")}
-              >
-                定时任务
-              </button>
-            </div>
-          </div>
-          <div className="flex items-center gap-2 shrink-0">
-            {activeTab === "tasks" ? (
-              <>
-                <div className="flex items-center border border-border rounded-md overflow-hidden">
-                  <button
-                    onClick={() => setViewMode("table")}
-                    className={`p-1.5 transition-colors duration-fast ${viewMode === "table" ? "bg-primary/10 text-primary" : "text-muted-foreground hover:text-foreground"}`}
-                    title="表格视图"
-                  >
-                    <List className="w-3.5 h-3.5" />
-                  </button>
-                  <button
-                    onClick={() => setViewMode("board")}
-                    className={`p-1.5 transition-colors duration-fast ${viewMode === "board" ? "bg-primary/10 text-primary" : "text-muted-foreground hover:text-foreground"}`}
-                    title="看板视图"
-                  >
-                    <LayoutGrid className="w-3.5 h-3.5" />
-                  </button>
-                </div>
-                <button onClick={() => openCreateModal("task")} className="flex items-center gap-2 px-3 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:opacity-90 transition-opacity duration-fast">
-                  <Plus className="w-4 h-4" />
-                  <span className="hidden md:inline">新建任务</span>
-                </button>
-              </>
-            ) : (
-              <button onClick={() => openCreateModal("cron")} className="flex items-center gap-2 px-3 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:opacity-90 transition-opacity duration-fast">
-                <Plus className="w-4 h-4" />
-                <span className="hidden md:inline">新建定时任务</span>
-              </button>
-            )}
-          </div>
-        </div>
-        {/* Filters (tasks tab only) */}
-        {activeTab === "tasks" && (<>
-        {/* Filters */}
-        <div className={`flex items-center gap-2 px-4 md:px-6 py-2.5 border-b border-border overflow-x-auto shrink-0`}>
-          <div className="flex items-center gap-1">
-            {(["all", "running", "pending", "completed", "failed"] as const).map((s) => (
-              <button
-                key={s}
-                onClick={() => setStatusFilter(s)}
-                className={`px-2 py-1 rounded-md text-xs transition-colors duration-fast whitespace-nowrap shrink-0 ${
-                  statusFilter === s ? "bg-primary/10 text-primary font-medium" : "text-muted-foreground hover:text-foreground hover:bg-muted"
-                }`}
-              >
-                {s === "all" ? "全部" : statusConfig[s].label}
-                <span className="ml-1 font-mono">{s === "all" ? tasks.length : stats[s as TaskStatus]}</span>
-              </button>
-            ))}
-          </div>
-
-          {!isMobile && (
-            <>
-              <div className="w-px h-5 bg-border" />
-              {(["all", "high", "medium", "low"] as const).map((p) => (
-                <button
-                  key={p}
-                  onClick={() => setPriorityFilter(p)}
-                  className={`px-2 py-1 rounded-md text-xs transition-colors duration-fast whitespace-nowrap shrink-0 ${
-                    priorityFilter === p ? "bg-primary/10 text-primary font-medium" : "text-muted-foreground hover:text-foreground hover:bg-muted"
-                  }`}
-                >
-                  {p === "all" ? "优先级" : priorityConfig[p].label}
-                </button>
-              ))}
-              <div className="flex-1" />
-              <div className="relative w-52">
-                <Search className="absolute left-2.5 top-1/2 -translate-y-1/2 w-3.5 h-3.5 text-muted-foreground" />
-                <input
-                  value={search}
-                  onChange={(e) => setSearch(e.target.value)}
-                  placeholder="搜索任务..."
-                  className="w-full pl-8 pr-3 py-1.5 rounded-md bg-card border border-border text-sm text-foreground placeholder:text-muted-foreground outline-none focus:border-primary/40 transition-colors duration-fast"
-                />
-              </div>
-            </>
-          )}
-        </div>
-
-        {/* Active tag filter */}
-        {tagFilter && (
-          <div className="flex items-center gap-2 px-4 md:px-6 py-1.5 border-b border-border bg-primary/[0.03] shrink-0">
-            <span className="text-xs text-muted-foreground">标签过滤：</span>
-            <span className="flex items-center gap-1 px-2 py-0.5 rounded-full bg-primary/10 text-primary text-xs font-medium">
-              {tagFilter}
-              <button onClick={() => setTagFilter(null)} className="hover:text-primary/60 leading-none text-sm">×</button>
-            </span>
-          </div>
-        )}
-
-        {/* Bulk actions bar */}
-        {selectedRows.size > 0 && (
-          <div className="flex items-center gap-3 px-6 py-2 bg-primary/5 border-b border-primary/15 text-xs shrink-0">
-            <span className="text-primary font-medium">已选择 {selectedRows.size} 项</span>
-            <button onClick={async () => { try { await storeBulkUpdate([...selectedRows], "pending"); setSelectedRows(new Set()); } catch (e: unknown) { toast.error("操作失败: " + (e instanceof Error ? e.message : String(e))); } }} className="px-2 py-1 rounded bg-muted hover:bg-muted/80 text-foreground transition-colors duration-fast">批量取消</button>
-            <button onClick={async () => { try { await storeBulkUpdate([...selectedRows], "running"); setSelectedRows(new Set()); } catch (e: unknown) { toast.error("操作失败: " + (e instanceof Error ? e.message : String(e))); } }} className="px-2 py-1 rounded bg-muted hover:bg-muted/80 text-foreground transition-colors duration-fast">批量重试</button>
-            <button onClick={async () => { try { await storeBulkDelete([...selectedRows]); setSelectedRows(new Set()); toast.success(`已删除 ${selectedRows.size} 个任务`); } catch (e: unknown) { toast.error("删除失败: " + (e instanceof Error ? e.message : String(e))); } }} className="ml-auto text-destructive hover:text-destructive/80 transition-colors duration-fast">批量删除</button>
-          </div>
-        )}
-        {/* Content area */}
-        <div className="flex-1 overflow-y-auto">
-          {error ? (
-            <div className="flex flex-col items-center justify-center py-20">
-              <div className="w-12 h-12 rounded-full bg-destructive/10 flex items-center justify-center mb-4">
-                <AlertTriangle className="w-6 h-6 text-destructive" />
-              </div>
-              <p className="text-sm font-medium text-foreground mb-1">加载失败</p>
-              <p className="text-xs text-muted-foreground mb-4 max-w-xs text-center">{error}</p>
-              <button onClick={retry} className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 transition-opacity duration-fast">
-                <RefreshCw className="w-3.5 h-3.5" />重试
-              </button>
-            </div>
-          ) : viewMode === "board" ? (
-            <div className={`${isMobile ? "flex flex-col gap-4 p-3" : "flex gap-4 p-4 h-full overflow-x-auto"}`}>
-              {kanbanColumns.map((status) => {
-                const columnTasks = filtered.filter((t) => t.status === status);
-                const config = statusConfig[status];
-                const StatusIcon = config.icon;
-                return (
-                  <div
-                    key={status}
-                    className={`${isMobile ? "w-full" : "w-[280px] shrink-0"} flex flex-col rounded-lg border transition-colors duration-fast ${
-                      dragOverColumn === status ? "border-primary/40 bg-primary/5" : "border-border bg-card/50"
-                    }`}
-                    onDragOver={(e) => handleDragOver(e, status)}
-                    onDragLeave={handleDragLeave}
-                    onDrop={(e) => handleDrop(e, status)}
-                  >
-                    <div className="flex items-center justify-between px-3 py-2.5 border-b border-border">
-                      <div className="flex items-center gap-2">
-                        <StatusIcon className={`w-3.5 h-3.5 ${config.color}`} />
-                        <span className="text-xs font-medium text-foreground">{config.label}</span>
-                      </div>
-                      <span className="text-xs font-mono text-muted-foreground bg-muted px-1.5 py-0.5 rounded">{columnTasks.length}</span>
-                    </div>
-                    <div className={`flex-1 p-2 space-y-2 ${isMobile ? "" : "overflow-y-auto min-h-[200px]"}`}>
-                      {columnTasks.length === 0 && (
-                        <div className="text-center py-8 text-xs text-muted-foreground">拖拽任务到此列</div>
-                      )}
-                      {columnTasks.map((task) => {
-                        const priority = priorityConfig[task.priority];
-                        return (
-                          <div
-                            key={task.id}
-                            draggable
-                            onDragStart={(e) => handleDragStart(e, task.id)}
-                            onClick={() => openEdit(task)}
-                            className={`p-3 rounded-lg border bg-background cursor-pointer active:cursor-grabbing transition-all duration-fast hover:shadow-sm ${
-                              editingTask?.id === task.id ? "border-primary/40 shadow-sm" : "border-border hover:border-primary/30"
-                            }`}
-                          >
-                            <div className="flex flex-col gap-1 mb-2">
-                              <div className="flex items-center gap-1">
-                                <p className="text-sm font-medium text-foreground leading-snug">{task.title}</p>
-                                {task.source && task.source !== "manual" && (
-                                  <span className="text-2xs px-1.5 py-0.5 rounded bg-primary/10 text-primary shrink-0">
-                                    {sourceLabel[task.source] || task.source}
-                                  </span>
-                                )}
-                              </div>
-                              {(task.tags || []).length > 0 && (
-                                <div className="flex flex-wrap gap-1">
-                                  {(task.tags || []).map((tag) => (
-                                    <button
-                                      key={tag}
-                                      onClick={(e) => { e.stopPropagation(); setTagFilter(tagFilter === tag ? null : tag); }}
-                                      className={`text-2xs px-1.5 py-0.5 rounded-full font-medium transition-colors duration-fast ${
-                                        tagFilter === tag ? "bg-primary text-primary-foreground" : "bg-muted text-muted-foreground hover:bg-primary/10 hover:text-primary"
-                                      }`}
-                                    >
-                                      {tag}
-                                    </button>
-                                  ))}
-                                </div>
-                              )}
-                            </div>
-                            <div className="flex items-center justify-between">
-                              <span className={`text-2xs px-1.5 py-0.5 rounded font-medium ${priority.className}`}>
-                                {priority.label}
-                              </span>
-                              <div className="flex items-center gap-1.5">
-                                {task.assignee_id && (() => { const { name } = getAssigneeInfo(task.assignee_id); return name ? (
-                                  <>
-                                    <MemberAvatar name={name} size="xs" />
-                                    <span className="text-2xs text-muted-foreground">{name}</span>
-                                  </>
-                                ) : null; })()}
-                              </div>
-                            </div>
-                            {task.status === "running" && (
-                              <div className="flex items-center gap-1.5 mt-2">
-                                <div className="flex-1 h-1 rounded-full bg-muted overflow-hidden">
-                                  <div className="h-full bg-primary rounded-full transition-all duration-fast" style={{ width: `${task.progress}%` }} />
-                                </div>
-                                <span className="text-2xs font-mono text-primary">{task.progress}%</span>
-                              </div>
-                            )}
-                          </div>
-                        );
-                      })}
-                    </div>
-                  </div>
-                );
-              })}
-            </div>
-          ) : isMobile ? (
-            <div className="p-3 space-y-2">
-              {filtered.length === 0 ? (
-                <div className="flex items-center justify-center py-20">
-                  <div className="text-center">
-                    <ListTodo className="w-10 h-10 text-muted-foreground mx-auto mb-3" />
-                    <p className="text-sm font-medium text-foreground mb-1">暂无任务</p>
-                    <p className="text-xs text-muted-foreground mb-3">创建一个新任务开始工作</p>
-                    <button onClick={() => openCreateModal("task")} className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 transition-opacity duration-fast">
-                      <Plus className="w-3.5 h-3.5" />新建任务
-                    </button>
-                  </div>
-                </div>
-              ) : (
-                filtered.map((task) => {
-                  const status = statusConfig[task.status];
-                  const priority = priorityConfig[task.priority];
-                  const StatusIcon = status.icon;
-                  return (
-                    <div
-                      key={task.id}
-                      onClick={() => openEdit(task)}
-                      className={`p-3 rounded-lg border bg-card cursor-pointer transition-colors duration-fast ${
-                        editingTask?.id === task.id ? "border-primary/40" : "border-border"
-                      } ${task.status === "failed" ? "border-l-2 border-l-destructive bg-destructive/[0.03]" : ""}`}
-                    >
-                      <div className="flex items-start justify-between mb-2">
-                        <div className="flex items-center gap-2">
-                          <StatusIcon className={`w-4 h-4 ${status.color} shrink-0`} />
-                          <p className="text-sm font-medium text-foreground">{task.title}</p>
-                        </div>
-                        <span className={`text-2xs px-1.5 py-0.5 rounded font-medium shrink-0 ${priority.className}`}>{priority.label}</span>
-                      </div>
-                      <div className="flex items-center justify-between text-xs text-muted-foreground">
-                        <div className="flex items-center gap-1.5">
-                          {task.assignee_id && (() => { const { name } = getAssigneeInfo(task.assignee_id); return name ? (
-                            <>
-                              <MemberAvatar name={name} size="xs" />
-                              <span>{name}</span>
-                            </>
-                          ) : null; })()}
-                        </div>
-                        {task.status === "running" && (
-                          <span className="font-mono text-primary">{task.progress}%</span>
-                        )}
-                      </div>
-                    </div>
-                  );
-                })
-              )}
-            </div>
-          ) : (
-            <>
-              <div className="grid grid-cols-[32px_32px_1fr_80px_160px_80px_60px_28px] gap-2 px-6 py-2 border-b border-border text-xs text-muted-foreground uppercase tracking-wider font-medium sticky top-0 bg-background z-10">
-                <span className="flex items-center">
-                  <input type="checkbox" aria-label="全选任务" checked={selectedRows.size === filtered.length && filtered.length > 0} onChange={toggleSelectAll} className="w-3.5 h-3.5 accent-primary rounded" />
-                </span>
-                <span />
-                <button onClick={() => handleSort("title")} className="flex items-center hover:text-foreground transition-colors duration-fast text-left">任务 <SortIcon field="title" /></button>
-                <button onClick={() => handleSort("priority")} className="flex items-center hover:text-foreground transition-colors duration-fast">优先级 <SortIcon field="priority" /></button>
-                <span>执行者</span>
-                <span>进度</span>
-                <button onClick={() => handleSort("created_at")} className="flex items-center hover:text-foreground transition-colors duration-fast">时间 <SortIcon field="created_at" /></button>
-                <span />
-              </div>
-
-              {filtered.length === 0 ? (
-                <div className="flex items-center justify-center py-20">
-                  <div className="text-center">
-                    <ListTodo className="w-10 h-10 text-muted-foreground mx-auto mb-3" />
-                    <p className="text-sm font-medium text-foreground mb-1">暂无任务</p>
-                    <p className="text-xs text-muted-foreground mb-3">创建一个新任务开始工作</p>
-                    <button onClick={() => openCreateModal("task")} className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 transition-opacity duration-fast">
-                      <Plus className="w-3.5 h-3.5" />新建任务
-                    </button>
-                  </div>
-                </div>
-              ) : (
-                filtered.map((task) => {
-                  const status = statusConfig[task.status];
-                  const priority = priorityConfig[task.priority];
-                  const StatusIcon = status.icon;
-                  const isExpanded = expandedTaskId === task.id;
-                  const cache = task.thread_id ? threadCache[task.thread_id] : null;
-                  return (
-                    <div key={task.id}>
-                      <div
-                        onClick={() => openEdit(task)}
-                        className={`grid grid-cols-[32px_32px_1fr_80px_160px_80px_60px_28px] gap-2 px-6 py-3 border-b border-border hover:bg-muted/30 transition-colors duration-fast cursor-pointer items-center ${
-                          editingTask?.id === task.id ? "bg-primary/[0.03]" : ""
-                        } ${task.status === "failed" ? "bg-destructive/[0.03] border-l-2 border-l-destructive" : ""} ${isExpanded ? "bg-muted/20" : ""}`}
-                      >
-                        <span className="flex items-center" onClick={(e) => e.stopPropagation()}>
-                          <input type="checkbox" aria-label={`选择任务: ${task.title}`} checked={selectedRows.has(task.id)} onChange={() => toggleSelectRow(task.id)} className="w-3.5 h-3.5 accent-primary rounded" />
-                        </span>
-                        <StatusIcon className={`w-4 h-4 ${status.color}`} />
-                        <span className="text-sm font-medium text-foreground truncate flex items-center gap-1 flex-wrap">
-                          {task.title}
-                          {task.source && task.source !== "manual" && (
-                            <span className="text-2xs px-1.5 py-0.5 rounded bg-primary/10 text-primary ml-1 shrink-0">
-                              {sourceLabel[task.source] || task.source}
-                            </span>
-                          )}
-                          {(task.tags || []).map((tag) => (
-                            <button
-                              key={tag}
-                              onClick={(e) => { e.stopPropagation(); setTagFilter(tagFilter === tag ? null : tag); }}
-                              className={`text-2xs px-1.5 py-0.5 rounded-full font-medium shrink-0 transition-colors duration-fast ${
-                                tagFilter === tag ? "bg-primary text-primary-foreground" : "bg-muted text-muted-foreground hover:bg-primary/10 hover:text-primary"
-                              }`}
-                            >
-                              {tag}
-                            </button>
-                          ))}
-                        </span>
-                        <span className={`text-2xs px-1.5 py-0.5 rounded font-medium w-fit ${priority.className}`}>{priority.label}</span>
-                        <div className="flex items-center gap-2">
-                          {task.assignee_id ? (() => { const { name } = getAssigneeInfo(task.assignee_id); return name ? (
-                            <>
-                              <MemberAvatar name={name} size="xs" />
-                              <span className="text-xs text-muted-foreground truncate">{name}</span>
-                            </>
-                          ) : <span className="text-xs text-muted-foreground">未分配</span>; })() : (
-                            <span className="text-xs text-muted-foreground">未分配</span>
-                          )}
-                        </div>
-                        <div>
-                          {task.status === "running" ? (
-                            <div className="flex items-center gap-1.5">
-                              <div className="w-12 h-1.5 rounded-full bg-muted overflow-hidden">
-                                <div className="h-full bg-primary rounded-full" style={{ width: `${task.progress}%` }} />
-                              </div>
-                              <span className="text-2xs font-mono text-primary">{task.progress}%</span>
-                            </div>
-                          ) : task.status === "completed" ? (
-                            <span className="text-2xs font-mono text-success">100%</span>
-                          ) : (
-                            <span className="text-2xs text-muted-foreground">—</span>
-                          )}
-                        </div>
-                        <span className="text-xs text-muted-foreground font-mono">{task.created_at ? new Date(task.created_at).toLocaleTimeString("zh-CN", { hour: "2-digit", minute: "2-digit" }) : "--"}</span>
-                        <span className="flex items-center justify-center">
-                          {task.thread_id && (
-                            <button
-                              onClick={(e) => toggleExpand(e, task)}
-                              className="w-5 h-5 flex items-center justify-center rounded text-muted-foreground hover:text-foreground hover:bg-muted transition-colors duration-fast"
-                              title={isExpanded ? "收起执行详情" : "展开执行详情"}
-                            >
-                              {isExpanded ? <ChevronDown className="w-3.5 h-3.5" /> : <ChevronRight className="w-3.5 h-3.5" />}
-                            </button>
-                          )}
-                        </span>
-                      </div>
-                      {isExpanded && task.thread_id && (
-                        <div className="border-b border-border bg-muted/10 px-6 py-3">
-                          <div className="flex items-start gap-3">
-                            <div className="shrink-0 mt-0.5">
-                              <div className="w-1.5 h-1.5 rounded-full bg-primary mt-1.5" />
-                            </div>
-                            <div className="flex-1 min-w-0">
-                              <div className="flex items-center gap-2 mb-2">
-                                <span className="text-xs text-muted-foreground font-mono">
-                                  Thread: {task.thread_id.slice(0, 8)}…
-                                </span>
-                                <a
-                                  href={`/chat/${task.thread_id}`}
-                                  className="inline-flex items-center gap-1 text-xs text-primary hover:underline"
-                                  onClick={(e) => e.stopPropagation()}
-                                >
-                                  <ExternalLink className="w-3 h-3" />
-                                  查看对话
-                                </a>
-                              </div>
-                              {cache?.loading ? (
-                                <div className="flex items-center gap-2 text-xs text-muted-foreground">
-                                  <Loader2 className="w-3.5 h-3.5 animate-spin" />
-                                  加载中…
-                                </div>
-                              ) : cache?.error ? (
-                                <p className="text-xs text-destructive">{cache.error}</p>
-                              ) : cache?.text ? (
-                                <p className="text-xs text-foreground/80 leading-relaxed whitespace-pre-wrap border border-border rounded-md px-3 py-2 bg-background">
-                                  {cache.text}
-                                </p>
-                              ) : (
-                                <p className="text-xs text-muted-foreground italic">暂无 AI 回复记录</p>
-                              )}
-                            </div>
-                          </div>
-                        </div>
-                      )}
-                    </div>
-                  );
-                })
-              )}
-            </>
-          )}
-        </div>
-        </>)}
-
-        {/* Cron tab content */}
-        {activeTab === "cron" && (
-          <div className="flex-1 overflow-y-auto">
-            {cronJobs.length === 0 ? (
-              <div className="flex items-center justify-center py-20">
-                <div className="text-center">
-                  <Timer className="w-10 h-10 text-muted-foreground mx-auto mb-3" />
-                  <p className="text-sm font-medium text-foreground mb-1">暂无定时任务</p>
-                  <p className="text-xs text-muted-foreground mb-3">创建定时任务自动执行工作</p>
-                  <button onClick={() => openCreateModal("cron")} className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 transition-opacity duration-fast">
-                    <Plus className="w-3.5 h-3.5" />新建定时任务
-                  </button>
-                </div>
-              </div>
-            ) : (
-              <>
-                {/* Cron table header */}
-                <div className="grid grid-cols-[1fr_160px_64px_120px_80px] gap-2 px-6 py-2 border-b border-border text-xs text-muted-foreground uppercase tracking-wider font-medium sticky top-0 bg-background z-10">
-                  <span>名称</span>
-                  <span>执行频率</span>
-                  <span>状态</span>
-                  <span>上次触发</span>
-                  <span>操作</span>
-                </div>
-                {cronJobs.map((cron) => (
-                  <div
-                    key={cron.id}
-                    onClick={() => openCronEdit(cron)}
-                    className={`grid grid-cols-[1fr_160px_64px_120px_80px] gap-2 px-6 py-3 border-b border-border hover:bg-muted/30 transition-colors duration-fast cursor-pointer items-center ${
-                      editingCron?.id === cron.id ? "bg-primary/[0.03]" : ""
-                    }`}
-                  >
-                    <div className="flex flex-col gap-0.5">
-                      <span className="text-sm font-medium text-foreground truncate">{cron.name}</span>
-                      {cron.description && (
-                        <span className="text-xs text-muted-foreground truncate">{cron.description}</span>
-                      )}
-                    </div>
-                    <div className="flex flex-col gap-0.5">
-                      <span className="text-sm text-foreground">{cronToHuman(cron.cron_expression)}</span>
-                    </div>
-                    <span>
-                      <span className={`inline-flex items-center gap-1 text-2xs px-1.5 py-0.5 rounded font-medium ${
-                        cron.enabled ? "bg-success/10 text-success" : "bg-muted text-muted-foreground"
-                      }`}>
-                        {cron.enabled ? "启用" : "停用"}
-                      </span>
-                    </span>
-                    <span className="text-xs text-muted-foreground font-mono">
-                      {cron.last_run_at ? new Date(cron.last_run_at).toLocaleString("zh-CN", { month: "2-digit", day: "2-digit", hour: "2-digit", minute: "2-digit" }) : "--"}
-                    </span>
-                    <div className="flex items-center gap-1" onClick={(e) => e.stopPropagation()}>
-                      <button
-                        onClick={() => handleTriggerCron(cron.id)}
-                        className="p-1.5 rounded-md hover:bg-primary/10 text-muted-foreground hover:text-primary transition-colors duration-fast"
-                        title="立即触发"
-                      >
-                        <Play className="w-3.5 h-3.5" />
-                      </button>
-                      <button
-                        onClick={() => setDeleteCronConfirmId(cron.id)}
-                        className="p-1.5 rounded-md hover:bg-destructive/10 text-muted-foreground hover:text-destructive transition-colors duration-fast"
-                        title="删除"
-                      >
-                        <Trash2 className="w-3.5 h-3.5" />
-                      </button>
-                    </div>
-                  </div>
-                ))}
-              </>
-            )}
-          </div>
-        )}
-      </div>
-
-      {/* Edit panel (cron) */}
-      {activeTab === "cron" && editingCron && cronEditPanel}
-
-      {/* Cron delete confirmation dialog */}
-      <AlertDialog open={!!deleteCronConfirmId} onOpenChange={(open) => !open && setDeleteCronConfirmId(null)}>
-        <AlertDialogContent>
-          <AlertDialogHeader>
-            <AlertDialogTitle>确认删除定时任务</AlertDialogTitle>
-            <AlertDialogDescription>
-              此操作不可撤销。删除后该定时任务将永久丢失。
-            </AlertDialogDescription>
-          </AlertDialogHeader>
-          <AlertDialogFooter>
-            <AlertDialogCancel>取消</AlertDialogCancel>
-            <AlertDialogAction onClick={executeCronDelete} className="bg-destructive text-destructive-foreground hover:bg-destructive/90">
-              确认删除
-            </AlertDialogAction>
-          </AlertDialogFooter>
-        </AlertDialogContent>
-      </AlertDialog>
-
-      {/* Task delete confirmation dialog */}
-      <AlertDialog open={!!deleteConfirmId} onOpenChange={(open) => !open && setDeleteConfirmId(null)}>
-        <AlertDialogContent>
-          <AlertDialogHeader>
-            <AlertDialogTitle>确认删除任务</AlertDialogTitle>
-            <AlertDialogDescription>
-              此操作不可撤销。删除后该任务的所有数据将永久丢失。
-            </AlertDialogDescription>
-          </AlertDialogHeader>
-          <AlertDialogFooter>
-            <AlertDialogCancel>取消</AlertDialogCancel>
-            <AlertDialogAction onClick={executeDelete} className="bg-destructive text-destructive-foreground hover:bg-destructive/90">
-              确认删除
-            </AlertDialogAction>
-          </AlertDialogFooter>
-        </AlertDialogContent>
-      </AlertDialog>
-
-      {/* Unified Task Modal (create + edit) */}
-      <TaskModal
-        open={taskModalOpen}
-        editTask={editingTask}
-        defaultTab={taskModalTab}
-        members={memberList}
-        existingTags={allTags}
-        onClose={closeTaskModal}
-        onCreateTask={handleCreateTask}
-        onSaveTask={handleSaveTask}
-        onDeleteTask={(id) => setDeleteConfirmId(id)}
-        onCreateCronJob={handleCreateCronJob}
-      />
-    </div>
-  );
-}
-
-
-
-
-
-
-
-
-
diff --git a/frontend/app/src/pages/ThreadsIndexRedirect.tsx b/frontend/app/src/pages/ThreadsIndexRedirect.tsx
index 2fb79079c..53c33e619 100644
--- a/frontend/app/src/pages/ThreadsIndexRedirect.tsx
+++ b/frontend/app/src/pages/ThreadsIndexRedirect.tsx
@@ -1,14 +1,58 @@
 import { useEffect } from "react";
 import { useNavigate } from "react-router-dom";
+import { getDefaultThread } from "../api/client";
 import { useAuthStore } from "../store/auth-store";
 
+const defaultThreadInflight = new Map<string, Promise<Awaited<ReturnType<typeof getDefaultThread>>>>();
+
+function loadDefaultThread(memberId: string) {
+  const existing = defaultThreadInflight.get(memberId);
+  if (existing) return existing;
+  const pending = getDefaultThread(memberId).finally(() => {
+    defaultThreadInflight.delete(memberId);
+  });
+  defaultThreadInflight.set(memberId, pending);
+  return pending;
+}
+
 export default function ThreadsIndexRedirect() {
   const agent = useAuthStore((s) => s.agent);
   const navigate = useNavigate();
 
   useEffect(() => {
     if (!agent?.id) return;
-    navigate(`/threads/${encodeURIComponent(agent.id)}`, { replace: true });
+    const agentId = agent.id;
+
+    let cancelled = false;
+
+    async function redirectToThread() {
+      const memberId = encodeURIComponent(agentId);
+      try {
+        // @@@threads-index-direct-default-route - /threads is a pure entrypoint; resolve the
+        // default thread here so login/setup flows do not bounce through NewChatPage first.
+        // @@@threads-index-inflight-dedup - React StrictMode remounts /threads in dev.
+        // Reuse the first default-thread request and ignore stale callbacks instead of
+        // aborting the first fetch and polluting network/devtools with ERR_ABORTED.
+        const thread = await loadDefaultThread(agentId);
+        if (cancelled) return;
+        navigate(
+          thread
+            ? `/chat/hire/${memberId}/${encodeURIComponent(thread.thread_id)}`
+            : `/chat/hire/${memberId}`,
+          { replace: true },
+        );
+      } catch (error) {
+        if (cancelled) return;
+        if (error instanceof DOMException && error.name === "AbortError") return;
+        console.error("[ThreadsIndexRedirect] resolve default thread failed:", error);
+        navigate(`/chat/hire/${memberId}`, { replace: true });
+      }
+    }
+
+    void redirectToThread();
+    return () => {
+      cancelled = true;
+    };
   }, [agent?.id, navigate]);
 
   return null;
diff --git a/frontend/app/src/pages/ask-user-question.test.ts b/frontend/app/src/pages/ask-user-question.test.ts
new file mode 100644
index 000000000..899c58006
--- /dev/null
+++ b/frontend/app/src/pages/ask-user-question.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, it } from "vitest";
+import { askUserQuestionSelectionKey, buildAskUserAnswers } from "./ask-user-question";
+import type { AskUserQuestionPrompt } from "../api";
+
+describe("ask-user-question helpers", () => {
+  it("keeps duplicate prompts independently addressable by position", () => {
+    const questions: AskUserQuestionPrompt[] = [
+      {
+        header: "Style",
+        question: "Choose a style",
+        options: [{ label: "Minimal", description: "Keep it simple" }],
+      },
+      {
+        header: "Style",
+        question: "Choose a style",
+        options: [{ label: "Bold", description: "Make it loud" }],
+      },
+    ];
+
+    const answers = buildAskUserAnswers(questions, {
+      [askUserQuestionSelectionKey(0)]: ["Minimal"],
+      [askUserQuestionSelectionKey(1)]: ["Bold"],
+    });
+
+    expect(answers).toEqual([
+      {
+        header: "Style",
+        question: "Choose a style",
+        selected_options: ["Minimal"],
+      },
+      {
+        header: "Style",
+        question: "Choose a style",
+        selected_options: ["Bold"],
+      },
+    ]);
+  });
+});
diff --git a/frontend/app/src/pages/ask-user-question.ts b/frontend/app/src/pages/ask-user-question.ts
new file mode 100644
index 000000000..c97cbb3dc
--- /dev/null
+++ b/frontend/app/src/pages/ask-user-question.ts
@@ -0,0 +1,54 @@
+import type { AskUserAnswer, AskUserQuestionPrompt } from "../api";
+
+export interface AskUserQuestionPendingState {
+  requestId: string;
+  promptMessage: string;
+  prompts: AskUserQuestionPrompt[];
+  selections: Record<string, string[]>;
+  resolving: boolean;
+  canSubmit: boolean;
+  onSelect: (questionIndex: number, question: AskUserQuestionPrompt, optionLabel: string) => void;
+  onSubmit: () => void;
+  selectionKeyForIndex: (questionIndex: number) => string;
+}
+
+export interface AskUserQuestionAnsweredPayload {
+  questions: AskUserQuestionPrompt[];
+  answers: AskUserAnswer[];
+  annotations?: Record<string, unknown>;
+}
+
+export function askUserQuestionSelectionKey(questionIndex: number): string {
+  return String(questionIndex);
+}
+
+export function buildAskUserAnswers(
+  questions: AskUserQuestionPrompt[],
+  selections: Record<string, string[]>,
+): AskUserAnswer[] {
+  return questions.map((question, index) => ({
+    header: question.header,
+    question: question.question,
+    selected_options: selections[askUserQuestionSelectionKey(index)] ?? [],
+  }));
+}
+
+export function parseAskUserQuestionAnswerPayload(content: string): AskUserQuestionAnsweredPayload | null {
+  const match = content.match(/<ask_user_question_answers>\s*([\s\S]*?)\s*<\/ask_user_question_answers>/i);
+  if (!match) return null;
+  try {
+    const parsed = JSON.parse(match[1]) as {
+      questions?: AskUserQuestionPrompt[];
+      answers?: AskUserAnswer[];
+      annotations?: Record<string, unknown>;
+    };
+    if (!Array.isArray(parsed.questions) || !Array.isArray(parsed.answers)) return null;
+    return {
+      questions: parsed.questions,
+      answers: parsed.answers,
+      annotations: parsed.annotations,
+    };
+  } catch {
+    return null;
+  }
+}
diff --git a/frontend/app/src/pages/chat/ChatLayout.tsx b/frontend/app/src/pages/chat/ChatLayout.tsx
new file mode 100644
index 000000000..758f60406
--- /dev/null
+++ b/frontend/app/src/pages/chat/ChatLayout.tsx
@@ -0,0 +1,27 @@
+import { useMemo, useState } from "react";
+import { useParams } from "react-router-dom";
+import SplitPaneLayout from "@/components/SplitPaneLayout";
+import ConversationList from "./ConversationList";
+import { useThreadManager } from "@/hooks/use-thread-manager";
+
+export default function ChatLayout() {
+  const params = useParams();
+  const hasActiveConversation = Boolean(params.threadId || params.chatId || params.memberId);
+  const tm = useThreadManager();
+  const [sidebarCollapsed, setSidebarCollapsed] = useState(false);
+  const [, setSessionsOpen] = useState(false);
+
+  const outletContext = useMemo(
+    () => ({ tm, sidebarCollapsed, setSidebarCollapsed, setSessionsOpen }),
+    [tm, sidebarCollapsed],
+  );
+
+  return (
+    <SplitPaneLayout
+      sidebar={<ConversationList threads={tm.threads} />}
+      hasDetail={hasActiveConversation}
+      emptyMessage="选择一个对话开始"
+      outletContext={outletContext}
+    />
+  );
+}
diff --git a/frontend/app/src/pages/chat/ConversationList.tsx b/frontend/app/src/pages/chat/ConversationList.tsx
new file mode 100644
index 000000000..f3f37229a
--- /dev/null
+++ b/frontend/app/src/pages/chat/ConversationList.tsx
@@ -0,0 +1,157 @@
+import { useEffect, useState } from "react";
+import { Link, useLocation } from "react-router-dom";
+import { Plus, Search } from "lucide-react";
+import MemberAvatar from "@/components/MemberAvatar";
+import { useConversationStore } from "@/store/conversation-store";
+import type { ConversationItem } from "@/types/conversation";
+import type { ThreadSummary } from "@/api";
+import NewChatDialog from "@/components/NewChatDialog";
+
+function formatTime(dateStr: string | null): string {
+  if (!dateStr) return "";
+  const d = new Date(dateStr);
+  const now = new Date();
+  const diffMs = now.getTime() - d.getTime();
+  if (diffMs < 60_000) return "刚刚";
+  if (diffMs < 3600_000) return `${Math.floor(diffMs / 60_000)}m`;
+  if (diffMs < 86400_000) return `${Math.floor(diffMs / 3600_000)}h`;
+  return `${d.getMonth() + 1}/${d.getDate()}`;
+}
+
+function conversationHref(item: ConversationItem): string {
+  const templateMemberId = item.member_id;
+  if (item.type === "hire" && templateMemberId) {
+    return `/chat/hire/${encodeURIComponent(templateMemberId)}/${encodeURIComponent(item.id)}`;
+  }
+  return `/chat/visit/${encodeURIComponent(item.id)}`;
+}
+
+function conversationTitle(item: ConversationItem, threads: ThreadSummary[]): string {
+  if (item.type !== "hire") return item.title;
+  const thread = threads.find((entry) => entry.thread_id === item.id);
+  return thread?.sidebar_label || item.title;
+}
+
+export default function ConversationList({ threads }: { threads: ThreadSummary[] }) {
+  const { conversations, loading, fetchConversations } = useConversationStore();
+  const [search, setSearch] = useState("");
+  const [newChatOpen, setNewChatOpen] = useState(false);
+  const location = useLocation();
+
+  useEffect(() => {
+    void fetchConversations();
+    let timer: ReturnType<typeof setInterval> | null = null;
+    const start = () => { if (!timer) timer = setInterval(() => void useConversationStore.getState().fetchConversations(), 5000); };
+    const stop = () => { if (timer) { clearInterval(timer); timer = null; } };
+    const onVis = () => document.visibilityState === "visible" ? start() : stop();
+    start();
+    document.addEventListener("visibilitychange", onVis);
+    return () => { stop(); document.removeEventListener("visibilitychange", onVis); };
+  }, [fetchConversations]);
+
+  const searchQuery = search.trim().toLowerCase();
+  const filtered = searchQuery
+    ? conversations.filter((c) => conversationTitle(c, threads).toLowerCase().includes(searchQuery))
+    : conversations;
+
+  return (
+    <div className="h-full flex flex-col bg-card border-r border-border">
+      <div className="px-4 pt-3 pb-1 flex items-center justify-between">
+        <span className="text-sm font-semibold text-foreground">对话</span>
+        <button
+          onClick={() => setNewChatOpen(true)}
+          className="text-xs text-muted-foreground/50 hover:text-foreground transition-colors duration-fast"
+        >
+          <Plus className="w-4 h-4" />
+        </button>
+      </div>
+
+      <div className="px-3 pb-3">
+        <div className="flex items-center gap-2 px-3 py-2 rounded-lg bg-muted/50 border border-border">
+          <Search className="w-4 h-4 text-muted-foreground" />
+          <input
+            type="text"
+            placeholder="搜索对话..."
+            value={search}
+            onChange={(e) => setSearch(e.target.value)}
+            className="flex-1 bg-transparent text-sm outline-none text-foreground placeholder:text-muted-foreground/50"
+          />
+        </div>
+      </div>
+
+      <div className="h-px mx-3 bg-border" />
+
+      <div className="flex-1 min-h-0 overflow-y-auto px-2 pt-2 space-y-0.5 custom-scrollbar">
+        {loading && conversations.length === 0 ? (
+          <div className="space-y-0.5">
+            {[...Array(3)].map((_, i) => (
+              <div key={i} className="px-3 py-2.5 rounded-lg animate-pulse">
+                <div className="h-4 w-[60%] bg-muted rounded mb-1.5" />
+                <div className="h-3 w-[40%] bg-muted rounded" />
+              </div>
+            ))}
+          </div>
+        ) : filtered.length === 0 ? (
+          <div className="flex flex-col items-center justify-center py-12 px-4">
+            <p className="text-xs text-muted-foreground mb-2">
+              {search ? "无匹配结果" : "暂无对话"}
+            </p>
+          </div>
+        ) : (
+          filtered.map((item) => {
+            const href = conversationHref(item);
+            const title = conversationTitle(item, threads);
+            const isActive =
+              location.pathname === href ||
+              location.pathname.startsWith(href + "/");
+            return (
+              <Link
+                key={`${item.type}-${item.id}`}
+                to={href}
+                className={`flex items-center gap-2.5 px-3 py-2.5 rounded-lg transition-colors duration-fast ${
+                  isActive ? "bg-background shadow-sm" : "hover:bg-muted"
+                }`}
+              >
+                <div className="relative">
+                  <MemberAvatar
+                    name={title}
+                    avatarUrl={item.avatar_url ?? undefined}
+                    type={item.type === "hire" ? "mycel_agent" : "human"}
+                    size="sm"
+                  />
+                  {item.running && (
+                    <span className="absolute -bottom-0.5 -right-0.5 w-2.5 h-2.5 rounded-full bg-success border-2 border-card" />
+                  )}
+                </div>
+                <div className="flex-1 min-w-0">
+                  <div className="flex items-center gap-1.5">
+                    <span className="text-sm font-medium truncate text-foreground">
+                      {title}
+                    </span>
+                  </div>
+                  {item.updated_at && (
+                    <span className="text-2xs text-muted-foreground/40">
+                      {formatTime(item.updated_at)}
+                    </span>
+                  )}
+                </div>
+                {item.unread_count > 0 && (
+                  <span className="min-w-4 h-4 rounded-full bg-primary text-primary-foreground text-2xs flex items-center justify-center px-1 shrink-0">
+                    {item.unread_count > 99 ? "99+" : item.unread_count}
+                  </span>
+                )}
+              </Link>
+            );
+          })
+        )}
+      </div>
+
+      {newChatOpen && (
+        <NewChatDialog
+          open={newChatOpen}
+          onOpenChange={setNewChatOpen}
+        />
+      )}
+    </div>
+  );
+}
diff --git a/frontend/app/src/pages/contacts/ContactList.tsx b/frontend/app/src/pages/contacts/ContactList.tsx
new file mode 100644
index 000000000..ba4ea1a5b
--- /dev/null
+++ b/frontend/app/src/pages/contacts/ContactList.tsx
@@ -0,0 +1,140 @@
+import { useEffect, useState } from "react";
+import { Link, useParams } from "react-router-dom";
+import { Bot, Search, User, Plus } from "lucide-react";
+import MemberAvatar from "@/components/MemberAvatar";
+import { useAppStore } from "@/store/app-store";
+import CreateMemberDialog from "@/components/CreateMemberDialog";
+
+type Tab = "agents" | "contacts";
+
+const statusDot: Record<string, string> = {
+  active: "bg-success",
+  draft: "bg-warning",
+  inactive: "bg-muted-foreground opacity-50",
+};
+
+export default function ContactList() {
+  const [tab, setTab] = useState<Tab>("agents");
+  const [search, setSearch] = useState("");
+  const [createOpen, setCreateOpen] = useState(false);
+  const { id: activeId } = useParams<{ id?: string }>();
+
+  const members = useAppStore((s) => s.memberList);
+  const fetchMembers = useAppStore((s) => s.fetchMembers);
+
+  useEffect(() => {
+    void fetchMembers();
+  }, [fetchMembers]);
+
+  // Filter agents (non-builtin members)
+  const agents = members.filter((m) => !m.builtin);
+  const filtered = search
+    ? agents.filter((m) => m.name.toLowerCase().includes(search.toLowerCase()))
+    : agents;
+
+  return (
+    <div className="h-full flex flex-col bg-card border-r border-border">
+      {/* Header */}
+      <div className="px-4 pt-3 pb-2 flex items-center justify-between">
+        <span className="text-sm font-semibold text-foreground">通讯录</span>
+        <button
+          onClick={() => setCreateOpen(true)}
+          className="text-xs text-muted-foreground/50 hover:text-foreground transition-colors duration-fast"
+        >
+          <Plus className="w-4 h-4" />
+        </button>
+      </div>
+
+      {/* Tabs */}
+      <div className="flex px-3 gap-1 mb-2">
+        <button
+          onClick={() => setTab("agents")}
+          className={`flex-1 py-1.5 text-xs font-medium rounded-md transition-colors duration-fast ${
+            tab === "agents"
+              ? "bg-primary/10 text-primary"
+              : "text-muted-foreground hover:text-foreground hover:bg-muted"
+          }`}
+        >
+          <Bot className="w-3.5 h-3.5 inline mr-1" />
+          Agent
+        </button>
+        <button
+          onClick={() => setTab("contacts")}
+          className={`flex-1 py-1.5 text-xs font-medium rounded-md transition-colors duration-fast ${
+            tab === "contacts"
+              ? "bg-primary/10 text-primary"
+              : "text-muted-foreground hover:text-foreground hover:bg-muted"
+          }`}
+        >
+          <User className="w-3.5 h-3.5 inline mr-1" />
+          联系人
+        </button>
+      </div>
+
+      {/* Search */}
+      <div className="px-3 pb-2">
+        <div className="flex items-center gap-2 px-3 py-1.5 rounded-lg bg-muted/50 border border-border">
+          <Search className="w-3.5 h-3.5 text-muted-foreground" />
+          <input
+            type="text"
+            placeholder="搜索..."
+            value={search}
+            onChange={(e) => setSearch(e.target.value)}
+            className="flex-1 bg-transparent text-sm outline-none text-foreground placeholder:text-muted-foreground/50"
+          />
+        </div>
+      </div>
+
+      <div className="h-px mx-3 bg-border" />
+
+      {/* List */}
+      <div className="flex-1 min-h-0 overflow-y-auto px-2 pt-2 space-y-0.5 custom-scrollbar">
+        {tab === "agents" ? (
+          filtered.length === 0 ? (
+            <div className="flex flex-col items-center justify-center py-12 px-4">
+              <p className="text-xs text-muted-foreground">
+                {search ? "无匹配结果" : "暂无 Agent"}
+              </p>
+            </div>
+          ) : (
+            filtered.map((agent) => {
+              const isActive = activeId === agent.id;
+              const dot = statusDot[agent.status] || statusDot.inactive;
+              return (
+                <Link
+                  key={agent.id}
+                  to={`/contacts/agents/${agent.id}`}
+                  className={`flex items-center gap-2.5 px-3 py-2 rounded-lg transition-colors duration-fast ${
+                    isActive ? "bg-background shadow-sm" : "hover:bg-muted"
+                  }`}
+                >
+                  <MemberAvatar
+                    name={agent.name}
+                    avatarUrl={agent.avatar_url}
+                    type="mycel_agent"
+                    size="sm"
+                  />
+                  <div className="flex-1 min-w-0">
+                    <span className="text-sm font-medium truncate block">{agent.name}</span>
+                    {agent.description && (
+                      <span className="text-2xs text-muted-foreground truncate block">
+                        {agent.description}
+                      </span>
+                    )}
+                  </div>
+                  <span className={`w-2 h-2 rounded-full shrink-0 ${dot}`} />
+                </Link>
+              );
+            })
+          )
+        ) : (
+          <div className="flex flex-col items-center justify-center py-12 px-4">
+            <p className="text-xs text-muted-foreground">联系人功能即将上线</p>
+          </div>
+        )}
+      </div>
+
+      <CreateMemberDialog open={createOpen} onOpenChange={setCreateOpen} />
+    </div>
+  );
+}
diff --git a/frontend/app/src/pages/contacts/ContactsLayout.tsx b/frontend/app/src/pages/contacts/ContactsLayout.tsx
new file mode 100644
index 000000000..324c7ee39
--- /dev/null
+++ b/frontend/app/src/pages/contacts/ContactsLayout.tsx
@@ -0,0 +1,14 @@
+import { useParams } from "react-router-dom";
+import SplitPaneLayout from "@/components/SplitPaneLayout";
+import ContactList from "./ContactList";
+
+export default function ContactsLayout() {
+  const { id } = useParams();
+
+  return (
+    <SplitPaneLayout
+      sidebar={<ContactList />}
+      hasDetail={Boolean(id)}
+    />
+  );
+}
diff --git a/frontend/app/src/pages/resources/CapabilityIcons.tsx b/frontend/app/src/pages/resources/CapabilityIcons.tsx
index 886ef02aa..c3c32cbc0 100644
--- a/frontend/app/src/pages/resources/CapabilityIcons.tsx
+++ b/frontend/app/src/pages/resources/CapabilityIcons.tsx
@@ -52,38 +52,3 @@ export function CapabilityStrip({ capabilities }: { capabilities: ProviderCapabi
     </div>
   );
 }
-
-/** Detailed capability tiles for ProviderDetail */
-export function CapabilityGrid({ capabilities }: { capabilities: ProviderCapabilities }) {
-  return (
-    <div className="grid grid-cols-4 gap-2">
-      {CAPABILITY_KEYS.map((key) => {
-        const Icon = CAPABILITY_ICON_MAP[key];
-        const has = capabilities[key];
-        return (
-          <div
-            key={key}
-            className={[
-              "flex items-center gap-2 px-3 py-2 rounded-lg border transition-all duration-fast",
-              has
-                ? "border-foreground/15 bg-foreground/[0.04] shadow-sm"
-                : "border-transparent bg-muted/30 opacity-40",
-            ].join(" ")}
-          >
-            <div
-              className={[
-                "w-6 h-6 rounded-md flex items-center justify-center shrink-0",
-                has ? "bg-foreground/10 text-foreground" : "bg-muted text-muted-foreground",
-              ].join(" ")}
-            >
-              <Icon className="w-3.5 h-3.5" />
-            </div>
-            <span className={`text-xs ${has ? "text-foreground font-medium" : "text-muted-foreground"}`}>
-              {CAPABILITY_LABELS[key]}
-            </span>
-          </div>
-        );
-      })}
-    </div>
-  );
-}
diff --git a/frontend/app/src/pages/resources/ProviderDetail.tsx b/frontend/app/src/pages/resources/ProviderDetail.tsx
deleted file mode 100644
index 4de4f4fae..000000000
--- a/frontend/app/src/pages/resources/ProviderDetail.tsx
+++ /dev/null
@@ -1,256 +0,0 @@
-import { useState } from "react";
-import { Monitor, Cloud, Container, Lock, Settings, ArrowRight, ExternalLink } from "lucide-react";
-import { Link } from "react-router-dom";
-import type { ProviderInfo, UsageMetric } from "./types";
-import { groupByLease, useSessionCounts, type LeaseGroup } from "./session-list-utils";
-import SandboxCard from "./SandboxCard";
-import SandboxDetailSheet from "./SandboxDetailSheet";
-import { formatNumber, formatLimit } from "./utils/format";
-
-const typeIcon = {
-  local: Monitor,
-  cloud: Cloud,
-  container: Container,
-} as const;
-
-const typeLabel = {
-  local: "本地",
-  cloud: "云端",
-  container: "容器",
-} as const;
-
-const statusLabel = {
-  active: "活跃",
-  ready: "就绪",
-  unavailable: "未就绪",
-} as const;
-
-interface ProviderDetailProps {
-  provider: ProviderInfo;
-}
-
-export default function ProviderDetail({ provider }: ProviderDetailProps) {
-  const { name, description, vendor, type, status, unavailableReason, telemetry, error } = provider;
-  const TypeIcon = typeIcon[type];
-  const { running: runningCount, paused: pausedCount, stopped: stoppedCount } = useSessionCounts(provider.sessions);
-  const groups = groupByLease(provider.sessions);
-
-  const [selectedGroup, setSelectedGroup] = useState<LeaseGroup | null>(null);
-  const [sheetOpen, setSheetOpen] = useState(false);
-
-  if (status === "unavailable") {
-    return (
-      <div className="rounded-xl border border-border bg-card shadow-sm overflow-hidden">
-        <div className="flex items-center justify-between px-5 py-4 border-b border-border bg-muted/20">
-          <div className="flex items-center gap-3">
-            <TypeIcon className="w-4 h-4 text-muted-foreground" />
-            <div>
-              <h3 className="text-sm font-semibold text-foreground">{name}</h3>
-              <p className="text-xs text-muted-foreground">{description}</p>
-            </div>
-          </div>
-          <div className="flex items-center gap-1.5">
-            <span className="text-xs text-muted-foreground">{typeLabel[type]}</span>
-            <span className="text-xs text-muted-foreground">·</span>
-            <span className="text-xs text-muted-foreground">{statusLabel[status]}</span>
-          </div>
-        </div>
-        <div className="flex flex-col items-center justify-center py-12 px-6">
-          <Lock className="w-8 h-8 text-muted-foreground/40 mb-3" />
-          <p className="text-sm text-muted-foreground mb-1">{unavailableReason}</p>
-          {error?.message && <p className="text-xs text-muted-foreground/70 mb-2 font-mono">{error.message}</p>}
-          <p className="text-xs text-muted-foreground mb-4">前往 设置 &gt; 沙箱 配置 {name} 环境</p>
-          <Link
-            to="/settings"
-            className="inline-flex items-center gap-1.5 text-xs text-foreground hover:text-primary transition-colors duration-fast border border-border rounded-lg px-3 py-1.5"
-          >
-            <Settings className="w-3 h-3" />
-            前往设置
-            <ArrowRight className="w-3 h-3" />
-          </Link>
-        </div>
-      </div>
-    );
-  }
-
-  // @@@overview-semantic - local = host machine metrics (CPU/mem/disk are provider-level).
-  // Non-local = session counts only; per-instance probe data is not a global provider quota.
-  const isLocal = type === "local";
-
-  return (
-    <>
-      <div className="rounded-xl border border-border bg-card shadow-sm overflow-hidden">
-        {/* Provider header */}
-        <div className="flex items-center justify-between px-5 py-4 border-b border-border bg-muted/20">
-          <div className="flex items-center gap-3">
-            <TypeIcon className="w-4 h-4 text-muted-foreground" />
-            <div>
-              <h3 className="text-sm font-semibold text-foreground">{name}</h3>
-              <p className="text-xs text-muted-foreground">
-                {description}
-                {vendor && ` · ${vendor}`}
-              </p>
-            </div>
-          </div>
-          <div className="flex items-center gap-2">
-            {provider.consoleUrl && (
-              <a
-                href={provider.consoleUrl}
-                target="_blank"
-                rel="noreferrer"
-                className="inline-flex items-center gap-1 rounded border border-border px-2 py-1 text-2xs text-muted-foreground hover:text-foreground"
-              >
-                控制台
-                <ExternalLink className="h-3 w-3" />
-              </a>
-            )}
-            <span className="text-xs text-muted-foreground">{typeLabel[type]}</span>
-            <span className="text-xs text-muted-foreground">·</span>
-            <span className={`text-xs ${status === "active" ? "text-success" : "text-muted-foreground"}`}>
-              {statusLabel[status]}
-            </span>
-          </div>
-        </div>
-
-        <div className="p-5">
-          {/* Overview */}
-          <div className="mb-1">
-            <span className="text-xs text-muted-foreground uppercase tracking-wider font-medium">概览</span>
-          </div>
-
-          {isLocal ? (
-            /* Local: compact strip with running count + host metrics inline */
-            <div className="flex flex-wrap items-center gap-x-5 gap-y-1.5 mb-5 text-xs font-mono">
-              <StatPill count={runningCount} label="运行中" dotClass="bg-success animate-pulse-slow" />
-              <MetricPill label="CPU" metric={provider.cardCpu} />
-              <MetricPill label="RAM" metric={telemetry.memory} />
-              <MetricPill label="Disk" metric={telemetry.disk} />
-            </div>
-          ) : (
-            /* Non-local: compact inline stat strip */
-            <div className="flex items-center gap-5 mb-5 text-xs font-mono">
-              <StatPill count={runningCount} label="运行中" dotClass="bg-success animate-pulse-slow" />
-              {pausedCount > 0 && (
-                <StatPill count={pausedCount} label="已暂停" dotClass="bg-warning/80" />
-              )}
-              <StatPill count={stoppedCount} label="已结束" dotClass="bg-muted-foreground/30" />
-            </div>
-          )}
-
-          {telemetry.quota && (
-            <div className="mb-5">
-              <div className="mb-2">
-                <span className="text-xs text-muted-foreground uppercase tracking-wider font-medium">配额</span>
-              </div>
-              <div className="rounded-lg bg-muted/15 border border-border/40 p-3">
-                <StatBlock metric={telemetry.quota} label="quota" title="额度" compact />
-              </div>
-            </div>
-          )}
-
-          {/* Sandbox card grid */}
-          <div>
-            <div className="mb-3">
-              <span className="text-xs text-muted-foreground uppercase tracking-wider font-medium">沙盒</span>
-            </div>
-            {groups.length === 0 ? (
-              <p className="text-xs text-muted-foreground">暂无沙盒</p>
-            ) : (
-              <div className="grid grid-cols-2 md:grid-cols-3 xl:grid-cols-4 gap-3">
-                {groups.map((group) => (
-                  <SandboxCard
-                    key={group.leaseId || group.sessions.map((s) => s.id).join("|")}
-                    group={group}
-                    onClick={() => {
-                      setSelectedGroup(group);
-                      setSheetOpen(true);
-                    }}
-                  />
-                ))}
-              </div>
-            )}
-          </div>
-        </div>
-      </div>
-
-      {/* Detail sheet — rendered outside the card to avoid stacking context issues */}
-      <SandboxDetailSheet
-        group={selectedGroup}
-        providerType={type}
-        open={sheetOpen}
-        onClose={() => setSheetOpen(false)}
-      />
-    </>
-  );
-}
-
-// ---------------------------------------------------------------------------
-// StatPill (count-based, used in both local + non-local strips)
-// ---------------------------------------------------------------------------
-
-function StatPill({
-  count,
-  label,
-  dotClass,
-}: {
-  count: number;
-  label: string;
-  dotClass: string;
-}) {
-  return (
-    <span className="inline-flex items-center gap-1.5">
-      <span className={`w-1.5 h-1.5 rounded-full shrink-0 ${dotClass}`} />
-      <span className="font-semibold text-foreground tabular-nums">{count}</span>
-      <span className="text-muted-foreground">{label}</span>
-    </span>
-  );
-}
-
-// ---------------------------------------------------------------------------
-// MetricPill (value/limit, used in local strip)
-// ---------------------------------------------------------------------------
-
-function MetricPill({ label, metric }: { label: string; metric: UsageMetric }) {
-  const { used, limit, unit } = metric;
-  if (used == null) return null;
-
-  const usedStr = `${formatNumber(used)}${limit == null && unit === "%" ? "%" : ""}`;
-  const limitStr = limit != null ? ` / ${formatNumber(limit)} ${unit}` : unit === "%" ? "" : ` ${unit}`;
-
-  return (
-    <span className="inline-flex items-center gap-1">
-      <span className="text-muted-foreground/60">{label}</span>
-      <span className="text-foreground font-semibold">{usedStr}</span>
-      {limitStr && <span className="text-muted-foreground/50">{limitStr}</span>}
-    </span>
-  );
-}
-
-// ---------------------------------------------------------------------------
-// StatBlock (quota only now — local overview uses strip instead)
-// ---------------------------------------------------------------------------
-
-function StatBlock({
-  metric,
-  label,
-  title,
-  compact = false,
-}: {
-  metric: UsageMetric;
-  label: string;
-  title: string;
-  compact?: boolean;
-}) {
-  const valueStr =
-    metric.used != null
-      ? `${formatNumber(metric.used)}${metric.limit == null && metric.unit === "%" ? "%" : ""}`
-      : "--";
-  return (
-    <div className={["rounded-lg bg-muted/30 border border-border/40", compact ? "px-3 py-2" : "py-3 px-2"].join(" ")}>
-      <p className="text-lg md:text-2xl font-mono font-bold text-foreground">{valueStr}</p>
-      {metric.limit != null && <p className="text-2xs text-muted-foreground font-mono">{formatLimit(metric.limit, metric.unit)}</p>}
-      <p className="text-2xs text-muted-foreground/60 uppercase tracking-wider mt-1">{label}</p>
-      {!compact && <p className="text-2xs text-muted-foreground mt-1">{title}</p>}
-    </div>
-  );
-}
diff --git a/frontend/app/src/pages/resources/SandboxDetailSheet.tsx b/frontend/app/src/pages/resources/SandboxDetailSheet.tsx
index 7fe17120c..71d795102 100644
--- a/frontend/app/src/pages/resources/SandboxDetailSheet.tsx
+++ b/frontend/app/src/pages/resources/SandboxDetailSheet.tsx
@@ -9,7 +9,7 @@ import { ScrollArea } from "@/components/ui/scroll-area";
 import { Tooltip, TooltipTrigger, TooltipContent } from "@/components/ui/tooltip";
 import { SandboxFileBrowser } from "@/components/SandboxFileBrowser";
 import type { LeaseGroup } from "./session-list-utils";
-import type { ResourceSession, SessionMetrics } from "./types";
+import type { ResourceSession } from "./types";
 import { calculateDuration, formatDuration } from "./utils/duration";
 import { formatMetric } from "./utils/format";
 
@@ -181,6 +181,3 @@ function MetricBlock({
     </div>
   );
 }
-
-// Re-export for consumers that only need the type
-export type { SessionMetrics };
diff --git a/frontend/app/src/pages/resources/SessionList.tsx b/frontend/app/src/pages/resources/SessionList.tsx
deleted file mode 100644
index 970fab50a..000000000
--- a/frontend/app/src/pages/resources/SessionList.tsx
+++ /dev/null
@@ -1,200 +0,0 @@
-import { useState } from "react";
-import { ChevronDown, ChevronRight } from "lucide-react";
-import MemberAvatar from "@/components/MemberAvatar";
-import { Tooltip, TooltipTrigger, TooltipContent } from "@/components/ui/tooltip";
-import type { ResourceSession } from "./types";
-import { calculateDuration, formatDuration } from "./utils/duration";
-import { formatMetric } from "./utils/format";
-import { SandboxFileBrowser } from "@/components/SandboxFileBrowser";
-import { groupByLease, type LeaseGroup } from "./session-list-utils";
-
-// ---------------------------------------------------------------------------
-// Public component
-// ---------------------------------------------------------------------------
-
-interface SessionListProps {
-  sessions: ResourceSession[];
-  providerType: string;
-}
-
-export default function SessionList({ sessions, providerType }: SessionListProps) {
-  if (sessions.length === 0) {
-    return <p className="text-xs text-muted-foreground">暂无会话</p>;
-  }
-
-  const groups = groupByLease(sessions);
-
-  return (
-    <div className="space-y-2">
-      {groups.map((group) => (
-        <LeaseItem key={group.leaseId || group.sessions.map((s) => s.id).join("|")} group={group} providerType={providerType} />
-      ))}
-    </div>
-  );
-}
-
-// ---------------------------------------------------------------------------
-// LeaseItem
-// ---------------------------------------------------------------------------
-
-const STATUS_LABEL: Record<ResourceSession["status"], string> = {
-  running: "运行中",
-  paused: "已暂停",
-  stopped: "已结束",
-  destroying: "销毁中",
-};
-
-function LeaseItem({ group, providerType }: { group: LeaseGroup; providerType: string }) {
-  const [expanded, setExpanded] = useState(false);
-  const duration = group.startedAt ? calculateDuration(group.startedAt) : null;
-  const isStopped = group.status === "stopped";
-  const canBrowse = group.status !== "stopped" && group.status !== "destroying";
-
-  const hasMetrics =
-    group.metrics != null &&
-    (group.metrics.cpu != null ||
-     group.metrics.memory != null ||
-     group.metrics.memoryLimit != null ||
-     group.metrics.disk != null ||
-     group.metrics.diskLimit != null);
-
-  return (
-    <div className={`rounded-md border border-border/50 bg-card/60 overflow-hidden ${isStopped ? "opacity-50" : ""}`}>
-      {/* Row */}
-      <button
-        className="w-full flex items-center gap-2 px-3 py-2 hover:bg-muted/20 transition-colors duration-fast text-left"
-        onClick={() => setExpanded((v) => !v)}
-      >
-        <StatusDot status={group.status} />
-        {expanded ? (
-          <ChevronDown className="w-3 h-3 text-muted-foreground shrink-0" />
-        ) : (
-          <ChevronRight className="w-3 h-3 text-muted-foreground shrink-0" />
-        )}
-
-        {/* Crew avatars */}
-        <div className="flex -space-x-1 shrink-0">
-          {group.sessions.slice(0, 4).map((s) => (
-            <MemberAvatar key={s.id} name={s.memberName || "?"} avatarUrl={s.avatarUrl || undefined} size="xs" type="mycel_agent" className="border border-background" />
-          ))}
-          {group.sessions.length > 4 && (
-            <div className="w-5 h-5 rounded-full bg-muted border border-background flex items-center justify-center text-3xs text-muted-foreground">
-              +{group.sessions.length - 4}
-            </div>
-          )}
-        </div>
-
-        {/* Names */}
-        <span className="text-xs text-foreground flex-1 truncate">
-          {group.sessions.map((s) => s.memberName || "未绑定").join(", ")}
-        </span>
-
-        {/* Lease ID */}
-        {group.leaseId && (
-          <span className="text-2xs text-muted-foreground font-mono shrink-0">
-            {shortId(group.leaseId)}
-          </span>
-        )}
-
-        {/* Duration + status */}
-        <div className="flex items-center gap-2 shrink-0">
-          {duration != null && (
-            <span className="text-2xs text-muted-foreground">{formatDuration(duration)}</span>
-          )}
-          <span className="text-2xs text-muted-foreground">{STATUS_LABEL[group.status]}</span>
-        </div>
-      </button>
-
-      {/* Expanded panel */}
-      {expanded && (
-        <div className="border-t border-border/30">
-          {/* Metrics bar */}
-          {hasMetrics && (
-            <div className="grid grid-cols-3 gap-2 px-3 py-2 text-2xs font-mono bg-muted/10 border-b border-border/20">
-              <MetricCell label="CPU" used={group.metrics?.cpu} unit="%" />
-              <MetricCell label="RAM" used={group.metrics?.memory} limit={group.metrics?.memoryLimit} unit="GB" note={group.metrics?.memoryNote} />
-              <MetricCell label="磁盘" used={group.metrics?.disk} limit={group.metrics?.diskLimit} unit="GB" note={group.metrics?.diskNote} />
-            </div>
-          )}
-          {/* File browser */}
-          <div className="px-3 py-2">
-            {canBrowse ? (
-              <SandboxBrowser leaseId={group.leaseId} providerType={providerType} />
-            ) : (
-              <p className="text-xs text-muted-foreground text-center py-2">沙盒已停止，无法浏览文件</p>
-            )}
-          </div>
-        </div>
-      )}
-    </div>
-  );
-}
-
-// ---------------------------------------------------------------------------
-// Sandbox file browser — uses shared SandboxFileBrowser component
-// ---------------------------------------------------------------------------
-
-function SandboxBrowser({ leaseId, providerType }: { leaseId: string; providerType: string }) {
-  return <SandboxFileBrowser leaseId={leaseId} providerType={providerType} />;
-}
-
-// ---------------------------------------------------------------------------
-// Small helpers
-// ---------------------------------------------------------------------------
-
-function StatusDot({ status }: { status: ResourceSession["status"] }) {
-  const cls = {
-    running: "bg-success animate-pulse",
-    paused: "bg-warning/80",
-    stopped: "bg-muted-foreground/40",
-    destroying: "bg-destructive animate-pulse",
-  }[status];
-  return <span className={`h-2 w-2 rounded-full shrink-0 ${cls}`} />;
-}
-
-function MetricCell({
-  label,
-  used,
-  limit,
-  unit,
-  note,
-}: {
-  label: string;
-  used: number | null | undefined;
-  limit?: number | null | undefined;
-  unit: string;
-  note?: string;
-}) {
-  const usedStr = used != null ? formatMetric(used, unit) : "--";
-  const limitStr = limit != null ? formatMetric(limit, unit) : "--";
-
-  // Show note icon if there's a note OR if limit is null (to explain why)
-  const showNote = note != null && note.length > 0;
-
-  return (
-    <div className="rounded border border-border/40 bg-muted/20 px-2 py-1">
-      <p className="text-muted-foreground">{label}</p>
-      <p className="text-foreground font-semibold">
-        {usedStr}
-        {limit !== undefined && (
-          <span className="text-muted-foreground font-normal"> / {limitStr}</span>
-        )}
-        {showNote && (
-          <Tooltip>
-            <TooltipTrigger asChild>
-              <span className="ml-1 text-muted-foreground cursor-help text-2xs inline-block" style={{ userSelect: "none" }}>
-                ⓘ
-              </span>
-            </TooltipTrigger>
-            <TooltipContent>{note}</TooltipContent>
-          </Tooltip>
-        )}
-      </p>
-    </div>
-  );
-}
-
-function shortId(raw: string): string {
-  if (!raw) return "--";
-  return raw.length <= 12 ? raw : `${raw.slice(0, 8)}…`;
-}
diff --git a/frontend/app/src/pages/resources/api.ts b/frontend/app/src/pages/resources/api.ts
deleted file mode 100644
index 26ff8a8e2..000000000
--- a/frontend/app/src/pages/resources/api.ts
+++ /dev/null
@@ -1,55 +0,0 @@
-import type { ProviderInfo } from "./types";
-
-interface ResourceSummary {
-  snapshot_at: string;
-  last_refreshed_at?: string;
-  refresh_duration_ms?: number;
-  refresh_status?: "ok" | "error";
-  refresh_error?: string | null;
-  total_providers: number;
-  active_providers: number;
-  unavailable_providers: number;
-  running_sessions: number;
-}
-
-interface ResourceOverviewResponse {
-  summary: ResourceSummary;
-  providers: ProviderInfo[];
-}
-
-function ensureProviderCardContract(providers: ProviderInfo[]): void {
-  for (const provider of providers) {
-    if (!provider.cardCpu) {
-      throw new Error(`Provider cardCpu missing: ${provider.id}`);
-    }
-  }
-}
-
-async function ensureResponseShape(response: Response): Promise<ResourceOverviewResponse> {
-  if (!response.ok) {
-    const body = await response.text();
-    throw new Error(`API ${response.status}: ${body || response.statusText}`);
-  }
-
-  const payload = (await response.json()) as ResourceOverviewResponse;
-  if (!payload || !payload.summary || !Array.isArray(payload.providers)) {
-    throw new Error("Unexpected /api/monitor/resources response shape");
-  }
-  ensureProviderCardContract(payload.providers);
-  return payload;
-}
-
-export async function fetchResourceProviders(): Promise<ResourceOverviewResponse> {
-  const response = await fetch("/api/monitor/resources", {
-    headers: { "Content-Type": "application/json" },
-  });
-  return ensureResponseShape(response);
-}
-
-export async function refreshResourceProviders(): Promise<ResourceOverviewResponse> {
-  const response = await fetch("/api/monitor/resources/refresh", {
-    method: "POST",
-    headers: { "Content-Type": "application/json" },
-  });
-  return ensureResponseShape(response);
-}
diff --git a/frontend/app/src/pages/resources/session-list-utils.ts b/frontend/app/src/pages/resources/session-list-utils.ts
index cffc2dc23..74c423164 100644
--- a/frontend/app/src/pages/resources/session-list-utils.ts
+++ b/frontend/app/src/pages/resources/session-list-utils.ts
@@ -1,4 +1,3 @@
-import { useMemo } from "react";
 import type { ResourceSession, SessionMetrics } from "./types";
 
 export interface LeaseGroup {
@@ -8,52 +7,3 @@ export interface LeaseGroup {
   startedAt: string;
   metrics: SessionMetrics | null;
 }
-
-const STATUS_ORDER: Record<ResourceSession["status"], number> = {
-  running: 0,
-  destroying: 1,
-  paused: 2,
-  stopped: 3,
-};
-
-export function useSessionCounts(sessions: ResourceSession[]) {
-  return useMemo(
-    () => ({
-      running: sessions.filter((s) => s.status === "running").length,
-      paused: sessions.filter((s) => s.status === "paused").length,
-      stopped: sessions.filter((s) => s.status === "stopped").length,
-    }),
-    [sessions],
-  );
-}
-
-export function groupByLease(sessions: ResourceSession[]): LeaseGroup[] {
-  const map = new Map<string, ResourceSession[]>();
-  for (const s of sessions) {
-    // Group by leaseId; local sessions with no lease each get their own group
-    const key = s.leaseId || s.id;
-    const arr = map.get(key) ?? [];
-    arr.push(s);
-    map.set(key, arr);
-  }
-
-  return Array.from(map.values())
-    .map((group) => {
-      const sorted = [...group].sort(
-        (a, b) => (STATUS_ORDER[a.status] ?? 4) - (STATUS_ORDER[b.status] ?? 4),
-      );
-      const best = sorted[0];
-      const earliest = group.reduce(
-        (min, s) => (s.startedAt < min ? s.startedAt : min),
-        group[0].startedAt,
-      );
-      return {
-        leaseId: group[0].leaseId ?? "",
-        status: best.status,
-        sessions: sorted,
-        startedAt: earliest,
-        metrics: best.metrics ?? null,
-      } satisfies LeaseGroup;
-    })
-    .sort((a, b) => (STATUS_ORDER[a.status] ?? 4) - (STATUS_ORDER[b.status] ?? 4));
-}
diff --git a/frontend/app/src/pages/resources/types.ts b/frontend/app/src/pages/resources/types.ts
index 40c7dca9f..a13fe14cf 100644
--- a/frontend/app/src/pages/resources/types.ts
+++ b/frontend/app/src/pages/resources/types.ts
@@ -85,17 +85,3 @@ export interface ProviderInfo {
   latencyMs?: number;
   sessions: ResourceSession[];
 }
-
-/** An atomic resource allocated to an agent via a provider session */
-export type ResourceType = keyof ProviderCapabilities;
-
-export interface AllocatedResource {
-  resourceType: ResourceType;
-  providerId: string;
-  providerName: string;
-  threadId: string;
-  memberId: string;
-  memberName: string;
-  sessionId: string;
-  sessionStatus: ResourceSession["status"];
-}
diff --git a/frontend/app/src/router.tsx b/frontend/app/src/router.tsx
index 024478143..4f6c4901b 100644
--- a/frontend/app/src/router.tsx
+++ b/frontend/app/src/router.tsx
@@ -1,117 +1,68 @@
-import { createBrowserRouter, Navigate } from 'react-router-dom';
+import { createBrowserRouter, Navigate, useParams } from 'react-router-dom';
 import RootLayout from './pages/RootLayout';
-import AppLayout from './pages/AppLayout';
-import ChatPage from './pages/ChatPage';
-import NewChatPage from './pages/NewChatPage';
-import ThreadsIndexRedirect from './pages/ThreadsIndexRedirect';
-import ChatsLayout from './pages/ChatsLayout';
-import ChatsEmptyState from './pages/ChatsEmptyState';
-import ChatConversationPage from './pages/ChatConversationPage';
 import SettingsPage from './pages/SettingsPage';
-import MembersPage from './pages/MembersPage';
-import AgentDetailPage from './pages/AgentDetailPage';
-import TasksPage from './pages/TasksPage';
 import MarketplacePage from './pages/MarketplacePage';
 import MarketplaceDetailPage from './pages/MarketplaceDetailPage';
 import LibraryItemDetailPage from './pages/LibraryItemDetailPage';
-import ResourcesPage from './pages/ResourcesPage';
-import ConnectionsPage from './pages/ConnectionsPage';
-import InviteCodesPage from './pages/InviteCodesPage';
+
+// Lazy imports for new layout components
+import ChatLayout from './pages/chat/ChatLayout';
+import ContactsLayout from './pages/contacts/ContactsLayout';
+
+// Legacy pages reused in new routes
+import ChatPage from './pages/ChatPage';
+import NewChatPage from './pages/NewChatPage';
+import ChatConversationPage from './pages/ChatConversationPage';
+import AgentDetailPage from './pages/AgentDetailPage';
+import MembersPage from './pages/MembersPage';
+import ThreadsIndexRedirect from './pages/ThreadsIndexRedirect';
+
+/** Redirect /threads/:memberId/:threadId → /chat/hire/:memberId/:threadId */
+function ThreadsLegacyRedirect() {
+  const params = useParams();
+  const rest = params['*'] || '';
+  return <Navigate to={`/chat/hire/${rest}`} replace />;
+}
 
 export const router = createBrowserRouter([
-  // Old /chat/* URLs → redirect to /threads
-  {
-    path: '/chat/*',
-    element: <Navigate to="/threads" replace />,
-  },
+  // Legacy redirects — preserve path segments
+  { path: '/threads', element: <ThreadsIndexRedirect /> },
+  { path: '/threads/*', element: <ThreadsLegacyRedirect /> },
+  { path: '/chats', element: <Navigate to="/chat" replace /> },
+  { path: '/chats/*', element: <Navigate to="/chat" replace /> },
+  { path: '/members', element: <Navigate to="/contacts" replace /> },
+  { path: '/members/*', element: <Navigate to="/contacts" replace /> },
+  { path: '/tasks', element: <Navigate to="/chat" replace /> },
+  { path: '/resources', element: <Navigate to="/marketplace" replace /> },
+  { path: '/invite-codes', element: <Navigate to="/settings" replace /> },
   {
     path: '/',
     element: <RootLayout />,
     children: [
+      { index: true, element: <Navigate to="/chat" replace /> },
       {
-        index: true,
-        element: <Navigate to="/threads" replace />,
-      },
-      {
-        path: 'threads',
-        element: <AppLayout />,
+        path: 'chat',
+        element: <ChatLayout />,
         children: [
-          {
-            index: true,
-            element: <ThreadsIndexRedirect />,
-          },
-          {
-            path: ':memberId',
-            element: <NewChatPage />,
-          },
-          {
-            path: ':memberId/new',
-            element: <NewChatPage mode="new" />,
-          },
-          {
-            path: ':memberId/:threadId',
-            element: <ChatPage />,
-          },
+          { index: true, element: null },
+          { path: 'hire/:memberId/:threadId', element: <ChatPage /> },
+          { path: 'hire/:memberId', element: <NewChatPage /> },
+          { path: 'visit/:chatId', element: <ChatConversationPage /> },
         ],
       },
       {
-        path: 'chats',
-        element: <ChatsLayout />,
+        path: 'contacts',
+        element: <ContactsLayout />,
         children: [
-          {
-            index: true,
-            element: <ChatsEmptyState />,
-          },
-          {
-            path: ':chatId',
-            element: <ChatConversationPage />,
-          },
+          { index: true, element: <MembersPage /> },
+          { path: 'agents/:id', element: <AgentDetailPage /> },
         ],
       },
-      {
-        path: 'members',
-        element: <MembersPage />,
-      },
-      {
-        path: 'members/:id',
-        element: <AgentDetailPage />,
-      },
-      {
-        path: 'tasks',
-        element: <TasksPage />,
-      },
-      {
-        path: 'resources',
-        element: <ResourcesPage />,
-      },
-      {
-        path: 'marketplace',
-        element: <MarketplacePage />,
-      },
-      {
-        path: 'marketplace/:id',
-        element: <MarketplaceDetailPage />,
-      },
-      {
-        path: 'library/:type/:id',
-        element: <LibraryItemDetailPage />,
-      },
-      {
-        path: 'library',
-        element: <Navigate to="/marketplace" replace />,
-      },
-      {
-        path: 'connections',
-        element: <ConnectionsPage />,
-      },
-      {
-        path: 'invite-codes',
-        element: <InviteCodesPage />,
-      },
-      {
-        path: 'settings',
-        element: <SettingsPage />,
-      },
+      { path: 'marketplace', element: <MarketplacePage /> },
+      { path: 'marketplace/:id', element: <MarketplaceDetailPage /> },
+      { path: 'library/:type/:id', element: <LibraryItemDetailPage /> },
+      { path: 'library', element: <Navigate to="/marketplace" replace /> },
+      { path: 'settings', element: <SettingsPage /> },
     ],
   },
 ]);
diff --git a/frontend/app/src/store/app-store.test.ts b/frontend/app/src/store/app-store.test.ts
new file mode 100644
index 000000000..350c25ba7
--- /dev/null
+++ b/frontend/app/src/store/app-store.test.ts
@@ -0,0 +1,36 @@
+// @vitest-environment jsdom
+
+import { beforeEach, describe, expect, it } from "vitest";
+import { useAppStore } from "./app-store";
+
+describe("useAppStore", () => {
+  beforeEach(() => {
+    useAppStore.setState({
+      memberList: [],
+      taskList: [],
+      cronJobs: [],
+      librarySkills: [],
+      libraryMcps: [],
+      libraryAgents: [],
+      libraryRecipes: [],
+      userProfile: { name: "User", initials: "U", email: "" },
+      loaded: false,
+      error: null,
+    });
+  });
+
+  it("resets loaded member state when auth identity changes", () => {
+    useAppStore.setState({
+      memberList: [{ id: "m-old", name: "Old", status: "active" } as never],
+      loaded: true,
+      error: "stale",
+    });
+
+    useAppStore.getState().resetSessionData();
+
+    const state = useAppStore.getState();
+    expect(state.memberList).toEqual([]);
+    expect(state.loaded).toBe(false);
+    expect(state.error).toBeNull();
+  });
+});
diff --git a/frontend/app/src/store/app-store.ts b/frontend/app/src/store/app-store.ts
index e54bd1ef5..3cbab9423 100644
--- a/frontend/app/src/store/app-store.ts
+++ b/frontend/app/src/store/app-store.ts
@@ -6,6 +6,7 @@ import type {
 import { useAuthStore } from "./auth-store";
 
 const API = "/api/panel";
+let loadAllInflight: Promise<void> | null = null;
 
 interface AppState {
   // ── Data ──
@@ -23,6 +24,7 @@ interface AppState {
   // ── Init ──
   loadAll: () => Promise<void>;
   retry: () => Promise<void>;
+  resetSessionData: () => void;
 
   // ── Members ──
   fetchMembers: () => Promise<void>;
@@ -71,6 +73,38 @@ interface AppState {
   getResourceUsedBy: (type: string, name: string) => string[];
 }
 
+type LibraryType = "skill" | "mcp" | "agent" | "recipe";
+type LibraryStateKey = "librarySkills" | "libraryMcps" | "libraryAgents" | "libraryRecipes";
+
+const DEFAULT_PROFILE: UserProfile = { name: "User", initials: "U", email: "" };
+const LIBRARY_STATE_KEYS: Record<LibraryType, LibraryStateKey> = {
+  skill: "librarySkills",
+  mcp: "libraryMcps",
+  agent: "libraryAgents",
+  recipe: "libraryRecipes",
+};
+
+function getLibraryStateKey(type: string): LibraryStateKey {
+  const key = LIBRARY_STATE_KEYS[type as LibraryType];
+  if (!key) throw new Error(`Unsupported library type: ${type}`);
+  return key;
+}
+
+function emptySessionState() {
+  return {
+    memberList: [],
+    taskList: [],
+    cronJobs: [],
+    librarySkills: [],
+    libraryMcps: [],
+    libraryAgents: [],
+    libraryRecipes: [],
+    userProfile: DEFAULT_PROFILE,
+    loaded: false,
+    error: null,
+  };
+}
+
 async function api<T = unknown>(path: string, opts?: RequestInit): Promise<T> {
   const token = useAuthStore.getState().token;
   const headers: Record<string, string> = { "Content-Type": "application/json" };
@@ -81,35 +115,42 @@ async function api<T = unknown>(path: string, opts?: RequestInit): Promise<T> {
 }
 
 export const useAppStore = create<AppState>()((set, get) => ({
-  memberList: [],
-  taskList: [],
-  cronJobs: [],
-  librarySkills: [],
-  libraryMcps: [],
-  libraryAgents: [],
-  libraryRecipes: [],
-  userProfile: { name: "User", initials: "U", email: "" },
-  loaded: false,
-  error: null,
+  ...emptySessionState(),
 
   loadAll: async () => {
     if (get().loaded) return;
-    set({ error: null });
+    if (loadAllInflight) return loadAllInflight;
+
+    const pending = (async () => {
+      set({ error: null });
+      try {
+        // @@@load-all-singleflight - RootLayout can mount twice in dev StrictMode and /threads
+        // index redirect now avoids AppLayout, so keep the global panel bootstrap idempotent
+        // instead of firing duplicate members/tasks/library/profile bursts.
+        await Promise.all([
+          get().fetchMembers(),
+          get().fetchTasks(),
+          get().fetchCronJobs(),
+          get().fetchLibrary("skill"),
+          get().fetchLibrary("mcp"),
+          get().fetchLibrary("agent"),
+          get().fetchLibrary("recipe"),
+          get().fetchProfile(),
+        ]);
+        set({ loaded: true });
+      } catch (e) {
+        const msg = e instanceof Error ? e.message : String(e);
+        set({ error: `数据加载失败: ${msg}`, loaded: true });
+      }
+    })();
+
+    loadAllInflight = pending;
     try {
-      await Promise.all([
-        get().fetchMembers(),
-        get().fetchTasks(),
-        get().fetchCronJobs(),
-        get().fetchLibrary("skill"),
-        get().fetchLibrary("mcp"),
-        get().fetchLibrary("agent"),
-        get().fetchLibrary("recipe"),
-        get().fetchProfile(),
-      ]);
-      set({ loaded: true });
-    } catch (e) {
-      const msg = e instanceof Error ? e.message : String(e);
-      set({ error: `数据加载失败: ${msg}`, loaded: true });
+      await pending;
+    } finally {
+      if (loadAllInflight === pending) {
+        loadAllInflight = null;
+      }
     }
   },
 
@@ -118,6 +159,11 @@ export const useAppStore = create<AppState>()((set, get) => ({
     await get().loadAll();
   },
 
+  resetSessionData: () => {
+    loadAllInflight = null;
+    set(emptySessionState());
+  },
+
   // ── Members ──
   fetchMembers: async () => {
     const data = await api<{ items: Member[] }>("/members");
@@ -254,10 +300,8 @@ export const useAppStore = create<AppState>()((set, get) => ({
   // ── Library ──
   fetchLibrary: async (type) => {
     const data = await api<{ items: ResourceItem[] }>(`/library/${type}`);
-    if (type === "skill") set({ librarySkills: data.items });
-    else if (type === "mcp") set({ libraryMcps: data.items });
-    else if (type === "agent") set({ libraryAgents: data.items });
-    else if (type === "recipe") set({ libraryRecipes: data.items });
+    const key = getLibraryStateKey(type);
+    set({ [key]: data.items } as Pick<AppState, typeof key>);
   },
 
   fetchLibraryNames: async (type) => {
@@ -270,10 +314,8 @@ export const useAppStore = create<AppState>()((set, get) => ({
       method: "POST",
       body: JSON.stringify({ name, desc, ...extra }),
     });
-    if (type === "skill") set((s) => ({ librarySkills: [...s.librarySkills, item] }));
-    else if (type === "mcp") set((s) => ({ libraryMcps: [...s.libraryMcps, item] }));
-    else if (type === "agent") set((s) => ({ libraryAgents: [...s.libraryAgents, item] }));
-    else set((s) => ({ libraryRecipes: [...s.libraryRecipes, item] }));
+    const key = getLibraryStateKey(type);
+    set((s) => ({ [key]: [...s[key], item] }) as Pick<AppState, typeof key>);
     return item;
   },
 
@@ -282,23 +324,23 @@ export const useAppStore = create<AppState>()((set, get) => ({
       method: "PUT",
       body: JSON.stringify(fields),
     });
-    const updater = (list: ResourceItem[]) => list.map((x) => (x.id === id ? updated : x));
-    if (type === "skill") set((s) => ({ librarySkills: updater(s.librarySkills) }));
-    else if (type === "mcp") set((s) => ({ libraryMcps: updater(s.libraryMcps) }));
-    else if (type === "agent") set((s) => ({ libraryAgents: updater(s.libraryAgents) }));
-    else set((s) => ({ libraryRecipes: updater(s.libraryRecipes) }));
+    const key = getLibraryStateKey(type);
+    set((s) => ({
+      [key]: s[key].map((item) => (item.id === id ? updated : item)),
+    }) as Pick<AppState, typeof key>);
   },
 
   deleteResource: async (type, id) => {
     await api(`/library/${type}/${id}`, { method: "DELETE" });
-    const filter = (list: ResourceItem[]) => list.filter((x) => x.id !== id);
-    if (type === "skill") set((s) => ({ librarySkills: filter(s.librarySkills) }));
-    else if (type === "mcp") set((s) => ({ libraryMcps: filter(s.libraryMcps) }));
-    else if (type === "agent") set((s) => ({ libraryAgents: filter(s.libraryAgents) }));
-    else {
+    if (type === "recipe") {
       const data = await api<{ items: ResourceItem[] }>(`/library/${type}`);
       set({ libraryRecipes: data.items });
+      return;
     }
+    const key = getLibraryStateKey(type);
+    set((s) => ({
+      [key]: s[key].filter((item) => item.id !== id),
+    }) as Pick<AppState, typeof key>);
   },
 
   fetchResourceContent: async (type, id) => {
diff --git a/frontend/app/src/store/auth-store.ts b/frontend/app/src/store/auth-store.ts
index fb0d7b1d8..35bbde010 100644
--- a/frontend/app/src/store/auth-store.ts
+++ b/frontend/app/src/store/auth-store.ts
@@ -1,15 +1,11 @@
 /**
  * Auth store — JWT token, user identity, login/register/logout.
  * Persisted to localStorage via Zustand persist middleware.
- *
- * Set VITE_DEV_SKIP_AUTH=true in .env.development to bypass login during dev.
  */
 
 import { create } from "zustand";
 import { persist } from "zustand/middleware";
 
-const DEV_SKIP_AUTH = import.meta.env.VITE_DEV_SKIP_AUTH === "true";
-
 // Allow overriding the API origin at runtime via window.__MYCEL_CONFIG__.apiBase
 // (injected by docker-entrypoint.sh), falling back to the Vite build-time variable.
 // Relative URLs are used when neither is set (same-origin / local dev).
@@ -18,7 +14,6 @@ const API_BASE = (
   ?? import.meta.env.VITE_API_BASE
   ?? ""
 ).replace(/\/$/, "");
-
 export interface AuthIdentity {
   id: string;
   name: string;
@@ -30,7 +25,7 @@ interface AuthState {
   token: string | null;
   user: AuthIdentity | null;
   agent: AuthIdentity | null;
-  entityId: string | null;
+  userId: string | null;
   setupInfo: { userId: string; defaultName: string } | null;
 
   login: (identifier: string, password: string) => Promise<void>;
@@ -62,15 +57,13 @@ async function apiPost(endpoint: string, body: Record<string, string>) {
   return res.json();
 }
 
-const DEV_MOCK_USER: AuthIdentity = { id: "dev-user", name: "Dev", type: "human" };
-
 export const useAuthStore = create<AuthState>()(
   persist(
     (set) => ({
-      token: DEV_SKIP_AUTH ? "dev-skip-auth" : null,
-      user: DEV_SKIP_AUTH ? DEV_MOCK_USER : null,
+      token: null,
+      user: null,
       agent: null,
-      entityId: DEV_SKIP_AUTH ? "dev-user" : null,
+      userId: null,
       setupInfo: null,
 
       login: async (identifier, password) => {
@@ -79,9 +72,8 @@ export const useAuthStore = create<AuthState>()(
           token: data.token,
           user: data.user,
           agent: data.agent,
-          entityId: data.user?.id ?? null,
+          userId: data.user?.id ?? null,
         });
-        window.location.href = "/threads";
       },
 
       sendOtp: async (email, password, inviteCode) => {
@@ -102,7 +94,7 @@ export const useAuthStore = create<AuthState>()(
           token: data.token,
           user: data.user,
           agent: data.agent ?? null,
-          entityId: data.user?.id ?? null,
+          userId: data.user?.id ?? null,
           setupInfo: { userId: data.user.id, defaultName: data.user.name },
         });
       },
@@ -112,15 +104,11 @@ export const useAuthStore = create<AuthState>()(
       },
 
       logout: () => {
-        set({ token: null, user: null, agent: null, entityId: null, setupInfo: null });
+        set({ token: null, user: null, agent: null, userId: null, setupInfo: null });
       },
     }),
     {
       name: "leon-auth",
-      ...(DEV_SKIP_AUTH && {
-        // In skip-auth mode, never let persisted null overwrite the mock identity
-        merge: (_persisted: unknown, current: AuthState) => current,
-      }),
     },
   ),
 );
@@ -140,18 +128,8 @@ export async function authFetch(url: string, init?: RequestInit): Promise<Respon
   // Prepend API_BASE for relative URLs when configured
   const resolvedUrl = API_BASE && url.startsWith("/") ? `${API_BASE}${url}` : url;
   const res = await fetch(resolvedUrl, { ...init, headers });
-  if (res.status === 401 && !DEV_SKIP_AUTH) {
+  if (res.status === 401) {
     useAuthStore.getState().logout();
   }
   return res;
 }
-
-export async function authRequest<T>(url: string, init?: RequestInit): Promise<T> {
-  const res = await authFetch(url, init);
-  if (!res.ok) {
-    const body = await res.text();
-    throw new Error(`API ${res.status}: ${body || res.statusText}`);
-  }
-  if (res.status === 204) return undefined as T;
-  return res.json();
-}
diff --git a/frontend/app/src/store/conversation-store.ts b/frontend/app/src/store/conversation-store.ts
new file mode 100644
index 000000000..41eabc256
--- /dev/null
+++ b/frontend/app/src/store/conversation-store.ts
@@ -0,0 +1,40 @@
+import { create } from "zustand";
+import type { ConversationItem } from "@/types/conversation";
+import { authFetch } from "./auth-store";
+
+interface ConversationState {
+  conversations: ConversationItem[];
+  loading: boolean;
+  fetchConversations: () => Promise<void>;
+}
+
+let inflight: Promise<void> | null = null;
+
+export const useConversationStore = create<ConversationState>((set, get) => ({
+  conversations: [],
+  loading: false,
+
+  fetchConversations: async () => {
+    if (inflight) return;
+    set({ loading: true });
+    const pending = (async () => {
+      try {
+        const res = await authFetch("/api/conversations");
+        if (!res.ok) throw new Error(`${res.status}`);
+        const data: ConversationItem[] = await res.json();
+        // Skip no-op update to avoid unnecessary re-renders
+        const prev = get().conversations;
+        if (prev.length !== data.length || JSON.stringify(prev) !== JSON.stringify(data)) {
+          set({ conversations: data });
+        }
+      } catch (err) {
+        console.error("[ConversationStore] fetch failed:", err);
+      } finally {
+        inflight = null;
+        set({ loading: false });
+      }
+    })();
+    inflight = pending;
+    await pending;
+  },
+}));
diff --git a/frontend/app/src/store/marketplace-store.ts b/frontend/app/src/store/marketplace-store.ts
index 3714e2d52..a4c7bf2fe 100644
--- a/frontend/app/src/store/marketplace-store.ts
+++ b/frontend/app/src/store/marketplace-store.ts
@@ -93,9 +93,21 @@ interface MarketplaceState {
 }
 
 async function hubApi<T = any>(path: string): Promise<T> {
-  const res = await fetch(`${HUB_URL}/api/v1${path}`);
-  if (!res.ok) throw new Error(`Hub API error: ${res.status}`);
-  return res.json();
+  try {
+    const res = await fetch(`${HUB_URL}/api/v1${path}`);
+    if (!res.ok) {
+      if (res.status >= 502) {
+        throw new Error("Marketplace Hub unavailable");
+      }
+      throw new Error(`Hub API error: ${res.status}`);
+    }
+    return res.json();
+  } catch (e) {
+    if (e instanceof TypeError) {
+      throw new Error("Marketplace Hub unavailable");
+    }
+    throw e;
+  }
 }
 
 async function backendApi<T = any>(path: string, opts?: RequestInit): Promise<T> {
diff --git a/frontend/app/src/store/types.ts b/frontend/app/src/store/types.ts
index ecb6c56f4..7cd171ead 100644
--- a/frontend/app/src/store/types.ts
+++ b/frontend/app/src/store/types.ts
@@ -67,6 +67,8 @@ export interface Task {
   created_at: number;
   // New fields
   thread_id: string;
+  /** Optional template shell for the task owner; actor identity still lives in `thread_id`. */
+  member_id?: string;
   source: TaskSource;
   cron_job_id: string;
   result: string;
diff --git a/frontend/app/src/styles/ux-timing.ts b/frontend/app/src/styles/ux-timing.ts
index f8c8f0603..35a23d1a5 100644
--- a/frontend/app/src/styles/ux-timing.ts
+++ b/frontend/app/src/styles/ux-timing.ts
@@ -8,6 +8,3 @@ export const FEEDBACK_BRIEF = 1500;
 
 /** Normal feedback display: toast, status message */
 export const FEEDBACK_NORMAL = 2000;
-
-/** Delay before closing a dropdown on blur (prevents click-through) */
-export const BLUR_CLOSE_DELAY = 150;
diff --git a/frontend/app/src/types/conversation.ts b/frontend/app/src/types/conversation.ts
new file mode 100644
index 000000000..9f2f4697d
--- /dev/null
+++ b/frontend/app/src/types/conversation.ts
@@ -0,0 +1,11 @@
+export interface ConversationItem {
+  id: string;
+  type: "hire" | "visit";
+  title: string;
+  /** Hire entries keep the template entry id here; the actor thread still lives in `id`. */
+  member_id: string | null;
+  avatar_url: string | null;
+  updated_at: string | null;
+  unread_count: number;
+  running: boolean;
+}
diff --git a/frontend/app/vite.config.ts b/frontend/app/vite.config.ts
index 00b97f2a6..a6c152626 100644
--- a/frontend/app/vite.config.ts
+++ b/frontend/app/vite.config.ts
@@ -17,7 +17,7 @@ const frontendPort = parseInt(process.env.LEON_FRONTEND_PORT || getWorktreePort(
 
 // https://vite.dev/config/
 export default defineConfig({
-  base: './',
+  base: '/',
   plugins: [inspectAttr(), react()],
   server: {
     host: "0.0.0.0",
diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx
index 3cfe19393..eaec89c7f 100644
--- a/frontend/monitor/src/App.tsx
+++ b/frontend/monitor/src/App.tsx
@@ -3,11 +3,131 @@ import { BrowserRouter, Routes, Route, Link, useParams, useNavigate } from 'reac
 import './styles.css';
 
 const API_BASE = '/api/monitor';
+const MONITOR_TOKEN_KEY = 'leon-monitor-token';
+
+type MonitorFetchError = Error & {
+  status?: number;
+  payload?: unknown;
+};
+
+function readStoredToken(): string | null {
+  if (typeof window === 'undefined') return null;
+
+  const explicit = window.localStorage.getItem(MONITOR_TOKEN_KEY)?.trim();
+  if (explicit) return explicit;
+
+  const rawAuth = window.localStorage.getItem('leon-auth');
+  if (!rawAuth) return null;
+
+  try {
+    const parsed = JSON.parse(rawAuth) as { state?: { token?: unknown } };
+    return typeof parsed.state?.token === 'string' && parsed.state.token.trim()
+      ? parsed.state.token.trim()
+      : null;
+  } catch {
+    return null;
+  }
+}
 
 // Utility: Fetch JSON from API
-async function fetchAPI(path: string) {
-  const res = await fetch(`${API_BASE}${path}`);
-  return res.json();
+async function fetchAPI<T>(path: string): Promise<T> {
+  const headers: HeadersInit = {};
+  const token = readStoredToken();
+  if (token) headers.Authorization = `Bearer ${token}`;
+
+  const res = await fetch(`${API_BASE}${path}`, { headers });
+  const text = await res.text();
+  const payload = text ? JSON.parse(text) : null;
+
+  // @@@monitor-http-contract - monitor pages only accept shaped payloads on 2xx.
+  // Unauthorized or backend errors must stay as explicit page errors, not crash into .map.
+  if (!res.ok) {
+    const detail =
+      payload && typeof payload === 'object' && 'detail' in payload
+        ? String((payload as { detail?: unknown }).detail ?? `Request failed (${res.status})`)
+        : `Request failed (${res.status})`;
+    const error = new Error(detail) as MonitorFetchError;
+    error.status = res.status;
+    error.payload = payload;
+    throw error;
+  }
+
+  return payload as T;
+}
+
+function useMonitorData<T>(path: string) {
+  const [data, setData] = React.useState<T | null>(null);
+  const [error, setError] = React.useState<MonitorFetchError | null>(null);
+
+  React.useEffect(() => {
+    let cancelled = false;
+    setData(null);
+    setError(null);
+
+    fetchAPI<T>(path)
+      .then((result) => {
+        if (!cancelled) setData(result);
+      })
+      .catch((err: unknown) => {
+        if (!cancelled) {
+          setError(err instanceof Error ? (err as MonitorFetchError) : new Error(String(err)));
+        }
+      });
+
+    return () => {
+      cancelled = true;
+    };
+  }, [path]);
+
+  return { data, error };
+}
+
+function ErrorState({ title, error }: { title: string; error: MonitorFetchError }) {
+  const [token, setToken] = React.useState(() => readStoredToken() ?? '');
+  const heading = error.status === 401 ? `${title}: Unauthorized` : `${title}: Request failed`;
+
+  function saveToken() {
+    const trimmed = token.trim();
+    if (!trimmed || typeof window === 'undefined') return;
+    // @@@monitor-token-bridge - same-origin deployments can auto-read leon-auth.
+    // Cross-origin local dev cannot, so keep a monitor-local token slot.
+    window.localStorage.setItem(MONITOR_TOKEN_KEY, trimmed);
+    window.location.reload();
+  }
+
+  function clearToken() {
+    if (typeof window === 'undefined') return;
+    window.localStorage.removeItem(MONITOR_TOKEN_KEY);
+    window.location.reload();
+  }
+
+  return (
+    <div className="page">
+      <h1>{heading}</h1>
+      <p className="error">{error.message}</p>
+      {error.status === 401 && (
+        <div className="info-grid">
+          <label>
+            <strong>Bearer token</strong>
+            <input
+              type="password"
+              value={token}
+              onChange={(e) => setToken(e.target.value)}
+              placeholder="Paste JWT token"
+            />
+          </label>
+          <div>
+            <button onClick={saveToken} disabled={!token.trim()}>
+              Save token and retry
+            </button>
+            <button onClick={clearToken} style={{ marginLeft: 8 }}>
+              Clear saved token
+            </button>
+          </div>
+        </div>
+      )}
+    </div>
+  );
 }
 
 // Component: Breadcrumb navigation
@@ -39,12 +159,9 @@ function StateBadge({ badge }: { badge: any }) {
 
 // Page: Threads List
 function ThreadsPage() {
-  const [data, setData] = React.useState<any>(null);
-
-  React.useEffect(() => {
-    fetchAPI('/threads').then(setData);
-  }, []);
+  const { data, error } = useMonitorData<any>('/threads');
 
+  if (error) return <ErrorState title="Threads" error={error} />;
   if (!data) return <div>Loading...</div>;
 
   return (
@@ -86,12 +203,9 @@ function ThreadsPage() {
 // Page: Thread Detail
 function ThreadDetailPage() {
   const { threadId } = useParams();
-  const [data, setData] = React.useState<any>(null);
-
-  React.useEffect(() => {
-    fetchAPI(`/thread/${threadId}`).then(setData);
-  }, [threadId]);
+  const { data, error } = useMonitorData<any>(`/thread/${threadId}`);
 
+  if (error) return <ErrorState title="Thread detail" error={error} />;
   if (!data) return <div>Loading...</div>;
 
   return (
@@ -149,12 +263,9 @@ function ThreadDetailPage() {
 
 // Page: Leases List
 function LeasesPage() {
-  const [data, setData] = React.useState<any>(null);
-
-  React.useEffect(() => {
-    fetchAPI('/leases').then(setData);
-  }, []);
+  const { data, error } = useMonitorData<any>('/leases');
 
+  if (error) return <ErrorState title="Leases" error={error} />;
   if (!data) return <div>Loading...</div>;
 
   return (
@@ -200,12 +311,9 @@ function LeasesPage() {
 // Page: Lease Detail
 function LeaseDetailPage() {
   const { leaseId } = useParams();
-  const [data, setData] = React.useState<any>(null);
-
-  React.useEffect(() => {
-    fetchAPI(`/lease/${leaseId}`).then(setData);
-  }, [leaseId]);
+  const { data, error } = useMonitorData<any>(`/lease/${leaseId}`);
 
+  if (error) return <ErrorState title="Lease detail" error={error} />;
   if (!data) return <div>Loading...</div>;
 
   return (
@@ -288,12 +396,9 @@ function LeaseDetailPage() {
 
 // Page: Diverged Leases
 function DivergedPage() {
-  const [data, setData] = React.useState<any>(null);
-
-  React.useEffect(() => {
-    fetchAPI('/diverged').then(setData);
-  }, []);
+  const { data, error } = useMonitorData<any>('/diverged');
 
+  if (error) return <ErrorState title="Diverged leases" error={error} />;
   if (!data) return <div>Loading...</div>;
 
   return (
@@ -341,12 +446,9 @@ function DivergedPage() {
 
 // Page: Events List
 function EventsPage() {
-  const [data, setData] = React.useState<any>(null);
-
-  React.useEffect(() => {
-    fetchAPI('/events?limit=100').then(setData);
-  }, []);
+  const { data, error } = useMonitorData<any>('/events?limit=100');
 
+  if (error) return <ErrorState title="Events" error={error} />;
   if (!data) return <div>Loading...</div>;
 
   return (
@@ -389,12 +491,9 @@ function EventsPage() {
 // Page: Event Detail
 function EventDetailPage() {
   const { eventId } = useParams();
-  const [data, setData] = React.useState<any>(null);
-
-  React.useEffect(() => {
-    fetchAPI(`/event/${eventId}`).then(setData);
-  }, [eventId]);
+  const { data, error } = useMonitorData<any>(`/event/${eventId}`);
 
+  if (error) return <ErrorState title="Event detail" error={error} />;
   if (!data) return <div>Loading...</div>;
 
   return (
diff --git a/messaging/__init__.py b/messaging/__init__.py
new file mode 100644
index 000000000..95d37e53b
--- /dev/null
+++ b/messaging/__init__.py
@@ -0,0 +1,5 @@
+"""messaging/ — independent messaging module for Mycel.
+
+Provides: MessagingService, RelationshipService, contracts, realtime bridge,
+chat tools. No dependency on backend/ or core/ — storage access via Protocol.
+"""
diff --git a/messaging/_utils.py b/messaging/_utils.py
new file mode 100644
index 000000000..9930c4b0a
--- /dev/null
+++ b/messaging/_utils.py
@@ -0,0 +1,10 @@
+"""Shared utilities for the messaging module."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+
+def now_iso() -> str:
+    """Current UTC time as ISO 8601 string."""
+    return datetime.now(tz=UTC).isoformat()
diff --git a/messaging/contracts.py b/messaging/contracts.py
new file mode 100644
index 000000000..109ba541d
--- /dev/null
+++ b/messaging/contracts.py
@@ -0,0 +1,162 @@
+"""messaging/contracts.py — canonical types for the messaging module.
+
+All types are Pydantic v2, strict=True, frozen=True.
+These types expose the current messaging social-id slot.
+The long-term agent social-handle split is still pending.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Literal, Protocol
+
+from pydantic import BaseModel, ConfigDict
+
+# ---------------------------------------------------------------------------
+# User — current messaging social-id record
+# ---------------------------------------------------------------------------
+
+
+class User(BaseModel):
+    model_config = ConfigDict(strict=True, frozen=True)
+
+    id: str  # current social-id slot; agent handle source still pending
+    name: str
+    avatar_url: str | None = None
+    type: Literal["human", "agent"]
+    owner_id: str | None = None  # owner user_id for agents; None for humans
+
+
+class UserRepo(Protocol):
+    """Resolve the current messaging social-id record. Reads from member-backed storage today."""
+
+    def get_user(self, user_id: str) -> User | None: ...
+    def list_users(self) -> list[User]: ...
+
+
+# ---------------------------------------------------------------------------
+# AI metadata
+# ---------------------------------------------------------------------------
+
+
+class AiMetadata(BaseModel):
+    model_config = ConfigDict(strict=True, frozen=True)
+
+    tool_calls: dict[str, int] = {}
+    elapsed_seconds: float | None = None
+
+
+# ---------------------------------------------------------------------------
+# Message
+# ---------------------------------------------------------------------------
+
+MessageType = Literal["human", "ai", "ai_process", "system", "notification"]
+ContentType = Literal["text", "markdown"]
+SignalType = Literal["open", "yield", "close"]
+
+
+class MessageRow(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    id: str
+    chat_id: str
+    sender_id: str  # user_id
+    content: str
+    content_type: ContentType = "text"
+    message_type: MessageType = "human"
+    signal: SignalType | None = None
+    mentions: list[str] = []
+    reply_to: str | None = None
+    ai_metadata: AiMetadata | None = None
+    created_at: datetime
+    delivered_at: datetime | None = None
+    edited_at: datetime | None = None
+    retracted_at: datetime | None = None
+    deleted_at: datetime | None = None
+    deleted_for: list[str] = []
+
+
+# ---------------------------------------------------------------------------
+# Chat + Member
+# ---------------------------------------------------------------------------
+
+ChatType = Literal["direct", "group"]
+ChatStatus = Literal["active", "archived", "deleted"]
+MemberRole = Literal["member", "admin"]
+
+
+class ChatMemberRow(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    chat_id: str
+    user_id: str
+    role: MemberRole = "member"
+    joined_at: datetime
+    muted: bool = False
+    mute_until: datetime | None = None
+    last_read_at: datetime | None = None
+
+
+class ChatRow(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    id: str
+    title: str | None = None
+    type: ChatType = "direct"
+    status: ChatStatus = "active"
+    created_at: datetime
+    updated_at: datetime | None = None
+
+
+# ---------------------------------------------------------------------------
+# Contact
+# ---------------------------------------------------------------------------
+
+ContactRelation = Literal["normal", "blocked", "muted"]
+
+
+class ContactRow(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    owner_user_id: str
+    target_user_id: str
+    relation: ContactRelation = "normal"
+    created_at: datetime
+    updated_at: datetime | None = None
+
+
+# ---------------------------------------------------------------------------
+# Relationship (Hire/Visit state machine)
+# ---------------------------------------------------------------------------
+
+RelationshipState = Literal["none", "pending_a_to_b", "pending_b_to_a", "visit", "hire"]
+RelationshipDirection = Literal["a_to_b", "b_to_a"]
+RelationshipEvent = Literal["request", "approve", "reject", "upgrade", "downgrade", "revoke"]
+
+
+class RelationshipRow(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    id: str
+    principal_a: str
+    principal_b: str
+    state: RelationshipState = "none"
+    direction: RelationshipDirection | None = None
+    hire_granted_at: datetime | None = None
+    hire_revoked_at: datetime | None = None
+    hire_snapshot: dict[str, Any] | None = None
+    created_at: datetime
+    updated_at: datetime
+
+
+# ---------------------------------------------------------------------------
+# Delivery
+# ---------------------------------------------------------------------------
+
+DeliveryAction = Literal["deliver", "notify", "drop"]
+
+
+class MessageSendStatus(BaseModel):
+    model_config = ConfigDict(strict=True, frozen=True)
+
+    status: Literal["sending", "sent", "delivered", "read", "retracted", "deleted"]
diff --git a/messaging/delivery/__init__.py b/messaging/delivery/__init__.py
new file mode 100644
index 000000000..7d2dab521
--- /dev/null
+++ b/messaging/delivery/__init__.py
@@ -0,0 +1 @@
+# messaging/delivery/
diff --git a/messaging/delivery/actions.py b/messaging/delivery/actions.py
new file mode 100644
index 000000000..254a9a923
--- /dev/null
+++ b/messaging/delivery/actions.py
@@ -0,0 +1,11 @@
+"""Delivery action enum for messaging module."""
+
+from __future__ import annotations
+
+from enum import StrEnum
+
+
+class DeliveryAction(StrEnum):
+    DELIVER = "deliver"  # inject into agent context, wake agent
+    NOTIFY = "notify"  # store + unread count, no delivery
+    DROP = "drop"  # silent: stored but invisible to recipient
diff --git a/messaging/delivery/resolver.py b/messaging/delivery/resolver.py
new file mode 100644
index 000000000..69d533f35
--- /dev/null
+++ b/messaging/delivery/resolver.py
@@ -0,0 +1,128 @@
+"""HireVisitDeliveryResolver — delivery action based on relationship state.
+
+Priority chain (highest wins):
+1. blocked (contact relation) → DROP
+2. HIRE relationship → DELIVER (direct access)
+3. @mention override → DELIVER
+4. muted contact → NOTIFY
+5. muted chat → NOTIFY
+6. VISIT relationship → NOTIFY (queue, not direct)
+7. stranger (no relationship) → NOTIFY (anti-spam default)
+8. Default → DELIVER (same-owner entities, known contacts)
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from typing import Any
+
+from messaging.delivery.actions import DeliveryAction
+
+logger = logging.getLogger(__name__)
+
+
+class HireVisitDeliveryResolver:
+    """Evaluates delivery action for a chat message recipient.
+
+    Args:
+        contact_repo: Provides get(owner, target) → ContactRow-like dict.
+        chat_member_repo: Provides list_members(chat_id) → list of member dicts.
+        relationship_repo: Provides get(user_a, user_b) → relationship dict.
+    """
+
+    def __init__(
+        self,
+        contact_repo: Any,
+        chat_member_repo: Any,
+        relationship_repo: Any | None = None,
+    ) -> None:
+        self._contacts = contact_repo
+        self._chat_members = chat_member_repo
+        self._relationships = relationship_repo
+
+    def resolve(
+        self,
+        recipient_id: str,
+        chat_id: str,
+        sender_id: str,
+        *,
+        is_mentioned: bool = False,
+    ) -> DeliveryAction:
+        # 1. Contact-level block — always DROP
+        contact = self._get_contact(recipient_id, sender_id)
+        if contact and contact.get("relation") == "blocked":
+            logger.debug("[resolver] DROP: %s blocked %s", recipient_id[:15], sender_id[:15])
+            return DeliveryAction.DROP
+
+        # Fetch relationship once for checks 2, 6, 7
+        rel = self._relationships.get(recipient_id, sender_id) if self._relationships else None
+        rel_state = rel.get("state") if rel else "none"
+
+        # 2. HIRE → DELIVER
+        if rel_state == "hire":
+            logger.debug("[resolver] DELIVER: HIRE relationship %s←%s", recipient_id[:15], sender_id[:15])
+            return DeliveryAction.DELIVER
+
+        # 3. @mention override — skip mute checks (not block)
+        if is_mentioned:
+            return DeliveryAction.DELIVER
+
+        # 4. Contact-level mute
+        if contact and contact.get("relation") == "muted":
+            logger.debug("[resolver] NOTIFY: %s muted %s", recipient_id[:15], sender_id[:15])
+            return DeliveryAction.NOTIFY
+
+        # 5. Chat-level mute
+        if self._is_chat_muted(recipient_id, chat_id):
+            logger.debug("[resolver] NOTIFY: %s muted chat %s", recipient_id[:15], chat_id[:8])
+            return DeliveryAction.NOTIFY
+
+        # 6. VISIT → NOTIFY
+        if rel_state == "visit":
+            logger.debug("[resolver] NOTIFY: VISIT relationship %s←%s", recipient_id[:15], sender_id[:15])
+            return DeliveryAction.NOTIFY
+
+        # 7. Stranger (none or no relationship) → NOTIFY (anti-spam)
+        if self._relationships and rel_state == "none":
+            logger.debug("[resolver] NOTIFY: stranger %s←%s", recipient_id[:15], sender_id[:15])
+            return DeliveryAction.NOTIFY
+
+        # 8. Default → DELIVER
+        return DeliveryAction.DELIVER
+
+    def _get_contact(self, owner_id: str, target_id: str):
+        """Fetch contact row — handles both old and new field names."""
+        try:
+            # New contacts table (owner_user_id / target_user_id)
+            if hasattr(self._contacts, "get"):
+                return self._contacts.get(owner_id, target_id)
+        except Exception:
+            pass
+        return None
+
+    def _is_chat_muted(self, user_id: str, chat_id: str) -> bool:
+        """Check if user has muted this specific chat."""
+        try:
+            members = self._chat_members.list_members(chat_id)
+        except AttributeError:
+            # Fallback for old ChatParticipantRepo interface
+            try:
+                members = self._chat_members.list_entities(chat_id)
+            except Exception:
+                return False
+
+        for m in members:
+            uid = m.get("user_id") or getattr(m, "user_id", None)
+            if uid != user_id:
+                continue
+            muted = m.get("muted", False) if isinstance(m, dict) else getattr(m, "muted", False)
+            if not muted:
+                return False
+            mute_until = m.get("mute_until") if isinstance(m, dict) else getattr(m, "mute_until", None)
+            if mute_until is not None:
+                # Handle both timestamp float and ISO string
+                if isinstance(mute_until, (int, float)) and mute_until < time.time():
+                    return False
+            return True
+        return False
diff --git a/messaging/realtime/__init__.py b/messaging/realtime/__init__.py
new file mode 100644
index 000000000..3aa889c8c
--- /dev/null
+++ b/messaging/realtime/__init__.py
@@ -0,0 +1 @@
+# messaging/realtime/
diff --git a/messaging/realtime/bridge.py b/messaging/realtime/bridge.py
new file mode 100644
index 000000000..3fa994c13
--- /dev/null
+++ b/messaging/realtime/bridge.py
@@ -0,0 +1,59 @@
+"""SupabaseRealtimeBridge — event bus backed by Supabase Broadcast.
+
+Replaces ChatEventBus for typing indicators and process-level pub/sub.
+For message persistence, Supabase Postgres Changes handles delivery directly
+to the frontend via @supabase/supabase-js subscriptions.
+
+This bridge:
+1. Implements the same publish/subscribe interface as ChatEventBus
+2. Routes typing events through Supabase Broadcast channels
+3. Falls back to in-process asyncio.Queue for local subscribers (SSE compat)
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+class SupabaseRealtimeBridge:
+    """Hybrid event bus: local asyncio.Queue + Supabase Broadcast for typing."""
+
+    def __init__(self, supabase_client: Any | None = None) -> None:
+        self._supabase = supabase_client
+        # Local subscribers for SSE fallback
+        self._subscribers: dict[str, list[asyncio.Queue]] = {}
+
+    def subscribe(self, chat_id: str) -> asyncio.Queue:
+        """Subscribe to events for a chat (SSE / local consumer)."""
+        queue: asyncio.Queue = asyncio.Queue(maxsize=256)
+        self._subscribers.setdefault(chat_id, []).append(queue)
+        return queue
+
+    def unsubscribe(self, chat_id: str, queue: asyncio.Queue) -> None:
+        subs = self._subscribers.get(chat_id, [])
+        if queue in subs:
+            subs.remove(queue)
+        if not subs:
+            self._subscribers.pop(chat_id, None)
+
+    def publish(self, chat_id: str, event: dict) -> None:
+        """Publish event to local subscribers and Supabase Broadcast."""
+        # Local delivery (SSE consumers)
+        for queue in self._subscribers.get(chat_id, []):
+            try:
+                queue.put_nowait(event)
+            except asyncio.QueueFull:
+                logger.warning("[realtime] queue full for chat %s", chat_id[:8])
+
+        # Supabase Broadcast (typing indicators, not messages — messages go via Postgres Changes)
+        event_type = event.get("event", "")
+        if self._supabase and event_type in ("typing_start", "typing_stop"):
+            try:
+                channel = self._supabase.channel(f"chat:{chat_id}")
+                channel.send_broadcast(event_type, event.get("data", {}))
+            except Exception as e:
+                logger.debug("[realtime] broadcast send failed: %s", e)
diff --git a/messaging/realtime/typing.py b/messaging/realtime/typing.py
new file mode 100644
index 000000000..cc8082d43
--- /dev/null
+++ b/messaging/realtime/typing.py
@@ -0,0 +1,52 @@
+"""TypingTracker — Broadcast-backed typing indicator.
+
+Same interface as backend/web/services/typing_tracker.py,
+but routes through SupabaseRealtimeBridge (Broadcast) instead of ChatEventBus.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from messaging.realtime.bridge import SupabaseRealtimeBridge
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class _ChatEntry:
+    chat_id: str
+    user_id: str
+
+
+class TypingTracker:
+    """Tracks which chat triggered each brain thread run, broadcasts typing events."""
+
+    def __init__(self, bridge: SupabaseRealtimeBridge) -> None:
+        self._bridge = bridge
+        self._active: dict[str, _ChatEntry] = {}
+
+    def start_chat(self, thread_id: str, chat_id: str, user_id: str) -> None:
+        self._active[thread_id] = _ChatEntry(chat_id, user_id)
+        self._bridge.publish(
+            chat_id,
+            {
+                "event": "typing_start",
+                "data": {"user_id": user_id},
+            },
+        )
+
+    def stop(self, thread_id: str) -> None:
+        entry = self._active.pop(thread_id, None)
+        if not entry:
+            return
+        self._bridge.publish(
+            entry.chat_id,
+            {
+                "event": "typing_stop",
+                "data": {"user_id": entry.user_id},
+            },
+        )
diff --git a/messaging/relationships/__init__.py b/messaging/relationships/__init__.py
new file mode 100644
index 000000000..ec3a51edc
--- /dev/null
+++ b/messaging/relationships/__init__.py
@@ -0,0 +1 @@
+# messaging/relationships/
diff --git a/messaging/relationships/router.py b/messaging/relationships/router.py
new file mode 100644
index 000000000..1eb4a5499
--- /dev/null
+++ b/messaging/relationships/router.py
@@ -0,0 +1,176 @@
+"""Relationship API router — /api/relationships endpoints."""
+
+from __future__ import annotations
+
+import logging
+from typing import Annotated, Any
+
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+
+from backend.web.core.dependencies import get_app, get_current_user_id
+from messaging.contracts import RelationshipRow
+from messaging.relationships.state_machine import TransitionError
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/relationships", tags=["relationships"])
+
+
+class RelationshipRequestBody(BaseModel):
+    target_user_id: str
+
+
+class RelationshipActionBody(BaseModel):
+    hire_snapshot: dict[str, Any] | None = None
+
+
+def _get_rel_service(app: Any):
+    svc = getattr(app.state, "relationship_service", None)
+    if svc is None:
+        raise HTTPException(503, "Relationship service unavailable")
+    return svc
+
+
+def _get_existing(svc, relationship_id: str, user_id: str) -> dict:
+    existing = svc.get_by_id(relationship_id)
+    if not existing:
+        raise HTTPException(404, "Relationship not found")
+    if user_id not in (existing["principal_a"], existing["principal_b"]):
+        raise HTTPException(403, "Not a party of this relationship")
+    return existing
+
+
+def _resolve_parties(existing: dict, actor_id: str) -> tuple[str, str]:
+    """Return (requester_id, other_id) from a relationship row and actor."""
+    requester_id = existing["principal_a"] if existing["state"] == "pending_a_to_b" else existing["principal_b"]
+    other_id = existing["principal_b"] if actor_id == existing["principal_a"] else existing["principal_a"]
+    return requester_id, other_id
+
+
+def _row_to_dict(row: RelationshipRow, viewer_id: str) -> dict:
+    other_id = row.principal_b if viewer_id == row.principal_a else row.principal_a
+    # Determine who is the requester based on state direction
+    if row.state == "pending_a_to_b":
+        is_requester = viewer_id == row.principal_a
+    elif row.state == "pending_b_to_a":
+        is_requester = viewer_id == row.principal_b
+    else:
+        is_requester = False
+    return {
+        "id": row.id,
+        "other_user_id": other_id,
+        "state": row.state,
+        "direction": row.direction,
+        "is_requester": is_requester,
+        "hire_granted_at": row.hire_granted_at.isoformat() if row.hire_granted_at else None,
+        "hire_revoked_at": row.hire_revoked_at.isoformat() if row.hire_revoked_at else None,
+        "created_at": row.created_at.isoformat(),
+        "updated_at": row.updated_at.isoformat(),
+    }
+
+
+@router.get("")
+async def list_relationships(
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    svc = _get_rel_service(app)
+    rows = svc.list_for_user(user_id)
+    return [_row_to_dict(r, user_id) for r in rows]
+
+
+@router.post("/request")
+async def request_relationship(
+    body: RelationshipRequestBody,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    svc = _get_rel_service(app)
+    if user_id == body.target_user_id:
+        raise HTTPException(400, "Cannot request relationship with yourself")
+    try:
+        row = svc.request(user_id, body.target_user_id)
+        return _row_to_dict(row, user_id)
+    except TransitionError as e:
+        raise HTTPException(409, str(e))
+
+
+@router.post("/{relationship_id}/approve")
+async def approve_relationship(
+    relationship_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    svc = _get_rel_service(app)
+    existing = _get_existing(svc, relationship_id, user_id)
+    requester_id, _ = _resolve_parties(existing, user_id)
+    if user_id == requester_id:
+        raise HTTPException(409, "Cannot approve your own request")
+    try:
+        return _row_to_dict(svc.approve(user_id, requester_id), user_id)
+    except TransitionError as e:
+        raise HTTPException(409, str(e))
+
+
+@router.post("/{relationship_id}/reject")
+async def reject_relationship(
+    relationship_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    svc = _get_rel_service(app)
+    existing = _get_existing(svc, relationship_id, user_id)
+    requester_id, _ = _resolve_parties(existing, user_id)
+    if user_id == requester_id:
+        raise HTTPException(409, "Cannot reject your own request")
+    try:
+        return _row_to_dict(svc.reject(user_id, requester_id), user_id)
+    except TransitionError as e:
+        raise HTTPException(409, str(e))
+
+
+@router.post("/{relationship_id}/upgrade")
+async def upgrade_relationship(
+    relationship_id: str,
+    body: RelationshipActionBody,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    svc = _get_rel_service(app)
+    existing = _get_existing(svc, relationship_id, user_id)
+    _, other_id = _resolve_parties(existing, user_id)
+    try:
+        return _row_to_dict(svc.upgrade(user_id, other_id, snapshot=body.hire_snapshot), user_id)
+    except TransitionError as e:
+        raise HTTPException(409, str(e))
+
+
+@router.post("/{relationship_id}/revoke")
+async def revoke_relationship(
+    relationship_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    svc = _get_rel_service(app)
+    existing = _get_existing(svc, relationship_id, user_id)
+    _, other_id = _resolve_parties(existing, user_id)
+    try:
+        return _row_to_dict(svc.revoke(user_id, other_id), user_id)
+    except TransitionError as e:
+        raise HTTPException(409, str(e))
+
+
+@router.post("/{relationship_id}/downgrade")
+async def downgrade_relationship(
+    relationship_id: str,
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    app: Annotated[Any, Depends(get_app)],
+):
+    svc = _get_rel_service(app)
+    existing = _get_existing(svc, relationship_id, user_id)
+    _, other_id = _resolve_parties(existing, user_id)
+    try:
+        return _row_to_dict(svc.downgrade(user_id, other_id), user_id)
+    except TransitionError as e:
+        raise HTTPException(409, str(e))
diff --git a/messaging/relationships/service.py b/messaging/relationships/service.py
new file mode 100644
index 000000000..27bcfca41
--- /dev/null
+++ b/messaging/relationships/service.py
@@ -0,0 +1,132 @@
+"""RelationshipService — Hire/Visit lifecycle management."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from messaging._utils import now_iso
+from messaging.contracts import RelationshipEvent, RelationshipRow, RelationshipState
+from messaging.relationships.state_machine import transition
+
+logger = logging.getLogger(__name__)
+
+
+class RelationshipService:
+    """Manages Hire/Visit relationships between users."""
+
+    def __init__(self, relationship_repo: Any, member_repo: Any = None, thread_repo: Any = None) -> None:
+        self._repo = relationship_repo
+        self._member_repo = member_repo
+        self._thread_repo = thread_repo
+
+    def _resolve_display_member(self, social_user_id: str) -> Any | None:
+        member = self._member_repo.get_by_id(social_user_id) if self._member_repo is not None else None
+        if member is not None:
+            return member
+        if self._thread_repo is None or self._member_repo is None:
+            return None
+        thread = self._thread_repo.get_by_user_id(social_user_id)
+        if thread is None:
+            return None
+        member_id = thread.get("member_id")
+        if not member_id:
+            return None
+        return self._member_repo.get_by_id(member_id)
+
+    def apply_event(
+        self,
+        actor_id: str,
+        target_id: str,
+        event: RelationshipEvent,
+        *,
+        hire_snapshot: dict[str, Any] | None = None,
+    ) -> RelationshipRow:
+        """Apply an event to the relationship between actor and target.
+
+        Returns the updated RelationshipRow.
+        Raises TransitionError on invalid transition.
+        """
+        # Ensure canonical ordering
+        if actor_id < target_id:
+            pa, pb = actor_id, target_id
+            requester_is_a = True
+        else:
+            pa, pb = target_id, actor_id
+            requester_is_a = False
+
+        existing = self._repo.get(actor_id, target_id)
+        if existing is None:
+            current_state: RelationshipState = "none"
+            current_direction = None
+        else:
+            current_state = existing["state"]
+            current_direction = existing.get("direction")
+
+        new_state, new_direction = transition(current_state, current_direction, event, requester_is_a=requester_is_a)
+        logger.info(
+            "[relationship] %s + %s → %s (actor=%s event=%s)",
+            current_state,
+            event,
+            new_state,
+            actor_id[:15],
+            event,
+        )
+
+        fields: dict[str, Any] = {"state": new_state, "direction": new_direction}
+        if new_state == "hire" and current_state != "hire":
+            fields["hire_granted_at"] = now_iso()
+            if hire_snapshot:
+                fields["hire_snapshot"] = hire_snapshot
+        if new_state == "none" and current_state in ("hire", "visit"):
+            fields["hire_revoked_at"] = now_iso()
+            if current_state == "hire" and self._member_repo is not None:
+                other_id = pb if actor_id == pa else pa
+                # @@@thread-user-hire-snapshot - relationship principals can now be thread-owned
+                # social user_ids, so the snapshot name must resolve back through thread -> member.
+                m = self._resolve_display_member(other_id)
+                fields["hire_snapshot"] = {
+                    "user_id": other_id,
+                    "name": m.name if m else other_id,
+                    "snapshot_at": now_iso(),
+                }
+
+        row = self._repo.upsert(actor_id, target_id, **fields)
+        return RelationshipRow.model_validate(row)
+
+    def request(self, requester_id: str, target_id: str) -> RelationshipRow:
+        return self.apply_event(requester_id, target_id, "request")
+
+    def approve(self, approver_id: str, requester_id: str) -> RelationshipRow:
+        return self.apply_event(approver_id, requester_id, "approve")
+
+    def reject(self, approver_id: str, requester_id: str) -> RelationshipRow:
+        return self.apply_event(approver_id, requester_id, "reject")
+
+    def upgrade(self, owner_id: str, agent_id: str, snapshot: dict[str, Any] | None = None) -> RelationshipRow:
+        return self.apply_event(owner_id, agent_id, "upgrade", hire_snapshot=snapshot)
+
+    def downgrade(self, owner_id: str, agent_id: str) -> RelationshipRow:
+        return self.apply_event(owner_id, agent_id, "downgrade")
+
+    def revoke(self, revoker_id: str, other_id: str) -> RelationshipRow:
+        return self.apply_event(revoker_id, other_id, "revoke")
+
+    def list_for_user(self, user_id: str) -> list[RelationshipRow]:
+        rows = self._repo.list_for_user(user_id)
+        result = []
+        for r in rows:
+            try:
+                result.append(RelationshipRow.model_validate(r))
+            except Exception:
+                logger.warning("[relationship] invalid row: %s", r)
+        return result
+
+    def get_by_id(self, relationship_id: str) -> dict | None:
+        return self._repo.get_by_id(relationship_id)
+
+    def get_state(self, user_a: str, user_b: str) -> RelationshipState:
+        existing = self._repo.get(user_a, user_b)
+        if not existing:
+            return "none"
+        return existing.get("state", "none")
diff --git a/messaging/relationships/state_machine.py b/messaging/relationships/state_machine.py
new file mode 100644
index 000000000..7cdb65ee4
--- /dev/null
+++ b/messaging/relationships/state_machine.py
@@ -0,0 +1,101 @@
+"""Hire/Visit relationship state machine — pure functions, no I/O.
+
+State transitions:
+  NONE             + request   → PENDING (direction set)
+  PENDING_A_TO_B   + approve   → VISIT
+  PENDING_A_TO_B   + reject    → NONE
+  PENDING_B_TO_A   + approve   → VISIT
+  PENDING_B_TO_A   + reject    → NONE
+  VISIT            + upgrade   → HIRE
+  HIRE             + downgrade → VISIT
+  HIRE | VISIT     + revoke    → NONE
+"""
+
+from __future__ import annotations
+
+from messaging.contracts import (
+    RelationshipDirection,
+    RelationshipEvent,
+    RelationshipState,
+)
+
+
+class TransitionError(ValueError):
+    """Invalid state machine transition."""
+
+
+def transition(
+    current_state: RelationshipState,
+    current_direction: RelationshipDirection | None,
+    event: RelationshipEvent,
+    *,
+    requester_is_a: bool,
+) -> tuple[RelationshipState, RelationshipDirection | None]:
+    """Apply an event and return (new_state, new_direction).
+
+    Args:
+        current_state: The current relationship state.
+        current_direction: Current direction (only relevant for pending states).
+        event: The event to apply.
+        requester_is_a: True if the actor is principal_a (lexicographically smaller id).
+
+    Returns:
+        (new_state, new_direction)
+
+    Raises:
+        TransitionError: If the transition is not valid in the current state.
+    """
+    match (current_state, event):
+        case ("none", "request"):
+            direction: RelationshipDirection = "a_to_b" if requester_is_a else "b_to_a"
+            return ("pending_a_to_b" if requester_is_a else "pending_b_to_a", direction)
+
+        case ("pending_a_to_b", "approve") if not requester_is_a:
+            # b approves a's request
+            return ("visit", None)
+
+        case ("pending_b_to_a", "approve") if requester_is_a:
+            # a approves b's request
+            return ("visit", None)
+
+        case ("pending_a_to_b", "reject") if not requester_is_a:
+            return ("none", None)
+
+        case ("pending_b_to_a", "reject") if requester_is_a:
+            return ("none", None)
+
+        # Requester can cancel their own pending request
+        case ("pending_a_to_b", "revoke") if requester_is_a:
+            return ("none", None)
+
+        case ("pending_b_to_a", "revoke") if not requester_is_a:
+            return ("none", None)
+
+        case (("visit" | "hire"), "revoke"):
+            return ("none", None)
+
+        case ("visit", "upgrade"):
+            return ("hire", None)
+
+        case ("hire", "downgrade"):
+            return ("visit", None)
+
+        case _:
+            raise TransitionError(f"Invalid transition: state={current_state!r} event={event!r} requester_is_a={requester_is_a}")
+
+
+def resolve_direction(
+    relationship: dict,
+    actor_id: str,
+) -> bool:
+    """Return True if actor_id is principal_a (used to compute requester_is_a)."""
+    return actor_id == relationship.get("principal_a")
+
+
+def get_pending_direction(state: RelationshipState, principal_a: str, principal_b: str) -> tuple[str, str] | None:
+    """Return (requester_id, approver_id) for a pending state, or None."""
+    if state == "pending_a_to_b":
+        return (principal_a, principal_b)
+    if state == "pending_b_to_a":
+        return (principal_b, principal_a)
+    return None
diff --git a/messaging/service.py b/messaging/service.py
new file mode 100644
index 000000000..23633e680
--- /dev/null
+++ b/messaging/service.py
@@ -0,0 +1,285 @@
+"""MessagingService — core business logic for the messaging module.
+
+Wraps Supabase messaging repos with business rules:
+- create_chat, find_or_create_chat
+- send (with delivery routing)
+- retract, delete_for, mark_read
+- list_messages, list_chats
+"""
+
+from __future__ import annotations
+
+import logging
+import uuid
+from collections.abc import Callable
+from typing import Any
+
+from backend.web.utils.serializers import avatar_url
+from messaging._utils import now_iso
+from messaging.contracts import ContentType, MessageType
+
+logger = logging.getLogger(__name__)
+
+
+class MessagingService:
+    """Core messaging operations backed by Supabase repos."""
+
+    def __init__(
+        self,
+        chat_repo: Any,  # storage.providers.sqlite.chat_repo.SQLiteChatRepo (for chat creation)
+        chat_member_repo: Any,  # SupabaseChatMemberRepo or compatible
+        messages_repo: Any,  # SupabaseMessagesRepo
+        message_read_repo: Any,  # SupabaseMessageReadRepo
+        member_repo: Any,  # MemberRepo (for name + avatar lookup)
+        thread_repo: Any | None = None,  # ThreadRepo for thread-user-id -> member display lookup
+        delivery_resolver: Any | None = None,
+        delivery_fn: Callable | None = None,
+        event_bus: Any | None = None,  # ChatEventBus or SupabaseRealtimeBridge (optional)
+    ) -> None:
+        self._chats = chat_repo
+        self._members_repo = chat_member_repo
+        self._messages = messages_repo
+        self._reads = message_read_repo
+        self._member_repo = member_repo
+        self._thread_repo = thread_repo
+        self._delivery_resolver = delivery_resolver
+        self._delivery_fn = delivery_fn
+        self._event_bus = event_bus
+
+    def _resolve_display_member(self, social_user_id: str) -> Any | None:
+        member = self._member_repo.get_by_id(social_user_id)
+        if member is not None:
+            return member
+        if self._thread_repo is None:
+            return None
+        thread = self._thread_repo.get_by_user_id(social_user_id)
+        if thread is None:
+            return None
+        member_id = thread.get("member_id")
+        if not member_id:
+            return None
+        return self._member_repo.get_by_id(member_id)
+
+    def set_delivery_fn(self, fn: Callable) -> None:
+        self._delivery_fn = fn
+
+    # ------------------------------------------------------------------
+    # Chat lifecycle
+    # ------------------------------------------------------------------
+
+    def find_or_create_chat(self, user_ids: list[str], title: str | None = None) -> dict[str, Any]:
+        if len(user_ids) != 2:
+            raise ValueError("Use create_group_chat() for 3+ users")
+        existing_id = self._members_repo.find_chat_between(user_ids[0], user_ids[1])
+        if existing_id:
+            chat = self._chats.get_by_id(existing_id)
+            return {"id": chat.id, "title": chat.title, "status": chat.status, "created_at": chat.created_at}
+
+        return self._create_chat(user_ids, chat_type="direct", title=title)
+
+    def create_group_chat(self, user_ids: list[str], title: str | None = None) -> dict[str, Any]:
+        if len(user_ids) < 3:
+            raise ValueError("Group chat requires 3+ users")
+        return self._create_chat(user_ids, chat_type="group", title=title)
+
+    def _create_chat(self, user_ids: list[str], *, chat_type: str, title: str | None) -> dict[str, Any]:
+        import time
+
+        from storage.contracts import ChatRow
+
+        chat_id = str(uuid.uuid4())
+        now = time.time()
+        self._chats.create(ChatRow(id=chat_id, title=title, status="active", created_at=now))
+        for uid in user_ids:
+            self._members_repo.add_member(chat_id, uid)
+        return {"id": chat_id, "title": title, "status": "active", "created_at": now}
+
+    # ------------------------------------------------------------------
+    # Sending
+    # ------------------------------------------------------------------
+
+    def send(
+        self,
+        chat_id: str,
+        sender_id: str,
+        content: str,
+        *,
+        message_type: MessageType = "human",
+        content_type: ContentType = "text",
+        mentions: list[str] | None = None,
+        signal: str | None = None,
+        reply_to: str | None = None,
+        ai_metadata: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        msg_id = str(uuid.uuid4())
+
+        row: dict[str, Any] = {
+            "id": msg_id,
+            "chat_id": chat_id,
+            "sender_id": sender_id,
+            "content": content,
+            "content_type": content_type,
+            "message_type": message_type,
+            "mentions": mentions or [],
+            "created_at": now_iso(),
+        }
+        if signal in ("open", "yield", "close"):
+            row["signal"] = signal
+        if reply_to:
+            row["reply_to"] = reply_to
+        if ai_metadata:
+            row["ai_metadata"] = ai_metadata
+
+        created = self._messages.create(row)
+        logger.debug("[messaging] send chat=%s sender=%s msg=%s type=%s", chat_id[:8], sender_id[:15], msg_id[:8], message_type)
+
+        # Publish to event bus (SSE / Realtime bridge)
+        sender = self._resolve_display_member(sender_id)
+        sender_name = sender.name if sender else "unknown"
+        if self._event_bus:
+            self._event_bus.publish(
+                chat_id,
+                {
+                    "event": "message",
+                    "data": {**created, "sender_name": sender_name},
+                },
+            )
+
+        # Deliver to agent recipients
+        if message_type in ("human", "ai"):
+            self._deliver_to_agents(chat_id, sender_id, content, mentions or [], signal=signal)
+
+        return created
+
+    def _deliver_to_agents(
+        self,
+        chat_id: str,
+        sender_id: str,
+        content: str,
+        mentions: list[str],
+        signal: str | None = None,
+    ) -> None:
+        mention_set = set(mentions)
+        members = self._members_repo.list_members(chat_id)
+        sender_member = self._resolve_display_member(sender_id)
+        sender_name = sender_member.name if sender_member else "unknown"
+        sender_avatar_url = avatar_url(sender_id, bool(sender_member.avatar if sender_member else None))
+
+        for member in members:
+            uid = member.get("user_id")
+            if not uid or uid == sender_id:
+                continue
+            m = self._resolve_display_member(uid)
+            if not m or m.type == "human":
+                continue
+
+            from messaging.delivery.actions import DeliveryAction
+
+            if self._delivery_resolver:
+                is_mentioned = uid in mention_set
+                action = self._delivery_resolver.resolve(uid, chat_id, sender_id, is_mentioned=is_mentioned)
+                if action != DeliveryAction.DELIVER:
+                    logger.info("[messaging] POLICY %s for %s", action.value, uid[:15])
+                    continue
+
+            if self._delivery_fn:
+                try:
+                    self._delivery_fn(uid, m, content, sender_name, chat_id, sender_id, sender_avatar_url, signal=signal)
+                except Exception:
+                    logger.exception("[messaging] delivery failed for member %s", uid)
+
+    # ------------------------------------------------------------------
+    # Lifecycle operations
+    # ------------------------------------------------------------------
+
+    def retract(self, message_id: str, sender_id: str) -> bool:
+        return self._messages.retract(message_id, sender_id)
+
+    def delete_for(self, message_id: str, user_id: str) -> None:
+        self._messages.delete_for(message_id, user_id)
+
+    def mark_read(self, chat_id: str, user_id: str) -> None:
+        """Mark all messages in a chat as read for user."""
+        self._members_repo.update_last_read(chat_id, user_id)
+        # Also write per-message reads for recent messages
+        msgs = self._messages.list_by_chat(chat_id, limit=50, viewer_id=user_id)
+        msg_ids = [m["id"] for m in msgs if m.get("sender_id") != user_id]
+        if msg_ids:
+            self._reads.mark_chat_read(chat_id, user_id, msg_ids)
+
+    def mark_message_read(self, message_id: str, user_id: str) -> None:
+        self._reads.mark_read(message_id, user_id)
+
+    # ------------------------------------------------------------------
+    # Queries
+    # ------------------------------------------------------------------
+
+    def list_messages(
+        self, chat_id: str, *, limit: int = 50, before: str | None = None, viewer_id: str | None = None
+    ) -> list[dict[str, Any]]:
+        return self._messages.list_by_chat(chat_id, limit=limit, before=before, viewer_id=viewer_id)
+
+    def list_unread(self, chat_id: str, user_id: str) -> list[dict[str, Any]]:
+        return self._messages.list_unread(chat_id, user_id)
+
+    def count_unread(self, chat_id: str, user_id: str) -> int:
+        return self._messages.count_unread(chat_id, user_id)
+
+    def search_messages(self, query: str, *, chat_id: str | None = None) -> list[dict[str, Any]]:
+        return self._messages.search(query, chat_id=chat_id)
+
+    def list_chat_members(self, chat_id: str) -> list[dict[str, Any]]:
+        return self._members_repo.list_members(chat_id)
+
+    def is_chat_member(self, chat_id: str, user_id: str) -> bool:
+        return self._members_repo.is_member(chat_id, user_id)
+
+    def update_mute(self, chat_id: str, user_id: str, muted: bool, mute_until: str | None) -> None:
+        self._members_repo.update_mute(chat_id, user_id, muted, mute_until)
+
+    def list_chats_for_user(self, user_id: str) -> list[dict[str, Any]]:
+        """List all active chats for user with summary info."""
+        chat_ids = self._members_repo.list_chats_for_user(user_id)
+        result = []
+        for cid in chat_ids:
+            chat = self._chats.get_by_id(cid)
+            if not chat or chat.status != "active":
+                continue
+            members = self._members_repo.list_members(cid)
+            entities_info = []
+            for m in members:
+                uid = m.get("user_id")
+                e = self._resolve_display_member(uid) if uid else None
+                if e:
+                    entities_info.append(
+                        {
+                            "id": uid,
+                            "name": e.name,
+                            "type": e.type,
+                            "avatar_url": avatar_url(e.id, bool(e.avatar)),
+                        }
+                    )
+            msgs = self._messages.list_by_chat(cid, limit=1)
+            last_msg = None
+            if msgs:
+                m = msgs[-1]
+                sender = self._resolve_display_member(m.get("sender_id", ""))
+                last_msg = {
+                    "content": m.get("content", ""),
+                    "sender_name": sender.name if sender else "unknown",
+                    "created_at": m.get("created_at"),
+                }
+            unread = self.count_unread(cid, user_id)
+            result.append(
+                {
+                    "id": cid,
+                    "title": chat.title,
+                    "status": chat.status,
+                    "created_at": chat.created_at,
+                    "entities": entities_info,
+                    "last_message": last_msg,
+                    "unread_count": unread,
+                    "has_mention": False,  # TODO: implement mention tracking
+                }
+            )
+        return result
diff --git a/messaging/tools/__init__.py b/messaging/tools/__init__.py
new file mode 100644
index 000000000..4437f69ee
--- /dev/null
+++ b/messaging/tools/__init__.py
@@ -0,0 +1 @@
+# messaging/tools/
diff --git a/core/agents/communication/chat_tool_service.py b/messaging/tools/chat_tool_service.py
similarity index 59%
rename from core/agents/communication/chat_tool_service.py
rename to messaging/tools/chat_tool_service.py
index f5464abb4..237e2bd35 100644
--- a/core/agents/communication/chat_tool_service.py
+++ b/messaging/tools/chat_tool_service.py
@@ -1,7 +1,7 @@
-"""Chat tool service — 7 tools for entity-to-entity communication.
+"""Chat tool service (messaging module version).
 
-Tools use user_ids as parameters (human = Supabase auth UUID, agent = member_id).
-Two users share at most one chat; the system auto-resolves user_id → chat.
+Provides 5 tools: chats, chat_read, chat_send, chat_search, directory.
+directory includes privacy filter: only shows entities with existing relationships.
 """
 
 from __future__ import annotations
@@ -16,45 +16,28 @@
 
 logger = logging.getLogger(__name__)
 
-# @@@range-parser — parse range strings for chat_read history queries.
-# Supports: negative index (-10:-1), relative time (-2h:, -1d:-6h), ISO dates (2026-03-20:2026-03-22).
 _RELATIVE_RE = re.compile(r"^-(\d+)([hdm])$")
 
 
 def _parse_range(range_str: str) -> dict:
-    """Parse a range string into query parameters.
-
-    Returns dict with either:
-      {"type": "index", "limit": int, "skip_last": int}
-      {"type": "time", "after": float|None, "before": float|None}
-    Raises ValueError on invalid input.
-    """
-    # @@@range-split — split on ':' but ISO dates (YYYY-MM-DD) don't contain ':' so it's safe.
-    # We only support date-level ISO (no HH:MM) to avoid ':' collision. Use -Nh/-Nm for sub-day precision.
     parts = range_str.split(":", 1)
     if len(parts) != 2:
         raise ValueError(f"Invalid range format '{range_str}'. Use 'start:end' (e.g. '-10:-1', '-1h:').")
-
     left, right = parts[0].strip(), parts[1].strip()
-
-    # --- Detect index range: both parts are negative integers (or empty) ---
     left_is_neg_int = bool(re.match(r"^-\d+$", left)) if left else True
     right_is_neg_int = bool(re.match(r"^-\d+$", right)) if right else True
-    # Reject positive integers
     left_is_pos_int = bool(re.match(r"^\d+$", left)) if left else False
     right_is_pos_int = bool(re.match(r"^\d+$", right)) if right else False
     if left_is_pos_int or right_is_pos_int:
         raise ValueError("Positive indices not allowed. Use negative indices like '-10:-1'.")
-
     if left_is_neg_int and right_is_neg_int and not _RELATIVE_RE.match(left or "") and not _RELATIVE_RE.match(right or ""):
-        # Pure negative integer range
-        start = int(left) if left else None  # e.g. -10
-        end = int(right) if right else None  # e.g. -1
+        start = int(left) if left else None
+        end = int(right) if right else None
         if start is not None and end is not None:
             if start >= end:
                 raise ValueError(f"Start ({start}) must be less than end ({end}). E.g. '-10:-1'.")
             limit = end - start
-            skip_last = -end  # -1 means skip 0 from the end, -5 means skip 4
+            skip_last = -end
         elif start is not None:
             limit = -start
             skip_last = 0
@@ -62,8 +45,6 @@ def _parse_range(range_str: str) -> dict:
             limit = -end if end else 20
             skip_last = 0
         return {"type": "index", "limit": limit, "skip_last": skip_last}
-
-    # --- Time range: relative (-2h, -1d) or ISO date ---
     now = time.time()
     after_ts = _parse_time_endpoint(left, now) if left else None
     before_ts = _parse_time_endpoint(right, now) if right else None
@@ -73,13 +54,10 @@ def _parse_range(range_str: str) -> dict:
 
 
 def _parse_time_endpoint(s: str, now: float) -> float | None:
-    """Parse a single time endpoint: relative (-2h, -1d, -30m) or ISO date."""
     m = _RELATIVE_RE.match(s)
     if m:
         n, unit = int(m.group(1)), m.group(2)
-        seconds = {"h": 3600, "d": 86400, "m": 60}[unit]
-        return now - n * seconds
-    # Try ISO date parsing (date-level only — no HH:MM to avoid ':' collision with range separator)
+        return now - n * {"h": 3600, "d": 86400, "m": 60}[unit]
     try:
         dt = datetime.strptime(s, "%Y-%m-%d").replace(tzinfo=UTC)
         return dt.timestamp()
@@ -88,37 +66,74 @@ def _parse_time_endpoint(s: str, now: float) -> float | None:
     raise ValueError(f"Cannot parse time '{s}'. Use '-2h', '-1d', '-30m', or '2026-03-20'.")
 
 
-class ChatToolService:
-    """Registers 5 chat tools into ToolRegistry.
+def _float_ts(ts: Any) -> float | None:
+    """Convert ISO string or float timestamp to float."""
+    if ts is None:
+        return None
+    if isinstance(ts, (int, float)):
+        return float(ts)
+    try:
+        dt = datetime.fromisoformat(str(ts).replace("Z", "+00:00"))
+        return dt.timestamp()
+    except (ValueError, TypeError):
+        return None
+
 
-    Each tool closure captures user_id (the calling agent's social identity = member_id).
-    """
+class ChatToolService:
+    """Registers 5 chat tools into ToolRegistry (messaging module version)."""
 
     def __init__(
         self,
         registry: ToolRegistry,
-        user_id: str,
-        owner_user_id: str,
+        owner_id: str,
         *,
-        entity_repo: Any = None,
-        chat_service: Any = None,
-        chat_entity_repo: Any = None,
-        chat_message_repo: Any = None,
+        chat_identity_id: str | None = None,
+        user_id: str | None = None,
+        messaging_service: Any = None,  # MessagingService (new)
+        chat_member_repo: Any = None,  # SupabaseChatMemberRepo
+        messages_repo: Any = None,  # SupabaseMessagesRepo
         member_repo: Any = None,
-        chat_event_bus: Any = None,
-        runtime_fn: Any = None,
+        thread_repo: Any = None,
+        relationship_repo: Any = None,  # for directory privacy filter
     ) -> None:
-        self._user_id = user_id
-        self._owner_user_id = owner_user_id
-        self._entities = entity_repo
-        self._chat_service = chat_service
-        self._chat_entities = chat_entity_repo
-        self._messages = chat_message_repo
-        self._members = member_repo
-        self._event_bus = chat_event_bus
-        self._runtime_fn = runtime_fn  # callable → AgentRuntime (lazy, resolves at call time)
+        identity_id = chat_identity_id or user_id
+        if not identity_id:
+            raise ValueError("ChatToolService requires chat_identity_id or legacy user_id")
+        self._chat_identity_id: str = identity_id
+        self._owner_id = owner_id
+        self._messaging = messaging_service
+        self._chat_members = chat_member_repo
+        self._messages = messages_repo
+        self._member_repo = member_repo
+        self._thread_repo = thread_repo
+        self._relationships = relationship_repo
         self._register(registry)
 
+    def _resolve_display_member(self, social_user_id: str) -> Any | None:
+        member = self._member_repo.get_by_id(social_user_id) if self._member_repo else None
+        if member is not None:
+            return member
+        if self._thread_repo is None:
+            return None
+        thread = self._thread_repo.get_by_user_id(social_user_id)
+        if thread is None:
+            return None
+        member_id = thread.get("member_id")
+        if not member_id or self._member_repo is None:
+            return None
+        return self._member_repo.get_by_id(member_id)
+
+    def _resolve_directory_social_id(self, member: Any) -> str:
+        member_type = member.type.value if hasattr(member.type, "value") else str(member.type)
+        if member_type == "human":
+            return member.id
+        if self._thread_repo is None:
+            raise RuntimeError("thread_repo is required to resolve agent directory ids")
+        default_thread = self._thread_repo.get_default_thread(member.id)
+        if default_thread is None or not default_thread.get("user_id"):
+            raise RuntimeError(f"Default thread user_id is required for directory member: {member.id}")
+        return default_thread["user_id"]
+
     def _register(self, registry: ToolRegistry) -> None:
         self._register_chats(registry)
         self._register_chat_read(registry)
@@ -126,44 +141,37 @@ def _register(self, registry: ToolRegistry) -> None:
         self._register_chat_search(registry)
         self._register_directory(registry)
 
-    def _resolve_name(self, user_id: str) -> str:
-        """Resolve display name: entity_repo (agents) → member_repo (humans)."""
-        e = self._entities.get_by_id(user_id)
-        if e:
-            return e.name
-        m = self._members.get_by_id(user_id) if self._members else None
-        return m.name if m else "unknown"
-
-    def _format_msgs(self, msgs: list, eid: str) -> str:
+    def _format_msgs(self, msgs: list[dict], eid: str) -> str:
         lines = []
         for m in msgs:
-            name = self._resolve_name(m.sender_id)
-            tag = "you" if m.sender_id == eid else name
-            lines.append(f"[{tag}]: {m.content}")
+            sender = self._resolve_display_member(m.get("sender_id", ""))
+            name = sender.name if sender else "unknown"
+            tag = "you" if m.get("sender_id") == eid else name
+            content = m.get("content", "")
+            if m.get("retracted_at"):
+                content = "[已撤回]"
+            lines.append(f"[{tag}]: {content}")
         return "\n".join(lines)
 
-    def _fetch_by_range(self, chat_id: str, parsed: dict) -> list:
+    def _fetch_by_range(self, chat_id: str, parsed: dict) -> list[dict]:
         if parsed["type"] == "index":
             limit = parsed["limit"]
             skip_last = parsed["skip_last"]
-            # Fetch limit + skip_last, then trim the tail
             fetch_count = limit + skip_last
-            msgs = self._messages.list_by_chat(chat_id, limit=fetch_count)
+            msgs = self._messages.list_by_chat(chat_id, limit=fetch_count, viewer_id=self._chat_identity_id)
             if skip_last > 0:
                 msgs = msgs[: len(msgs) - skip_last] if len(msgs) > skip_last else []
             return msgs
         else:
-            return self._messages.list_by_time_range(
-                chat_id,
-                after=parsed["after"],
-                before=parsed["before"],
-            )
+            after_iso = datetime.fromtimestamp(parsed["after"], tz=UTC).isoformat() if parsed.get("after") else None
+            before_iso = datetime.fromtimestamp(parsed["before"], tz=UTC).isoformat() if parsed.get("before") else None
+            return self._messages.list_by_time_range(chat_id, after=after_iso, before=before_iso)
 
     def _register_chats(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
+        eid = self._chat_identity_id
 
         def handle(unread_only: bool = False, limit: int = 20) -> str:
-            chats = self._chat_service.list_chats_for_user(eid)
+            chats = self._messaging.list_chats_for_user(eid)
             if unread_only:
                 chats = [c for c in chats if c.get("unread_count", 0) > 0]
             chats = chats[:limit]
@@ -182,7 +190,7 @@ def handle(unread_only: bool = False, limit: int = 20) -> str:
                     id_str = f" [chat_id: {c['id']}]"
                 else:
                     other_id = others[0]["id"] if others else ""
-                    id_str = f" [user_id: {other_id}]" if other_id else ""
+                    id_str = f" [id: {other_id}]" if other_id else ""
                 lines.append(f"- {name}{id_str}{unread_str}{last_preview}")
             return "\n".join(lines)
 
@@ -192,7 +200,7 @@ def handle(unread_only: bool = False, limit: int = 20) -> str:
                 mode=ToolMode.INLINE,
                 schema={
                     "name": "chats",
-                    "description": "List your chats. Returns chat summaries with user_ids of participants.",
+                    "description": "List your chats. Returns chat summaries with participant ids from the current social-id slot.",
                     "parameters": {
                         "type": "object",
                         "properties": {
@@ -211,20 +219,20 @@ def handle(unread_only: bool = False, limit: int = 20) -> str:
         )
 
     def _register_chat_read(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
+        eid = self._chat_identity_id
 
         def handle(user_id: str | None = None, chat_id: str | None = None, range: str | None = None) -> str:
             if chat_id:
-                pass  # use chat_id directly
+                pass
             elif user_id:
-                chat_id = self._chat_entities.find_chat_between(eid, user_id)
+                chat_id = self._chat_members.find_chat_between(eid, user_id)
                 if not chat_id:
-                    name = self._resolve_name(user_id)
+                    target = self._resolve_display_member(user_id)
+                    name = target.name if target else user_id
                     return f"No chat history with {name}."
             else:
                 return "Provide user_id or chat_id."
 
-            # @@@range-dispatch — if range is provided, use it regardless of unread state.
             if range:
                 try:
                     parsed = _parse_range(range)
@@ -233,20 +241,14 @@ def handle(user_id: str | None = None, chat_id: str | None = None, range: str |
                 msgs = self._fetch_by_range(chat_id, parsed)
                 if not msgs:
                     return "No messages in that range."
-                # @@@range-marks-read — WORKAROUND: unblock chat_send by pushing
-                # last_read_at to now. This marks ALL messages as read, not just
-                # the requested range. Proper fix needs per-message read tracking
-                # instead of the current single-timestamp waterline model.
-                self._chat_entities.update_last_read(chat_id, eid, time.time())
+                self._messaging.mark_read(chat_id, eid)
                 return self._format_msgs(msgs, eid)
 
-            # @@@read-unread-only — default to unread messages only.
-            msgs = self._messages.list_unread(chat_id, eid)
+            msgs = self._messaging.list_unread(chat_id, eid)
             if msgs:
-                self._chat_entities.update_last_read(chat_id, eid, time.time())
+                self._messaging.mark_read(chat_id, eid)
                 return self._format_msgs(msgs, eid)
 
-            # Nothing unread — prompt agent to use range parameter
             return (
                 "No unread messages. To read history, call again with range:\n"
                 "  range='-10:-1'  (last 10 messages)\n"
@@ -272,13 +274,14 @@ def handle(user_id: str | None = None, chat_id: str | None = None, range: str |
                     "parameters": {
                         "type": "object",
                         "properties": {
-                            "user_id": {"type": "string", "description": "user_id for 1:1 chat history"},
+                            "user_id": {
+                                "type": "string",
+                                "description": "Participant id for 1:1 chat history. Parameter name is legacy.",
+                            },
                             "chat_id": {"type": "string", "description": "Chat_id for group chat history"},
                             "range": {
                                 "type": "string",
-                                "description": (
-                                    "History range. Negative index '-X:-Y' or time '-1h:', '2026-03-20:'. Positive indices NOT allowed."
-                                ),
+                                "description": "History range. Negative index '-X:-Y' or time '-1h:', '2026-03-20:'.",
                             },
                         },
                     },
@@ -289,7 +292,7 @@ def handle(user_id: str | None = None, chat_id: str | None = None, range: str |
         )
 
     def _register_chat_send(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
+        eid = self._chat_identity_id
 
         def handle(
             content: str,
@@ -298,36 +301,33 @@ def handle(
             signal: str = "open",
             mentions: list[str] | None = None,
         ) -> str:
-            # @@@read-before-write — resolve chat_id, then check unread
             resolved_chat_id = chat_id
             target_name = "chat"
 
             if chat_id:
-                if not self._chat_entities.is_participant_in_chat(chat_id, eid):
+                if not self._chat_members.is_member(chat_id, eid):
                     raise RuntimeError(f"You are not a member of chat {chat_id}")
             elif user_id:
                 if user_id == eid:
                     raise RuntimeError("Cannot send a message to yourself.")
-                target_name = self._resolve_name(user_id)
-                resolved_chat_id = self._chat_entities.find_chat_between(eid, user_id)
-                if not resolved_chat_id:
-                    # New chat — no unread possible, create and send
-                    chat = self._chat_service.find_or_create_chat([eid, user_id])
-                    resolved_chat_id = chat.id
+                target = self._resolve_display_member(user_id)
+                if not target:
+                    raise RuntimeError(f"User not found: {user_id}")
+                target_name = target.name
+                chat = self._messaging.find_or_create_chat([eid, user_id])
+                resolved_chat_id = chat["id"]
             else:
                 raise RuntimeError("Provide user_id (for 1:1) or chat_id (for group)")
 
-            # @@@read-before-write-gate — reject if unread messages exist
-            unread = self._messages.count_unread(resolved_chat_id, eid)
+            unread = self._messaging.count_unread(resolved_chat_id, eid)
             if unread > 0:
                 raise RuntimeError(f"You have {unread} unread message(s). Call chat_read(chat_id='{resolved_chat_id}') first.")
 
-            # Append signal to content (for chat_read) + pass through chain (for notification)
             effective_signal = signal if signal in ("yield", "close") else None
             if effective_signal:
                 content = f"{content}\n[signal: {effective_signal}]"
 
-            self._chat_service.send_message(resolved_chat_id, eid, content, mentions, signal=effective_signal)
+            self._messaging.send(resolved_chat_id, eid, content, mentions=mentions, signal=effective_signal)
             return f"Message sent to {target_name}."
 
         registry.register(
@@ -337,30 +337,30 @@ def handle(
                 schema={
                     "name": "chat_send",
                     "description": (
-                        "Send a message. Use user_id for 1:1 chats, chat_id for group chats.\n\n"
+                        "Send a message. Use the directory-listed id for 1:1 chats and chat_id for group chats.\n"
+                        "The user_id parameter name is legacy.\n\n"
                         "You MUST call chat_read() first if you have unread messages — sending will fail otherwise.\n\n"
-                        "Signal protocol — append to content:\n"
+                        "Signal protocol:\n"
                         "  (no tag) = I expect a reply from you\n"
                         "  ::yield = I'm done with my turn; reply only if you want to\n"
-                        "  ::close = conversation over, do NOT reply\n\n"
-                        "For games/turns: do NOT append ::yield — just send the move and expect a reply."
+                        "  ::close = conversation over, do NOT reply"
                     ),
                     "parameters": {
                         "type": "object",
                         "properties": {
                             "content": {"type": "string", "description": "Message content"},
-                            "user_id": {"type": "string", "description": "Target user_id (for 1:1 chat)"},
-                            "chat_id": {"type": "string", "description": "Target chat_id (for group chat)"},
-                            "signal": {
+                            "user_id": {
                                 "type": "string",
-                                "enum": ["open", "yield", "close"],
-                                "description": "Signal intent to recipient",
-                                "default": "open",
+                                "description": (
+                                    "Target participant id for 1:1 chat. Parameter name is legacy; pass the id shown by directory."
+                                ),
                             },
+                            "chat_id": {"type": "string", "description": "Target chat_id (for group chat)"},
+                            "signal": {"type": "string", "enum": ["open", "yield", "close"], "default": "open"},
                             "mentions": {
                                 "type": "array",
                                 "items": {"type": "string"},
-                                "description": "Entity IDs to @mention (overrides mute for these recipients)",
+                                "description": "User IDs to @mention",
                             },
                         },
                         "required": ["content"],
@@ -372,19 +372,24 @@ def handle(
         )
 
     def _register_chat_search(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
+        eid = self._chat_identity_id
 
         def handle(query: str, user_id: str | None = None) -> str:
             chat_id = None
             if user_id:
-                chat_id = self._chat_entities.find_chat_between(eid, user_id)
-            results = self._messages.search(query, chat_id=chat_id, limit=20)
+                chat_id = self._chat_members.find_chat_between(eid, user_id)
+                if not chat_id:
+                    target = self._resolve_display_member(user_id)
+                    name = target.name if target else user_id
+                    return f"No messages matching '{query}' with {name}."
+            results = self._messaging.search_messages(query, chat_id=chat_id)
             if not results:
                 return f"No messages matching '{query}'."
             lines = []
             for m in results:
-                name = self._resolve_name(m.sender_id)
-                lines.append(f"[{name}] {m.content[:100]}")
+                sender = self._resolve_display_member(m.get("sender_id", ""))
+                name = sender.name if sender else "unknown"
+                lines.append(f"[{name}] {m.get('content', '')[:100]}")
             return "\n".join(lines)
 
         registry.register(
@@ -400,7 +405,7 @@ def handle(query: str, user_id: str | None = None) -> str:
                             "query": {"type": "string", "description": "Search query"},
                             "user_id": {
                                 "type": "string",
-                                "description": "Optional: only search in chat with this user",
+                                "description": "Optional: only search in chat with this participant id. Parameter name is legacy.",
                             },
                         },
                         "required": ["query"],
@@ -412,38 +417,43 @@ def handle(query: str, user_id: str | None = None) -> str:
         )
 
     def _register_directory(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
+        eid = self._chat_identity_id
 
         def handle(search: str | None = None, type: str | None = None) -> str:
+            all_entities = self._member_repo.list_all()
+            entities = [e for e in all_entities if e.id != eid]
+            if type:
+                entities = [e for e in entities if e.type == type]
+            if search:
+                q = search.lower()
+                entities = [e for e in entities if q in e.name.lower()]
+            directory_ids = {e.id: self._resolve_directory_social_id(e) for e in entities}
+
+            # Privacy filter: only show members with a relationship (VISIT or HIRE)
+            # or members owned by the same user (owner_id)
+            if self._relationships:
+
+                def _is_visible(m) -> bool:
+                    if getattr(m, "owner_user_id", None) == self._owner_id:
+                        return True
+                    rel = self._relationships.get(eid, directory_ids[m.id])
+                    if rel and rel.get("state") in ("visit", "hire"):
+                        return True
+                    return False
+
+                entities = [e for e in entities if _is_visible(e)]
+
+            if not entities:
+                return "No members found."
             lines = []
-            all_members = self._members.list_all() if self._members else []
-            member_map = {m.id: m for m in all_members}
-
-            if type is None or type == "human":
-                for m in all_members:
-                    if m.id == eid or m.type != "human":
-                        continue
-                    if search and search.lower() not in m.name.lower():
-                        continue
-                    lines.append(f"- {m.name} [human] user_id={m.id}")
-
-            if type is None or type == "agent":
-                all_entities = self._entities.list_all()
-                for e in all_entities:
-                    if e.id == eid or e.type != "agent":
-                        continue
-                    if search and search.lower() not in e.name.lower():
-                        continue
-                    member = member_map.get(e.member_id)
-                    owner_info = ""
-                    if member and member.owner_user_id:
-                        owner = member_map.get(member.owner_user_id)
-                        if owner:
-                            owner_info = f" (owner: {owner.name})"
-                    lines.append(f"- {e.name} [{e.type}] user_id={e.id}{owner_info}")
-
-            if not lines:
-                return "No users found."
+            for e in entities:
+                owner_info = ""
+                if getattr(e, "owner_user_id", None):
+                    owner_member = self._member_repo.get_by_id(e.owner_user_id)
+                    if owner_member:
+                        owner_info = f" (owner: {owner_member.name})"
+                mtype = e.type.value if hasattr(e.type, "value") else str(e.type)
+                lines.append(f"- {e.name} [{mtype}] id={directory_ids[e.id]}{owner_info}")
             return "\n".join(lines)
 
         registry.register(
@@ -452,7 +462,7 @@ def handle(search: str | None = None, type: str | None = None) -> str:
                 mode=ToolMode.INLINE,
                 schema={
                     "name": "directory",
-                    "description": "Browse the user directory. Returns user_ids for use with chat_send, chat_read.",
+                    "description": "Browse the member directory. Shows members with Visit/Hire relationships. Returns ids for chat_send(user_id=...).",  # noqa: E501
                     "parameters": {
                         "type": "object",
                         "properties": {
diff --git a/pyproject.toml b/pyproject.toml
index fed480c59..58e77e574 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,8 @@ dependencies = [
     "croniter>=6.0.0",
     "uvicorn>=0.30.0",
     "sse-starlette>=1.6.0",
+    "multilspy>=0.0.15",
+    "pyright>=1.1.0",
     "supabase>=2.28.3",
     "fastapi>=0.118.0",
     "langgraph-checkpoint-postgres>=3.0.5",
@@ -91,6 +93,7 @@ packages = [
     "core.tools.filesystem",
     "core.tools.filesystem.read",
     "core.tools.filesystem.read.readers",
+    "core.tools.lsp",
     "core.tools.search",
     "core.tools.skills",
     "core.tools.task",
@@ -123,6 +126,12 @@ markers = [
     "e2e: marks tests as end-to-end (require provider secrets; skipped in unit CI)",
 ]
 
+[tool.pyright]
+pythonVersion = "3.12"
+typeCheckingMode = "basic"
+stubPath = "typings"
+reportMissingModuleSource = "none"
+
 [tool.ruff]
 line-length = 140
 target-version = "py312"
diff --git a/sandbox/base.py b/sandbox/base.py
index 0a423f25a..1dfe9e22b 100644
--- a/sandbox/base.py
+++ b/sandbox/base.py
@@ -9,9 +9,11 @@
 
 import asyncio
 import logging
+import threading
 from abc import ABC, abstractmethod
+from collections.abc import Coroutine
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 logger = logging.getLogger(__name__)
 
@@ -70,6 +72,51 @@ def __getattr__(self, name: str):
         return getattr(self._remote._get_capability().command, name)
 
 
+def _cached_capability_is_stale(manager, thread_id: str, capability) -> bool:
+    session = getattr(capability, "_session", None)
+    if session is None:
+        return True
+    if getattr(session, "status", None) in {"closed", "failed", "paused"}:
+        return True
+    # @@@capability-cache-session-liveness - cached wrappers outlive session teardown;
+    # always confirm the cached session still exists as the current active session.
+    current = manager.session_manager.get(thread_id, session.terminal.terminal_id)
+    if current is None:
+        return True
+    return current.session_id != session.session_id
+
+
+def _run_coroutine_blocking[T](coro: Coroutine[Any, Any, T], *, timeout: float | None = None) -> T:
+    try:
+        asyncio.get_running_loop()
+    except RuntimeError:
+        return asyncio.run(coro)
+
+    result: dict[str, T] = {}
+    error: dict[str, BaseException] = {}
+    done = threading.Event()
+
+    # @@@same-loop-init-bridge - init commands can run while the web request event loop is already active;
+    # running run_coroutine_threadsafe(...).result() on that same loop deadlocks, so bridge through a helper thread.
+    def _runner() -> None:
+        try:
+            result["value"] = asyncio.run(coro)
+        except BaseException as exc:  # pragma: no cover - defensive relay
+            error["value"] = exc
+        finally:
+            done.set()
+
+    thread = threading.Thread(target=_runner, daemon=True)
+    thread.start()
+    if not done.wait(timeout):
+        raise TimeoutError(f"Coroutine timed out after {timeout}s")
+    if "value" in error:
+        raise error["value"]
+    if "value" not in result:
+        raise RuntimeError("Coroutine bridge finished without a result")
+    return result["value"]
+
+
 class RemoteSandbox(Sandbox):
     """Concrete sandbox for all provider-backed environments (AgentBay, Docker, E2B, Daytona)."""
 
@@ -103,26 +150,30 @@ def _get_capability(self) -> SandboxCapability:
         thread_id = get_current_thread_id()
         if not thread_id:
             raise RuntimeError("No thread_id set. Call set_current_thread_id first.")
+        print(f"[RemoteSandbox._get_capability] provider={self._provider.name} thread_id={thread_id}", flush=True)
+        cached = self._capability_cache.get(thread_id)
+        if cached is not None and _cached_capability_is_stale(self._manager, thread_id, cached):
+            self._capability_cache.pop(thread_id, None)
         if thread_id not in self._capability_cache:
+            print(
+                f"[RemoteSandbox._get_capability] provider={self._provider.name} thread_id={thread_id} cache=miss",
+                flush=True,
+            )
             capability = self._manager.get_sandbox(thread_id)
             if self._config.init_commands and thread_id not in self._init_commands_run:
                 self._run_init_commands(capability)
                 self._init_commands_run.add(thread_id)
             self._capability_cache[thread_id] = capability
+        else:
+            print(
+                f"[RemoteSandbox._get_capability] provider={self._provider.name} thread_id={thread_id} cache=hit",
+                flush=True,
+            )
         return self._capability_cache[thread_id]
 
     def _run_init_commands(self, capability: SandboxCapability) -> None:
         for i, cmd in enumerate(self._config.init_commands, 1):
-            try:
-                loop = asyncio.get_running_loop()
-            except RuntimeError:
-                loop = None
-
-            if loop:
-                future = asyncio.run_coroutine_threadsafe(capability.command.execute(cmd), loop)
-                result = future.result(timeout=30)
-            else:
-                result = asyncio.run(capability.command.execute(cmd))
+            result = _run_coroutine_blocking(capability.command.execute(cmd), timeout=30)
 
             if result.exit_code != 0:
                 raise RuntimeError(
@@ -229,6 +280,9 @@ def _get_capability(self) -> SandboxCapability:
         thread_id = get_current_thread_id()
         if not thread_id:
             raise RuntimeError("No thread_id set. Call set_current_thread_id first.")
+        cached = self._capability_cache.get(thread_id)
+        if cached is not None and _cached_capability_is_stale(self._manager, thread_id, cached):
+            self._capability_cache.pop(thread_id, None)
         if thread_id not in self._capability_cache:
             self._capability_cache[thread_id] = self._manager.get_sandbox(thread_id)
         return self._capability_cache[thread_id]
diff --git a/sandbox/capability.py b/sandbox/capability.py
index 4b278742a..b5269a30f 100644
--- a/sandbox/capability.py
+++ b/sandbox/capability.py
@@ -9,7 +9,7 @@
 
 import shlex
 import uuid
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 from typing import TYPE_CHECKING
 
 from sandbox.interfaces.executor import BaseExecutor
@@ -36,7 +36,7 @@ class SandboxCapability:
     def __init__(self, session: ChatSession, manager: SandboxManager | None = None):
         self._session = session
         self._command_wrapper = _CommandWrapper(session, manager=manager)
-        self._fs_wrapper = _FileSystemWrapper(session)
+        self._fs_wrapper = _FileSystemWrapper(session, manager=manager)
 
     @property
     def command(self) -> BaseExecutor:
@@ -95,6 +95,14 @@ async def execute(self, command: str, cwd: str | None = None, timeout: float | N
         self._session.touch()
         # @@@command-context - CommandMiddleware passes Cwd/env; preserve that context for remote runtimes.
         wrapped, _ = self._wrap_command(command, cwd, env)
+        print(
+            "[_CommandWrapper.execute] "
+            f"thread_id={self._session.thread_id} "
+            f"terminal_id={self._session.terminal.terminal_id} "
+            f"command={command[:200]!r} "
+            f"cwd={cwd!r} timeout={timeout}",
+            flush=True,
+        )
         return await self._session.runtime.execute(wrapped, timeout)
 
     async def execute_async(self, command: str, cwd: str | None = None, env: dict[str, str] | None = None):
@@ -178,8 +186,9 @@ class _FileSystemWrapper(FileSystemBackend):
 
     is_remote = True
 
-    def __init__(self, session: ChatSession):
+    def __init__(self, session: ChatSession, manager: SandboxManager | None = None):
         self._session = session
+        self._manager = manager
 
     def _get_provider(self):
         """Get provider from session's lease."""
@@ -193,7 +202,14 @@ def _get_instance_id(self) -> str:
         # @@@lease-convergence - File operations can also wake paused instances; always converge through lease.
         provider = getattr(self._session.runtime, "provider", None)
         if provider is not None:
-            instance = self._session.lease.ensure_active_instance(provider)
+            try:
+                instance = self._session.lease.ensure_active_instance(provider)
+            except RuntimeError:
+                if self._manager is None or getattr(self._session.lease, "observed_state", None) != "paused":
+                    raise
+                if not self._manager.resume_session(self._session.thread_id, source="auto_resume"):
+                    raise
+                instance = self._session.lease.ensure_active_instance(provider)
         else:
             instance = self._session.lease.get_instance()
             if not instance:
@@ -242,7 +258,30 @@ def file_mtime(self, path: str) -> float | None:
         return None
 
     def file_size(self, path: str) -> int | None:
-        """Not available for remote sandbox."""
+        """Best-effort size lookup via parent directory listing."""
+        self._session.touch()
+        provider = self._get_provider()
+        instance_id = self._get_instance_id()
+
+        target = PurePosixPath(path)
+        if not target.name:
+            return None
+
+        parent = str(target.parent) or "/"
+        try:
+            entries = provider.list_dir(instance_id, parent)
+        except Exception:
+            return None
+
+        for entry in entries or []:
+            if entry.get("name") != target.name:
+                continue
+            size = entry.get("size")
+            if isinstance(size, int):
+                return size
+            if isinstance(size, float):
+                return int(size)
+            return None
         return None
 
     def is_dir(self, path: str) -> bool:
diff --git a/sandbox/chat_session.py b/sandbox/chat_session.py
index ae74d1937..e672594c4 100644
--- a/sandbox/chat_session.py
+++ b/sandbox/chat_session.py
@@ -50,6 +50,13 @@ def _connect(db_path: Path) -> sqlite3.Connection:
     return connect_sqlite(db_path)
 
 
+def _require_row_text(row: dict[str, object], key: str) -> str:
+    value = row.get(key)
+    if not isinstance(value, str) or not value:
+        raise RuntimeError(f"Chat session row missing required text field: {key}")
+    return value
+
+
 @dataclass
 class ChatSessionPolicy:
     """Policy configuration for ChatSession lifecycle."""
@@ -210,7 +217,7 @@ def get(self, thread_id: str, terminal_id: str | None = None) -> ChatSession | N
                 _term_repo.close()
             if _term_row is None:
                 return None
-            terminal_id = _term_row["terminal_id"]
+            terminal_id = _require_row_text(dict(_term_row), "terminal_id")
         live = self._live_sessions.get(terminal_id)
         if live:
             if live.is_expired():
@@ -266,7 +273,7 @@ def get(self, thread_id: str, terminal_id: str | None = None) -> ChatSession | N
         if session.is_expired():
             self.delete(session.session_id, reason="expired")
             return None
-        self._live_sessions[terminal_id] = session
+        self._live_sessions[session.terminal.terminal_id] = session
         return session
 
     def create(
diff --git a/sandbox/config.py b/sandbox/config.py
index 0c5a9c18e..4b2fcac01 100644
--- a/sandbox/config.py
+++ b/sandbox/config.py
@@ -83,7 +83,7 @@ def save(self, name: str) -> Path:
         path = Path.home() / ".leon" / "sandboxes" / f"{name}.json"
         path.parent.mkdir(parents=True, exist_ok=True)
 
-        data = {"provider": self.provider, "on_exit": self.on_exit}
+        data: dict[str, object] = {"provider": self.provider, "on_exit": self.on_exit}
         if self.console_url:
             data["console_url"] = self.console_url
         if self.init_commands:
diff --git a/sandbox/manager.py b/sandbox/manager.py
index 29f380b0a..83c063be1 100644
--- a/sandbox/manager.py
+++ b/sandbox/manager.py
@@ -10,6 +10,7 @@
 from pathlib import Path
 from typing import Any
 
+from config.user_paths import user_home_path
 from sandbox.capability import SandboxCapability
 from sandbox.chat_session import ChatSessionManager, ChatSessionPolicy
 from sandbox.lease import lease_from_row
@@ -20,7 +21,7 @@
 from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
 from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
 from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
+from storage.runtime import build_storage_container, build_thread_repo
 
 logger = logging.getLogger(__name__)
 
@@ -53,6 +54,76 @@ def lookup_sandbox_for_thread(thread_id: str, db_path: Path | None = None) -> st
         lease_repo.close()
 
 
+def resolve_existing_lease_cwd(
+    lease_id: str,
+    fallback_cwd: str | None = None,
+    db_path: Path | None = None,
+) -> str:
+    if fallback_cwd:
+        return fallback_cwd
+
+    target_db = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
+    terminal_repo = SQLiteTerminalRepo(db_path=target_db)
+    try:
+        row = terminal_repo.get_latest_by_lease(lease_id)
+    finally:
+        terminal_repo.close()
+    if row and row.get("cwd"):
+        return str(row["cwd"])
+    return str(Path.home())
+
+
+def bind_thread_to_existing_lease(
+    thread_id: str,
+    lease_id: str,
+    *,
+    cwd: str | None = None,
+    db_path: Path | None = None,
+) -> str:
+    target_db = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
+    terminal_repo = SQLiteTerminalRepo(db_path=target_db)
+    try:
+        existing = terminal_repo.get_active(thread_id)
+        if existing is not None:
+            return str(existing["cwd"])
+        initial_cwd = resolve_existing_lease_cwd(lease_id, cwd, db_path=target_db)
+        terminal_repo.create(
+            terminal_id=f"term-{uuid.uuid4().hex[:12]}",
+            thread_id=thread_id,
+            lease_id=lease_id,
+            initial_cwd=initial_cwd,
+        )
+        return initial_cwd
+    finally:
+        terminal_repo.close()
+
+
+def bind_thread_to_existing_thread_lease(
+    thread_id: str,
+    source_thread_id: str,
+    *,
+    cwd: str | None = None,
+    db_path: Path | None = None,
+) -> str | None:
+    target_db = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
+    terminal_repo = SQLiteTerminalRepo(db_path=target_db)
+    try:
+        source_terminal = terminal_repo.get_active(source_thread_id)
+    finally:
+        terminal_repo.close()
+    if source_terminal is None:
+        return None
+    # @@@subagent-lease-reuse
+    # Child threads need their own terminal/session state, but must attach
+    # to the parent's existing lease instead of silently provisioning a new one.
+    return bind_thread_to_existing_lease(
+        thread_id,
+        str(source_terminal["lease_id"]),
+        cwd=cwd,
+        db_path=target_db,
+    )
+
+
 class SandboxManager:
     def __init__(
         self,
@@ -105,27 +176,82 @@ def get_lease(self, lease_id: str):
     def _default_terminal_cwd(self) -> str:
         return resolve_provider_cwd(self.provider)
 
+    def _sandbox_volume_repo(self):
+        # @@@volume-repo-align - thread creation persists volume metadata through the
+        # active storage container; sandbox startup must read the same repo instead
+        # of hardcoding SQLite or Supabase-backed threads lose their volume row.
+        container = build_storage_container(main_db_path=resolve_role_db_path(SQLiteDBRole.MAIN))
+        return container.sandbox_volume_repo()
+
+    def _requires_volume_bootstrap(self) -> bool:
+        # @@@local-shell-no-volume-gate - local runtimes execute directly on the host
+        # and should not fail to start a shell just because file-channel volume
+        # metadata is absent or stored in a different backend.
+        return self.provider_capability.runtime_kind != "local"
+
+    def _ensure_thread_volume(self, thread_id: str, lease) -> None:
+        if not self._requires_volume_bootstrap() or lease.volume_id:
+            return
+
+        volume_id = str(uuid.uuid4())
+        self._create_volume_entry(thread_id, volume_id)
+
+        # @@@remote-volume-self-heal - legacy threads can lose their eager-created lease row
+        # and get rebound through manager recovery; persist a replacement volume_id before mount/sync.
+        self.lease_store.set_volume_id(lease.lease_id, volume_id)
+        lease.volume_id = volume_id
+
+    def _create_volume_entry(self, thread_id: str, volume_id: str) -> None:
+        import json
+        import os
+
+        from sandbox.volume_source import HostVolume
+
+        now_str = datetime.now().isoformat()
+        volume_root = Path(os.environ.get("LEON_SANDBOX_VOLUME_ROOT", str(user_home_path("volumes")))).expanduser().resolve()
+        volume_root.mkdir(parents=True, exist_ok=True)
+        source = HostVolume(volume_root / volume_id)
+
+        repo = self._sandbox_volume_repo()
+        try:
+            repo.create(volume_id, json.dumps(source.serialize()), f"vol-{thread_id}", now_str)
+        finally:
+            repo.close()
+
+    def _resolve_volume_entry(self, thread_id: str, lease) -> dict[str, Any]:
+        repo = self._sandbox_volume_repo()
+        try:
+            entry = repo.get(lease.volume_id)
+        finally:
+            repo.close()
+        if entry:
+            return entry
+        # @@@missing-volume-row-self-heal - old remote threads can retain a live lease.volume_id
+        # after the sandbox volume row was pruned; recreate the row in place before mount/sync.
+        self._create_volume_entry(thread_id, lease.volume_id)
+        repo = self._sandbox_volume_repo()
+        try:
+            entry = repo.get(lease.volume_id)
+        finally:
+            repo.close()
+        if not entry:
+            raise ValueError(f"Volume not found: {lease.volume_id}")
+        return entry
+
     def _setup_mounts(self, thread_id: str) -> dict:
         """Mount the lease's volume into the sandbox. Pure sandbox-layer operation."""
         import json
 
         from sandbox.volume_source import DaytonaVolume, deserialize_volume_source
-        from storage.providers.sqlite.sandbox_volume_repo import SQLiteSandboxVolumeRepo
 
         terminal = self._get_active_terminal(thread_id)
         if not terminal:
             raise ValueError(f"No active terminal for thread {thread_id}")
         lease = self._get_lease(terminal.lease_id)
-        if not lease or not lease.volume_id:
+        if not lease:
             raise ValueError(f"No volume for thread {thread_id}")
-
-        repo = SQLiteSandboxVolumeRepo()
-        try:
-            entry = repo.get(lease.volume_id)
-        finally:
-            repo.close()
-        if not entry:
-            raise ValueError(f"Volume not found: {lease.volume_id}")
+        self._ensure_thread_volume(thread_id, lease)
+        entry = self._resolve_volume_entry(thread_id, lease)
 
         source = deserialize_volume_source(json.loads(entry["source"]))
         volume_id = lease.volume_id
@@ -152,11 +278,10 @@ def _upgrade_to_daytona_volume(self, thread_id: str, current_source, volume_id:
         import json
 
         from sandbox.volume_source import DaytonaVolume
-        from storage.providers.sqlite.sandbox_volume_repo import SQLiteSandboxVolumeRepo
 
         # @@@member-id-for-volume-naming - read from thread config in leon.db
         member_id = "unknown"
-        thread_repo = SQLiteThreadRepo(resolve_role_db_path(SQLiteDBRole.MAIN))
+        thread_repo = build_thread_repo(main_db_path=resolve_role_db_path(SQLiteDBRole.MAIN))
         try:
             row = thread_repo.get_by_id(thread_id)
             if row:
@@ -172,6 +297,7 @@ def _upgrade_to_daytona_volume(self, thread_id: str, current_source, volume_id:
             if "already exists" in str(e):
                 volume_name = f"leon-volume-{member_id}"
                 logger.info("Daytona volume already exists: %s, reusing", volume_name)
+                self.provider.wait_managed_volume_ready(volume_name)
             else:
                 raise
 
@@ -180,7 +306,7 @@ def _upgrade_to_daytona_volume(self, thread_id: str, current_source, volume_id:
             volume_name=volume_name,
         )
 
-        repo = SQLiteSandboxVolumeRepo()
+        repo = self._sandbox_volume_repo()
         try:
             repo.update_source(volume_id, json.dumps(new_source.serialize()))
         finally:
@@ -251,30 +377,35 @@ def resolve_volume_source(self, thread_id: str):
         import json
 
         from sandbox.volume_source import deserialize_volume_source
-        from storage.providers.sqlite.sandbox_volume_repo import SQLiteSandboxVolumeRepo
 
         terminal = self._get_active_terminal(thread_id)
         if not terminal:
             raise ValueError(f"No active terminal for thread {thread_id}")
         lease = self._get_lease(terminal.lease_id)
-        if not lease or not lease.volume_id:
+        if not lease:
             raise ValueError(f"No volume for thread {thread_id}")
-        repo = SQLiteSandboxVolumeRepo()
-        try:
-            entry = repo.get(lease.volume_id)
-        finally:
-            repo.close()
-        if not entry:
-            raise ValueError(f"Volume not found: {lease.volume_id}")
+        self._ensure_thread_volume(thread_id, lease)
+        entry = self._resolve_volume_entry(thread_id, lease)
         return deserialize_volume_source(json.loads(entry["source"]))
 
+    def _skip_volume_sync_for_local_lease(self, lease) -> bool:
+        # @@@local-no-volume-sync - local sessions may execute directly in host cwd with no sandbox volume row.
+        # In that shape there is nothing to upload/download, so sync paths must no-op instead of inventing one.
+        return lease is not None and not self._requires_volume_bootstrap() and not lease.volume_id
+
     def _sync_to_sandbox(self, thread_id: str, instance_id: str, source=None, files: list[str] | None = None) -> None:
         if source is None:
+            lease = self._get_thread_lease(thread_id)
+            if self._skip_volume_sync_for_local_lease(lease):
+                return
             source = self.resolve_volume_source(thread_id)
         self.volume.sync_upload(thread_id, instance_id, source, self.volume.resolve_mount_path(), files=files)
 
     def _sync_from_sandbox(self, thread_id: str, instance_id: str, source=None) -> None:
         if source is None:
+            lease = self._get_thread_lease(thread_id)
+            if self._skip_volume_sync_for_local_lease(lease):
+                return
             source = self.resolve_volume_source(thread_id)
         self.volume.sync_download(thread_id, instance_id, source, self.volume.resolve_mount_path())
 
@@ -307,16 +438,16 @@ def get_sandbox(self, thread_id: str, bind_mounts: list | None = None) -> Sandbo
         if session:
             self._assert_lease_provider(session.lease, thread_id)
             # @@@activity-resume - Any new activity against a paused thread must resume before command execution.
-            if session.status == "paused":
+            if session.status == "paused" or getattr(session.lease, "observed_state", None) == "paused":
                 if not self.resume_session(thread_id, source="auto_resume"):
                     raise RuntimeError(f"Failed to resume paused session for thread {thread_id}")
                 session = self.session_manager.get(thread_id, session.terminal.terminal_id)
                 if not session:
                     raise RuntimeError(f"Session disappeared after resume for thread {thread_id}")
                 self._assert_lease_provider(session.lease, thread_id)
-            # Stamp bind_mounts on lease so lazy creation paths pick them up
+            # Stamp bind_mounts on provider thread state so lazy create_session paths pick them up
             if bind_mounts:
-                session.lease.bind_mounts = bind_mounts
+                self.provider.set_thread_bind_mounts(thread_id, bind_mounts)
             self._ensure_bound_instance(session.lease)
             return SandboxCapability(session, manager=self)
 
@@ -339,13 +470,29 @@ def get_sandbox(self, thread_id: str, bind_mounts: list | None = None) -> Sandbo
             if not lease:
                 lease = self._create_lease(terminal.lease_id, self.provider.name)
             self._assert_lease_provider(lease, thread_id)
-
-        # Stamp bind_mounts on lease so lazy creation paths pick them up
+            if lease.observed_state == "paused":
+                # @@@paused-lease-rehydrate - a persisted thread can lose its in-memory chat session
+                # while the lease stays paused in storage; resume before reconstructing capability.
+                if not self.resume_session(thread_id, source="auto_resume"):
+                    raise RuntimeError(f"Failed to resume paused session for thread {thread_id}")
+                session = self.session_manager.get(thread_id, terminal.terminal_id)
+                if session:
+                    self._assert_lease_provider(session.lease, thread_id)
+                    self._ensure_bound_instance(session.lease)
+                    return SandboxCapability(session, manager=self)
+                lease = self._get_lease(terminal.lease_id)
+                if not lease:
+                    raise RuntimeError(f"Lease disappeared after resume for thread {thread_id}")
+                self._assert_lease_provider(lease, thread_id)
+
+        # Stamp bind_mounts on provider thread state so lazy create_session paths pick them up
         if bind_mounts:
-            lease.bind_mounts = bind_mounts
+            self.provider.set_thread_bind_mounts(thread_id, bind_mounts)
 
-        # @@@volume-strategy-gate - mount volume into sandbox
-        storage = self._setup_mounts(thread_id)
+        storage = None
+        if self._requires_volume_bootstrap():
+            # @@@volume-strategy-gate - remote runtimes need volume mount/sync before first command.
+            storage = self._setup_mounts(thread_id)
 
         self._ensure_bound_instance(lease)
 
@@ -375,7 +522,7 @@ def get_sandbox(self, thread_id: str, bind_mounts: list | None = None) -> Sandbo
             lease=lease,
         )
 
-        if instance:
+        if instance and storage is not None:
             # @@@workspace-upload - sync files to sandbox after creation
             self._sync_to_sandbox(thread_id, instance.instance_id, source=storage["source"])
             self._fire_session_ready(instance.instance_id, "create")
@@ -518,15 +665,26 @@ def enforce_idle_timeouts(self) -> int:
                     if self._lease_is_busy(lease.lease_id):
                         continue
                     status = lease.refresh_instance_status(self.provider)
-                    # Only pause remote providers (local sandbox doesn't need pause)
+                    capability = self.provider.get_capability()
+                    # @@@idle-reaper-reclaim-contract - idle timeout must reclaim remote resources; providers
+                    # that cannot pause should destroy instead of repeatedly throwing unsupported-operation noise.
                     if status == "running" and self.provider.name != "local":
                         try:
-                            paused = lease.pause_instance(self.provider, source="idle_reaper")
+                            if capability.can_pause:
+                                reclaimed = lease.pause_instance(self.provider, source="idle_reaper")
+                            elif capability.can_destroy:
+                                reclaimed = lease.destroy_instance(self.provider, source="idle_reaper") is None
+                            else:
+                                print(
+                                    f"[idle-reaper] provider {self.provider.name} cannot reclaim expired lease "
+                                    f"{lease.lease_id} for thread {thread_id}"
+                                )
+                                continue
                         except Exception as exc:
-                            print(f"[idle-reaper] failed to pause expired lease {lease.lease_id} for thread {thread_id}: {exc}")
+                            print(f"[idle-reaper] failed to reclaim expired lease {lease.lease_id} for thread {thread_id}: {exc}")
                             continue
-                        if not paused:
-                            print(f"[idle-reaper] failed to pause expired lease {lease.lease_id} for thread {thread_id}")
+                        if not reclaimed:
+                            print(f"[idle-reaper] failed to reclaim expired lease {lease.lease_id} for thread {thread_id}")
                             continue
 
             self.session_manager.delete(session_id, reason="idle_timeout")
@@ -596,6 +754,10 @@ def resume_session(self, thread_id: str, source: str = "user_resume") -> bool:
         for terminal in terminals:
             session = self.session_manager.get(thread_id, terminal.terminal_id)
             if session:
+                session.lease = lease
+                runtime = getattr(session, "runtime", None)
+                if runtime is not None:
+                    runtime.lease = lease
                 self.session_manager.resume(session.session_id)
                 resumed_any = True
 
@@ -752,33 +914,32 @@ def list_sessions(self) -> list[dict]:
                     }
                 )
 
-        if hasattr(self.provider, "list_provider_sessions"):
-            try:
-                provider_sessions = self.provider.list_provider_sessions() or []
-            except Exception:
-                logger.warning("Failed to list provider sessions for %s", self.provider.name, exc_info=True)
-                provider_sessions = []
+        try:
+            provider_sessions = self.provider.list_provider_sessions() or []
+        except Exception:
+            logger.warning("Failed to list provider sessions for %s", self.provider.name, exc_info=True)
+            provider_sessions = []
 
-            for ps in provider_sessions:
-                instance_id = getattr(ps, "session_id", None)
-                status = getattr(ps, "status", None) or "unknown"
-                if not instance_id or status in {"deleted", "dead", "stopped"} or instance_id in seen_instance_ids:
-                    continue
+        for ps in provider_sessions:
+            instance_id = getattr(ps, "session_id", None)
+            status = getattr(ps, "status", None) or "unknown"
+            if not instance_id or status in {"deleted", "dead", "stopped"} or instance_id in seen_instance_ids:
+                continue
 
-                sessions.append(
-                    {
-                        "session_id": instance_id,
-                        "thread_id": "(orphan)",
-                        "provider": self.provider.name,
-                        "status": status,
-                        "created_at": None,
-                        "last_active": None,
-                        "lease_id": None,
-                        "instance_id": instance_id,
-                        "chat_session_id": None,
-                        "source": "provider_orphan",
-                        "inspect_visible": inspect_visible,
-                    }
-                )
+            sessions.append(
+                {
+                    "session_id": instance_id,
+                    "thread_id": "(orphan)",
+                    "provider": self.provider.name,
+                    "status": status,
+                    "created_at": None,
+                    "last_active": None,
+                    "lease_id": None,
+                    "instance_id": instance_id,
+                    "chat_session_id": None,
+                    "source": "provider_orphan",
+                    "inspect_visible": inspect_visible,
+                }
+            )
 
         return sessions
diff --git a/sandbox/provider.py b/sandbox/provider.py
index fc298afed..fc68e7a57 100644
--- a/sandbox/provider.py
+++ b/sandbox/provider.py
@@ -260,6 +260,14 @@ def delete_managed_volume(self, backend_ref: str) -> None:
         """Delete provider-managed persistent volume."""
         raise NotImplementedError(f"{self.name} does not support managed volumes")
 
+    def wait_managed_volume_ready(self, backend_ref: str) -> None:
+        """Block until a previously created managed volume is reusable."""
+        return None
+
     def set_thread_bind_mounts(self, thread_id: str, mounts: list) -> None:
         """Set per-thread bind mounts for next create_session(). No-op for providers without mount support."""
         pass
+
+    def list_provider_sessions(self) -> list[SessionInfo]:
+        """List raw provider sessions for monitor/orphan visibility. Empty by default."""
+        return []
diff --git a/sandbox/providers/agentbay.py b/sandbox/providers/agentbay.py
index 4f3e7c996..679a82b41 100644
--- a/sandbox/providers/agentbay.py
+++ b/sandbox/providers/agentbay.py
@@ -6,9 +6,14 @@
 
 from __future__ import annotations
 
+import json
+import time
 from dataclasses import replace
+from types import SimpleNamespace
 from typing import TYPE_CHECKING, Any
 
+import requests
+
 from sandbox.provider import (
     Metrics,
     ProviderCapability,
@@ -82,6 +87,20 @@ def __init__(
         can_resume = self.CAPABILITY.can_resume if supports_resume is None else supports_resume
         self._capability = replace(self.CAPABILITY, can_pause=can_pause, can_resume=can_resume)
 
+    @staticmethod
+    def _require_sdk_session(result: Any, context: str) -> Any:
+        session = getattr(result, "session", None)
+        if session is None:
+            raise RuntimeError(f"AgentBay {context} succeeded without a session payload")
+        return session
+
+    @staticmethod
+    def _require_sdk_context(result: Any, context_id: str) -> Any:
+        context = getattr(result, "context", None)
+        if context is None:
+            raise RuntimeError(f"AgentBay context lookup succeeded without a context payload: {context_id}")
+        return context
+
     def create_session(self, context_id: str | None = None, thread_id: str | None = None) -> SessionInfo:
         from agentbay import ContextSync, CreateSessionParams
 
@@ -94,13 +113,14 @@ def create_session(self, context_id: str | None = None, thread_id: str | None =
             ctx_result = self.client.context.get(context_id, create=True)
             if not ctx_result.success:
                 raise RuntimeError(f"Failed to get/create context '{context_id}': {ctx_result.error_message}")
-            params.context_syncs = [ContextSync.new(ctx_result.context.id, self.default_context_path)]
+            ctx = self._require_sdk_context(ctx_result, context_id)
+            params.context_syncs = [ContextSync.new(ctx.id, self.default_context_path)]
 
         result = self.client.create(params)
         if not result.success:
             raise RuntimeError(f"Failed to create session: {result.error_message}")
 
-        session = result.session
+        session = self._hydrate_direct_call_session(self._require_sdk_session(result, "create"))
         self._sessions[session.session_id] = session
 
         return SessionInfo(
@@ -111,7 +131,10 @@ def create_session(self, context_id: str | None = None, thread_id: str | None =
 
     def destroy_session(self, session_id: str, sync: bool = True) -> bool:
         session = self._get_session(session_id)
-        result = session.delete(sync_context=sync)
+        # @@@agentbay-destroy-without-pause - some AgentBay account tiers wire delete(sync_context=True)
+        # through pause/sync first; when pause is unsupported, destroy must skip sync_context entirely.
+        effective_sync = sync and self.get_capability().can_pause
+        result = session.delete(sync_context=effective_sync)
         if result.success:
             self._sessions.pop(session_id, None)
         return result.success
@@ -119,7 +142,7 @@ def destroy_session(self, session_id: str, sync: bool = True) -> bool:
     def pause_session(self, session_id: str) -> bool:
         session = self._get_session(session_id)
         # @@@agentbay-benefit-level - Some AgentBay accounts reject pause/resume with BenefitLevel.NotSupport; keep fail-loud and do not fallback.  # noqa: E501
-        result = self.client.pause(session)
+        result = session.beta_pause()
         if result.success:
             return True
         message = str(getattr(result, "error_message", "") or getattr(result, "message", "") or "unknown error")
@@ -127,20 +150,20 @@ def pause_session(self, session_id: str) -> bool:
 
     def resume_session(self, session_id: str) -> bool:
         session = self._get_session(session_id)
-        result = self.client.resume(session)
+        result = session.beta_resume()
         if not result.success:
             message = str(getattr(result, "error_message", "") or getattr(result, "message", "") or "unknown error")
             raise RuntimeError(f"AgentBay resume failed for {session_id}: {message}")
         get_result = self.client.get(session_id)
         if get_result.success:
-            self._sessions[session_id] = get_result.session
+            self._sessions[session_id] = self._require_sdk_session(get_result, "resume refresh")
         return True
 
     def get_session_status(self, session_id: str) -> str:
         try:
             result = self.client.get(session_id)
             if result.success:
-                status_result = result.session.get_status()
+                status_result = self._require_sdk_session(result, "status lookup").get_status()
                 if status_result.success:
                     return status_result.status.lower()
             else:
@@ -161,17 +184,65 @@ def execute(
     ) -> ProviderExecResult:
         session = self._get_session(session_id)
         timeout_ms = min(timeout_ms, 50000)
-
-        result = session.command.execute_command(
-            command=command,
-            timeout_ms=timeout_ms,
-            cwd=cwd or self.default_context_path,
+        exec_args = {
+            "command": command,
+            "timeout_ms": timeout_ms,
+            "cwd": cwd or self.default_context_path,
+        }
+        shell_server = self._resolve_shell_server(session)
+        session_tools = getattr(session, "mcpTools", None) or getattr(session, "mcp_tools", None) or []
+        print(
+            "[AgentBay.execute] "
+            f"session_id={session_id} "
+            f"has_link_url={bool(getattr(session, 'link_url', ''))} "
+            f"has_token={bool(getattr(session, 'token', ''))} "
+            f"shell_server={shell_server!r} "
+            f"tool_count={len(session_tools)} "
+            f"timeout_ms={timeout_ms}",
+            flush=True,
         )
 
-        if not result.success:
-            return ProviderExecResult(output="", error=result.error_message)
+        if getattr(session, "link_url", "") and getattr(session, "token", "") and shell_server:
+            # @@@agentbay-shell-link-route - shared staging proved shell can degrade into the API path
+            # despite hydrated direct-call metadata; take the explicit LinkUrl route when shell server is known.
+            result = self._call_link_url_tool(session, "shell", exec_args, shell_server)
+            print(
+                "[AgentBay.execute] "
+                f"session_id={session_id} path=link_url exit_code={result.exit_code} "
+                f"error={result.error!r} output_len={len(result.output or '')}",
+                flush=True,
+            )
+            return result
 
-        return ProviderExecResult(output=result.output or "")
+        print(f"[AgentBay.execute] session_id={session_id} path=sdk_command_execute", flush=True)
+        try:
+            result = session.command.execute_command(**exec_args)
+        except Exception as exc:
+            print(
+                f"[AgentBay.execute] session_id={session_id} path=sdk_command_execute raised={exc.__class__.__name__}: {exc}",
+                flush=True,
+            )
+            raise
+
+        if not result.success:
+            print(
+                "[AgentBay.execute] "
+                f"session_id={session_id} path=sdk_command_execute success=False "
+                f"exit_code={getattr(result, 'exit_code', None)} "
+                f"error={getattr(result, 'error_message', None)!r} "
+                f"output_len={len(getattr(result, 'output', '') or '')}",
+                flush=True,
+            )
+            return ProviderExecResult(output=result.output or "", exit_code=result.exit_code or 1, error=result.error_message)
+
+        print(
+            "[AgentBay.execute] "
+            f"session_id={session_id} path=sdk_command_execute success=True "
+            f"exit_code={getattr(result, 'exit_code', None)} "
+            f"output_len={len(getattr(result, 'output', '') or '')}",
+            flush=True,
+        )
+        return ProviderExecResult(output=result.output or "", exit_code=result.exit_code or 0)
 
     def read_file(self, session_id: str, path: str) -> str:
         session = self._get_session(session_id)
@@ -231,7 +302,7 @@ def list_processes(self, session_id: str) -> list[dict]:
         session = self._get_session(session_id)
         result = session.computer.list_visible_apps()
         if result.success:
-            return [{"pid": app.pid, "name": app.name, "cmd": app.cmd} for app in (result.data or [])]
+            return [{"pid": app.pid, "name": app.pname, "cmd": app.cmdline} for app in (result.data or [])]
         return []
 
     def get_web_url(self, session_id: str) -> str | None:
@@ -239,14 +310,175 @@ def get_web_url(self, session_id: str) -> str | None:
         session = self._get_session(session_id)
         return getattr(session, "resource_url", None)
 
-    def _get_session(self, session_id: str):
+    def _get_session(self, session_id: str) -> Any:
         """Get session object, fetching from API if not cached."""
         if session_id not in self._sessions:
             result = self.client.get(session_id)
             if not result.success:
                 raise RuntimeError(f"Session not found: {session_id}")
-            self._sessions[session_id] = result.session
-        return self._sessions[session_id]
+            self._sessions[session_id] = self._require_sdk_session(result, "get")
+        cached = self._sessions[session_id]
+        hydrated = self._hydrate_direct_call_session(cached)
+        self._sessions[session_id] = hydrated
+        return hydrated
+
+    def _hydrate_direct_call_session(self, session: Any) -> Any:
+        """Ensure cached session carries LinkUrl/token/tool metadata for direct shell calls."""
+        if not self._session_needs_direct_call_refresh(session):
+            return session
+        session_id = str(getattr(session, "session_id", "") or "")
+        if not session_id:
+            raise RuntimeError("AgentBay session missing session_id")
+        refreshed = self.client.get(session_id)
+        if not refreshed.success:
+            raise RuntimeError(f"Failed to hydrate AgentBay session {session_id}: {refreshed.error_message}")
+        hydrated = self._require_sdk_session(refreshed, "hydrate")
+        if self._session_needs_direct_call_refresh(hydrated):
+            metadata = self._fetch_direct_call_metadata(session_id)
+            self._apply_direct_call_metadata(hydrated, metadata)
+        return hydrated
+
+    @staticmethod
+    def _resolve_shell_server(session: Any) -> str | None:
+        for resolver_name in ("_get_mcp_server_for_tool", "_find_server_for_tool"):
+            resolver = getattr(session, resolver_name, None)
+            if callable(resolver):
+                try:
+                    server_name = resolver("shell")
+                except Exception:
+                    continue
+                if server_name:
+                    return str(server_name)
+        for tools_attr in ("mcpTools", "mcp_tools"):
+            tools = getattr(session, tools_attr, None) or []
+            for tool in tools:
+                if getattr(tool, "name", None) == "shell":
+                    server_name = getattr(tool, "server", "") or ""
+                    if server_name:
+                        return str(server_name)
+        return None
+
+    @staticmethod
+    def _provider_exec_result_from_tool_result(tool_result: Any) -> ProviderExecResult:
+        if not getattr(tool_result, "success", False):
+            error_message = getattr(tool_result, "error_message", "") or "Failed to execute command"
+            return ProviderExecResult(output="", exit_code=1, error=error_message)
+        data = getattr(tool_result, "data", "")
+        try:
+            payload = json.loads(data) if isinstance(data, str) else data
+        except json.JSONDecodeError:
+            payload = None
+        if isinstance(payload, dict):
+            stdout = str(payload.get("stdout", "") or "")
+            stderr = str(payload.get("stderr", "") or "")
+            exit_code = int(payload.get("exit_code", 0) or 0)
+            error = stderr or None
+            return ProviderExecResult(output=stdout + stderr, exit_code=exit_code, error=error)
+        return ProviderExecResult(output=str(data or ""), exit_code=0)
+
+    def _call_link_url_tool(
+        self,
+        session: Any,
+        tool_name: str,
+        args: dict[str, Any],
+        server_name: str,
+    ) -> ProviderExecResult:
+        link_url = str(getattr(session, "link_url", "") or "")
+        token = str(getattr(session, "token", "") or "")
+        if not link_url or not token:
+            return ProviderExecResult(output="", exit_code=1, error="LinkUrl/token not available")
+
+        try:
+            response = requests.post(
+                link_url.rstrip("/") + "/callTool",
+                json={
+                    "args": args,
+                    "server": server_name,
+                    "requestId": f"link-{int(time.time() * 1000)}",
+                    "tool": tool_name,
+                    "token": token,
+                },
+                headers={
+                    "Content-Type": "application/json",
+                    "X-Access-Token": token,
+                },
+                timeout=max(int(args.get("timeout_ms", 30000) or 30000) / 1000.0, 30.0),
+            )
+        except requests.RequestException as exc:
+            return ProviderExecResult(output="", exit_code=1, error=f"HTTP request failed: {exc}")
+        if response.status_code < 200 or response.status_code >= 300:
+            return ProviderExecResult(output="", exit_code=1, error=f"HTTP request failed with code: {response.status_code}")
+
+        outer = response.json()
+        data_field = outer.get("data")
+        if data_field is None:
+            return ProviderExecResult(output="", exit_code=1, error="No data field in LinkUrl response")
+        parsed_data = json.loads(data_field) if isinstance(data_field, str) else data_field
+        if not isinstance(parsed_data, dict):
+            return ProviderExecResult(output="", exit_code=1, error="Invalid data field type in LinkUrl response")
+
+        result_field = parsed_data.get("result", {})
+        if not isinstance(result_field, dict):
+            return ProviderExecResult(output="", exit_code=1, error="No result field in LinkUrl response data")
+
+        content = result_field.get("content", [])
+        text_content = ""
+        if isinstance(content, list) and content:
+            first = content[0]
+            if isinstance(first, str):
+                text_content = first
+            elif isinstance(first, dict):
+                text_content = str(first.get("text") or first.get("blob") or first.get("data") or "")
+        elif isinstance(content, str):
+            text_content = content
+
+        if result_field.get("isError", False):
+            error_message = text_content or json.dumps(result_field, ensure_ascii=False)
+            return ProviderExecResult(output="", exit_code=1, error=error_message)
+
+        return self._provider_exec_result_from_tool_result(SimpleNamespace(success=True, data=text_content, error_message=""))
+
+    @staticmethod
+    def _session_needs_direct_call_refresh(session: Any) -> bool:
+        # @@@agentbay-direct-call-hydration - shared staging may return a create-session object
+        # without token/link_url/mcpTools; refresh once so shell execution stays on the richer LinkUrl path.
+        if not getattr(session, "token", ""):
+            return True
+        if not getattr(session, "link_url", ""):
+            return True
+        tools = getattr(session, "mcpTools", None) or getattr(session, "mcp_tools", None)
+        return not bool(tools)
+
+    def _fetch_direct_call_metadata(self, session_id: str) -> dict[str, Any]:
+        from agentbay.api.models import GetSessionRequest
+
+        # @@@agentbay-raw-get-session - the SDK Session object drops LinkUrl/ToolList for this account tier,
+        # but the raw GetSession response still carries them. Pull that response directly and patch the session.
+        request = GetSessionRequest(authorization=f"Bearer {self.client.api_key}", session_id=session_id)
+        response = self.client.client.get_session(request)
+        body = response.to_map().get("body", {})
+        data = body.get("Data", {}) or {}
+        return {
+            "link_url": data.get("LinkUrl", "") or "",
+            "token": data.get("Token", "") or "",
+            "mcp_tools": [
+                SimpleNamespace(name=str(tool.get("Name", "") or ""), server=str(tool.get("Server", "") or ""))
+                for tool in (data.get("ToolList", []) or [])
+            ],
+        }
+
+    @staticmethod
+    def _apply_direct_call_metadata(session: Any, metadata: dict[str, Any]) -> None:
+        link_url = str(metadata.get("link_url", "") or "")
+        if link_url:
+            setattr(session, "link_url", link_url)
+        token = str(metadata.get("token", "") or "")
+        if token:
+            setattr(session, "token", token)
+        tools = metadata.get("mcp_tools", []) or []
+        if tools:
+            setattr(session, "mcp_tools", tools)
+            setattr(session, "mcpTools", tools)
 
     def create_runtime(self, terminal: AbstractTerminal, lease: SandboxLease) -> PhysicalTerminalRuntime:
         from sandbox.runtime import RemoteWrappedRuntime
diff --git a/sandbox/providers/daytona.py b/sandbox/providers/daytona.py
index def0f865f..fdd2b2907 100644
--- a/sandbox/providers/daytona.py
+++ b/sandbox/providers/daytona.py
@@ -14,7 +14,8 @@
 import time
 import uuid
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
+from urllib.parse import urlparse, urlunparse
 
 import httpx
 
@@ -40,7 +41,15 @@ def _daytona_state_to_status(state: str) -> str:
 
 logger = logging.getLogger(__name__)
 
+
+def _daytona_state_value(sandbox: Any) -> str | None:
+    state = getattr(sandbox, "state", None)
+    return getattr(state, "value", None)
+
+
 if TYPE_CHECKING:
+    from daytona_sdk._sync.sandbox import Sandbox as DaytonaSandbox
+
     from sandbox.lease import SandboxLease
     from sandbox.runtime import PhysicalTerminalRuntime
     from sandbox.terminal import AbstractTerminal
@@ -94,7 +103,7 @@ def __init__(
         bind_mounts: list[MountSpec] | None = None,
         provider_name: str | None = None,
     ):
-        from daytona_sdk import Daytona
+        from daytona_sdk import Daytona, DaytonaConfig
 
         if provider_name:
             self.name = provider_name
@@ -106,7 +115,15 @@ def __init__(
 
         os.environ["DAYTONA_API_KEY"] = api_key
         os.environ["DAYTONA_API_URL"] = api_url
-        self.client = Daytona()
+        os.environ["DAYTONA_TARGET"] = target
+        self.client = Daytona(DaytonaConfig(api_key=api_key, api_url=api_url, target=target))
+        original_get_proxy_toolbox_url = self.client._get_proxy_toolbox_url
+
+        def _wrapped_get_proxy_toolbox_url(sandbox_id: str, region_id: str) -> str:
+            raw_url = original_get_proxy_toolbox_url(sandbox_id, region_id)
+            return self._normalize_toolbox_proxy_url(raw_url)
+
+        self.client._get_proxy_toolbox_url = _wrapped_get_proxy_toolbox_url
         self._sandboxes: dict[str, Any] = {}
         self._thread_bind_mounts: dict[str, list[MountSpec]] = {}  # thread_id -> bind_mounts
         self._volume_mounts: dict[str, tuple[str, str]] = {}  # thread_id -> (volume_id, mount_path)
@@ -123,13 +140,17 @@ def create_managed_volume(self, member_id: str, mount_path: str) -> str:
         logger.info("Creating managed volume: %s", volume_name)
         # @@@volume-ready - volume transitions pending_create → ready (~6s)
         self.client.volume.create(volume_name)
+        self.wait_managed_volume_ready(volume_name)
+        return volume_name
+
+    def wait_managed_volume_ready(self, backend_ref: str) -> None:
         for _ in range(30):
-            vol = self.client.volume.get(volume_name)
+            vol = self.client.volume.get(backend_ref)
             if vol.state == "ready":
-                logger.info("Managed volume ready: %s (id=%s)", volume_name, vol.id)
-                return volume_name
+                logger.info("Managed volume ready: %s (id=%s)", backend_ref, vol.id)
+                return
             time.sleep(1)
-        raise RuntimeError(f"Volume {volume_name} did not become ready within 30s")
+        raise RuntimeError(f"Volume {backend_ref} did not become ready within 30s")
 
     def set_managed_volume_mount(self, thread_id: str, backend_ref: str, mount_path: str) -> None:
         self._volume_mounts[thread_id] = (backend_ref, mount_path)
@@ -150,7 +171,6 @@ def create_session(self, context_id: str | None = None, thread_id: str | None =
             volume_name, vol_mount_path = self._volume_mounts.pop(thread_id)
             vol = self.client.volume.get(volume_name)
             params = CreateSandboxFromSnapshotParams(
-                target=self.target,
                 auto_stop_interval=0,
                 volumes=[VolumeMount(volume_id=vol.id, mount_path=vol_mount_path)],
             )
@@ -179,7 +199,7 @@ def create_session(self, context_id: str | None = None, thread_id: str | None =
             self._wait_until_started(sandbox_id)
             sb = self.client.find_one(sandbox_id)
         else:
-            params = CreateSandboxFromSnapshotParams(target=self.target, auto_stop_interval=0)
+            params = CreateSandboxFromSnapshotParams(auto_stop_interval=0)
             sb = self.client.create(params)
 
         for source, target in copy_mounts:
@@ -242,7 +262,7 @@ def get_session_status(self, session_id: str) -> str:
             # @@@status-refresh - Always refetch sandbox before reading state to avoid stale cached status.
             sb = self.client.find_one(session_id)
             self._sandboxes[session_id] = sb
-            return _daytona_state_to_status(sb.state.value)
+            return _daytona_state_to_status(_daytona_state_value(sb) or "")
         except Exception:
             logger.exception("[DaytonaProvider] get_session_status failed for %s", session_id)
             return "unknown"
@@ -293,7 +313,10 @@ def download_bytes(self, session_id: str, remote_path: str) -> bytes:
 
     def list_provider_sessions(self) -> list[SessionInfo]:
         result = self.client.list()
-        return [SessionInfo(session_id=sb.id, provider=self.name, status=_daytona_state_to_status(sb.state.value)) for sb in result.items]
+        return [
+            SessionInfo(session_id=sb.id, provider=self.name, status=_daytona_state_to_status(_daytona_state_value(sb) or ""))
+            for sb in result.items
+        ]
 
     # ==================== Inspection ====================
 
@@ -313,7 +336,7 @@ def get_metrics(self, session_id: str) -> Metrics | None:
         memory_total_mb = float(memory_gib) * 1024.0 if memory_gib else None
         disk_total_gb = float(disk_gib) if disk_gib else None
 
-        is_running = getattr(sb, "state", None) and sb.state.value == "started"
+        is_running = _daytona_state_value(sb) == "started"
         if not is_running:
             return Metrics(memory_total_mb=memory_total_mb, disk_total_gb=disk_total_gb)
 
@@ -390,6 +413,19 @@ def _get_sandbox(self, session_id: str):
             self._sandboxes[session_id] = self.client.find_one(session_id)
         return self._sandboxes[session_id]
 
+    def _normalize_toolbox_proxy_url(self, raw_url: str) -> str:
+        api_host = (urlparse(self.api_url).hostname or "").lower()
+        if api_host not in {"localhost", "127.0.0.1"}:
+            return raw_url
+
+        parsed = urlparse(raw_url)
+        if (parsed.hostname or "").lower() != "172.18.0.1":
+            return raw_url
+
+        # @@@local-toolbox-loopback - self-host Daytona local dev reaches toolbox through
+        # the SSH-forwarded loopback proxy on :4000, not the server-side docker bridge gateway.
+        return urlunparse(parsed._replace(netloc=f"127.0.0.1:{parsed.port or 4000}"))
+
     def get_runtime_sandbox(self, session_id: str):
         """Expose native SDK sandbox for runtime-level persistent terminal handling."""
         return self._get_sandbox(session_id)
@@ -535,11 +571,14 @@ def _close_shell_sync(self) -> None:
         if not self._bound_instance_id:
             return
         try:
-            sandbox = self._provider_sandbox(self._bound_instance_id)
+            sandbox = self._runtime_sandbox(self._bound_instance_id)
             sandbox.process.kill_pty_session(self._pty_session_id)
         except Exception:
             pass
 
+    def _runtime_sandbox(self, instance_id: str) -> DaytonaSandbox:
+        return cast("DaytonaSandbox", self._provider_sandbox(instance_id))
+
     @staticmethod
     def _read_pty_chunk_sync(handle, wait_sec: float) -> bytes | None:
         ws = getattr(handle, "_ws", None)
@@ -615,7 +654,7 @@ def _ensure_session_sync(self, timeout: float | None):
             self._baseline_env = None
             self._hydrated = False
 
-        sandbox = self._provider_sandbox(instance.instance_id)
+        sandbox = self._runtime_sandbox(instance.instance_id)
         effective_cwd, effective_env = self._sanitize_terminal_snapshot()
         if self._pty_handle is None:
             from daytona_sdk.common.pty import PtySize
@@ -637,8 +676,8 @@ def _ensure_session_sync(self, timeout: float | None):
                     if "fork/exec" in message and "no such file" in message:
                         # Diagnose: check if working directory exists
                         try:
-                            result = sandbox.process.exec_sync(f"test -d {effective_cwd} && echo y || echo n", timeout=5)
-                            if "n" in result.stdout:
+                            result = sandbox.process.exec(f"test -d {effective_cwd} && echo y || echo n", timeout=5)
+                            if "n" in result.result:
                                 raise RuntimeError(
                                     f"PTY bootstrap failed: working directory '{effective_cwd}' does not exist. "
                                     f"Update config 'cwd' to an existing directory (e.g., /home/daytona)."
diff --git a/sandbox/providers/docker.py b/sandbox/providers/docker.py
index 6fbf436fc..df30e24d8 100644
--- a/sandbox/providers/docker.py
+++ b/sandbox/providers/docker.py
@@ -14,7 +14,7 @@
 import uuid
 from collections.abc import Callable
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal, overload
 
 from sandbox.config import MountSpec
 from sandbox.interfaces.executor import ExecuteResult
@@ -444,6 +444,12 @@ def _copy_host_path_into_container(self, container_id: str, *, source: str, targ
     def create_runtime(self, terminal: AbstractTerminal, lease: SandboxLease) -> PhysicalTerminalRuntime:
         return DockerPtyRuntime(terminal, lease, self)
 
+    @overload
+    def _get_container_id(self, session_id: str, allow_missing: Literal[False] = False) -> str: ...
+
+    @overload
+    def _get_container_id(self, session_id: str, allow_missing: Literal[True]) -> str | None: ...
+
     def _get_container_id(self, session_id: str, allow_missing: bool = False) -> str | None:
         container_id = self._sessions.get(session_id)
         if container_id:
diff --git a/sandbox/providers/e2b.py b/sandbox/providers/e2b.py
index 5827b124b..77e0b63da 100644
--- a/sandbox/providers/e2b.py
+++ b/sandbox/providers/e2b.py
@@ -13,7 +13,8 @@
 
 import logging
 import os
-from typing import TYPE_CHECKING, Any
+from collections.abc import Iterator
+from typing import TYPE_CHECKING, Any, Protocol, cast
 
 from sandbox.provider import (
     Metrics,
@@ -32,6 +33,86 @@
 logger = logging.getLogger(__name__)
 
 
+class _E2BProcessHandle(Protocol):
+    pid: int
+
+
+class _E2BCommandResult(Protocol):
+    stdout: str | None
+    stderr: str | None
+    exit_code: int
+
+
+class _E2BPtyCreateHandle(Protocol):
+    pid: int
+
+    def disconnect(self) -> None: ...
+
+
+class _E2BPtyStreamHandle(Protocol):
+    def __iter__(self) -> Iterator[tuple[object, object, bytes | None]]: ...
+
+    def disconnect(self) -> None: ...
+
+
+class _E2BCommandsHandle(Protocol):
+    def run(self, command: str, cwd: str | None = None, timeout: float | int | None = None) -> _E2BCommandResult: ...
+
+    def list(self) -> list[_E2BProcessHandle]: ...
+
+
+class _E2BPtyHandle(Protocol):
+    def create(self, *, size: object, cwd: str, timeout: float | int) -> _E2BPtyCreateHandle: ...
+
+    def connect(self, pid: int, timeout: float | int) -> _E2BPtyStreamHandle: ...
+
+    def send_stdin(self, pid: int, data: bytes) -> object: ...
+
+    def kill(self, pid: int) -> object: ...
+
+
+class _E2BFileType(Protocol):
+    value: str
+
+
+class _E2BFileEntry(Protocol):
+    name: str
+    type: _E2BFileType | None
+    size: int | None
+    path: str
+
+
+class _E2BFilesHandle(Protocol):
+    def read(self, path: str, format: str | None = None) -> str | bytes | bytearray | memoryview | None: ...
+
+    def write(self, path: str, content: object) -> object: ...
+
+    def list(self, path: str) -> list[_E2BFileEntry]: ...
+
+
+class _E2BSandboxHandle(Protocol):
+    sandbox_id: str
+    commands: _E2BCommandsHandle
+    pty: _E2BPtyHandle
+    files: _E2BFilesHandle
+
+    def beta_pause(self) -> object: ...
+
+    def kill(self) -> object: ...
+
+
+def _require_e2b_bytes(
+    value: str | bytes | bytearray | memoryview | None,
+    *,
+    path: str,
+) -> bytes:
+    if value is None:
+        return b""
+    if isinstance(value, str):
+        return value.encode("utf-8")
+    return bytes(value)
+
+
 class E2BProvider(SandboxProvider):
     """E2B cloud sandbox provider."""
 
@@ -68,6 +149,10 @@ def __init__(
         timeout: int = 300,
         provider_name: str | None = None,
     ):
+        # @@@e2b-sdk-presence - staging inventory must fail loudly when the SDK is absent,
+        # otherwise provider catalog/create-thread gates can overclaim e2b availability.
+        from e2b import Sandbox  # noqa: F401
+
         if provider_name:
             self.name = provider_name
         self.api_key = api_key
@@ -88,6 +173,16 @@ def create_session(self, context_id: str | None = None, thread_id: str | None =
             api_key=self.api_key,
         )
         self._sandboxes[sandbox.sandbox_id] = sandbox
+        # @@@e2b-workspace-bootstrap - fresh E2B sandboxes do not guarantee our sync root exists.
+        # Create it eagerly so upload/download and file hints target a real path contract.
+        bootstrap = sandbox.commands.run(
+            f"mkdir -p {self.WORKSPACE_ROOT}/files",
+            cwd=self.default_cwd,
+            timeout=10,
+        )
+        if getattr(bootstrap, "exit_code", 0) != 0:
+            error = getattr(bootstrap, "stderr", "") or getattr(bootstrap, "stdout", "") or "unknown error"
+            raise RuntimeError(f"Failed to bootstrap E2B workspace root: {error}")
 
         return SessionInfo(
             session_id=sandbox.sandbox_id,
@@ -192,7 +287,10 @@ def execute(
 
     def read_file(self, session_id: str, path: str) -> str:
         sandbox = self._get_sandbox(session_id)
-        return sandbox.files.read(path)
+        content = sandbox.files.read(path)
+        if isinstance(content, str):
+            return content
+        raise RuntimeError(f"E2B read_file returned non-text content for {path}")
 
     def write_file(self, session_id: str, path: str, content: str) -> str:
         sandbox = self._get_sandbox(session_id)
@@ -222,7 +320,7 @@ def upload_bytes(self, session_id: str, remote_path: str, data: bytes) -> None:
     def download_bytes(self, session_id: str, remote_path: str) -> bytes:
         sandbox = self._get_sandbox(session_id)
         content = sandbox.files.read(remote_path, format="bytes")
-        return bytes(content) if content else b""
+        return _require_e2b_bytes(content, path=remote_path)
 
     def get_metrics(self, session_id: str) -> Metrics | None:
         # E2B is Ubuntu-based; free/top/df are available → delegate to shell command probing.
@@ -240,14 +338,14 @@ def snapshot_workspace(self, session_id: str) -> list[dict]:
             except Exception:
                 continue
             for entry in entries:
-                p = entry.path if hasattr(entry, "path") else f"{d}/{entry.name}"
+                p = entry.path or f"{d}/{entry.name}"
                 if entry.type and entry.type.value == "dir":
                     stack.append(p)
                     continue
                 try:
                     data = sandbox.files.read(p, format="bytes")
                     rel = p.removeprefix(self.WORKSPACE_ROOT + "/")
-                    files.append({"file_path": rel, "content": bytes(data)})
+                    files.append({"file_path": rel, "content": _require_e2b_bytes(data, path=p)})
                 except Exception:
                     logger.warning("[E2BProvider] snapshot_workspace failed to read %s", p, exc_info=True)
                     continue
@@ -260,7 +358,7 @@ def restore_workspace(self, session_id: str, files: list[dict]) -> None:
             abs_path = f"{self.WORKSPACE_ROOT}/{f['file_path']}"
             sandbox.files.write(abs_path, f["content"])
 
-    def _get_sandbox(self, session_id: str):
+    def _get_sandbox(self, session_id: str) -> _E2BSandboxHandle:
         """Get sandbox object, reconnecting if not cached."""
         if session_id not in self._sandboxes:
             from e2b import Sandbox
@@ -271,9 +369,9 @@ def _get_sandbox(self, session_id: str):
                 api_key=self.api_key,
             )
             self._sandboxes[session_id] = sandbox
-        return self._sandboxes[session_id]
+        return cast(_E2BSandboxHandle, self._sandboxes[session_id])
 
-    def get_runtime_sandbox(self, session_id: str):
+    def get_runtime_sandbox(self, session_id: str) -> _E2BSandboxHandle:
         """Expose native SDK sandbox for runtime-level persistent terminal handling."""
         return self._get_sandbox(session_id)
 
@@ -313,7 +411,7 @@ def __init__(self, terminal, lease, provider):
 
     def _run_pty_command_sync(
         self,
-        sandbox,
+        sandbox: _E2BSandboxHandle,
         pid: int,
         command: str,
         timeout: float | None,
@@ -338,7 +436,10 @@ def _run_pty_command_sync(
         finally:
             handle.disconnect()
 
-    def _ensure_shell_sync(self, timeout: float | None) -> tuple[object, int]:
+    def _provider_sandbox(self, instance_id: str) -> _E2BSandboxHandle:
+        return cast(_E2BSandboxHandle, super()._provider_sandbox(instance_id))
+
+    def _ensure_shell_sync(self, timeout: float | None) -> tuple[_E2BSandboxHandle, int]:
         instance = self.lease.ensure_active_instance(self.provider)
         if self._bound_instance_id != instance.instance_id:
             self._bound_instance_id = instance.instance_id
diff --git a/sandbox/providers/local.py b/sandbox/providers/local.py
index a8c6c6f02..b5766b9c9 100644
--- a/sandbox/providers/local.py
+++ b/sandbox/providers/local.py
@@ -7,6 +7,7 @@
 import shlex
 import subprocess
 import threading
+import time
 import uuid
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -171,6 +172,12 @@ def list_dir(self, session_id: str, path: str) -> list[dict]:
         return items
 
     def get_metrics(self, session_id: str) -> Metrics | None:
+        if platform.system() == "Linux":
+            metrics = self._get_metrics_via_procfs()
+            if metrics is not None:
+                return metrics
+            return self.get_metrics_via_commands(session_id)
+
         if platform.system() != "Darwin":
             return self.get_metrics_via_commands(session_id)
 
@@ -222,6 +229,59 @@ def get_metrics(self, session_id: str) -> Metrics | None:
         except Exception:
             return None
 
+    def _get_metrics_via_procfs(self) -> Metrics | None:
+        try:
+            cpu_percent = self._sample_linux_cpu_percent()
+
+            meminfo: dict[str, int] = {}
+            with open("/proc/meminfo") as fh:
+                for line in fh:
+                    key, _, raw = line.partition(":")
+                    value = raw.strip().split()[0] if raw.strip() else ""
+                    if value.isdigit():
+                        meminfo[key] = int(value)
+
+            total_kb = meminfo.get("MemTotal")
+            available_kb = meminfo.get("MemAvailable")
+            memory_total_mb = (total_kb / 1024.0) if total_kb is not None else None
+            memory_used_mb = ((total_kb - available_kb) / 1024.0) if total_kb is not None and available_kb is not None else None
+
+            stat = os.statvfs("/")
+            total_bytes = stat.f_blocks * stat.f_frsize
+            free_bytes = stat.f_bavail * stat.f_frsize
+            disk_total_gb = total_bytes / (1024.0**3)
+            disk_used_gb = (total_bytes - free_bytes) / (1024.0**3)
+
+            return Metrics(
+                cpu_percent=cpu_percent,
+                memory_used_mb=memory_used_mb,
+                memory_total_mb=memory_total_mb,
+                disk_used_gb=disk_used_gb,
+                disk_total_gb=disk_total_gb,
+            )
+        except Exception:
+            return None
+
+    def _sample_linux_cpu_percent(self) -> float | None:
+        first_total, first_idle = self._read_linux_cpu_totals()
+        time.sleep(0.1)
+        second_total, second_idle = self._read_linux_cpu_totals()
+        total_delta = second_total - first_total
+        idle_delta = second_idle - first_idle
+        if total_delta <= 0:
+            return None
+        busy_delta = total_delta - idle_delta
+        return max(0.0, min(100.0, (busy_delta / total_delta) * 100.0))
+
+    def _read_linux_cpu_totals(self) -> tuple[int, int]:
+        with open("/proc/stat") as fh:
+            first = fh.readline().strip()
+        parts = first.split()
+        values = [int(value) for value in parts[1:9]]
+        total = sum(values)
+        idle = values[3] + values[4]
+        return total, idle
+
     def create_runtime(self, terminal: AbstractTerminal, lease: SandboxLease) -> PhysicalTerminalRuntime:
         from sandbox.providers.local import LocalPersistentShellRuntime
 
diff --git a/sandbox/resource_snapshot.py b/sandbox/resource_snapshot.py
index f346ca58d..43a7790bd 100644
--- a/sandbox/resource_snapshot.py
+++ b/sandbox/resource_snapshot.py
@@ -6,14 +6,29 @@
 from typing import Any
 
 from sandbox.provider import SandboxProvider
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-from storage.providers.sqlite.resource_snapshot_repo import (
-    ensure_resource_snapshot_table,
-    list_snapshots_by_lease_ids,
-    upsert_lease_resource_snapshot,
-)
-
-# Re-export storage functions for backward compatibility
+from storage.runtime import build_resource_snapshot_repo
+
+
+def ensure_resource_snapshot_table() -> None:
+    """Noop — Supabase tables managed via migrations."""
+
+
+def upsert_lease_resource_snapshot(**kwargs) -> None:  # type: ignore[no-untyped-def]
+    repo = build_resource_snapshot_repo()
+    try:
+        repo.upsert_lease_resource_snapshot(**kwargs)
+    finally:
+        repo.close()
+
+
+def list_snapshots_by_lease_ids(lease_ids: list[str], **kwargs) -> dict:  # type: ignore[no-untyped-def,type-arg]
+    repo = build_resource_snapshot_repo()
+    try:
+        return repo.list_snapshots_by_lease_ids(lease_ids)
+    finally:
+        repo.close()
+
+
 __all__ = [
     "ensure_resource_snapshot_table",
     "upsert_lease_resource_snapshot",
@@ -45,10 +60,10 @@ def probe_and_upsert_for_instance(
     probe_mode: str,
     provider: SandboxProvider,
     instance_id: str,
-    db_path: Path | None = None,
+    repo: Any | None = None,
+    db_path: Path | None = None,  # deprecated, ignored
 ) -> dict[str, Any]:
     """Probe provider metrics and persist to storage."""
-    db_path = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
     metrics = None
     cpu_used = None
     cpu_limit = None
@@ -86,7 +101,8 @@ def probe_and_upsert_for_instance(
     ) and probe_error is None:
         probe_error = "metrics unavailable"
 
-    upsert_lease_resource_snapshot(
+    upsert = repo.upsert_lease_resource_snapshot if repo is not None else upsert_lease_resource_snapshot
+    upsert(
         lease_id=lease_id,
         provider_name=provider_name,
         observed_state=observed_state,
@@ -100,6 +116,5 @@ def probe_and_upsert_for_instance(
         network_rx_kbps=network_rx_kbps,
         network_tx_kbps=network_tx_kbps,
         probe_error=probe_error,
-        db_path=db_path,
     )
     return {"ok": probe_error is None, "error": probe_error}
diff --git a/sandbox/runtime.py b/sandbox/runtime.py
index 87cecd024..9c7e60abd 100644
--- a/sandbox/runtime.py
+++ b/sandbox/runtime.py
@@ -36,6 +36,12 @@
 ENV_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 
 
+def _require_select_module():
+    if select is None:
+        raise RuntimeError("PTY sessions are not supported on Windows")
+    return select
+
+
 def _parse_env_output(raw: str) -> dict[str, str]:
     env_map: dict[str, str] = {}
     for line in raw.replace("\r", "").splitlines():
@@ -199,7 +205,7 @@ def run(
             if deadline is not None and time.monotonic() > deadline:
                 raise TimeoutError(f"Command timed out after {timeout}s")
             wait_sec = 0.1 if deadline is None else max(0.0, min(0.1, deadline - time.monotonic()))
-            readable, _, _ = select.select([self._master_fd], [], [], wait_sec)
+            readable, _, _ = _require_select_module().select([self._master_fd], [], [], wait_sec)
             if not readable:
                 continue
             chunk = os.read(self._master_fd, 4096)
@@ -241,7 +247,7 @@ def interrupt_and_recover(self, recover_timeout: float = 3.0) -> bool:
         drain_deadline = time.monotonic() + 1.0
         while time.monotonic() < drain_deadline:
             remaining = max(0.0, drain_deadline - time.monotonic())
-            readable, _, _ = select.select([self._master_fd], [], [], min(0.1, remaining))
+            readable, _, _ = _require_select_module().select([self._master_fd], [], [], min(0.1, remaining))
             if not readable:
                 continue
             try:
@@ -264,7 +270,7 @@ def interrupt_and_recover(self, recover_timeout: float = 3.0) -> bool:
         probe_buf = bytearray()
         while time.monotonic() < probe_deadline:
             wait_sec = max(0.0, min(0.1, probe_deadline - time.monotonic()))
-            readable, _, _ = select.select([self._master_fd], [], [], wait_sec)
+            readable, _, _ = _require_select_module().select([self._master_fd], [], [], wait_sec)
             if not readable:
                 continue
             try:
@@ -749,6 +755,8 @@ def _looks_like_infra_error(text: str) -> bool:
             "no such session",
             "session does not exist",
             "failed to create pty session",
+            "failed to send input to pty",
+            "pty control error",
             "no ip address found",
             "is the sandbox started",
             "is paused",
@@ -758,6 +766,9 @@ def _looks_like_infra_error(text: str) -> bool:
             "websocket",
             "close frame",
             "no close frame",
+            "internal error",
+            "1011",
+            "broken pipe",
             "transport",
             "unreachable",
             "timed out",
@@ -806,6 +817,17 @@ def _execute_once(self, command: str, timeout: float | None = None) -> ExecuteRe
         instance = self.lease.ensure_active_instance(self.provider)
         state = self.terminal.get_state()
         timeout_ms = int(timeout * 1000) if timeout else 30000
+        print(
+            "[RemoteWrappedRuntime._execute_once] "
+            f"thread_id={self.terminal.thread_id} "
+            f"lease_id={self.lease.lease_id} "
+            f"instance_id={instance.instance_id} "
+            f"provider={getattr(self.provider, 'name', '?')} "
+            f"cwd={state.cwd!r} "
+            f"timeout_ms={timeout_ms} "
+            f"command={command[:200]!r}",
+            flush=True,
+        )
         # @@@ _build_state_snapshot_cmd returns (start, end, cmd) but RemoteWrappedRuntime
         # builds its own inline block to interleave cd/exports/command, so the pre-built cmd is unused.
         start_marker, end_marker, _ = _build_state_snapshot_cmd()
@@ -832,14 +854,32 @@ def _execute_once(self, command: str, timeout: float | None = None) -> ExecuteRe
             cwd=state.cwd,
         )
         raw_output = result.output or ""
-
-        new_cwd, env_map, raw_output = _extract_state_from_output(
-            raw_output,
-            start_marker,
-            end_marker,
-            cwd_fallback=state.cwd,
-            env_fallback=state.env_delta,
+        print(
+            "[RemoteWrappedRuntime._execute_once] "
+            f"thread_id={self.terminal.thread_id} "
+            f"provider_exit={result.exit_code} "
+            f"provider_error={result.error!r} "
+            f"output_len={len(raw_output)}",
+            flush=True,
         )
+
+        try:
+            new_cwd, env_map, raw_output = _extract_state_from_output(
+                raw_output,
+                start_marker,
+                end_marker,
+                cwd_fallback=state.cwd,
+                env_fallback=state.env_delta,
+            )
+        except Exception as exc:
+            print(
+                "[RemoteWrappedRuntime._execute_once] "
+                f"thread_id={self.terminal.thread_id} "
+                f"state_parse_failed={exc.__class__.__name__}: {exc} "
+                f"raw_output_preview={raw_output[:400]!r}",
+                flush=True,
+            )
+            raise
         from sandbox.terminal import TerminalState
 
         self.update_terminal_state(TerminalState(cwd=new_cwd, env_delta=env_map))
diff --git a/sandbox/sync/state.py b/sandbox/sync/state.py
index 4c1836ad2..d2a00e7d7 100644
--- a/sandbox/sync/state.py
+++ b/sandbox/sync/state.py
@@ -1,7 +1,7 @@
 import hashlib
 from pathlib import Path
 
-from backend.web.core.storage_factory import make_sync_file_repo
+from storage.runtime import build_sync_file_repo
 
 
 def _calculate_checksum(file_path: Path) -> str:
@@ -14,8 +14,8 @@ def _calculate_checksum(file_path: Path) -> str:
 
 
 class SyncState:
-    def __init__(self):
-        self._repo = make_sync_file_repo()
+    def __init__(self, repo=None):
+        self._repo = repo or build_sync_file_repo()
 
     def close(self) -> None:
         self._repo.close()
diff --git a/sandbox/sync/strategy.py b/sandbox/sync/strategy.py
index 593691ccc..de42773ab 100644
--- a/sandbox/sync/strategy.py
+++ b/sandbox/sync/strategy.py
@@ -237,8 +237,10 @@ def download(self, source_path: Path, remote_path: str, session_id: str, provide
     def clear_state(self, state_key: str):
         self.state.clear_thread(state_key)
 
-    def _update_checksums_after_download(self, state_key: str, source_path: Path):
+    def _update_checksums_after_download(self, state_key: str | None, source_path: Path):
         """Update checksum DB to match downloaded files, preventing redundant re-uploads on resume."""
+        if not state_key:
+            return
         if not source_path.exists():
             return
         from sandbox.sync.state import _calculate_checksum
diff --git a/sandbox/thread_context.py b/sandbox/thread_context.py
index d52ba7ef1..d98e9895c 100644
--- a/sandbox/thread_context.py
+++ b/sandbox/thread_context.py
@@ -3,10 +3,14 @@
 from __future__ import annotations
 
 from contextvars import ContextVar
+from typing import Any
 
 _current_thread_id: ContextVar[str] = ContextVar("sandbox_thread_id", default="")
 # @@@run-context - groups file ops per execution unit: checkpoint_id in TUI, run_id in web mode.
 _current_run_id: ContextVar[str] = ContextVar("sandbox_run_id", default="")
+# Parent conversation messages — set by QueryLoop before tool execution; read by AgentService
+# for forkContext=True sub-agent spawning.
+_current_messages: ContextVar[list[Any]] = ContextVar("current_messages", default=[])
 
 
 def set_current_thread_id(thread_id: str) -> None:
@@ -25,3 +29,11 @@ def set_current_run_id(run_id: str) -> None:
 def get_current_run_id() -> str | None:
     value = _current_run_id.get()
     return value if value else None
+
+
+def set_current_messages(messages: list[Any]) -> None:
+    _current_messages.set(list(messages))
+
+
+def get_current_messages() -> list[Any]:
+    return _current_messages.get()
diff --git a/storage/container.py b/storage/container.py
index aa184af5b..800d980b8 100644
--- a/storage/container.py
+++ b/storage/container.py
@@ -1,30 +1,39 @@
-"""Storage container with repo-level provider selection."""
+"""Storage container — Supabase-only repo composition root."""
 
 from __future__ import annotations
 
 import importlib
-from collections.abc import Mapping
-from pathlib import Path
-from typing import Any, Literal
+from typing import Any
 
 from .contracts import (
+    AgentConfigRepo,
+    AgentRegistryRepo,
+    ChatRepo,
     ChatSessionRepo,
     CheckpointRepo,
+    ContactRepo,
+    CronJobRepo,
     EvalRepo,
     FileOperationRepo,
+    InviteCodeRepo,
     LeaseRepo,
+    MemberRepo,
+    PanelTaskRepo,
     ProviderEventRepo,
     QueueRepo,
+    RecipeRepo,
+    ResourceSnapshotRepo,
     RunEventRepo,
     SandboxVolumeRepo,
     SummaryRepo,
+    SyncFileRepo,
     TerminalRepo,
+    ThreadLaunchPrefRepo,
+    ThreadRepo,
+    ToolTaskRepo,
+    UserSettingsRepo,
 )
 
-StorageStrategy = Literal["sqlite", "supabase"]
-RepoProviderMap = Mapping[str, str]
-
-# @@@repo-registry - maps repo name → (supabase module path, class name) for generic dispatch.
 _REPO_REGISTRY: dict[str, tuple[str, str]] = {
     "checkpoint_repo": ("storage.providers.supabase.checkpoint_repo", "SupabaseCheckpointRepo"),
     "run_event_repo": ("storage.providers.supabase.run_event_repo", "SupabaseRunEventRepo"),
@@ -37,88 +46,114 @@
     "lease_repo": ("storage.providers.supabase.lease_repo", "SupabaseLeaseRepo"),
     "terminal_repo": ("storage.providers.supabase.terminal_repo", "SupabaseTerminalRepo"),
     "chat_session_repo": ("storage.providers.supabase.chat_session_repo", "SupabaseChatSessionRepo"),
+    "panel_task_repo": ("storage.providers.supabase.panel_task_repo", "SupabasePanelTaskRepo"),
+    "cron_job_repo": ("storage.providers.supabase.cron_job_repo", "SupabaseCronJobRepo"),
+    "agent_registry_repo": ("storage.providers.supabase.agent_registry_repo", "SupabaseAgentRegistryRepo"),
+    "tool_task_repo": ("storage.providers.supabase.tool_task_repo", "SupabaseToolTaskRepo"),
+    "sync_file_repo": ("storage.providers.supabase.sync_file_repo", "SupabaseSyncFileRepo"),
+    "resource_snapshot_repo": ("storage.providers.supabase.resource_snapshot_repo", "SupabaseResourceSnapshotRepo"),
+    "member_repo": ("storage.providers.supabase.member_repo", "SupabaseMemberRepo"),
+    "thread_repo": ("storage.providers.supabase.thread_repo", "SupabaseThreadRepo"),
+    "thread_launch_pref_repo": ("storage.providers.supabase.thread_launch_pref_repo", "SupabaseThreadLaunchPrefRepo"),
+    "recipe_repo": ("storage.providers.supabase.recipe_repo", "SupabaseRecipeRepo"),
+    "chat_repo": ("storage.providers.supabase.chat_repo", "SupabaseChatRepo"),
+    "invite_code_repo": ("storage.providers.supabase.invite_code_repo", "SupabaseInviteCodeRepo"),
+    "user_settings_repo": ("storage.providers.supabase.user_settings_repo", "SupabaseUserSettingsRepo"),
+    "agent_config_repo": ("storage.providers.supabase.agent_config_repo", "SupabaseAgentConfigRepo"),
+    "contact_repo": ("storage.providers.supabase.contact_repo", "SupabaseContactRepo"),
 }
 
 
 class StorageContainer:
-    """Composition root for storage repos."""
-
-    _SUPPORTED_STRATEGIES = {"sqlite", "supabase"}
-    _REPO_NAMES = (
-        "checkpoint_repo",
-        "run_event_repo",
-        "file_operation_repo",
-        "summary_repo",
-        "eval_repo",
-        "queue_repo",
-        "sandbox_volume_repo",
-        "provider_event_repo",
-        "lease_repo",
-        "terminal_repo",
-        "chat_session_repo",
-    )
-
-    def __init__(
-        self,
-        main_db_path: str | Path | None = None,
-        eval_db_path: str | Path | None = None,
-        strategy: StorageStrategy = "sqlite",
-        repo_providers: RepoProviderMap | None = None,
-        supabase_bindings: Mapping[str, Any] | None = None,
-        supabase_client: Any | None = None,
-    ) -> None:
-        if strategy not in self._SUPPORTED_STRATEGIES:
-            raise ValueError(
-                f"Unsupported storage strategy: {strategy}. Supported strategies: {', '.join(sorted(self._SUPPORTED_STRATEGIES))}"
-            )
-        root = Path.home() / ".leon"
-        self._main_db = Path(main_db_path) if main_db_path else root / "leon.db"
-        self._queue_db = self._main_db.with_name("queue.db")
-        self._run_event_db = self._main_db.with_name("events.db")
-        self._file_op_db = self._main_db.with_name("file_ops.db")
-        self._summary_db = self._main_db.with_name("summary.db")
-        self._eval_db = Path(eval_db_path) if eval_db_path else root / "eval.db"
-        self._sandbox_db = self._main_db.with_name("sandbox.db")
-        self._strategy: StorageStrategy = strategy
+    """Composition root for storage repos (Supabase-only)."""
+
+    def __init__(self, supabase_client: Any, **_kwargs: Any) -> None:
+        if supabase_client is None:
+            raise RuntimeError("StorageContainer requires a supabase_client.")
         self._supabase_client = supabase_client
-        self._repo_providers = self._resolve_repo_providers(
-            default_strategy=strategy,
-            repo_providers=repo_providers,
-            legacy_supabase_bindings=supabase_bindings,
-        )
+
+    def _build(self, name: str) -> Any:
+        mod_path, cls_name = _REPO_REGISTRY[name]
+        mod = importlib.import_module(mod_path)
+        return getattr(mod, cls_name)(client=self._supabase_client)
 
     def checkpoint_repo(self) -> CheckpointRepo:
-        return self._build_repo("checkpoint_repo", self._sqlite_checkpoint_repo)
+        return self._build("checkpoint_repo")
 
     def run_event_repo(self) -> RunEventRepo:
-        return self._build_repo("run_event_repo", self._sqlite_run_event_repo)
+        return self._build("run_event_repo")
 
     def file_operation_repo(self) -> FileOperationRepo:
-        return self._build_repo("file_operation_repo", self._sqlite_file_operation_repo)
+        return self._build("file_operation_repo")
 
     def summary_repo(self) -> SummaryRepo:
-        return self._build_repo("summary_repo", self._sqlite_summary_repo)
+        return self._build("summary_repo")
 
     def queue_repo(self) -> QueueRepo:
-        return self._build_repo("queue_repo", self._sqlite_queue_repo)
+        return self._build("queue_repo")
 
     def eval_repo(self) -> EvalRepo:
-        return self._build_repo("eval_repo", self._sqlite_eval_repo)
+        return self._build("eval_repo")
 
     def sandbox_volume_repo(self) -> SandboxVolumeRepo:
-        return self._build_repo("sandbox_volume_repo", self._sqlite_sandbox_volume_repo)
+        return self._build("sandbox_volume_repo")
 
     def provider_event_repo(self) -> ProviderEventRepo:
-        return self._build_repo("provider_event_repo", self._sqlite_provider_event_repo)
+        return self._build("provider_event_repo")
 
     def lease_repo(self) -> LeaseRepo:
-        return self._build_repo("lease_repo", self._sqlite_lease_repo)
+        return self._build("lease_repo")
 
     def terminal_repo(self) -> TerminalRepo:
-        return self._build_repo("terminal_repo", self._sqlite_terminal_repo)
+        return self._build("terminal_repo")
 
     def chat_session_repo(self) -> ChatSessionRepo:
-        return self._build_repo("chat_session_repo", self._sqlite_chat_session_repo)
+        return self._build("chat_session_repo")
+
+    def panel_task_repo(self) -> PanelTaskRepo:
+        return self._build("panel_task_repo")
+
+    def cron_job_repo(self) -> CronJobRepo:
+        return self._build("cron_job_repo")
+
+    def agent_registry_repo(self) -> AgentRegistryRepo:
+        return self._build("agent_registry_repo")
+
+    def tool_task_repo(self) -> ToolTaskRepo:
+        return self._build("tool_task_repo")
+
+    def sync_file_repo(self) -> SyncFileRepo:
+        return self._build("sync_file_repo")
+
+    def resource_snapshot_repo(self) -> ResourceSnapshotRepo:
+        return self._build("resource_snapshot_repo")
+
+    def member_repo(self) -> MemberRepo:
+        return self._build("member_repo")
+
+    def thread_repo(self) -> ThreadRepo:
+        return self._build("thread_repo")
+
+    def thread_launch_pref_repo(self) -> ThreadLaunchPrefRepo:
+        return self._build("thread_launch_pref_repo")
+
+    def recipe_repo(self) -> RecipeRepo:
+        return self._build("recipe_repo")
+
+    def chat_repo(self) -> ChatRepo:
+        return self._build("chat_repo")
+
+    def invite_code_repo(self) -> InviteCodeRepo:
+        return self._build("invite_code_repo")
+
+    def user_settings_repo(self) -> UserSettingsRepo:
+        return self._build("user_settings_repo")
+
+    def agent_config_repo(self) -> AgentConfigRepo:
+        return self._build("agent_config_repo")
+
+    def contact_repo(self) -> ContactRepo:
+        return self._build("contact_repo")
 
     def purge_thread(self, thread_id: str) -> None:
         """Delete all data for a thread across all repos."""
@@ -128,8 +163,6 @@ def purge_thread(self, thread_id: str) -> None:
         finally:
             checkpoint.close()
 
-        # threads table is managed via app.state.thread_repo, not StorageContainer
-
         run_event = self.run_event_repo()
         try:
             run_event.delete_thread_events(thread_id)
@@ -147,107 +180,3 @@ def purge_thread(self, thread_id: str) -> None:
             summary.delete_thread_summaries(thread_id)
         finally:
             summary.close()
-
-    def provider_for(self, repo_name: str) -> StorageStrategy:
-        return self._provider_for(repo_name)
-
-    def _provider_for(self, repo_name: str) -> StorageStrategy:
-        if repo_name not in self._REPO_NAMES:
-            supported = ", ".join(self._REPO_NAMES)
-            raise ValueError(f"Unknown repo name: {repo_name}. Supported repo names: {supported}")
-        return self._repo_providers[repo_name]
-
-    def _build_repo(self, name: str, sqlite_factory):
-        """Generic repo builder: supabase via registry, sqlite via factory."""
-        if self._provider_for(name) == "supabase":
-            if self._supabase_client is None:
-                raise RuntimeError(f"Supabase strategy {name} requires supabase_client. Pass supabase_client=... into StorageContainer.")
-            mod_path, cls_name = _REPO_REGISTRY[name]
-            mod = importlib.import_module(mod_path)
-            return getattr(mod, cls_name)(client=self._supabase_client)
-        return sqlite_factory()
-
-    @classmethod
-    def _resolve_repo_providers(
-        cls,
-        *,
-        default_strategy: StorageStrategy,
-        repo_providers: RepoProviderMap | None,
-        legacy_supabase_bindings: Mapping[str, Any] | None,
-    ) -> dict[str, StorageStrategy]:
-        if repo_providers is not None and legacy_supabase_bindings is not None:
-            raise ValueError("Use either repo_providers or supabase_bindings, not both.")
-
-        overrides: Mapping[str, Any] = repo_providers or legacy_supabase_bindings or {}
-        unknown_repos = sorted(set(overrides.keys()) - set(cls._REPO_NAMES))
-        if unknown_repos:
-            supported = ", ".join(cls._REPO_NAMES)
-            unknown = ", ".join(unknown_repos)
-            raise ValueError(f"Unknown repo provider bindings: {unknown}. Supported repo names: {supported}")
-
-        resolved: dict[str, StorageStrategy] = {name: default_strategy for name in cls._REPO_NAMES}
-        # @@@repo-provider-override - default strategy keeps current behavior; only explicitly listed repos diverge.
-        for repo_name, provider in overrides.items():
-            if not isinstance(provider, str):
-                raise ValueError(f"Invalid provider value for {repo_name}: {provider!r}. Expected 'sqlite' or 'supabase'.")
-            normalized = provider.strip().lower()
-            if normalized not in cls._SUPPORTED_STRATEGIES:
-                supported = ", ".join(sorted(cls._SUPPORTED_STRATEGIES))
-                raise ValueError(f"Unsupported provider for {repo_name}: {provider!r}. Supported providers: {supported}")
-            resolved[repo_name] = normalized
-        return resolved
-
-    def _sqlite_checkpoint_repo(self):
-        from storage.providers.sqlite.checkpoint_repo import SQLiteCheckpointRepo
-
-        return SQLiteCheckpointRepo(db_path=self._main_db)
-
-    def _sqlite_run_event_repo(self):
-        from storage.providers.sqlite.run_event_repo import SQLiteRunEventRepo
-
-        return SQLiteRunEventRepo(db_path=self._run_event_db)
-
-    def _sqlite_file_operation_repo(self):
-        from storage.providers.sqlite.file_operation_repo import SQLiteFileOperationRepo
-
-        return SQLiteFileOperationRepo(db_path=self._file_op_db)
-
-    def _sqlite_summary_repo(self):
-        from storage.providers.sqlite.summary_repo import SQLiteSummaryRepo
-
-        return SQLiteSummaryRepo(db_path=self._summary_db)
-
-    def _sqlite_queue_repo(self):
-        from storage.providers.sqlite.queue_repo import SQLiteQueueRepo
-
-        return SQLiteQueueRepo(db_path=self._queue_db)
-
-    def _sqlite_eval_repo(self):
-        from storage.providers.sqlite.eval_repo import SQLiteEvalRepo
-
-        return SQLiteEvalRepo(db_path=self._eval_db)
-
-    def _sqlite_sandbox_volume_repo(self):
-        from storage.providers.sqlite.sandbox_volume_repo import SQLiteSandboxVolumeRepo
-
-        return SQLiteSandboxVolumeRepo()
-
-    def _sqlite_provider_event_repo(self):
-        from storage.providers.sqlite.provider_event_repo import SQLiteProviderEventRepo
-
-        return SQLiteProviderEventRepo(db_path=self._sandbox_db)
-
-    def _sqlite_lease_repo(self):
-        from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
-
-        return SQLiteLeaseRepo(db_path=self._sandbox_db)
-
-    def _sqlite_terminal_repo(self):
-        from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-        return SQLiteTerminalRepo(db_path=self._sandbox_db)
-
-    def _sqlite_chat_session_repo(self):
-        from storage.providers.sqlite.chat_session_repo import SQLiteChatSessionRepo
-
-        return SQLiteChatSessionRepo(db_path=self._sandbox_db)
diff --git a/storage/contracts.py b/storage/contracts.py
index fef514943..cc97e41de 100644
--- a/storage/contracts.py
+++ b/storage/contracts.py
@@ -99,8 +99,30 @@ def list_all(self) -> list[dict[str, Any]]: ...
     def cleanup_expired(self) -> list[str]: ...
 
 
+class SandboxMonitorRepo(Protocol):
+    """Read-only monitor queries over sandbox/session/lease state."""
+
+    def close(self) -> None: ...
+    def query_threads(self, *, thread_id: str | None = None) -> list[dict[str, Any]]: ...
+    def query_thread_summary(self, thread_id: str) -> dict[str, Any] | None: ...
+    def query_thread_sessions(self, thread_id: str) -> list[dict[str, Any]]: ...
+    def query_leases(self) -> list[dict[str, Any]]: ...
+    def list_leases_with_threads(self) -> list[dict[str, Any]]: ...
+    def query_lease(self, lease_id: str) -> dict[str, Any] | None: ...
+    def query_lease_sessions(self, lease_id: str) -> list[dict[str, Any]]: ...
+    def query_lease_threads(self, lease_id: str) -> list[dict[str, Any]]: ...
+    def query_lease_events(self, lease_id: str) -> list[dict[str, Any]]: ...
+    def query_diverged(self) -> list[dict[str, Any]]: ...
+    def query_events(self, limit: int = 100) -> list[dict[str, Any]]: ...
+    def query_event(self, event_id: str) -> dict[str, Any] | None: ...
+    def count_rows(self, table_names: list[str]) -> dict[str, int]: ...
+    def list_sessions_with_leases(self) -> list[dict[str, Any]]: ...
+    def list_probe_targets(self) -> list[dict[str, Any]]: ...
+    def query_lease_instance_id(self, lease_id: str) -> str | None: ...
+
+
 # ---------------------------------------------------------------------------
-# Entity-Chat — enums + row types
+# Member-Chat — enums + row types
 # ---------------------------------------------------------------------------
 
 
@@ -118,32 +140,13 @@ class MemberRow(BaseModel):
     description: str | None = None
     config_dir: str | None = None
     owner_user_id: str | None = None
-    next_entity_seq: int = 0
+    next_thread_seq: int = 0
     created_at: float
     updated_at: float | None = None
     email: str | None = None
     mycel_id: int | None = None
 
 
-class AccountRow(BaseModel):
-    id: str
-    user_id: str
-    username: str
-    password_hash: str | None = None
-    api_key_hash: str | None = None
-    created_at: float
-
-
-class EntityRow(BaseModel):
-    id: str
-    type: str  # 'human' | 'agent'
-    member_id: str
-    name: str
-    avatar: str | None = None
-    thread_id: str | None = None
-    created_at: float
-
-
 class ChatRow(BaseModel):
     id: str
     title: str | None = None
@@ -152,24 +155,6 @@ class ChatRow(BaseModel):
     updated_at: float | None = None
 
 
-class ChatEntityRow(BaseModel):
-    chat_id: str
-    user_id: str  # social identity: user_id for humans, member_id for agents
-    joined_at: float
-    last_read_at: float | None = None
-    muted: bool = False
-    mute_until: float | None = None  # None = permanent mute when muted=True
-
-
-class ChatMessageRow(BaseModel):
-    id: str
-    chat_id: str
-    sender_id: str  # social identity: user_id for humans, member_id for agents
-    content: str
-    mentioned_ids: list[str] = []
-    created_at: float
-
-
 # ---------------------------------------------------------------------------
 # Delivery strategy — contact relationships + delivery actions
 # ---------------------------------------------------------------------------
@@ -180,7 +165,7 @@ class DeliveryAction(StrEnum):
 
     DELIVER = "deliver"  # full delivery: inject into agent context, wake agent
     NOTIFY = "notify"  # red dot only: message stored, unread counted, no delivery
-    DROP = "drop"  # silent: message stored but invisible to this entity
+    DROP = "drop"  # silent: message stored but invisible to this user
 
 
 ContactRelation = Literal["normal", "blocked", "muted"]
@@ -189,8 +174,8 @@ class DeliveryAction(StrEnum):
 class ContactRow(BaseModel):
     """Directional relationship between two social identities. A→B independent of B→A."""
 
-    owner_id: str  # social identity: user_id for humans, member_id for agents
-    target_id: str  # social identity: user_id for humans, member_id for agents
+    owner_id: str  # social identity: direct user_id for humans, thread-attached user_id for agents
+    target_id: str  # social identity: direct user_id for humans, thread-attached user_id for agents
     relation: ContactRelation
     created_at: float
     updated_at: float | None = None
@@ -226,6 +211,7 @@ def latest_run_id(self, thread_id: str) -> str | None: ...
     def list_run_ids(self, thread_id: str) -> list[str]: ...
     def run_start_seq(self, thread_id: str, run_id: str) -> int: ...
     def delete_runs(self, thread_id: str, run_ids: list[str]) -> int: ...
+    def delete_thread_events(self, thread_id: str) -> int: ...
 
 
 class RecipeRepo(Protocol):
@@ -246,6 +232,126 @@ def delete(self, owner_user_id: str, recipe_id: str) -> bool: ...
     def delete_thread_events(self, thread_id: str) -> int: ...
 
 
+class ThreadLaunchPrefRepo(Protocol):
+    def close(self) -> None: ...
+    def get(self, owner_user_id: str, member_id: str) -> dict[str, Any] | None: ...
+    def save_confirmed(self, owner_user_id: str, member_id: str, config: dict[str, Any]) -> None: ...
+    def save_successful(self, owner_user_id: str, member_id: str, config: dict[str, Any]) -> None: ...
+
+
+class UserSettingsRepo(Protocol):
+    def close(self) -> None: ...
+    def get(self, user_id: str) -> dict[str, Any]: ...
+    def set_default_workspace(self, user_id: str, workspace: str) -> None: ...
+    def add_recent_workspace(self, user_id: str, workspace: str) -> None: ...
+    def set_default_model(self, user_id: str, model: str) -> None: ...
+    def get_models_config(self, user_id: str) -> dict[str, Any] | None: ...
+    def set_models_config(self, user_id: str, config: dict[str, Any]) -> None: ...
+    def get_observation_config(self, user_id: str) -> dict[str, Any] | None: ...
+    def set_observation_config(self, user_id: str, config: dict[str, Any]) -> None: ...
+    def get_sandbox_configs(self, user_id: str) -> dict[str, Any] | None: ...
+    def set_sandbox_configs(self, user_id: str, configs: dict[str, Any]) -> None: ...
+
+
+class AgentConfigRepo(Protocol):
+    def close(self) -> None: ...
+    def get_config(self, member_id: str) -> dict[str, Any] | None: ...
+    def save_config(self, member_id: str, data: dict[str, Any]) -> None: ...
+    def delete_config(self, member_id: str) -> None: ...
+    def list_rules(self, member_id: str) -> list[dict[str, Any]]: ...
+    def save_rule(self, member_id: str, filename: str, content: str, rule_id: str | None = None) -> dict[str, Any]: ...
+    def delete_rule(self, rule_id: str) -> None: ...
+    def list_skills(self, member_id: str) -> list[dict[str, Any]]: ...
+    def save_skill(
+        self,
+        member_id: str,
+        name: str,
+        content: str,
+        meta: dict[str, Any] | None = None,
+        skill_id: str | None = None,
+    ) -> dict[str, Any]: ...
+    def delete_skill(self, skill_id: str) -> None: ...
+    def list_sub_agents(self, member_id: str) -> list[dict[str, Any]]: ...
+    def save_sub_agent(
+        self,
+        member_id: str,
+        name: str,
+        *,
+        description: str | None = None,
+        model: str | None = None,
+        tools: list[Any] | None = None,
+        system_prompt: str | None = None,
+        sub_agent_id: str | None = None,
+    ) -> dict[str, Any]: ...
+    def delete_sub_agent(self, sub_agent_id: str) -> None: ...
+
+
+class PanelTaskRepo(Protocol):
+    def close(self) -> None: ...
+    def list_all(self, owner_user_id: str | None = None) -> list[dict[str, Any]]: ...
+    def get(self, task_id: str, owner_user_id: str | None = None) -> dict[str, Any] | None: ...
+    def get_highest_priority_pending(self, owner_user_id: str | None = None) -> dict[str, Any] | None: ...
+    def create(self, **fields: Any) -> dict[str, Any]: ...
+    def update(self, task_id: str, owner_user_id: str | None = None, **fields: Any) -> dict[str, Any] | None: ...
+    def delete(self, task_id: str, owner_user_id: str | None = None) -> bool: ...
+    def bulk_delete(self, ids: list[str], owner_user_id: str | None = None) -> int: ...
+    def bulk_update_status(self, ids: list[str], status: str, owner_user_id: str | None = None) -> int: ...
+
+
+class CronJobRepo(Protocol):
+    def close(self) -> None: ...
+    def list_all(self, owner_user_id: str | None = None) -> list[dict[str, Any]]: ...
+    def get(self, job_id: str, owner_user_id: str | None = None) -> dict[str, Any] | None: ...
+    def create(self, *, name: str, cron_expression: str, **fields: Any) -> dict[str, Any]: ...
+    def update(self, job_id: str, owner_user_id: str | None = None, **fields: Any) -> dict[str, Any] | None: ...
+    def delete(self, job_id: str, owner_user_id: str | None = None) -> bool: ...
+    def list_enabled(self, owner_user_id: str | None = None) -> list[dict[str, Any]]: ...
+
+
+class AgentRegistryRepo(Protocol):
+    def close(self) -> None: ...
+    def register(
+        self,
+        *,
+        agent_id: str,
+        name: str,
+        thread_id: str,
+        status: str,
+        parent_agent_id: str | None,
+        subagent_type: str | None,
+    ) -> None: ...
+    def get_by_id(self, agent_id: str) -> tuple[Any, ...] | None: ...
+    def list_running_by_name(self, name: str) -> list[tuple[Any, ...]]: ...
+    def update_status(self, agent_id: str, status: str) -> None: ...
+    def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None) -> tuple[Any, ...] | None: ...
+    def list_running(self) -> list[tuple[Any, ...]]: ...
+
+
+class ToolTaskRepo(Protocol):
+    def close(self) -> None: ...
+    def next_id(self, thread_id: str) -> str: ...
+    def get(self, thread_id: str, task_id: str) -> Any | None: ...
+    def list_all(self, thread_id: str) -> list[Any]: ...
+    def insert(self, thread_id: str, task: Any) -> None: ...
+    def update(self, thread_id: str, task: Any) -> None: ...
+    def delete(self, thread_id: str, task_id: str) -> None: ...
+
+
+class SyncFileRepo(Protocol):
+    def close(self) -> None: ...
+    def track_file(self, thread_id: str, relative_path: str, checksum: str, timestamp: int) -> None: ...
+    def track_files_batch(self, thread_id: str, file_records: list[tuple[str, str, int]]) -> None: ...
+    def get_file_info(self, thread_id: str, relative_path: str) -> dict[str, Any] | None: ...
+    def get_all_files(self, thread_id: str) -> dict[str, str]: ...
+    def clear_thread(self, thread_id: str) -> int: ...
+
+
+class ResourceSnapshotRepo(Protocol):
+    def close(self) -> None: ...
+    def upsert_lease_resource_snapshot(self, **kwargs: Any) -> None: ...
+    def list_snapshots_by_lease_ids(self, lease_ids: list[str]) -> dict[str, dict[str, Any]]: ...
+
+
 class FileOperationRepo(Protocol):
     def close(self) -> None: ...
     def record(
@@ -301,7 +407,7 @@ class QueueItem(BaseModel):
     content: str
     notification_type: NotificationType
     source: str | None = None  # "owner" | "external" | "system"
-    sender_id: str | None = None  # social identity: user_id for humans, member_id for agents
+    sender_id: str | None = None  # social identity slot; full agent-handle split still pending
     sender_name: str | None = None
     sender_avatar_url: str | None = None
     is_steer: bool = False
@@ -347,7 +453,7 @@ def get_metrics(self, run_id: str, tier: str | None = None) -> list[dict]: ...
 
 
 # ---------------------------------------------------------------------------
-# Entity-Chat — repo protocols
+# Member-Chat — repo protocols
 # ---------------------------------------------------------------------------
 
 
@@ -359,32 +465,13 @@ def get_by_name(self, name: str) -> MemberRow | None: ...
     def get_by_email(self, email: str) -> MemberRow | None: ...
     def get_by_mycel_id(self, mycel_id: int) -> MemberRow | None: ...
     def list_all(self) -> list[MemberRow]: ...
+    def list_by_type(self, member_type: str) -> list[MemberRow]: ...
     def list_by_owner_user_id(self, owner_user_id: str) -> list[MemberRow]: ...
     def update(self, member_id: str, **fields: Any) -> None: ...
-    def increment_entity_seq(self, member_id: str) -> int: ...
+    def increment_thread_seq(self, member_id: str) -> int: ...
     def delete(self, member_id: str) -> None: ...
 
 
-class AccountRepo(Protocol):
-    def close(self) -> None: ...
-    def create(self, row: AccountRow) -> None: ...
-    def get_by_id(self, account_id: str) -> AccountRow | None: ...
-    def get_by_user_id(self, user_id: str) -> AccountRow | None: ...
-    def get_by_username(self, username: str) -> AccountRow | None: ...
-    def delete(self, account_id: str) -> None: ...
-
-
-class EntityRepo(Protocol):
-    def close(self) -> None: ...
-    def create(self, row: EntityRow) -> None: ...
-    def get_by_id(self, id: str) -> EntityRow | None: ...
-    def get_by_member_id(self, member_id: str) -> list[EntityRow]: ...
-    def list_all(self) -> list[EntityRow]: ...
-    def list_by_type(self, entity_type: str) -> list[EntityRow]: ...
-    def update(self, id: str, **fields: Any) -> None: ...
-    def delete(self, id: str) -> None: ...
-
-
 class ChatRepo(Protocol):
     def close(self) -> None: ...
     def create(self, row: ChatRow) -> None: ...
@@ -392,34 +479,21 @@ def get_by_id(self, chat_id: str) -> ChatRow | None: ...
     def delete(self, chat_id: str) -> None: ...
 
 
-class ChatEntityRepo(Protocol):
-    def close(self) -> None: ...
-    def add_participant(self, chat_id: str, user_id: str, joined_at: float) -> None: ...
-    def list_participants(self, chat_id: str) -> list[ChatEntityRow]: ...
-    def list_chats_for_user(self, user_id: str) -> list[str]: ...
-    def is_participant_in_chat(self, chat_id: str, user_id: str) -> bool: ...
-    def update_last_read(self, chat_id: str, user_id: str, last_read_at: float) -> None: ...
-    def update_mute(self, chat_id: str, user_id: str, muted: bool, mute_until: float | None = None) -> None: ...
-    def find_chat_between(self, user_a: str, user_b: str) -> str | None: ...
-
-
-class ChatMessageRepo(Protocol):
-    def close(self) -> None: ...
-    def create(self, row: ChatMessageRow) -> None: ...
-    def list_by_chat(self, chat_id: str, *, limit: int = 50, before: float | None = None) -> list[ChatMessageRow]: ...
-    def list_unread(self, chat_id: str, user_id: str) -> list[ChatMessageRow]: ...
-    def count_unread(self, chat_id: str, user_id: str) -> int: ...
-    def list_by_time_range(
-        self, chat_id: str, *, after: float | None = None, before: float | None = None, limit: int = 100
-    ) -> list[ChatMessageRow]: ...
-    def search(self, query: str, *, chat_id: str | None = None, limit: int = 50) -> list[ChatMessageRow]: ...
-
-
 class ThreadRepo(Protocol):
     def close(self) -> None: ...
-    def create(self, thread_id: str, member_id: str, sandbox_type: str, cwd: str | None, created_at: float, **extra: Any) -> None: ...
+    def create(
+        self,
+        thread_id: str,
+        member_id: str,
+        user_id: str,
+        sandbox_type: str,
+        cwd: str | None,
+        created_at: float,
+        **extra: Any,
+    ) -> None: ...
     def get_by_id(self, thread_id: str) -> dict[str, Any] | None: ...
-    def get_main_thread(self, member_id: str) -> dict[str, Any] | None: ...
+    def get_by_user_id(self, user_id: str) -> dict[str, Any] | None: ...
+    def get_default_thread(self, member_id: str) -> dict[str, Any] | None: ...
     def get_next_branch_index(self, member_id: str) -> int: ...
     def list_by_member(self, member_id: str) -> list[dict[str, Any]]: ...
     def list_by_owner_user_id(self, owner_user_id: str) -> list[dict[str, Any]]: ...
diff --git a/storage/providers/sqlite/__init__.py b/storage/providers/sqlite/__init__.py
index cdefea991..710e6d996 100644
--- a/storage/providers/sqlite/__init__.py
+++ b/storage/providers/sqlite/__init__.py
@@ -1,20 +1,16 @@
-"""SQLite storage provider implementations."""
+"""SQLite storage provider — only sandbox/runtime repos remain."""
 
 from .checkpoint_repo import SQLiteCheckpointRepo
-from .eval_repo import SQLiteEvalRepo
 from .file_operation_repo import SQLiteFileOperationRepo
 from .kernel import SQLiteDBRole, connect_sqlite, connect_sqlite_async, connect_sqlite_role
 from .queue_repo import SQLiteQueueRepo
-from .run_event_repo import SQLiteRunEventRepo
 from .summary_repo import SQLiteSummaryRepo
 
 __all__ = [
     "SQLiteCheckpointRepo",
-    "SQLiteRunEventRepo",
     "SQLiteFileOperationRepo",
     "SQLiteQueueRepo",
     "SQLiteSummaryRepo",
-    "SQLiteEvalRepo",
     "SQLiteDBRole",
     "connect_sqlite",
     "connect_sqlite_async",
diff --git a/storage/providers/sqlite/agent_registry_repo.py b/storage/providers/sqlite/agent_registry_repo.py
deleted file mode 100644
index 02aa62aeb..000000000
--- a/storage/providers/sqlite/agent_registry_repo.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""SQLite repo for agent registry persistence."""
-
-from __future__ import annotations
-
-import sqlite3
-from pathlib import Path
-
-from config.user_paths import user_home_path
-
-
-class SQLiteAgentRegistryRepo:
-    DEFAULT_DB_PATH = user_home_path("agent_registry.db")
-
-    def __init__(self, db_path: Path | None = None):
-        self._db_path = db_path or self.DEFAULT_DB_PATH
-        self._db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._init_db()
-
-    def _conn(self) -> sqlite3.Connection:
-        return sqlite3.connect(self._db_path)
-
-    def _init_db(self) -> None:
-        with self._conn() as conn:
-            conn.execute("""
-                CREATE TABLE IF NOT EXISTS agents (
-                    agent_id TEXT PRIMARY KEY,
-                    name TEXT NOT NULL,
-                    thread_id TEXT NOT NULL,
-                    status TEXT NOT NULL DEFAULT 'running',
-                    parent_agent_id TEXT,
-                    subagent_type TEXT,
-                    created_at REAL DEFAULT (strftime('%s', 'now'))
-                )
-            """)
-            conn.execute("CREATE INDEX IF NOT EXISTS idx_thread ON agents(thread_id)")
-            conn.commit()
-
-    def register(
-        self,
-        *,
-        agent_id: str,
-        name: str,
-        thread_id: str,
-        status: str,
-        parent_agent_id: str | None,
-        subagent_type: str | None,
-    ) -> None:
-        with self._conn() as conn:
-            conn.execute(
-                "INSERT OR REPLACE INTO agents (agent_id, name, thread_id, status, parent_agent_id, subagent_type) VALUES (?,?,?,?,?,?)",
-                (agent_id, name, thread_id, status, parent_agent_id, subagent_type),
-            )
-            conn.commit()
-
-    def get_by_id(self, agent_id: str) -> tuple | None:
-        with self._conn() as conn:
-            return conn.execute(
-                "SELECT agent_id, name, thread_id, status, parent_agent_id, subagent_type FROM agents WHERE agent_id=?",
-                (agent_id,),
-            ).fetchone()
-
-    def update_status(self, agent_id: str, status: str) -> None:
-        with self._conn() as conn:
-            conn.execute("UPDATE agents SET status=? WHERE agent_id=?", (status, agent_id))
-            conn.commit()
-
-    def list_running(self) -> list[tuple]:
-        with self._conn() as conn:
-            return conn.execute(
-                "SELECT agent_id, name, thread_id, status, parent_agent_id, subagent_type FROM agents WHERE status='running'"
-            ).fetchall()
diff --git a/storage/providers/sqlite/chat_repo.py b/storage/providers/sqlite/chat_repo.py
index f761c6e5a..31a33c82c 100644
--- a/storage/providers/sqlite/chat_repo.py
+++ b/storage/providers/sqlite/chat_repo.py
@@ -1,4 +1,4 @@
-"""SQLite repositories for chats, chat entities, and chat messages."""
+"""SQLite repository for chats."""
 
 from __future__ import annotations
 
@@ -6,7 +6,7 @@
 import threading
 from pathlib import Path
 
-from storage.contracts import ChatEntityRow, ChatMessageRow, ChatRow
+from storage.contracts import ChatRow
 from storage.providers.sqlite.connection import create_connection
 from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
 from storage.providers.sqlite.kernel import retry_on_locked as _retry_on_locked
@@ -65,323 +65,3 @@ def _ensure_table(self) -> None:
             """
         )
         self._conn.commit()
-
-
-class SQLiteChatEntityRepo:
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.CHAT)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def add_participant(self, chat_id: str, user_id: str, joined_at: float) -> None:
-        with self._lock:
-            self._conn.execute(
-                "INSERT OR IGNORE INTO chat_entities (chat_id, user_id, joined_at) VALUES (?, ?, ?)",
-                (chat_id, user_id, joined_at),
-            )
-            self._conn.commit()
-
-    def list_participants(self, chat_id: str) -> list[ChatEntityRow]:
-        with self._lock:
-            rows = self._conn.execute(
-                "SELECT chat_id, user_id, joined_at, last_read_at, muted, mute_until FROM chat_entities WHERE chat_id = ?",
-                (chat_id,),
-            ).fetchall()
-            return [
-                ChatEntityRow(
-                    chat_id=r[0],
-                    user_id=r[1],
-                    joined_at=r[2],
-                    last_read_at=r[3],
-                    muted=bool(r[4]),
-                    mute_until=r[5],
-                )
-                for r in rows
-            ]
-
-    def list_chats_for_user(self, user_id: str) -> list[str]:
-        with self._lock:
-            rows = self._conn.execute(
-                "SELECT chat_id FROM chat_entities WHERE user_id = ?",
-                (user_id,),
-            ).fetchall()
-            return [r[0] for r in rows]
-
-    def is_participant_in_chat(self, chat_id: str, user_id: str) -> bool:
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT 1 FROM chat_entities WHERE chat_id = ? AND user_id = ? LIMIT 1",
-                (chat_id, user_id),
-            ).fetchone()
-            return row is not None
-
-    def update_last_read(self, chat_id: str, user_id: str, last_read_at: float) -> None:
-        with self._lock:
-            self._conn.execute(
-                "UPDATE chat_entities SET last_read_at = ? WHERE chat_id = ? AND user_id = ?",
-                (last_read_at, chat_id, user_id),
-            )
-            self._conn.commit()
-
-    def update_mute(self, chat_id: str, user_id: str, muted: bool, mute_until: float | None = None) -> None:
-        def _do():
-            with self._lock:
-                self._conn.execute(
-                    "UPDATE chat_entities SET muted = ?, mute_until = ? WHERE chat_id = ? AND user_id = ?",
-                    (int(muted), mute_until, chat_id, user_id),
-                )
-                self._conn.commit()
-
-        _retry_on_locked(_do)
-
-    # @@@find-chat-between — find the 1:1 chat (exactly 2 members) between two social identities.
-    # Must NOT return group chats that happen to contain both.
-    def find_chat_between(self, user_a: str, user_b: str) -> str | None:
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT ce1.chat_id FROM chat_entities ce1"
-                " JOIN chat_entities ce2 ON ce1.chat_id = ce2.chat_id"
-                " WHERE ce1.user_id = ? AND ce2.user_id = ?"
-                " AND (SELECT COUNT(*) FROM chat_entities ce3"
-                "      WHERE ce3.chat_id = ce1.chat_id) = 2",
-                (user_a, user_b),
-            ).fetchone()
-            return row[0] if row else None
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS chat_entities (
-                chat_id TEXT NOT NULL REFERENCES chats(id),
-                user_id TEXT NOT NULL,
-                joined_at REAL NOT NULL,
-                last_read_at REAL,
-                muted INTEGER NOT NULL DEFAULT 0,
-                mute_until REAL,
-                UNIQUE(chat_id, user_id)
-            )
-            """
-        )
-        # @@@chat-entity-migration - add muted/mute_until if table already exists
-        try:
-            self._conn.execute("ALTER TABLE chat_entities ADD COLUMN muted INTEGER NOT NULL DEFAULT 0")
-        except sqlite3.OperationalError:
-            pass  # column already exists
-        try:
-            self._conn.execute("ALTER TABLE chat_entities ADD COLUMN mute_until REAL")
-        except sqlite3.OperationalError:
-            pass
-        # @@@chat-entity-index — speeds up find_chat_between and list_chats_for_user
-        self._conn.execute("CREATE INDEX IF NOT EXISTS idx_chat_entities_user ON chat_entities(user_id, chat_id)")
-        # @@@entity-id-to-user-id-migration — rename column for existing databases
-        try:
-            self._conn.execute("ALTER TABLE chat_entities RENAME COLUMN entity_id TO user_id")
-        except sqlite3.OperationalError:
-            pass  # column already named user_id, or table is new
-        self._conn.commit()
-
-
-class SQLiteChatMessageRepo:
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.CHAT)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def create(self, row: ChatMessageRow) -> None:
-        import json as _json
-
-        mentions_json = _json.dumps(row.mentioned_ids) if row.mentioned_ids else None
-
-        def _do():
-            with self._lock:
-                self._conn.execute(
-                    "INSERT INTO chat_messages (id, chat_id, sender_id, content, mentions, created_at) VALUES (?, ?, ?, ?, ?, ?)",
-                    (row.id, row.chat_id, row.sender_id, row.content, mentions_json, row.created_at),
-                )
-                self._conn.commit()
-
-        _retry_on_locked(_do)
-
-    _MSG_COLS = "id, chat_id, sender_id, content, mentions, created_at"
-
-    def _to_msg(self, r: tuple) -> ChatMessageRow:
-        import json as _json
-
-        mentions = _json.loads(r[4]) if r[4] else []
-        return ChatMessageRow(id=r[0], chat_id=r[1], sender_id=r[2], content=r[3], mentioned_ids=mentions, created_at=r[5])
-
-    def list_by_chat(
-        self,
-        chat_id: str,
-        *,
-        limit: int = 50,
-        before: float | None = None,
-    ) -> list[ChatMessageRow]:
-        with self._lock:
-            if before is not None:
-                rows = self._conn.execute(
-                    f"SELECT {self._MSG_COLS} FROM chat_messages WHERE chat_id = ? AND created_at < ? ORDER BY created_at DESC LIMIT ?",
-                    (chat_id, before, limit),
-                ).fetchall()
-            else:
-                rows = self._conn.execute(
-                    f"SELECT {self._MSG_COLS} FROM chat_messages WHERE chat_id = ? ORDER BY created_at DESC LIMIT ?",
-                    (chat_id, limit),
-                ).fetchall()
-        rows.reverse()
-        return [self._to_msg(r) for r in rows]
-
-    def list_unread(self, chat_id: str, user_id: str) -> list[ChatMessageRow]:
-        """Return unread messages (after last_read_at, excluding own) in chronological order."""
-        with self._lock:
-            cursor_row = self._conn.execute(
-                "SELECT last_read_at FROM chat_entities WHERE chat_id = ? AND user_id = ?",
-                (chat_id, user_id),
-            ).fetchone()
-            last_read = cursor_row[0] if cursor_row else None
-            if last_read is None:
-                rows = self._conn.execute(
-                    f"SELECT {self._MSG_COLS} FROM chat_messages WHERE chat_id = ? AND sender_id != ? ORDER BY created_at ASC",
-                    (chat_id, user_id),
-                ).fetchall()
-            else:
-                rows = self._conn.execute(
-                    f"SELECT {self._MSG_COLS} FROM chat_messages"
-                    " WHERE chat_id = ? AND sender_id != ? AND created_at > ?"
-                    " ORDER BY created_at ASC",
-                    (chat_id, user_id, last_read),
-                ).fetchall()
-        return [self._to_msg(r) for r in rows]
-
-    def list_by_time_range(
-        self,
-        chat_id: str,
-        *,
-        after: float | None = None,
-        before: float | None = None,
-        limit: int = 100,
-    ) -> list[ChatMessageRow]:
-        """Return messages in a time range, chronological order."""
-        with self._lock:
-            clauses = ["chat_id = ?"]
-            params: list = [chat_id]
-            if after is not None:
-                clauses.append("created_at >= ?")
-                params.append(after)
-            if before is not None:
-                clauses.append("created_at <= ?")
-                params.append(before)
-            where = " AND ".join(clauses)
-            params.append(limit)
-            rows = self._conn.execute(
-                f"SELECT {self._MSG_COLS} FROM chat_messages WHERE {where} ORDER BY created_at ASC LIMIT ?",
-                tuple(params),
-            ).fetchall()
-        return [self._to_msg(r) for r in rows]
-
-    def count_unread(self, chat_id: str, user_id: str) -> int:
-        with self._lock:
-            cursor_row = self._conn.execute(
-                "SELECT last_read_at FROM chat_entities WHERE chat_id = ? AND user_id = ?",
-                (chat_id, user_id),
-            ).fetchone()
-            if cursor_row is None:
-                return 0
-            last_read = cursor_row[0]
-            if last_read is None:
-                row = self._conn.execute(
-                    "SELECT COUNT(*) FROM chat_messages WHERE chat_id = ? AND sender_id != ?",
-                    (chat_id, user_id),
-                ).fetchone()
-            else:
-                row = self._conn.execute(
-                    "SELECT COUNT(*) FROM chat_messages WHERE chat_id = ? AND sender_id != ? AND created_at > ?",
-                    (chat_id, user_id, last_read),
-                ).fetchone()
-            return int(row[0]) if row else 0
-
-    def has_unread_mention(self, chat_id: str, user_id: str) -> bool:
-        """Check if there are unread messages that @mention this user."""
-        with self._lock:
-            cursor_row = self._conn.execute(
-                "SELECT last_read_at FROM chat_entities WHERE chat_id = ? AND user_id = ?",
-                (chat_id, user_id),
-            ).fetchone()
-            last_read = cursor_row[0] if cursor_row else None
-            # @@@mention-query — JSON LIKE is crude but sufficient for SQLite without JSON1 extension
-            mention_pattern = f'%"{user_id}"%'
-            if last_read is None:
-                row = self._conn.execute(
-                    "SELECT COUNT(*) FROM chat_messages WHERE chat_id = ? AND mentions LIKE ? AND sender_id != ?",
-                    (chat_id, mention_pattern, user_id),
-                ).fetchone()
-            else:
-                row = self._conn.execute(
-                    "SELECT COUNT(*) FROM chat_messages WHERE chat_id = ? AND mentions LIKE ? AND sender_id != ? AND created_at > ?",
-                    (chat_id, mention_pattern, user_id, last_read),
-                ).fetchone()
-            return int(row[0]) > 0 if row else False
-
-    def search(self, query: str, *, chat_id: str | None = None, limit: int = 50) -> list[ChatMessageRow]:
-        with self._lock:
-            if chat_id:
-                rows = self._conn.execute(
-                    f"SELECT {self._MSG_COLS} FROM chat_messages WHERE chat_id = ? AND content LIKE ? ORDER BY created_at ASC LIMIT ?",
-                    (chat_id, f"%{query}%", limit),
-                ).fetchall()
-            else:
-                rows = self._conn.execute(
-                    f"SELECT {self._MSG_COLS} FROM chat_messages WHERE content LIKE ? ORDER BY created_at ASC LIMIT ?",
-                    (f"%{query}%", limit),
-                ).fetchall()
-        return [self._to_msg(r) for r in rows]
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS chat_messages (
-                id TEXT PRIMARY KEY,
-                chat_id TEXT NOT NULL REFERENCES chats(id),
-                sender_id TEXT NOT NULL,
-                content TEXT NOT NULL,
-                mentions TEXT,
-                created_at REAL NOT NULL
-            )
-            """
-        )
-        self._conn.execute("CREATE INDEX IF NOT EXISTS idx_chat_messages_chat_time ON chat_messages(chat_id, created_at)")
-        # @@@mentions-migration — add mentions column if table already exists
-        try:
-            self._conn.execute("ALTER TABLE chat_messages ADD COLUMN mentions TEXT")
-        except sqlite3.OperationalError:
-            pass
-        # @@@sender-entity-id-to-sender-id-migration — rename columns for existing databases
-        try:
-            self._conn.execute("ALTER TABLE chat_messages RENAME COLUMN sender_entity_id TO sender_id")
-        except sqlite3.OperationalError:
-            pass  # column already named sender_id, or table is new
-        try:
-            self._conn.execute("ALTER TABLE chat_messages RENAME COLUMN mentioned_entity_ids TO mentions")
-        except sqlite3.OperationalError:
-            pass
-        self._conn.commit()
diff --git a/storage/providers/sqlite/checkpoint_repo.py b/storage/providers/sqlite/checkpoint_repo.py
index 6c06e5b9e..5e03f2d1c 100644
--- a/storage/providers/sqlite/checkpoint_repo.py
+++ b/storage/providers/sqlite/checkpoint_repo.py
@@ -11,7 +11,7 @@
 class SQLiteCheckpointRepo:
     """Minimal checkpoint repository for thread-level read/write cleanup."""
 
-    _ALLOWED_TABLES = {"checkpoints", "writes", "checkpoint_writes", "checkpoint_blobs"}
+    _ALLOWED_TABLES = {"checkpoints", "checkpoint_writes", "checkpoint_blobs"}
 
     def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
         self._own_conn = conn is None
@@ -40,7 +40,6 @@ def list_thread_ids(self) -> list[str]:
 
     def delete_thread_data(self, thread_id: str) -> None:
         self._delete_by_thread("checkpoints", thread_id)
-        self._delete_by_thread("writes", thread_id)
         self._delete_by_thread("checkpoint_writes", thread_id)
         self._delete_by_thread("checkpoint_blobs", thread_id)
         self._conn.commit()
@@ -50,7 +49,6 @@ def delete_checkpoints_by_ids(self, thread_id: str, checkpoint_ids: list[str]) -
             return
 
         self._delete_by_thread_and_checkpoint_ids("checkpoints", thread_id, checkpoint_ids)
-        self._delete_by_thread_and_checkpoint_ids("writes", thread_id, checkpoint_ids)
         self._delete_by_thread_and_checkpoint_ids("checkpoint_writes", thread_id, checkpoint_ids)
         self._delete_by_thread_and_checkpoint_ids("checkpoint_blobs", thread_id, checkpoint_ids)
         self._conn.commit()
diff --git a/storage/providers/sqlite/contact_repo.py b/storage/providers/sqlite/contact_repo.py
deleted file mode 100644
index dea542e38..000000000
--- a/storage/providers/sqlite/contact_repo.py
+++ /dev/null
@@ -1,110 +0,0 @@
-"""SQLite repository for directional contact relationships."""
-
-from __future__ import annotations
-
-import sqlite3
-import threading
-from pathlib import Path
-
-from storage.contracts import ContactRow
-from storage.providers.sqlite.connection import create_connection
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-from storage.providers.sqlite.kernel import retry_on_locked as _retry_on_locked
-
-
-class SQLiteContactRepo:
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.CHAT)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def upsert(self, row: ContactRow) -> None:
-        def _do():
-            with self._lock:
-                self._conn.execute(
-                    "INSERT INTO contacts (owner_id, target_id, relation, created_at, updated_at)"
-                    " VALUES (?, ?, ?, ?, ?)"
-                    " ON CONFLICT(owner_id, target_id)"
-                    " DO UPDATE SET relation=excluded.relation, updated_at=excluded.updated_at",
-                    (row.owner_id, row.target_id, row.relation, row.created_at, row.updated_at),
-                )
-                self._conn.commit()
-
-        _retry_on_locked(_do)
-
-    def get(self, owner_id: str, target_id: str) -> ContactRow | None:
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT owner_id, target_id, relation, created_at, updated_at FROM contacts WHERE owner_id = ? AND target_id = ?",
-                (owner_id, target_id),
-            ).fetchone()
-        if not row:
-            return None
-        return ContactRow(
-            owner_id=row[0],
-            target_id=row[1],
-            relation=row[2],
-            created_at=row[3],
-            updated_at=row[4],
-        )
-
-    def list_for_user(self, owner_id: str) -> list[ContactRow]:
-        with self._lock:
-            rows = self._conn.execute(
-                "SELECT owner_id, target_id, relation, created_at, updated_at FROM contacts WHERE owner_id = ? ORDER BY created_at",
-                (owner_id,),
-            ).fetchall()
-        return [
-            ContactRow(
-                owner_id=r[0],
-                target_id=r[1],
-                relation=r[2],
-                created_at=r[3],
-                updated_at=r[4],
-            )
-            for r in rows
-        ]
-
-    def delete(self, owner_id: str, target_id: str) -> None:
-        def _do():
-            with self._lock:
-                self._conn.execute(
-                    "DELETE FROM contacts WHERE owner_id = ? AND target_id = ?",
-                    (owner_id, target_id),
-                )
-                self._conn.commit()
-
-        _retry_on_locked(_do)
-
-    def _ensure_table(self) -> None:
-        with self._lock:
-            self._conn.execute("""
-                CREATE TABLE IF NOT EXISTS contacts (
-                    owner_id   TEXT NOT NULL,
-                    target_id  TEXT NOT NULL,
-                    relation          TEXT NOT NULL DEFAULT 'normal',
-                    created_at        REAL NOT NULL,
-                    updated_at        REAL,
-                    PRIMARY KEY (owner_id, target_id)
-                )
-            """)
-            # @@@entity-id-to-user-id-migration — rename columns for existing databases
-            try:
-                self._conn.execute("ALTER TABLE contacts RENAME COLUMN owner_entity_id TO owner_id")
-            except sqlite3.OperationalError:
-                pass
-            try:
-                self._conn.execute("ALTER TABLE contacts RENAME COLUMN target_entity_id TO target_id")
-            except sqlite3.OperationalError:
-                pass
-            self._conn.commit()
diff --git a/storage/providers/sqlite/cron_job_repo.py b/storage/providers/sqlite/cron_job_repo.py
deleted file mode 100644
index 85a208971..000000000
--- a/storage/providers/sqlite/cron_job_repo.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""SQLite repo for cron_jobs records."""
-
-from __future__ import annotations
-
-import sqlite3
-import time
-import uuid
-from pathlib import Path
-from typing import Any
-
-from backend.web.core.config import DB_PATH
-from storage.providers.sqlite.connection import create_connection
-
-
-class SQLiteCronJobRepo:
-    def __init__(self, db_path: str | Path | None = None) -> None:
-        self._db_path = Path(db_path) if db_path else DB_PATH
-        self._conn = create_connection(self._db_path, row_factory=sqlite3.Row)
-        self._ensure_table()
-
-    def close(self) -> None:
-        self._conn.close()
-
-    def _ensure_table(self) -> None:
-        self._conn.execute("""
-            CREATE TABLE IF NOT EXISTS cron_jobs (
-                id TEXT PRIMARY KEY,
-                name TEXT NOT NULL,
-                description TEXT DEFAULT '',
-                cron_expression TEXT NOT NULL,
-                task_template TEXT DEFAULT '{}',
-                enabled INTEGER DEFAULT 1,
-                last_run_at INTEGER DEFAULT 0,
-                next_run_at INTEGER DEFAULT 0,
-                created_at INTEGER NOT NULL
-            )
-        """)
-        self._conn.commit()
-
-    def list_all(self) -> list[dict[str, Any]]:
-        rows = self._conn.execute("SELECT * FROM cron_jobs ORDER BY created_at DESC").fetchall()
-        return [dict(row) for row in rows]
-
-    def get(self, job_id: str) -> dict[str, Any] | None:
-        row = self._conn.execute("SELECT * FROM cron_jobs WHERE id = ?", (job_id,)).fetchone()
-        return dict(row) if row else None
-
-    def create(self, *, name: str, cron_expression: str, **fields: Any) -> dict[str, Any]:
-        job_id = uuid.uuid4().hex
-        now = int(time.time() * 1000)
-        self._conn.execute(
-            "INSERT INTO cron_jobs"
-            " (id, name, description, cron_expression, task_template,"
-            "  enabled, last_run_at, next_run_at, created_at)"
-            " VALUES (?,?,?,?,?,?,?,?,?)",
-            (
-                job_id,
-                name,
-                fields.get("description", ""),
-                cron_expression,
-                fields.get("task_template", "{}"),
-                fields.get("enabled", 1),
-                fields.get("last_run_at", 0),
-                fields.get("next_run_at", 0),
-                now,
-            ),
-        )
-        self._conn.commit()
-        return self.get(job_id) or {}
-
-    def update(self, job_id: str, **fields: Any) -> dict[str, Any] | None:
-        allowed = {
-            "name",
-            "description",
-            "cron_expression",
-            "task_template",
-            "enabled",
-            "last_run_at",
-            "next_run_at",
-        }
-        updates = {k: v for k, v in fields.items() if k in allowed and v is not None}
-        if not updates:
-            return self.get(job_id)
-        set_clause = ", ".join(f"{key} = ?" for key in updates)
-        self._conn.execute(
-            f"UPDATE cron_jobs SET {set_clause} WHERE id = ?",
-            (*updates.values(), job_id),
-        )
-        self._conn.commit()
-        return self.get(job_id)
-
-    def delete(self, job_id: str) -> bool:
-        cur = self._conn.execute("DELETE FROM cron_jobs WHERE id = ?", (job_id,))
-        self._conn.commit()
-        return cur.rowcount > 0
diff --git a/storage/providers/sqlite/entity_repo.py b/storage/providers/sqlite/entity_repo.py
deleted file mode 100644
index 4f89ef3e3..000000000
--- a/storage/providers/sqlite/entity_repo.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""SQLite entity repository."""
-
-from __future__ import annotations
-
-import sqlite3
-import threading
-from pathlib import Path
-
-from storage.contracts import EntityRow
-from storage.providers.sqlite.connection import create_connection
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-
-
-class SQLiteEntityRepo:
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.MAIN)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def create(self, row: EntityRow) -> None:
-        with self._lock:
-            self._conn.execute(
-                "INSERT INTO entities (id, type, member_id, name, avatar, thread_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
-                (row.id, row.type, row.member_id, row.name, row.avatar, row.thread_id, row.created_at),
-            )
-            self._conn.commit()
-
-    def get_by_id(self, id: str) -> EntityRow | None:
-        with self._lock:
-            row = self._conn.execute("SELECT * FROM entities WHERE id = ?", (id,)).fetchone()
-            return self._to_row(row) if row else None
-
-    def get_by_member_id(self, member_id: str) -> list[EntityRow]:
-        with self._lock:
-            rows = self._conn.execute("SELECT * FROM entities WHERE member_id = ?", (member_id,)).fetchall()
-            return [self._to_row(r) for r in rows]
-
-    def list_all(self) -> list[EntityRow]:
-        with self._lock:
-            rows = self._conn.execute("SELECT * FROM entities ORDER BY created_at").fetchall()
-            return [self._to_row(r) for r in rows]
-
-    def list_by_type(self, entity_type: str) -> list[EntityRow]:
-        with self._lock:
-            rows = self._conn.execute(
-                "SELECT * FROM entities WHERE type = ? ORDER BY created_at",
-                (entity_type,),
-            ).fetchall()
-            return [self._to_row(r) for r in rows]
-
-    def update(self, id: str, **fields: str | None) -> None:
-        allowed = {"name", "avatar", "thread_id"}
-        updates = {k: v for k, v in fields.items() if k in allowed}
-        if not updates:
-            return
-        set_clause = ", ".join(f"{k} = ?" for k in updates)
-        with self._lock:
-            self._conn.execute(
-                f"UPDATE entities SET {set_clause} WHERE id = ?",
-                (*updates.values(), id),
-            )
-            self._conn.commit()
-
-    def delete(self, id: str) -> None:
-        with self._lock:
-            self._conn.execute("DELETE FROM entities WHERE id = ?", (id,))
-            self._conn.commit()
-
-    def _to_row(self, r: tuple) -> EntityRow:
-        return EntityRow(
-            id=r[0],
-            type=r[1],
-            member_id=r[2],
-            name=r[3],
-            avatar=r[4],
-            thread_id=r[5],
-            created_at=r[6],
-        )
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS entities (
-                id TEXT PRIMARY KEY,
-                type TEXT NOT NULL,
-                member_id TEXT NOT NULL,
-                name TEXT NOT NULL,
-                avatar TEXT,
-                thread_id TEXT UNIQUE,
-                created_at REAL NOT NULL
-            )
-            """
-        )
-        self._conn.execute("CREATE INDEX IF NOT EXISTS idx_entities_member ON entities(member_id)")
-        self._conn.commit()
diff --git a/storage/providers/sqlite/eval_repo.py b/storage/providers/sqlite/eval_repo.py
deleted file mode 100644
index 1a66d9698..000000000
--- a/storage/providers/sqlite/eval_repo.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""SQLite eval repository provider export."""
-
-from eval.repo import SQLiteEvalRepo
-
-__all__ = ["SQLiteEvalRepo"]
diff --git a/storage/providers/sqlite/lease_repo.py b/storage/providers/sqlite/lease_repo.py
index f0ab745c9..116d57b97 100644
--- a/storage/providers/sqlite/lease_repo.py
+++ b/storage/providers/sqlite/lease_repo.py
@@ -44,6 +44,11 @@ def close(self) -> None:
         if self._own_conn:
             self._conn.close()
 
+    def _require_lease(self, row: dict[str, Any] | None, *, lease_id: str, operation: str) -> dict[str, Any]:
+        if row is None:
+            raise RuntimeError(f"SQLite lease repo failed to load lease after {operation}: {lease_id}")
+        return row
+
     def get(self, lease_id: str) -> dict[str, Any] | None:
         with self._lock:
             self._conn.row_factory = sqlite3.Row
@@ -127,7 +132,7 @@ def create(
                 ),
             )
             self._conn.commit()
-        return self.get(lease_id)  # type: ignore[return-value]
+        return self._require_lease(self.get(lease_id), lease_id=lease_id, operation="create")
 
     def find_by_instance(self, *, provider_name: str, instance_id: str) -> dict[str, Any] | None:
         with self._lock:
@@ -157,7 +162,11 @@ def adopt_instance(
         existing = self.get(lease_id)
         if existing is None:
             self.create(lease_id=lease_id, provider_name=provider_name)
-            existing = self.get(lease_id)
+            existing = self._require_lease(
+                self.get(lease_id),
+                lease_id=lease_id,
+                operation="adopt_instance bootstrap",
+            )
         if existing["provider_name"] != provider_name:
             raise RuntimeError(f"Lease provider mismatch during adopt: lease={existing['provider_name']}, requested={provider_name}")
 
@@ -250,6 +259,20 @@ def mark_needs_refresh(self, lease_id: str, hint_at: datetime | None = None) ->
             self._conn.commit()
             return cursor.rowcount > 0
 
+    def set_volume_id(self, lease_id: str, volume_id: str) -> bool:
+        with self._lock:
+            cursor = self._conn.execute(
+                """
+                UPDATE sandbox_leases
+                SET volume_id = ?,
+                    updated_at = ?
+                WHERE lease_id = ?
+                """,
+                (volume_id, datetime.now().isoformat(), lease_id),
+            )
+            self._conn.commit()
+            return cursor.rowcount > 0
+
     def delete(self, lease_id: str) -> None:
         with self._lock:
             self._conn.execute("DELETE FROM sandbox_instances WHERE lease_id = ?", (lease_id,))
diff --git a/storage/providers/sqlite/member_repo.py b/storage/providers/sqlite/member_repo.py
deleted file mode 100644
index 1e026e627..000000000
--- a/storage/providers/sqlite/member_repo.py
+++ /dev/null
@@ -1,233 +0,0 @@
-"""SQLite repository for members and accounts."""
-
-from __future__ import annotations
-
-import secrets
-import sqlite3
-import string
-import threading
-from pathlib import Path
-from typing import Any
-
-from storage.contracts import AccountRow, MemberRow, MemberType
-from storage.providers.sqlite.connection import create_connection
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-
-_ID_ALPHABET = string.ascii_letters + string.digits
-
-
-def generate_member_id() -> str:
-    """Generate member ID: m_{12 random alphanumeric chars}."""
-    return "m_" + "".join(secrets.choice(_ID_ALPHABET) for _ in range(12))
-
-
-class SQLiteMemberRepo:
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.MAIN)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def create(self, row: MemberRow) -> None:
-        with self._lock:
-            self._conn.execute(
-                "INSERT INTO members (id, name, type, avatar, description, config_dir, owner_user_id, created_at, updated_at)"
-                " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
-                (
-                    row.id,
-                    row.name,
-                    row.type.value,
-                    row.avatar,
-                    row.description,
-                    row.config_dir,
-                    row.owner_user_id,
-                    row.created_at,
-                    row.updated_at,
-                ),
-            )
-            self._conn.commit()
-
-    def get_by_id(self, member_id: str) -> MemberRow | None:
-        with self._lock:
-            row = self._conn.execute("SELECT * FROM members WHERE id = ?", (member_id,)).fetchone()
-            return self._to_row(row) if row else None
-
-    def get_by_name(self, name: str) -> MemberRow | None:
-        with self._lock:
-            row = self._conn.execute("SELECT * FROM members WHERE name = ?", (name,)).fetchone()
-            return self._to_row(row) if row else None
-
-    def get_by_email(self, email: str) -> MemberRow | None:
-        with self._lock:
-            row = self._conn.execute("SELECT * FROM members WHERE email = ?", (email,)).fetchone()
-            return self._to_row(row) if row else None
-
-    def get_by_mycel_id(self, mycel_id: int) -> MemberRow | None:
-        with self._lock:
-            row = self._conn.execute("SELECT * FROM members WHERE mycel_id = ?", (mycel_id,)).fetchone()
-            return self._to_row(row) if row else None
-
-    def list_all(self) -> list[MemberRow]:
-        with self._lock:
-            rows = self._conn.execute("SELECT * FROM members ORDER BY created_at").fetchall()
-            return [self._to_row(r) for r in rows]
-
-    def list_by_owner_user_id(self, owner_user_id: str) -> list[MemberRow]:
-        with self._lock:
-            rows = self._conn.execute(
-                "SELECT * FROM members WHERE owner_user_id = ? ORDER BY created_at",
-                (owner_user_id,),
-            ).fetchall()
-            return [self._to_row(r) for r in rows]
-
-    def update(self, member_id: str, **fields: Any) -> None:
-        allowed = {"name", "avatar", "description", "config_dir", "owner_user_id", "updated_at"}
-        updates = {k: v for k, v in fields.items() if k in allowed}
-        if not updates:
-            return
-        set_clause = ", ".join(f"{k} = ?" for k in updates)
-        with self._lock:
-            self._conn.execute(
-                f"UPDATE members SET {set_clause} WHERE id = ?",
-                (*updates.values(), member_id),
-            )
-            self._conn.commit()
-
-    def increment_entity_seq(self, member_id: str) -> int:
-        """Atomically increment next_entity_seq and return the new value."""
-        with self._lock:
-            self._conn.execute(
-                "UPDATE members SET next_entity_seq = next_entity_seq + 1 WHERE id = ?",
-                (member_id,),
-            )
-            row = self._conn.execute(
-                "SELECT next_entity_seq FROM members WHERE id = ?",
-                (member_id,),
-            ).fetchone()
-            self._conn.commit()
-            if not row:
-                raise ValueError(f"Member {member_id} not found")
-            return row[0]
-
-    def delete(self, member_id: str) -> None:
-        with self._lock:
-            self._conn.execute("DELETE FROM members WHERE id = ?", (member_id,))
-            self._conn.commit()
-
-    def _to_row(self, r: tuple) -> MemberRow:
-        return MemberRow(
-            id=r[0],
-            name=r[1],
-            type=MemberType(r[2]),
-            avatar=r[3],
-            description=r[4],
-            config_dir=r[5],
-            owner_user_id=r[6],
-            created_at=r[7],
-            updated_at=r[8],
-            next_entity_seq=r[9] if len(r) > 9 else 0,
-        )
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS members (
-                id TEXT PRIMARY KEY,
-                name TEXT NOT NULL,
-                type TEXT NOT NULL,
-                avatar TEXT,
-                description TEXT,
-                config_dir TEXT,
-                owner_user_id TEXT,
-                created_at REAL NOT NULL,
-                updated_at REAL,
-                next_entity_seq INTEGER NOT NULL DEFAULT 0
-            )
-            """
-        )
-        cols = {row[1] for row in self._conn.execute("PRAGMA table_info(members)").fetchall()}
-        if "owner_user_id" not in cols:
-            raise RuntimeError("members table missing owner_user_id; reset ~/.leon/leon.db for the new schema")
-        self._conn.commit()
-
-
-class SQLiteAccountRepo:
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.MAIN)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def create(self, row: AccountRow) -> None:
-        with self._lock:
-            self._conn.execute(
-                "INSERT INTO accounts (id, user_id, username, password_hash, api_key_hash, created_at) VALUES (?, ?, ?, ?, ?, ?)",
-                (row.id, row.user_id, row.username, row.password_hash, row.api_key_hash, row.created_at),
-            )
-            self._conn.commit()
-
-    def get_by_id(self, account_id: str) -> AccountRow | None:
-        with self._lock:
-            row = self._conn.execute("SELECT * FROM accounts WHERE id = ?", (account_id,)).fetchone()
-            return self._to_row(row) if row else None
-
-    def get_by_user_id(self, user_id: str) -> AccountRow | None:
-        with self._lock:
-            row = self._conn.execute("SELECT * FROM accounts WHERE user_id = ?", (user_id,)).fetchone()
-            return self._to_row(row) if row else None
-
-    def get_by_username(self, username: str) -> AccountRow | None:
-        with self._lock:
-            row = self._conn.execute("SELECT * FROM accounts WHERE username = ?", (username,)).fetchone()
-            return self._to_row(row) if row else None
-
-    def delete(self, account_id: str) -> None:
-        with self._lock:
-            self._conn.execute("DELETE FROM accounts WHERE id = ?", (account_id,))
-            self._conn.commit()
-
-    def _to_row(self, r: tuple) -> AccountRow:
-        return AccountRow(
-            id=r[0],
-            user_id=r[1],
-            username=r[2],
-            password_hash=r[3],
-            api_key_hash=r[4],
-            created_at=r[5],
-        )
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS accounts (
-                id TEXT PRIMARY KEY,
-                user_id TEXT NOT NULL UNIQUE,
-                username TEXT NOT NULL UNIQUE,
-                password_hash TEXT,
-                api_key_hash TEXT,
-                created_at REAL NOT NULL
-            )
-            """
-        )
-        cols = {row[1] for row in self._conn.execute("PRAGMA table_info(accounts)").fetchall()}
-        if "user_id" not in cols:
-            raise RuntimeError("accounts table missing user_id; reset ~/.leon/leon.db for the new schema")
-        self._conn.commit()
diff --git a/storage/providers/sqlite/panel_task_repo.py b/storage/providers/sqlite/panel_task_repo.py
deleted file mode 100644
index 7b3caa706..000000000
--- a/storage/providers/sqlite/panel_task_repo.py
+++ /dev/null
@@ -1,182 +0,0 @@
-"""SQLite repo for panel task board records."""
-
-from __future__ import annotations
-
-import json
-import sqlite3
-import time
-import uuid
-from pathlib import Path
-from typing import Any
-
-from backend.web.core.config import DB_PATH
-from storage.providers.sqlite.connection import create_connection
-
-TASK_STATUS_ALIASES = {
-    "done": "completed",
-}
-
-
-class SQLitePanelTaskRepo:
-    def __init__(self, db_path: str | Path | None = None) -> None:
-        self._db_path = Path(db_path) if db_path else DB_PATH
-        self._conn = create_connection(self._db_path, row_factory=sqlite3.Row)
-        self._ensure_table()
-
-    def close(self) -> None:
-        self._conn.close()
-
-    def _ensure_table(self) -> None:
-        self._conn.execute("""
-            CREATE TABLE IF NOT EXISTS panel_tasks (
-                id TEXT PRIMARY KEY,
-                title TEXT NOT NULL,
-                description TEXT DEFAULT '',
-                assignee_id TEXT DEFAULT '',
-                status TEXT DEFAULT 'pending',
-                priority TEXT DEFAULT 'medium',
-                progress INTEGER DEFAULT 0,
-                deadline TEXT DEFAULT '',
-                created_at INTEGER NOT NULL,
-                thread_id TEXT DEFAULT '',
-                source TEXT DEFAULT 'manual',
-                cron_job_id TEXT DEFAULT '',
-                result TEXT DEFAULT '',
-                started_at INTEGER DEFAULT 0,
-                completed_at INTEGER DEFAULT 0,
-                tags TEXT DEFAULT '[]'
-            )
-        """)
-        for col_name, col_def in [
-            ("thread_id", "TEXT DEFAULT ''"),
-            ("source", "TEXT DEFAULT 'manual'"),
-            ("cron_job_id", "TEXT DEFAULT ''"),
-            ("result", "TEXT DEFAULT ''"),
-            ("started_at", "INTEGER DEFAULT 0"),
-            ("completed_at", "INTEGER DEFAULT 0"),
-            ("tags", "TEXT DEFAULT '[]'"),
-        ]:
-            try:
-                self._conn.execute(f"ALTER TABLE panel_tasks ADD COLUMN {col_name} {col_def}")
-            except sqlite3.OperationalError:
-                pass
-        # @@@task-status-canonicalize - old local boards wrote `done`; normalize persisted rows
-        # once here so the repo only emits the canonical frontend/backend task contract.
-        self._conn.execute(
-            "UPDATE panel_tasks SET status = ? WHERE status = ?",
-            ("completed", "done"),
-        )
-        self._conn.commit()
-
-    def _deserialize(self, row: sqlite3.Row | None) -> dict[str, Any] | None:
-        if row is None:
-            return None
-        data = dict(row)
-        data["status"] = TASK_STATUS_ALIASES.get(data.get("status"), data.get("status"))
-        try:
-            data["tags"] = json.loads(data.get("tags") or "[]")
-        except (json.JSONDecodeError, TypeError):
-            data["tags"] = []
-        return data
-
-    def list_all(self) -> list[dict[str, Any]]:
-        rows = self._conn.execute("SELECT * FROM panel_tasks ORDER BY created_at DESC").fetchall()
-        return [self._deserialize(row) for row in rows if row is not None]
-
-    def get(self, task_id: str) -> dict[str, Any] | None:
-        row = self._conn.execute("SELECT * FROM panel_tasks WHERE id = ?", (task_id,)).fetchone()
-        return self._deserialize(row)
-
-    def get_highest_priority_pending(self) -> dict[str, Any] | None:
-        row = self._conn.execute(
-            "SELECT * FROM panel_tasks WHERE status = 'pending'"
-            " ORDER BY CASE priority WHEN 'high' THEN 0 WHEN 'medium' THEN 1 ELSE 2 END,"
-            " created_at ASC LIMIT 1"
-        ).fetchone()
-        return self._deserialize(row)
-
-    def create(self, **fields: Any) -> dict[str, Any]:
-        task_id = uuid.uuid4().hex
-        now = int(time.time() * 1000)
-        self._conn.execute(
-            "INSERT INTO panel_tasks"
-            " (id,title,description,assignee_id,status,priority,progress,deadline,created_at,"
-            "  thread_id,source,cron_job_id,result,started_at,completed_at,tags)"
-            " VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
-            (
-                task_id,
-                fields.get("title", "新任务"),
-                fields.get("description", ""),
-                fields.get("assignee_id", ""),
-                "pending",
-                fields.get("priority", "medium"),
-                0,
-                fields.get("deadline", ""),
-                now,
-                fields.get("thread_id", ""),
-                fields.get("source", "manual"),
-                fields.get("cron_job_id", ""),
-                fields.get("result", ""),
-                fields.get("started_at", 0),
-                fields.get("completed_at", 0),
-                json.dumps(fields.get("tags", [])),
-            ),
-        )
-        self._conn.commit()
-        return self.get(task_id) or {}
-
-    def update(self, task_id: str, **fields: Any) -> dict[str, Any] | None:
-        allowed = {
-            "title",
-            "description",
-            "assignee_id",
-            "status",
-            "priority",
-            "progress",
-            "deadline",
-            "thread_id",
-            "source",
-            "cron_job_id",
-            "result",
-            "started_at",
-            "completed_at",
-            "tags",
-        }
-        updates = {k: v for k, v in fields.items() if k in allowed and v is not None}
-        if "tags" in updates:
-            updates["tags"] = json.dumps(updates["tags"])
-        if not updates:
-            return self.get(task_id)
-        set_clause = ", ".join(f"{key} = ?" for key in updates)
-        self._conn.execute(f"UPDATE panel_tasks SET {set_clause} WHERE id = ?", (*updates.values(), task_id))
-        self._conn.commit()
-        return self.get(task_id)
-
-    def delete(self, task_id: str) -> bool:
-        cur = self._conn.execute("DELETE FROM panel_tasks WHERE id = ?", (task_id,))
-        self._conn.commit()
-        return cur.rowcount > 0
-
-    def bulk_delete(self, ids: list[str]) -> int:
-        if not ids:
-            return 0
-        placeholders = ",".join("?" for _ in ids)
-        cur = self._conn.execute(f"DELETE FROM panel_tasks WHERE id IN ({placeholders})", ids)
-        self._conn.commit()
-        return cur.rowcount
-
-    def bulk_update_status(self, ids: list[str], status: str) -> int:
-        if not ids:
-            return 0
-        placeholders = ",".join("?" for _ in ids)
-        progress_update = ""
-        if status == "completed":
-            progress_update = ", progress = 100"
-        elif status == "pending":
-            progress_update = ", progress = 0"
-        cur = self._conn.execute(
-            f"UPDATE panel_tasks SET status = ?{progress_update} WHERE id IN ({placeholders})",
-            (status, *ids),
-        )
-        self._conn.commit()
-        return cur.rowcount
diff --git a/storage/providers/sqlite/provider_event_repo.py b/storage/providers/sqlite/provider_event_repo.py
deleted file mode 100644
index a9d50fbd3..000000000
--- a/storage/providers/sqlite/provider_event_repo.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""SQLite repository for sandbox provider webhook events."""
-
-from __future__ import annotations
-
-import json
-import sqlite3
-import threading
-from datetime import datetime
-from pathlib import Path
-from typing import Any
-
-from storage.providers.sqlite.connection import create_connection
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-
-
-class SQLiteProviderEventRepo:
-    """Provider event persistence backed by SQLite.
-
-    Thread-safe: all connection access is serialized via a lock.
-    """
-
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.SANDBOX)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def record(
-        self,
-        *,
-        provider_name: str,
-        instance_id: str,
-        event_type: str,
-        payload: dict[str, Any],
-        matched_lease_id: str | None,
-    ) -> None:
-        with self._lock:
-            self._conn.execute(
-                """
-                INSERT INTO provider_events (
-                    provider_name, instance_id, event_type, payload_json, matched_lease_id, created_at
-                )
-                VALUES (?, ?, ?, ?, ?, ?)
-                """,
-                (
-                    provider_name,
-                    instance_id,
-                    event_type,
-                    json.dumps(payload),
-                    matched_lease_id,
-                    datetime.now().isoformat(),
-                ),
-            )
-            self._conn.commit()
-
-    def list_recent(self, limit: int = 100) -> list[dict[str, Any]]:
-        with self._lock:
-            self._conn.row_factory = sqlite3.Row
-            rows = self._conn.execute(
-                """
-                SELECT event_id, provider_name, instance_id, event_type,
-                       payload_json, matched_lease_id, created_at
-                FROM provider_events
-                ORDER BY created_at DESC
-                LIMIT ?
-                """,
-                (limit,),
-            ).fetchall()
-            self._conn.row_factory = None
-        items = [dict(row) for row in rows]
-        for item in items:
-            payload_raw = item.get("payload_json")
-            item["payload"] = json.loads(payload_raw) if payload_raw else {}
-        return items
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS provider_events (
-                event_id INTEGER PRIMARY KEY AUTOINCREMENT,
-                provider_name TEXT NOT NULL,
-                instance_id TEXT NOT NULL,
-                event_type TEXT NOT NULL,
-                payload_json TEXT,
-                matched_lease_id TEXT,
-                created_at TIMESTAMP NOT NULL
-            )
-            """
-        )
-        self._conn.execute(
-            """
-            CREATE INDEX IF NOT EXISTS idx_provider_events_created
-            ON provider_events(created_at DESC)
-            """
-        )
-        self._conn.commit()
diff --git a/storage/providers/sqlite/recipe_repo.py b/storage/providers/sqlite/recipe_repo.py
deleted file mode 100644
index 7911c480d..000000000
--- a/storage/providers/sqlite/recipe_repo.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""SQLite repo for user-scoped recipe overrides and custom recipes."""
-
-from __future__ import annotations
-
-import json
-import sqlite3
-import threading
-import time
-from pathlib import Path
-from typing import Any
-
-from storage.providers.sqlite.connection import create_connection
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-
-
-class SQLiteRecipeRepo:
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            self._conn = create_connection(resolve_role_db_path(SQLiteDBRole.MAIN, db_path))
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def list_by_owner(self, owner_user_id: str) -> list[dict[str, Any]]:
-        with self._lock:
-            rows = self._conn.execute(
-                """
-                SELECT owner_user_id, recipe_id, kind, provider_type, data_json, created_at, updated_at
-                FROM library_recipes
-                WHERE owner_user_id = ?
-                ORDER BY created_at ASC, recipe_id ASC
-                """,
-                (owner_user_id,),
-            ).fetchall()
-        return [self._hydrate(row) for row in rows]
-
-    def get(self, owner_user_id: str, recipe_id: str) -> dict[str, Any] | None:
-        with self._lock:
-            row = self._conn.execute(
-                """
-                SELECT owner_user_id, recipe_id, kind, provider_type, data_json, created_at, updated_at
-                FROM library_recipes
-                WHERE owner_user_id = ? AND recipe_id = ?
-                """,
-                (owner_user_id, recipe_id),
-            ).fetchone()
-        if row is None:
-            return None
-        return self._hydrate(row)
-
-    def upsert(
-        self,
-        *,
-        owner_user_id: str,
-        recipe_id: str,
-        kind: str,
-        provider_type: str,
-        data: dict[str, Any],
-        created_at: int | None = None,
-    ) -> dict[str, Any]:
-        if kind not in {"custom", "override"}:
-            raise ValueError(f"Unsupported recipe row kind: {kind}")
-        now = int(time.time() * 1000)
-        existing = self.get(owner_user_id, recipe_id)
-        created = int(created_at if created_at is not None else existing["created_at"] if existing else now)
-        payload = json.dumps(data, ensure_ascii=False)
-        with self._lock:
-            self._conn.execute(
-                """
-                INSERT INTO library_recipes (
-                    owner_user_id, recipe_id, kind, provider_type, data_json, created_at, updated_at
-                ) VALUES (?, ?, ?, ?, ?, ?, ?)
-                ON CONFLICT(owner_user_id, recipe_id) DO UPDATE SET
-                    kind = excluded.kind,
-                    provider_type = excluded.provider_type,
-                    data_json = excluded.data_json,
-                    updated_at = excluded.updated_at
-                """,
-                (owner_user_id, recipe_id, kind, provider_type, payload, created, now),
-            )
-            self._conn.commit()
-        row = self.get(owner_user_id, recipe_id)
-        if row is None:
-            raise RuntimeError("recipe upsert failed")
-        return row
-
-    def delete(self, owner_user_id: str, recipe_id: str) -> bool:
-        with self._lock:
-            cur = self._conn.execute(
-                "DELETE FROM library_recipes WHERE owner_user_id = ? AND recipe_id = ?",
-                (owner_user_id, recipe_id),
-            )
-            self._conn.commit()
-        return cur.rowcount > 0
-
-    def _ensure_table(self) -> None:
-        with self._lock:
-            self._conn.execute(
-                """
-                CREATE TABLE IF NOT EXISTS library_recipes (
-                    owner_user_id TEXT NOT NULL,
-                    recipe_id TEXT NOT NULL,
-                    kind TEXT NOT NULL,
-                    provider_type TEXT NOT NULL,
-                    data_json TEXT NOT NULL,
-                    created_at INTEGER NOT NULL,
-                    updated_at INTEGER NOT NULL,
-                    PRIMARY KEY (owner_user_id, recipe_id)
-                )
-                """
-            )
-            self._conn.execute("CREATE INDEX IF NOT EXISTS idx_library_recipes_owner_kind ON library_recipes(owner_user_id, kind)")
-            self._conn.commit()
-
-    def _hydrate(self, row: tuple[Any, ...]) -> dict[str, Any]:
-        payload = json.loads(str(row[4]))
-        if not isinstance(payload, dict):
-            raise ValueError("recipe payload must be an object")
-        return {
-            "owner_user_id": str(row[0]),
-            "recipe_id": str(row[1]),
-            "kind": str(row[2]),
-            "provider_type": str(row[3]),
-            "data": payload,
-            "created_at": int(row[5]),
-            "updated_at": int(row[6]),
-        }
diff --git a/storage/providers/sqlite/resource_snapshot_repo.py b/storage/providers/sqlite/resource_snapshot_repo.py
deleted file mode 100644
index 47673ba39..000000000
--- a/storage/providers/sqlite/resource_snapshot_repo.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""Resource snapshot repository for SQLite."""
-
-from __future__ import annotations
-
-import sqlite3
-from datetime import UTC, datetime
-from pathlib import Path
-from typing import Any
-
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-
-
-def _connect(db_path: Path) -> sqlite3.Connection:
-    conn = sqlite3.connect(str(db_path), timeout=30)
-    conn.execute("PRAGMA busy_timeout=30000")
-    return conn
-
-
-def _now_iso() -> str:
-    return datetime.now(UTC).isoformat().replace("+00:00", "Z")
-
-
-def ensure_resource_snapshot_table(db_path: Path | None = None) -> None:
-    db_path = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
-    with _connect(db_path) as conn:
-        conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS lease_resource_snapshots (
-                lease_id TEXT PRIMARY KEY,
-                provider_name TEXT NOT NULL,
-                observed_state TEXT NOT NULL,
-                probe_mode TEXT NOT NULL,
-                cpu_used REAL,
-                cpu_limit REAL,
-                memory_used_mb REAL,
-                memory_total_mb REAL,
-                disk_used_gb REAL,
-                disk_total_gb REAL,
-                network_rx_kbps REAL,
-                network_tx_kbps REAL,
-                probe_error TEXT,
-                collected_at TIMESTAMP NOT NULL,
-                updated_at TIMESTAMP NOT NULL
-            )
-            """
-        )
-        conn.commit()
-
-
-def upsert_lease_resource_snapshot(
-    *,
-    lease_id: str,
-    provider_name: str,
-    observed_state: str,
-    probe_mode: str,
-    cpu_used: float | None = None,
-    cpu_limit: float | None = None,
-    memory_used_mb: float | None = None,
-    memory_total_mb: float | None = None,
-    disk_used_gb: float | None = None,
-    disk_total_gb: float | None = None,
-    network_rx_kbps: float | None = None,
-    network_tx_kbps: float | None = None,
-    probe_error: str | None = None,
-    db_path: Path | None = None,
-) -> None:
-    db_path = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
-    ensure_resource_snapshot_table(db_path)
-    now = _now_iso()
-    with _connect(db_path) as conn:
-        conn.execute(
-            """
-            INSERT INTO lease_resource_snapshots (
-                lease_id, provider_name, observed_state, probe_mode,
-                cpu_used, cpu_limit, memory_used_mb, memory_total_mb,
-                disk_used_gb, disk_total_gb, network_rx_kbps, network_tx_kbps,
-                probe_error, collected_at, updated_at
-            )
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-            ON CONFLICT(lease_id) DO UPDATE SET
-                provider_name = excluded.provider_name,
-                observed_state = excluded.observed_state,
-                probe_mode = excluded.probe_mode,
-                cpu_used = excluded.cpu_used,
-                cpu_limit = excluded.cpu_limit,
-                memory_used_mb = excluded.memory_used_mb,
-                memory_total_mb = excluded.memory_total_mb,
-                disk_used_gb = excluded.disk_used_gb,
-                disk_total_gb = excluded.disk_total_gb,
-                network_rx_kbps = excluded.network_rx_kbps,
-                network_tx_kbps = excluded.network_tx_kbps,
-                probe_error = excluded.probe_error,
-                collected_at = excluded.collected_at,
-                updated_at = excluded.updated_at
-            """,
-            (
-                lease_id,
-                provider_name,
-                observed_state,
-                probe_mode,
-                cpu_used,
-                cpu_limit,
-                memory_used_mb,
-                memory_total_mb,
-                disk_used_gb,
-                disk_total_gb,
-                network_rx_kbps,
-                network_tx_kbps,
-                probe_error,
-                now,
-                now,
-            ),
-        )
-        conn.commit()
-
-
-def list_snapshots_by_lease_ids(lease_ids: list[str], db_path: Path | None = None) -> dict[str, dict[str, Any]]:
-    db_path = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
-    unique_lease_ids = sorted({lease_id for lease_id in lease_ids if lease_id})
-    if not unique_lease_ids or not db_path.exists():
-        return {}
-
-    placeholders = ",".join(["?"] * len(unique_lease_ids))
-    with _connect(db_path) as conn:
-        conn.row_factory = sqlite3.Row
-        table = conn.execute("SELECT 1 FROM sqlite_master WHERE type='table' AND name='lease_resource_snapshots' LIMIT 1").fetchone()
-        if table is None:
-            return {}
-        rows = conn.execute(
-            f"SELECT * FROM lease_resource_snapshots WHERE lease_id IN ({placeholders})",
-            unique_lease_ids,
-        ).fetchall()
-    return {str(row["lease_id"]): dict(row) for row in rows}
diff --git a/storage/providers/sqlite/run_event_repo.py b/storage/providers/sqlite/run_event_repo.py
deleted file mode 100644
index b10a5b545..000000000
--- a/storage/providers/sqlite/run_event_repo.py
+++ /dev/null
@@ -1,172 +0,0 @@
-"""SQLite repository for run event persistence operations."""
-
-from __future__ import annotations
-
-import json
-import sqlite3
-import threading
-from pathlib import Path
-from typing import Any
-
-from storage.providers.sqlite.connection import create_connection
-
-
-class SQLiteRunEventRepo:
-    """Minimal run event repository with parameterized SQL operations.
-
-    Thread-safe: all connection access is serialized via a lock, allowing
-    concurrent ``asyncio.to_thread`` callers from the event loop.
-    """
-
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = Path.home() / ".leon" / "events.db"
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def append_event(
-        self,
-        thread_id: str,
-        run_id: str,
-        event_type: str,
-        data: dict[str, Any],
-        message_id: str | None = None,
-    ) -> int:
-        payload = json.dumps(data, ensure_ascii=False)
-        with self._lock:
-            cursor = self._conn.execute(
-                """
-                INSERT INTO run_events (thread_id, run_id, event_type, data, message_id)
-                VALUES (?, ?, ?, ?, ?)
-                """,
-                (thread_id, run_id, event_type, payload, message_id),
-            )
-            self._conn.commit()
-            return int(cursor.lastrowid)
-
-    def list_events(
-        self,
-        thread_id: str,
-        run_id: str,
-        *,
-        after: int = 0,
-        limit: int = 200,
-    ) -> list[dict[str, Any]]:
-        with self._lock:
-            rows = self._conn.execute(
-                """
-                SELECT seq, event_type, data, message_id
-                FROM run_events
-                WHERE thread_id = ? AND run_id = ? AND seq > ?
-                ORDER BY seq ASC
-                LIMIT ?
-                """,
-                (thread_id, run_id, after, limit),
-            ).fetchall()
-        return [
-            {
-                "seq": row[0],
-                "event_type": row[1],
-                "data": json.loads(row[2]) if row[2] else {},
-                "message_id": row[3],
-            }
-            for row in rows
-        ]
-
-    def latest_seq(self, thread_id: str) -> int:
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT MAX(seq) FROM run_events WHERE thread_id = ?",
-                (thread_id,),
-            ).fetchone()
-        return int(row[0]) if row and row[0] is not None else 0
-
-    def run_start_seq(self, thread_id: str, run_id: str) -> int:
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT MIN(seq) FROM run_events WHERE thread_id = ? AND run_id = ?",
-                (thread_id, run_id),
-            ).fetchone()
-        return int(row[0]) if row and row[0] is not None else 0
-
-    def latest_run_id(self, thread_id: str) -> str | None:
-        with self._lock:
-            row = self._conn.execute(
-                """
-                SELECT run_id
-                FROM run_events
-                WHERE thread_id = ?
-                ORDER BY seq DESC
-                LIMIT 1
-                """,
-                (thread_id,),
-            ).fetchone()
-        return row[0] if row else None
-
-    def list_run_ids(self, thread_id: str) -> list[str]:
-        with self._lock:
-            rows = self._conn.execute(
-                """
-                SELECT run_id
-                FROM run_events
-                WHERE thread_id = ?
-                GROUP BY run_id
-                ORDER BY MAX(seq) DESC
-                """,
-                (thread_id,),
-            ).fetchall()
-        return [row[0] for row in rows if row[0]]
-
-    def delete_runs(self, thread_id: str, run_ids: list[str]) -> int:
-        if not run_ids:
-            return 0
-
-        placeholders = ",".join("?" for _ in run_ids)
-        # @@@param_sql - run ids can be external input; keep IN-clause values fully parameterized.
-        with self._lock:
-            cursor = self._conn.execute(
-                f"DELETE FROM run_events WHERE thread_id = ? AND run_id IN ({placeholders})",
-                [thread_id] + run_ids,
-            )
-            self._conn.commit()
-        return int(cursor.rowcount)
-
-    def delete_thread_events(self, thread_id: str) -> int:
-        with self._lock:
-            cursor = self._conn.execute(
-                "DELETE FROM run_events WHERE thread_id = ?",
-                (thread_id,),
-            )
-            self._conn.commit()
-        return int(cursor.rowcount)
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS run_events (
-                seq INTEGER PRIMARY KEY AUTOINCREMENT,
-                thread_id TEXT NOT NULL,
-                run_id TEXT NOT NULL,
-                event_type TEXT NOT NULL,
-                data TEXT NOT NULL,
-                message_id TEXT,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-            """
-        )
-        self._conn.execute(
-            """
-            CREATE INDEX IF NOT EXISTS idx_run_events_thread_run
-            ON run_events (thread_id, run_id, seq)
-            """
-        )
-        self._conn.commit()
diff --git a/storage/providers/sqlite/sandbox_monitor_repo.py b/storage/providers/sqlite/sandbox_monitor_repo.py
index d3ed18004..406366859 100644
--- a/storage/providers/sqlite/sandbox_monitor_repo.py
+++ b/storage/providers/sqlite/sandbox_monitor_repo.py
@@ -29,25 +29,46 @@ def __init__(self, db_path: str | Path | None = None) -> None:
     def close(self) -> None:
         self._conn.close()
 
-    def query_threads(self) -> list[dict]:
-        rows = self._conn.execute(
-            """
-            SELECT
-                cs.thread_id,
-                COUNT(DISTINCT cs.chat_session_id) as session_count,
-                MAX(cs.last_active_at) as last_active,
-                sl.lease_id,
-                sl.provider_name,
-                sl.desired_state,
-                sl.observed_state,
-                sl.current_instance_id
-            FROM chat_sessions cs
-            LEFT JOIN sandbox_leases sl ON cs.lease_id = sl.lease_id
-            WHERE cs.status != 'closed'
-            GROUP BY cs.thread_id
-            ORDER BY MAX(cs.last_active_at) DESC
-            """
-        ).fetchall()
+    def query_threads(self, *, thread_id: str | None = None) -> list[dict]:
+        if thread_id is None:
+            rows = self._conn.execute(
+                """
+                SELECT
+                    cs.thread_id,
+                    COUNT(DISTINCT cs.chat_session_id) as session_count,
+                    MAX(cs.last_active_at) as last_active,
+                    sl.lease_id,
+                    sl.provider_name,
+                    sl.desired_state,
+                    sl.observed_state,
+                    sl.current_instance_id
+                FROM chat_sessions cs
+                LEFT JOIN sandbox_leases sl ON cs.lease_id = sl.lease_id
+                WHERE cs.status != 'closed'
+                GROUP BY cs.thread_id
+                ORDER BY MAX(cs.last_active_at) DESC
+                """
+            ).fetchall()
+        else:
+            rows = self._conn.execute(
+                """
+                SELECT
+                    cs.thread_id,
+                    COUNT(DISTINCT cs.chat_session_id) as session_count,
+                    MAX(cs.last_active_at) as last_active,
+                    sl.lease_id,
+                    sl.provider_name,
+                    sl.desired_state,
+                    sl.observed_state,
+                    sl.current_instance_id
+                FROM chat_sessions cs
+                LEFT JOIN sandbox_leases sl ON cs.lease_id = sl.lease_id
+                WHERE cs.status != 'closed' AND cs.thread_id = ?
+                GROUP BY cs.thread_id
+                ORDER BY MAX(cs.last_active_at) DESC
+                """,
+                (thread_id,),
+            ).fetchall()
         return [_row_to_dict(r) for r in rows]
 
     def query_thread_summary(self, thread_id: str) -> dict | None:
@@ -168,6 +189,7 @@ def list_leases_with_threads(self) -> list[dict]:
                 sl.recipe_json,
                 sl.desired_state,
                 sl.observed_state,
+                sl.created_at,
                 sl.updated_at,
                 at.thread_id,
                 at.cwd
@@ -185,6 +207,31 @@ def query_lease(self, lease_id: str) -> dict | None:
         ).fetchone()
         return _row_to_dict(row) if row else None
 
+    def query_lease_sessions(self, lease_id: str) -> list[dict]:
+        rows = self._conn.execute(
+            """
+            SELECT
+                cs.chat_session_id,
+                cs.thread_id,
+                cs.status,
+                cs.started_at,
+                cs.ended_at,
+                cs.close_reason,
+                cs.lease_id,
+                sl.provider_name,
+                sl.desired_state,
+                sl.observed_state,
+                sl.current_instance_id,
+                sl.last_error
+            FROM chat_sessions cs
+            LEFT JOIN sandbox_leases sl ON cs.lease_id = sl.lease_id
+            WHERE cs.lease_id = ?
+            ORDER BY cs.started_at DESC
+            """,
+            (lease_id,),
+        ).fetchall()
+        return [_row_to_dict(r) for r in rows]
+
     def query_lease_threads(self, lease_id: str) -> list[dict]:
         rows = self._conn.execute(
             """
diff --git a/storage/providers/sqlite/sandbox_volume_repo.py b/storage/providers/sqlite/sandbox_volume_repo.py
deleted file mode 100644
index 71dcc03ac..000000000
--- a/storage/providers/sqlite/sandbox_volume_repo.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""SQLite repository for sandbox volumes (sandbox.db)."""
-
-from __future__ import annotations
-
-import sqlite3
-from pathlib import Path
-from typing import Any
-
-from storage.providers.sqlite.kernel import SQLiteDBRole, connect_sqlite_role
-
-
-class SQLiteSandboxVolumeRepo:
-    def __init__(self, db_path: str | Path | None = None) -> None:
-        self._conn = connect_sqlite_role(
-            SQLiteDBRole.SANDBOX,
-            db_path=db_path,
-            check_same_thread=False,
-        )
-        self._ensure_tables()
-
-    def close(self) -> None:
-        self._conn.close()
-
-    def create(self, volume_id: str, source_json: str, name: str | None, created_at: str) -> None:
-        self._conn.execute(
-            "INSERT INTO sandbox_volumes(volume_id, source, name, created_at) VALUES (?, ?, ?, ?)",
-            (volume_id, source_json, name, created_at),
-        )
-        self._conn.commit()
-
-    def get(self, volume_id: str) -> dict[str, Any] | None:
-        self._conn.row_factory = sqlite3.Row
-        row = self._conn.execute(
-            "SELECT volume_id, source, name, created_at FROM sandbox_volumes WHERE volume_id = ?",
-            (volume_id,),
-        ).fetchone()
-        self._conn.row_factory = None
-        return dict(row) if row else None
-
-    def update_source(self, volume_id: str, source_json: str) -> None:
-        self._conn.execute(
-            "UPDATE sandbox_volumes SET source = ? WHERE volume_id = ?",
-            (source_json, volume_id),
-        )
-        self._conn.commit()
-
-    def list_all(self) -> list[dict[str, Any]]:
-        self._conn.row_factory = sqlite3.Row
-        rows = self._conn.execute("SELECT volume_id, source, name, created_at FROM sandbox_volumes ORDER BY created_at DESC").fetchall()
-        self._conn.row_factory = None
-        return [dict(r) for r in rows]
-
-    def delete(self, volume_id: str) -> bool:
-        cur = self._conn.execute("DELETE FROM sandbox_volumes WHERE volume_id = ?", (volume_id,))
-        self._conn.commit()
-        return cur.rowcount > 0
-
-    def _ensure_tables(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS sandbox_volumes (
-                volume_id  TEXT PRIMARY KEY,
-                name       TEXT,
-                source     TEXT NOT NULL,
-                created_at TEXT NOT NULL
-            )
-            """
-        )
-        self._conn.commit()
diff --git a/storage/providers/sqlite/sync_file_repo.py b/storage/providers/sqlite/sync_file_repo.py
deleted file mode 100644
index 2e255cd3c..000000000
--- a/storage/providers/sqlite/sync_file_repo.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""SQLite repo for sync_files state."""
-
-from __future__ import annotations
-
-import threading
-
-from storage.providers.sqlite.connection import create_connection
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-
-
-class SQLiteSyncFileRepo:
-    def __init__(self) -> None:
-        # @@@sync-repo-thread-safe - SyncManager is shared per provider, so sync state reads/writes can hop threads.
-        self._lock = threading.Lock()
-        self._db_path = resolve_role_db_path(SQLiteDBRole.SANDBOX)
-        self._conn = create_connection(self._db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        self._conn.close()
-
-    def _ensure_table(self) -> None:
-        with self._lock:
-            self._conn.execute("""
-                CREATE TABLE IF NOT EXISTS sync_files (
-                    thread_id TEXT,
-                    relative_path TEXT,
-                    checksum TEXT,
-                    last_synced INTEGER,
-                    PRIMARY KEY (thread_id, relative_path)
-                )
-            """)
-            self._conn.commit()
-
-    def track_file(self, thread_id: str, relative_path: str, checksum: str, timestamp: int) -> None:
-        with self._lock:
-            self._conn.execute(
-                "INSERT OR REPLACE INTO sync_files VALUES (?, ?, ?, ?)",
-                (thread_id, relative_path, checksum, timestamp),
-            )
-            self._conn.commit()
-
-    def track_files_batch(self, thread_id: str, file_records: list[tuple[str, str, int]]) -> None:
-        if not file_records:
-            return
-        with self._lock:
-            self._conn.executemany(
-                "INSERT OR REPLACE INTO sync_files VALUES (?, ?, ?, ?)",
-                [(thread_id, rp, cs, ts) for rp, cs, ts in file_records],
-            )
-            self._conn.commit()
-
-    def get_file_info(self, thread_id: str, relative_path: str) -> dict | None:
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT checksum, last_synced FROM sync_files WHERE thread_id = ? AND relative_path = ?",
-                (thread_id, relative_path),
-            ).fetchone()
-        if row:
-            return {"checksum": row[0], "last_synced": row[1]}
-        return None
-
-    def get_all_files(self, thread_id: str) -> dict[str, str]:
-        with self._lock:
-            rows = self._conn.execute(
-                "SELECT relative_path, checksum FROM sync_files WHERE thread_id = ?",
-                (thread_id,),
-            ).fetchall()
-        return {row[0]: row[1] for row in rows}
-
-    def clear_thread(self, thread_id: str) -> int:
-        with self._lock:
-            cur = self._conn.execute("DELETE FROM sync_files WHERE thread_id = ?", (thread_id,))
-            self._conn.commit()
-            return cur.rowcount
diff --git a/storage/providers/sqlite/terminal_repo.py b/storage/providers/sqlite/terminal_repo.py
index de8fd90e0..477108f1e 100644
--- a/storage/providers/sqlite/terminal_repo.py
+++ b/storage/providers/sqlite/terminal_repo.py
@@ -133,13 +133,30 @@ def get_active(self, thread_id: str) -> dict[str, Any] | None:
         pointer = self._get_pointer_row(thread_id)
         if pointer is None:
             return None
-        return self.get_by_id(str(pointer["active_terminal_id"]))
+        row = self.get_by_id(str(pointer["active_terminal_id"]))
+        if row is not None:
+            return row
+        latest = self.list_by_thread(thread_id)
+        if not latest:
+            return None
+        # @@@stale-terminal-pointer-heal - stale pointer rows can survive direct
+        # row deletion / pre-fix thread bootstrap. Repair against the newest
+        # terminal instead of leaving the thread permanently unreadable.
+        self._ensure_thread_pointer(thread_id, str(latest[0]["terminal_id"]))
+        return self.get_by_id(str(latest[0]["terminal_id"])) or latest[0]
 
     def get_default(self, thread_id: str) -> dict[str, Any] | None:
         pointer = self._get_pointer_row(thread_id)
         if pointer is None:
             return None
-        return self.get_by_id(str(pointer["default_terminal_id"]))
+        row = self.get_by_id(str(pointer["default_terminal_id"]))
+        if row is not None:
+            return row
+        latest = self.list_by_thread(thread_id)
+        if not latest:
+            return None
+        self._ensure_thread_pointer(thread_id, str(latest[0]["terminal_id"]))
+        return self.get_by_id(str(latest[0]["terminal_id"])) or latest[0]
 
     def get_by_id(self, terminal_id: str) -> dict[str, Any] | None:
         with self._lock:
@@ -215,11 +232,50 @@ def list_all(self) -> list[dict[str, Any]]:
     def _ensure_thread_pointer(self, thread_id: str, terminal_id: str) -> None:
         now = datetime.now().isoformat()
         with self._lock:
+            self._conn.row_factory = sqlite3.Row
             row = self._conn.execute(
-                "SELECT thread_id FROM thread_terminal_pointers WHERE thread_id = ?",
+                """
+                SELECT active_terminal_id, default_terminal_id
+                FROM thread_terminal_pointers
+                WHERE thread_id = ?
+                """,
                 (thread_id,),
             ).fetchone()
-            if row:
+            if row is not None:
+                active_row = self._conn.execute(
+                    """
+                    SELECT terminal_id
+                    FROM abstract_terminals
+                    WHERE terminal_id = ? AND thread_id = ?
+                    """,
+                    (str(row["active_terminal_id"]), thread_id),
+                ).fetchone()
+                default_row = self._conn.execute(
+                    """
+                    SELECT terminal_id
+                    FROM abstract_terminals
+                    WHERE terminal_id = ? AND thread_id = ?
+                    """,
+                    (str(row["default_terminal_id"]), thread_id),
+                ).fetchone()
+                if active_row is not None and default_row is not None:
+                    self._conn.row_factory = None
+                    return
+                self._conn.execute(
+                    """
+                    UPDATE thread_terminal_pointers
+                    SET active_terminal_id = ?, default_terminal_id = ?, updated_at = ?
+                    WHERE thread_id = ?
+                    """,
+                    (
+                        str(row["active_terminal_id"]) if active_row is not None else terminal_id,
+                        str(row["default_terminal_id"]) if default_row is not None else terminal_id,
+                        now,
+                        thread_id,
+                    ),
+                )
+                self._conn.row_factory = None
+                self._conn.commit()
                 return
             self._conn.execute(
                 """
@@ -228,6 +284,7 @@ def _ensure_thread_pointer(self, thread_id: str, terminal_id: str) -> None:
                 """,
                 (thread_id, terminal_id, terminal_id, now),
             )
+            self._conn.row_factory = None
             self._conn.commit()
 
     def create(
diff --git a/storage/providers/sqlite/thread_launch_pref_repo.py b/storage/providers/sqlite/thread_launch_pref_repo.py
deleted file mode 100644
index 66678632c..000000000
--- a/storage/providers/sqlite/thread_launch_pref_repo.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""SQLite repo for per-user/member new-thread config memory."""
-
-from __future__ import annotations
-
-import json
-import sqlite3
-import threading
-import time
-from pathlib import Path
-from typing import Any
-
-from storage.providers.sqlite.connection import create_connection
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-
-
-class SQLiteThreadLaunchPrefRepo:
-    """Persist per-user/member last confirmed + successful new-thread config."""
-
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.MAIN)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def get(self, owner_user_id: str, member_id: str) -> dict[str, Any] | None:
-        with self._lock:
-            row = self._conn.execute(
-                """
-                SELECT owner_user_id, member_id, last_confirmed_json, last_successful_json,
-                       last_confirmed_at, last_successful_at
-                FROM thread_launch_prefs
-                WHERE owner_user_id = ? AND member_id = ?
-                """,
-                (owner_user_id, member_id),
-            ).fetchone()
-        if row is None:
-            return None
-        return {
-            "owner_user_id": row[0],
-            "member_id": row[1],
-            "last_confirmed": json.loads(row[2]) if row[2] else None,
-            "last_successful": json.loads(row[3]) if row[3] else None,
-            "last_confirmed_at": row[4],
-            "last_successful_at": row[5],
-        }
-
-    def save_confirmed(self, owner_user_id: str, member_id: str, config: dict[str, Any]) -> None:
-        self._save(owner_user_id, member_id, "last_confirmed_json", "last_confirmed_at", config)
-
-    def save_successful(self, owner_user_id: str, member_id: str, config: dict[str, Any]) -> None:
-        self._save(owner_user_id, member_id, "last_successful_json", "last_successful_at", config)
-
-    def _save(
-        self,
-        owner_user_id: str,
-        member_id: str,
-        json_col: str,
-        ts_col: str,
-        config: dict[str, Any],
-    ) -> None:
-        payload = json.dumps(config, ensure_ascii=False)
-        now = time.time()
-        with self._lock:
-            self._conn.execute(
-                """
-                INSERT INTO thread_launch_prefs (
-                    owner_user_id, member_id, last_confirmed_json, last_successful_json,
-                    last_confirmed_at, last_successful_at
-                ) VALUES (?, ?, NULL, NULL, NULL, NULL)
-                ON CONFLICT(owner_user_id, member_id) DO NOTHING
-                """,
-                (owner_user_id, member_id),
-            )
-            self._conn.execute(
-                f"UPDATE thread_launch_prefs SET {json_col} = ?, {ts_col} = ? WHERE owner_user_id = ? AND member_id = ?",
-                (payload, now, owner_user_id, member_id),
-            )
-            self._conn.commit()
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS thread_launch_prefs (
-                owner_user_id TEXT NOT NULL,
-                member_id TEXT NOT NULL,
-                last_confirmed_json TEXT,
-                last_successful_json TEXT,
-                last_confirmed_at REAL,
-                last_successful_at REAL,
-                PRIMARY KEY (owner_user_id, member_id)
-            )
-            """
-        )
-        self._conn.commit()
diff --git a/storage/providers/sqlite/thread_repo.py b/storage/providers/sqlite/thread_repo.py
deleted file mode 100644
index a7fd5779f..000000000
--- a/storage/providers/sqlite/thread_repo.py
+++ /dev/null
@@ -1,197 +0,0 @@
-"""SQLite thread repository."""
-
-from __future__ import annotations
-
-import sqlite3
-import threading
-from pathlib import Path
-from typing import Any
-
-from storage.providers.sqlite.connection import create_connection
-from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-
-
-def _validate_thread_identity(*, is_main: bool, branch_index: int) -> None:
-    if branch_index < 0:
-        raise ValueError(f"branch_index must be >= 0, got {branch_index}")
-    if is_main and branch_index != 0:
-        raise ValueError(f"Main thread must have branch_index=0, got {branch_index}")
-    if not is_main and branch_index == 0:
-        raise ValueError("Child thread must have branch_index>0")
-
-
-class SQLiteThreadRepo:
-    """Thread metadata store. Replaces ThreadConfigRepo.
-
-    DB role: MAIN (same DB as members, entities, checkpoints).
-    """
-
-    def __init__(self, db_path: str | Path | None = None, conn: sqlite3.Connection | None = None) -> None:
-        self._own_conn = conn is None
-        self._lock = threading.Lock()
-        if conn is not None:
-            self._conn = conn
-        else:
-            if db_path is None:
-                db_path = resolve_role_db_path(SQLiteDBRole.MAIN)
-            self._conn = create_connection(db_path)
-        self._ensure_table()
-
-    def close(self) -> None:
-        if self._own_conn:
-            self._conn.close()
-
-    def create(
-        self,
-        thread_id: str,
-        member_id: str,
-        sandbox_type: str,
-        cwd: str | None = None,
-        created_at: float = 0,
-        **extra: Any,
-    ) -> None:
-        is_main = bool(extra.get("is_main", False))
-        branch_index = int(extra["branch_index"])
-        _validate_thread_identity(is_main=is_main, branch_index=branch_index)
-        with self._lock:
-            self._conn.execute(
-                "INSERT INTO threads (id, member_id, sandbox_type, cwd, model, observation_provider, is_main, branch_index, created_at)"
-                " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
-                (
-                    thread_id,
-                    member_id,
-                    sandbox_type,
-                    cwd,
-                    extra.get("model"),
-                    extra.get("observation_provider"),
-                    int(is_main),
-                    branch_index,
-                    created_at,
-                ),
-            )
-            self._conn.commit()
-
-    _COLS = (
-        "id",
-        "member_id",
-        "sandbox_type",
-        "model",
-        "cwd",
-        "observation_provider",
-        "is_main",
-        "branch_index",
-        "created_at",
-    )
-    _SELECT = ", ".join(_COLS)
-
-    def _to_dict(self, r: tuple) -> dict[str, Any]:
-        data = dict(zip(self._COLS, r))
-        data["is_main"] = bool(data["is_main"])
-        data["branch_index"] = int(data["branch_index"])
-        return data
-
-    def get_by_id(self, thread_id: str) -> dict[str, Any] | None:
-        with self._lock:
-            row = self._conn.execute(f"SELECT {self._SELECT} FROM threads WHERE id = ?", (thread_id,)).fetchone()
-            return self._to_dict(row) if row else None
-
-    def get_main_thread(self, member_id: str) -> dict[str, Any] | None:
-        with self._lock:
-            row = self._conn.execute(
-                f"SELECT {self._SELECT} FROM threads WHERE member_id = ? AND is_main = 1",
-                (member_id,),
-            ).fetchone()
-            return self._to_dict(row) if row else None
-
-    def get_next_branch_index(self, member_id: str) -> int:
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT COALESCE(MAX(branch_index), 0) FROM threads WHERE member_id = ?",
-                (member_id,),
-            ).fetchone()
-            return int(row[0]) + 1 if row else 1
-
-    def list_by_member(self, member_id: str) -> list[dict[str, Any]]:
-        with self._lock:
-            rows = self._conn.execute(
-                f"SELECT {self._SELECT} FROM threads WHERE member_id = ? ORDER BY branch_index, created_at",
-                (member_id,),
-            ).fetchall()
-            return [self._to_dict(r) for r in rows]
-
-    def list_by_owner_user_id(self, owner_user_id: str) -> list[dict[str, Any]]:
-        """Return all threads owned by this user (via members.owner_user_id JOIN).
-
-        Also JOINs entities (entity.id == member_id) for entity_name.
-        """
-        cols = ", ".join(f"t.{c}" for c in self._COLS)
-        with self._lock:
-            rows = self._conn.execute(
-                f"SELECT {cols}, m.name as member_name, m.avatar as member_avatar,"
-                " e.name as entity_name FROM threads t"
-                " JOIN members m ON t.member_id = m.id"
-                " LEFT JOIN entities e ON e.id = t.member_id"
-                " WHERE m.owner_user_id = ?"
-                " ORDER BY t.is_main DESC, t.created_at",
-                (owner_user_id,),
-            ).fetchall()
-            ncols = len(self._COLS)
-            return [
-                {
-                    **self._to_dict(r[:ncols]),
-                    "member_name": r[ncols],
-                    "member_avatar": r[ncols + 1],
-                    "entity_name": r[ncols + 2],
-                }
-                for r in rows
-            ]
-
-    def update(self, thread_id: str, **fields: Any) -> None:
-        allowed = {"sandbox_type", "model", "cwd", "observation_provider", "is_main", "branch_index"}
-        sets = {k: v for k, v in fields.items() if k in allowed}
-        if not sets:
-            return
-        next_is_main = bool(sets["is_main"]) if "is_main" in sets else None
-        next_branch_index = int(sets["branch_index"]) if "branch_index" in sets else None
-        if next_is_main is not None or next_branch_index is not None:
-            current = self.get_by_id(thread_id)
-            if current is None:
-                raise ValueError(f"Thread {thread_id} not found")
-            _validate_thread_identity(
-                is_main=next_is_main if next_is_main is not None else bool(current["is_main"]),
-                branch_index=next_branch_index if next_branch_index is not None else int(current["branch_index"]),
-            )
-        sql = "UPDATE threads SET " + ", ".join(f"{k} = ?" for k in sets) + " WHERE id = ?"
-        with self._lock:
-            self._conn.execute(sql, [*sets.values(), thread_id])
-            self._conn.commit()
-
-    def delete(self, thread_id: str) -> None:
-        with self._lock:
-            self._conn.execute("DELETE FROM threads WHERE id = ?", (thread_id,))
-            self._conn.commit()
-
-    def _ensure_table(self) -> None:
-        self._conn.execute(
-            """
-            CREATE TABLE IF NOT EXISTS threads (
-                id TEXT PRIMARY KEY,
-                member_id TEXT NOT NULL,
-                sandbox_type TEXT DEFAULT 'local',
-                model TEXT,
-                cwd TEXT,
-                observation_provider TEXT,
-                agent TEXT,
-                is_main INTEGER NOT NULL DEFAULT 0,
-                branch_index INTEGER NOT NULL,
-                created_at REAL NOT NULL
-            )
-            """
-        )
-        cols = {row[1] for row in self._conn.execute("PRAGMA table_info(threads)").fetchall()}
-        if "branch_index" not in cols:
-            raise RuntimeError("threads table missing branch_index; reset ~/.leon/leon.db for the new schema")
-        self._conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_threads_single_main_per_member ON threads(member_id) WHERE is_main = 1")
-        self._conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_threads_member_branch ON threads(member_id, branch_index)")
-        self._conn.execute("CREATE INDEX IF NOT EXISTS idx_threads_member_created ON threads(member_id, branch_index, created_at)")
-        self._conn.commit()
diff --git a/storage/providers/sqlite/tool_task_repo.py b/storage/providers/sqlite/tool_task_repo.py
deleted file mode 100644
index 3e1fd1a2f..000000000
--- a/storage/providers/sqlite/tool_task_repo.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""SQLite repo for thread-scoped tool tasks."""
-
-from __future__ import annotations
-
-import json
-import sqlite3
-from pathlib import Path
-
-from core.tools.task.types import Task, TaskStatus
-
-
-class SQLiteToolTaskRepo:
-    def __init__(self, db_path: Path) -> None:
-        db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._db_path = db_path
-        self._init_db()
-
-    def _conn(self) -> sqlite3.Connection:
-        conn = sqlite3.connect(self._db_path, check_same_thread=False)
-        conn.row_factory = sqlite3.Row
-        return conn
-
-    def _init_db(self) -> None:
-        with self._conn() as conn:
-            conn.execute("""
-                CREATE TABLE IF NOT EXISTS tasks (
-                    thread_id  TEXT NOT NULL,
-                    task_id    TEXT NOT NULL,
-                    subject    TEXT NOT NULL,
-                    description TEXT NOT NULL,
-                    status     TEXT NOT NULL DEFAULT 'pending',
-                    active_form TEXT,
-                    owner      TEXT,
-                    blocks     TEXT NOT NULL DEFAULT '[]',
-                    blocked_by TEXT NOT NULL DEFAULT '[]',
-                    metadata   TEXT NOT NULL DEFAULT '{}',
-                    PRIMARY KEY (thread_id, task_id)
-                )
-            """)
-            conn.commit()
-
-    def next_id(self, thread_id: str) -> str:
-        with self._conn() as conn:
-            row = conn.execute("SELECT COUNT(*) FROM tasks WHERE thread_id = ?", (thread_id,)).fetchone()
-            return str((row[0] or 0) + 1)
-
-    def get(self, thread_id: str, task_id: str) -> Task | None:
-        with self._conn() as conn:
-            row = conn.execute(
-                "SELECT * FROM tasks WHERE thread_id = ? AND task_id = ?",
-                (thread_id, task_id),
-            ).fetchone()
-        return self._row_to_task(row) if row else None
-
-    def list_all(self, thread_id: str) -> list[Task]:
-        with self._conn() as conn:
-            rows = conn.execute("SELECT * FROM tasks WHERE thread_id = ? ORDER BY task_id", (thread_id,)).fetchall()
-        return [self._row_to_task(row) for row in rows]
-
-    def insert(self, thread_id: str, task: Task) -> None:
-        with self._conn() as conn:
-            conn.execute(
-                """INSERT INTO tasks
-                   (thread_id, task_id, subject, description, status,
-                    active_form, owner, blocks, blocked_by, metadata)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
-                (
-                    thread_id,
-                    task.id,
-                    task.subject,
-                    task.description,
-                    task.status.value,
-                    task.active_form,
-                    task.owner,
-                    json.dumps(task.blocks),
-                    json.dumps(task.blocked_by),
-                    json.dumps(task.metadata),
-                ),
-            )
-            conn.commit()
-
-    def update(self, thread_id: str, task: Task) -> None:
-        with self._conn() as conn:
-            conn.execute(
-                """UPDATE tasks SET
-                   subject=?, description=?, status=?, active_form=?,
-                   owner=?, blocks=?, blocked_by=?, metadata=?
-                   WHERE thread_id=? AND task_id=?""",
-                (
-                    task.subject,
-                    task.description,
-                    task.status.value,
-                    task.active_form,
-                    task.owner,
-                    json.dumps(task.blocks),
-                    json.dumps(task.blocked_by),
-                    json.dumps(task.metadata),
-                    thread_id,
-                    task.id,
-                ),
-            )
-            conn.commit()
-
-    def delete(self, thread_id: str, task_id: str) -> None:
-        with self._conn() as conn:
-            conn.execute("DELETE FROM tasks WHERE thread_id = ? AND task_id = ?", (thread_id, task_id))
-            conn.commit()
-
-    @staticmethod
-    def _row_to_task(row: sqlite3.Row) -> Task:
-        return Task(
-            id=row["task_id"],
-            subject=row["subject"],
-            description=row["description"],
-            status=TaskStatus(row["status"]),
-            active_form=row["active_form"],
-            owner=row["owner"],
-            blocks=json.loads(row["blocks"]),
-            blocked_by=json.loads(row["blocked_by"]),
-            metadata=json.loads(row["metadata"]),
-        )
diff --git a/storage/providers/supabase/__init__.py b/storage/providers/supabase/__init__.py
index 87c3e19d1..637f1cf20 100644
--- a/storage/providers/supabase/__init__.py
+++ b/storage/providers/supabase/__init__.py
@@ -1,22 +1,22 @@
 """Supabase storage provider implementations."""
 
+from .agent_config_repo import SupabaseAgentConfigRepo
 from .agent_registry_repo import SupabaseAgentRegistryRepo
-from .chat_repo import SupabaseChatEntityRepo, SupabaseChatMessageRepo, SupabaseChatRepo
+from .chat_repo import SupabaseChatRepo
 from .chat_session_repo import SupabaseChatSessionRepo
 from .checkpoint_repo import SupabaseCheckpointRepo
 from .contact_repo import SupabaseContactRepo
 from .cron_job_repo import SupabaseCronJobRepo
-from .entity_repo import SupabaseEntityRepo
 from .eval_repo import SupabaseEvalRepo
 from .file_operation_repo import SupabaseFileOperationRepo
 from .invite_code_repo import SupabaseInviteCodeRepo
 from .lease_repo import SupabaseLeaseRepo
-from .member_repo import SupabaseAccountRepo, SupabaseMemberRepo
+from .member_repo import SupabaseMemberRepo
 from .panel_task_repo import SupabasePanelTaskRepo
 from .provider_event_repo import SupabaseProviderEventRepo
 from .queue_repo import SupabaseQueueRepo
 from .recipe_repo import SupabaseRecipeRepo
-from .resource_snapshot_repo import list_snapshots_by_lease_ids, upsert_lease_resource_snapshot
+from .resource_snapshot_repo import SupabaseResourceSnapshotRepo, list_snapshots_by_lease_ids, upsert_lease_resource_snapshot
 from .run_event_repo import SupabaseRunEventRepo
 from .sandbox_monitor_repo import SupabaseSandboxMonitorRepo
 from .sandbox_volume_repo import SupabaseSandboxVolumeRepo
@@ -29,16 +29,13 @@
 from .user_settings_repo import SupabaseUserSettingsRepo
 
 __all__ = [
-    "SupabaseAccountRepo",
+    "SupabaseAgentConfigRepo",
     "SupabaseAgentRegistryRepo",
-    "SupabaseChatEntityRepo",
-    "SupabaseChatMessageRepo",
     "SupabaseChatRepo",
     "SupabaseChatSessionRepo",
     "SupabaseCheckpointRepo",
     "SupabaseContactRepo",
     "SupabaseCronJobRepo",
-    "SupabaseEntityRepo",
     "SupabaseEvalRepo",
     "SupabaseFileOperationRepo",
     "SupabaseInviteCodeRepo",
@@ -48,6 +45,7 @@
     "SupabaseProviderEventRepo",
     "SupabaseQueueRepo",
     "SupabaseRecipeRepo",
+    "SupabaseResourceSnapshotRepo",
     "SupabaseRunEventRepo",
     "SupabaseSandboxMonitorRepo",
     "SupabaseSandboxVolumeRepo",
diff --git a/storage/providers/supabase/agent_config_repo.py b/storage/providers/supabase/agent_config_repo.py
new file mode 100644
index 000000000..42383802c
--- /dev/null
+++ b/storage/providers/supabase/agent_config_repo.py
@@ -0,0 +1,120 @@
+"""Supabase repository for agent configuration (config, rules, skills, sub-agents)."""
+
+from __future__ import annotations
+
+import uuid
+from typing import Any
+
+from storage.providers.supabase import _query as q
+
+_REPO = "agent_config repo"
+
+
+class SupabaseAgentConfigRepo:
+    def __init__(self, client: Any) -> None:
+        self._client = q.validate_client(client, _REPO)
+
+    def close(self) -> None:
+        return None
+
+    # ------------------------------------------------------------------
+    # agent_configs (1:1 with member)
+    # ------------------------------------------------------------------
+
+    def get_config(self, member_id: str) -> dict[str, Any] | None:
+        rows = q.rows(
+            self._client.table("agent_configs").select("*").eq("member_id", member_id).execute(),
+            _REPO,
+            "get_config",
+        )
+        return dict(rows[0]) if rows else None
+
+    def save_config(self, member_id: str, data: dict[str, Any]) -> None:
+        payload = {"member_id": member_id, **{k: v for k, v in data.items() if k != "member_id"}}
+        self._client.table("agent_configs").upsert(payload).execute()
+
+    def delete_config(self, member_id: str) -> None:
+        self._client.table("agent_configs").delete().eq("member_id", member_id).execute()
+
+    # ------------------------------------------------------------------
+    # agent_rules
+    # ------------------------------------------------------------------
+
+    def list_rules(self, member_id: str) -> list[dict[str, Any]]:
+        rows = q.rows(
+            self._client.table("agent_rules").select("*").eq("member_id", member_id).execute(),
+            _REPO,
+            "list_rules",
+        )
+        return [dict(r) for r in rows]
+
+    def save_rule(self, member_id: str, filename: str, content: str, rule_id: str | None = None) -> dict[str, Any]:
+        rid = rule_id or str(uuid.uuid4())
+        payload = {"id": rid, "member_id": member_id, "filename": filename, "content": content}
+        self._client.table("agent_rules").upsert(payload).execute()
+        return payload
+
+    def delete_rule(self, rule_id: str) -> None:
+        self._client.table("agent_rules").delete().eq("id", rule_id).execute()
+
+    # ------------------------------------------------------------------
+    # agent_skills
+    # ------------------------------------------------------------------
+
+    def list_skills(self, member_id: str) -> list[dict[str, Any]]:
+        rows = q.rows(
+            self._client.table("agent_skills").select("*").eq("member_id", member_id).execute(),
+            _REPO,
+            "list_skills",
+        )
+        return [dict(r) for r in rows]
+
+    def save_skill(self, member_id: str, name: str, content: str, meta: dict | None = None, skill_id: str | None = None) -> dict[str, Any]:
+        sid = skill_id or str(uuid.uuid4())
+        payload: dict[str, Any] = {"id": sid, "member_id": member_id, "name": name, "content": content}
+        if meta:
+            payload["meta"] = meta
+        self._client.table("agent_skills").upsert(payload, on_conflict="member_id,name").execute()
+        return payload
+
+    def delete_skill(self, skill_id: str) -> None:
+        self._client.table("agent_skills").delete().eq("id", skill_id).execute()
+
+    # ------------------------------------------------------------------
+    # agent_sub_agents
+    # ------------------------------------------------------------------
+
+    def list_sub_agents(self, member_id: str) -> list[dict[str, Any]]:
+        rows = q.rows(
+            self._client.table("agent_sub_agents").select("*").eq("member_id", member_id).execute(),
+            _REPO,
+            "list_sub_agents",
+        )
+        return [dict(r) for r in rows]
+
+    def save_sub_agent(
+        self,
+        member_id: str,
+        name: str,
+        *,
+        description: str | None = None,
+        model: str | None = None,
+        tools: list | None = None,
+        system_prompt: str | None = None,
+        sub_agent_id: str | None = None,
+    ) -> dict[str, Any]:
+        sid = sub_agent_id or str(uuid.uuid4())
+        payload: dict[str, Any] = {"id": sid, "member_id": member_id, "name": name}
+        if description is not None:
+            payload["description"] = description
+        if model is not None:
+            payload["model"] = model
+        if tools is not None:
+            payload["tools"] = tools
+        if system_prompt is not None:
+            payload["system_prompt"] = system_prompt
+        self._client.table("agent_sub_agents").upsert(payload, on_conflict="member_id,name").execute()
+        return payload
+
+    def delete_sub_agent(self, sub_agent_id: str) -> None:
+        self._client.table("agent_sub_agents").delete().eq("id", sub_agent_id).execute()
diff --git a/storage/providers/supabase/agent_registry_repo.py b/storage/providers/supabase/agent_registry_repo.py
index 8aaccd1d0..31bca5506 100644
--- a/storage/providers/supabase/agent_registry_repo.py
+++ b/storage/providers/supabase/agent_registry_repo.py
@@ -55,6 +55,22 @@ def get_by_id(self, agent_id: str) -> tuple | None:
     def update_status(self, agent_id: str, status: str) -> None:
         self._table().update({"status": status}).eq("agent_id", agent_id).execute()
 
+    def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None) -> tuple | None:
+        query = self._table().select("agent_id,name,thread_id,status,parent_agent_id,subagent_type").eq("name", name)
+        if parent_agent_id is None:
+            query = query.is_("parent_agent_id", "null")
+        else:
+            query = query.eq("parent_agent_id", parent_agent_id)
+        rows = q.rows(
+            query.order("created_at", desc=True).limit(1).execute(),
+            _REPO,
+            "get_latest_by_name_and_parent",
+        )
+        if not rows:
+            return None
+        r = rows[0]
+        return (r["agent_id"], r["name"], r["thread_id"], r["status"], r.get("parent_agent_id"), r.get("subagent_type"))
+
     def list_running(self) -> list[tuple]:
         rows = q.rows(
             self._table().select("agent_id,name,thread_id,status,parent_agent_id,subagent_type").eq("status", "running").execute(),
diff --git a/storage/providers/supabase/chat_repo.py b/storage/providers/supabase/chat_repo.py
index d0cfaa0ab..d08262b43 100644
--- a/storage/providers/supabase/chat_repo.py
+++ b/storage/providers/supabase/chat_repo.py
@@ -1,22 +1,15 @@
-"""Supabase repositories for chats, chat entities, and chat messages."""
+"""Supabase repository for chats."""
 
 from __future__ import annotations
 
-import json
 from typing import Any
 
-from storage.contracts import ChatEntityRow, ChatMessageRow, ChatRow
+from storage.contracts import ChatRow
 from storage.providers.supabase import _query as q
 
 _REPO_CHAT = "chat repo"
 _TABLE_CHATS = "chats"
 
-_REPO_ENTITY = "chat entity repo"
-_TABLE_CHAT_ENTITIES = "chat_entities"
-
-_REPO_MSG = "chat message repo"
-_TABLE_CHAT_MESSAGES = "chat_messages"
-
 
 class SupabaseChatRepo:
     """Chat CRUD backed by Supabase."""
@@ -57,209 +50,3 @@ def delete(self, chat_id: str) -> None:
 
     def _t(self) -> Any:
         return self._client.table(_TABLE_CHATS)
-
-
-class SupabaseChatEntityRepo:
-    """Chat entity membership backed by Supabase."""
-
-    def __init__(self, client: Any) -> None:
-        self._client = q.validate_client(client, _REPO_ENTITY)
-
-    def close(self) -> None:
-        return None
-
-    def add_participant(self, chat_id: str, user_id: str, joined_at: float) -> None:
-        self._t().upsert(
-            {
-                "chat_id": chat_id,
-                "user_id": user_id,
-                "joined_at": joined_at,
-            },
-            on_conflict="chat_id,user_id",
-            ignore_duplicates=True,
-        ).execute()
-
-    def list_participants(self, chat_id: str) -> list[ChatEntityRow]:
-        response = self._t().select("*").eq("chat_id", chat_id).execute()
-        raw = q.rows(response, _REPO_ENTITY, "list_participants")
-        return [self._to_entity_row(r) for r in raw]
-
-    def list_chats_for_user(self, user_id: str) -> list[str]:
-        response = self._t().select("chat_id").eq("user_id", user_id).execute()
-        raw = q.rows(response, _REPO_ENTITY, "list_chats_for_user")
-        return [r["chat_id"] for r in raw]
-
-    def is_participant_in_chat(self, chat_id: str, user_id: str) -> bool:
-        response = self._t().select("chat_id").eq("chat_id", chat_id).eq("user_id", user_id).execute()
-        raw = q.rows(response, _REPO_ENTITY, "is_participant_in_chat")
-        return len(raw) > 0
-
-    def update_last_read(self, chat_id: str, user_id: str, last_read_at: float) -> None:
-        self._t().update({"last_read_at": last_read_at}).eq("chat_id", chat_id).eq("user_id", user_id).execute()
-
-    def update_mute(self, chat_id: str, user_id: str, muted: bool, mute_until: float | None = None) -> None:
-        self._t().update({"muted": muted, "mute_until": mute_until}).eq("chat_id", chat_id).eq("user_id", user_id).execute()
-
-    def find_chat_between(self, user_a: str, user_b: str) -> str | None:
-        # Two queries, intersect the chat_id sets, then verify exactly 2 members.
-        resp_a = self._t().select("chat_id").eq("user_id", user_a).execute()
-        chats_a = {r["chat_id"] for r in q.rows(resp_a, _REPO_ENTITY, "find_chat_between(a)")}
-        if not chats_a:
-            return None
-
-        resp_b = self._t().select("chat_id").eq("user_id", user_b).execute()
-        chats_b = {r["chat_id"] for r in q.rows(resp_b, _REPO_ENTITY, "find_chat_between(b)")}
-
-        shared = chats_a & chats_b
-        if not shared:
-            return None
-
-        # Among shared chats, find one that has exactly 2 members.
-        for chat_id in shared:
-            resp_count = self._t().select("user_id").eq("chat_id", chat_id).execute()
-            members = q.rows(resp_count, _REPO_ENTITY, "find_chat_between(count)")
-            if len(members) == 2:
-                return chat_id
-        return None
-
-    def _to_entity_row(self, r: dict[str, Any]) -> ChatEntityRow:
-        return ChatEntityRow(
-            chat_id=r["chat_id"],
-            user_id=r["user_id"],
-            joined_at=float(r["joined_at"]),
-            last_read_at=float(r["last_read_at"]) if r.get("last_read_at") is not None else None,
-            muted=bool(r.get("muted", False)),
-            mute_until=float(r["mute_until"]) if r.get("mute_until") is not None else None,
-        )
-
-    def _t(self) -> Any:
-        return self._client.table(_TABLE_CHAT_ENTITIES)
-
-
-class SupabaseChatMessageRepo:
-    """Chat message persistence backed by Supabase."""
-
-    def __init__(self, client: Any) -> None:
-        self._client = q.validate_client(client, _REPO_MSG)
-
-    def close(self) -> None:
-        return None
-
-    def create(self, row: ChatMessageRow) -> None:
-        mentions_json = json.dumps(row.mentioned_ids) if row.mentioned_ids else json.dumps([])
-        self._t().insert(
-            {
-                "id": row.id,
-                "chat_id": row.chat_id,
-                "sender_id": row.sender_id,
-                "content": row.content,
-                "mentions": mentions_json,
-                "created_at": row.created_at,
-            }
-        ).execute()
-
-    def list_by_chat(
-        self,
-        chat_id: str,
-        *,
-        limit: int = 50,
-        before: float | None = None,
-    ) -> list[ChatMessageRow]:
-        query = self._t().select("*").eq("chat_id", chat_id)
-        if before is not None:
-            query = query.lt("created_at", before)
-        query = q.order(query, "created_at", desc=True, repo=_REPO_MSG, operation="list_by_chat")
-        query = q.limit(query, limit, _REPO_MSG, "list_by_chat")
-        raw = q.rows(query.execute(), _REPO_MSG, "list_by_chat")
-        raw.reverse()
-        return [self._to_msg(r) for r in raw]
-
-    def list_unread(self, chat_id: str, user_id: str) -> list[ChatMessageRow]:
-        """Return unread messages (after last_read_at, excluding own) in chronological order."""
-        # Fetch last_read_at for this user in this chat.
-        resp_ce = self._client.table(_TABLE_CHAT_ENTITIES).select("last_read_at").eq("chat_id", chat_id).eq("user_id", user_id).execute()
-        ce_rows = q.rows(resp_ce, _REPO_MSG, "list_unread(last_read_at)")
-        last_read: float | None = None
-        if ce_rows:
-            val = ce_rows[0].get("last_read_at")
-            last_read = float(val) if val is not None else None
-
-        query = self._t().select("*").eq("chat_id", chat_id).neq("sender_id", user_id)
-        if last_read is not None:
-            query = q.gt(query, "created_at", last_read, _REPO_MSG, "list_unread")
-        query = q.order(query, "created_at", desc=False, repo=_REPO_MSG, operation="list_unread")
-        raw = q.rows(query.execute(), _REPO_MSG, "list_unread")
-        return [self._to_msg(r) for r in raw]
-
-    def count_unread(self, chat_id: str, user_id: str) -> int:
-        # Fetch last_read_at for this user in this chat.
-        resp_ce = self._client.table(_TABLE_CHAT_ENTITIES).select("last_read_at").eq("chat_id", chat_id).eq("user_id", user_id).execute()
-        ce_rows = q.rows(resp_ce, _REPO_MSG, "count_unread(last_read_at)")
-        if not ce_rows:
-            return 0
-        val = ce_rows[0].get("last_read_at")
-        last_read: float | None = float(val) if val is not None else None
-
-        query = self._t().select("id", count="exact").eq("chat_id", chat_id).neq("sender_id", user_id)
-        if last_read is not None:
-            query = q.gt(query, "created_at", last_read, _REPO_MSG, "count_unread")
-        response = query.execute()
-        # supabase-py returns count on response.count when count="exact"
-        count = getattr(response, "count", None)
-        if count is not None:
-            return int(count)
-        # Fallback: count from data list.
-        raw = q.rows(response, _REPO_MSG, "count_unread")
-        return len(raw)
-
-    def list_by_time_range(
-        self,
-        chat_id: str,
-        *,
-        after: float | None = None,
-        before: float | None = None,
-        limit: int = 100,
-    ) -> list[ChatMessageRow]:
-        query = self._t().select("*").eq("chat_id", chat_id)
-        if after is not None:
-            query = q.gte(query, "created_at", after, _REPO_MSG, "list_by_time_range")
-        if before is not None:
-            query = query.lte("created_at", before)
-        query = q.order(query, "created_at", desc=False, repo=_REPO_MSG, operation="list_by_time_range")
-        query = q.limit(query, limit, _REPO_MSG, "list_by_time_range")
-        raw = q.rows(query.execute(), _REPO_MSG, "list_by_time_range")
-        return [self._to_msg(r) for r in raw]
-
-    def search(self, query: str, *, chat_id: str | None = None, limit: int = 50) -> list[ChatMessageRow]:
-        base = self._t().select("*")
-        if chat_id:
-            base = base.eq("chat_id", chat_id)
-        base = base.ilike("content", f"%{query}%")
-        base = q.order(base, "created_at", desc=False, repo=_REPO_MSG, operation="search")
-        base = q.limit(base, limit, _REPO_MSG, "search")
-        raw = q.rows(base.execute(), _REPO_MSG, "search")
-        return [self._to_msg(r) for r in raw]
-
-    def _to_msg(self, r: dict[str, Any]) -> ChatMessageRow:
-        mentions_raw = r.get("mentions")
-        if mentions_raw is None or mentions_raw == "":
-            mentioned: list[str] = []
-        elif isinstance(mentions_raw, list):
-            mentioned = mentions_raw
-        else:
-            try:
-                loaded = json.loads(mentions_raw)
-                mentioned = loaded if isinstance(loaded, list) else []
-            except (json.JSONDecodeError, TypeError):
-                mentioned = []
-        return ChatMessageRow(
-            id=r["id"],
-            chat_id=r["chat_id"],
-            sender_id=r["sender_id"],
-            content=r["content"],
-            mentioned_ids=mentioned,
-            created_at=float(r["created_at"]),
-        )
-
-    def _t(self) -> Any:
-        return self._client.table(_TABLE_CHAT_MESSAGES)
diff --git a/storage/providers/supabase/checkpoint_repo.py b/storage/providers/supabase/checkpoint_repo.py
index 9bbed35ce..62203fbdf 100644
--- a/storage/providers/supabase/checkpoint_repo.py
+++ b/storage/providers/supabase/checkpoint_repo.py
@@ -7,7 +7,7 @@
 from storage.providers.supabase import _query as q
 
 _REPO = "checkpoint repo"
-_TABLES = ("checkpoints", "writes", "checkpoint_writes", "checkpoint_blobs")
+_TABLES = ("checkpoints", "checkpoint_writes", "checkpoint_blobs")
 
 
 class SupabaseCheckpointRepo:
diff --git a/storage/providers/supabase/cron_job_repo.py b/storage/providers/supabase/cron_job_repo.py
index e85587d03..2c3a80046 100644
--- a/storage/providers/supabase/cron_job_repo.py
+++ b/storage/providers/supabase/cron_job_repo.py
@@ -33,17 +33,23 @@ def _deserialize(self, row: dict[str, Any]) -> dict[str, Any]:
                 row["task_template"] = {}
         return row
 
-    def list_all(self) -> list[dict[str, Any]]:
+    def list_all(self, owner_user_id: str | None = None) -> list[dict[str, Any]]:
+        query = self._table().select("*")
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            q.order(self._table().select("*"), "created_at", desc=True, repo=_REPO, operation="list_all").execute(),
+            q.order(query, "created_at", desc=True, repo=_REPO, operation="list_all").execute(),
             _REPO,
             "list_all",
         )
         return [self._deserialize(r) for r in rows]
 
-    def get(self, job_id: str) -> dict[str, Any] | None:
+    def get(self, job_id: str, owner_user_id: str | None = None) -> dict[str, Any] | None:
+        query = self._table().select("*").eq("id", job_id)
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            self._table().select("*").eq("id", job_id).execute(),
+            query.execute(),
             _REPO,
             "get",
         )
@@ -71,11 +77,12 @@ def create(self, *, name: str, cron_expression: str, **fields: Any) -> dict[str,
                 "last_run_at": fields.get("last_run_at", 0),
                 "next_run_at": fields.get("next_run_at", 0),
                 "created_at": now,
+                "owner_user_id": fields.get("owner_user_id", None),
             }
         ).execute()
         return self.get(job_id) or {}
 
-    def update(self, job_id: str, **fields: Any) -> dict[str, Any] | None:
+    def update(self, job_id: str, owner_user_id: str | None = None, **fields: Any) -> dict[str, Any] | None:
         allowed = {"name", "description", "cron_expression", "task_template", "enabled", "last_run_at", "next_run_at"}
         updates = {k: v for k, v in fields.items() if k in allowed and v is not None}
         if "task_template" in updates and isinstance(updates["task_template"], str):
@@ -86,13 +93,19 @@ def update(self, job_id: str, **fields: Any) -> dict[str, Any] | None:
             except Exception:
                 updates["task_template"] = {}
         if not updates:
-            return self.get(job_id)
-        self._table().update(updates).eq("id", job_id).execute()
-        return self.get(job_id)
-
-    def delete(self, job_id: str) -> bool:
+            return self.get(job_id, owner_user_id=owner_user_id)
+        query = self._table().update(updates).eq("id", job_id)
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
+        query.execute()
+        return self.get(job_id, owner_user_id=owner_user_id)
+
+    def delete(self, job_id: str, owner_user_id: str | None = None) -> bool:
+        query = self._table().delete().eq("id", job_id)
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            self._table().delete().eq("id", job_id).execute(),
+            query.execute(),
             _REPO,
             "delete",
         )
diff --git a/storage/providers/supabase/entity_repo.py b/storage/providers/supabase/entity_repo.py
deleted file mode 100644
index cb2e0dc84..000000000
--- a/storage/providers/supabase/entity_repo.py
+++ /dev/null
@@ -1,73 +0,0 @@
-"""Supabase repository for entities."""
-
-from __future__ import annotations
-
-from typing import Any
-
-from storage.contracts import EntityRow
-from storage.providers.supabase import _query as q
-
-_REPO = "entity repo"
-_TABLE = "entities"
-
-
-class SupabaseEntityRepo:
-    def __init__(self, client: Any) -> None:
-        self._client = q.validate_client(client, _REPO)
-
-    def close(self) -> None:
-        return None
-
-    def create(self, row: EntityRow) -> None:
-        self._t().insert(
-            {
-                "id": row.id,
-                "type": row.type,
-                "member_id": row.member_id,
-                "name": row.name,
-                "avatar": row.avatar,
-                "thread_id": row.thread_id,
-                "created_at": row.created_at,
-            }
-        ).execute()
-
-    def get_by_id(self, id: str) -> EntityRow | None:
-        response = self._t().select("*").eq("id", id).execute()
-        rows = q.rows(response, _REPO, "get_by_id")
-        if not rows:
-            return None
-        return EntityRow.model_validate(rows[0])
-
-    def get_by_member_id(self, member_id: str) -> list[EntityRow]:
-        response = self._t().select("*").eq("member_id", member_id).execute()
-        rows = q.rows(response, _REPO, "get_by_member_id")
-        return [EntityRow.model_validate(r) for r in rows]
-
-    def list_all(self) -> list[EntityRow]:
-        query = q.order(self._t().select("*"), "created_at", desc=False, repo=_REPO, operation="list_all")
-        rows = q.rows(query.execute(), _REPO, "list_all")
-        return [EntityRow.model_validate(r) for r in rows]
-
-    def list_by_type(self, entity_type: str) -> list[EntityRow]:
-        query = q.order(
-            self._t().select("*").eq("type", entity_type),
-            "created_at",
-            desc=False,
-            repo=_REPO,
-            operation="list_by_type",
-        )
-        rows = q.rows(query.execute(), _REPO, "list_by_type")
-        return [EntityRow.model_validate(r) for r in rows]
-
-    def update(self, id: str, **fields: Any) -> None:
-        allowed = {"name", "avatar", "thread_id"}
-        updates = {k: v for k, v in fields.items() if k in allowed}
-        if not updates:
-            return
-        self._t().update(updates).eq("id", id).execute()
-
-    def delete(self, id: str) -> None:
-        self._t().delete().eq("id", id).execute()
-
-    def _t(self) -> Any:
-        return self._client.table(_TABLE)
diff --git a/storage/providers/supabase/lease_repo.py b/storage/providers/supabase/lease_repo.py
index d1e8e0aea..14521ca8e 100644
--- a/storage/providers/supabase/lease_repo.py
+++ b/storage/providers/supabase/lease_repo.py
@@ -24,6 +24,11 @@ def __init__(self, client: Any) -> None:
     def close(self) -> None:
         return None
 
+    def _require_lease(self, row: dict[str, Any] | None, *, lease_id: str, operation: str) -> dict[str, Any]:
+        if row is None:
+            raise RuntimeError(f"Supabase lease repo failed to load lease after {operation}: {lease_id}")
+        return row
+
     def _leases(self) -> Any:
         return self._client.table(_LEASES_TABLE)
 
@@ -94,10 +99,7 @@ def create(
                 "updated_at": now,
             }
         ).execute()
-        result = self.get(lease_id)
-        if result is None:
-            raise RuntimeError(f"Supabase lease repo failed to load lease after create: {lease_id}")
-        return result
+        return self._require_lease(self.get(lease_id), lease_id=lease_id, operation="create")
 
     def find_by_instance(self, *, provider_name: str, instance_id: str) -> dict[str, Any] | None:
         rows = q.rows(
@@ -127,7 +129,11 @@ def adopt_instance(
         existing = self.get(lease_id)
         if existing is None:
             self.create(lease_id=lease_id, provider_name=provider_name)
-            existing = self.get(lease_id)
+            existing = self._require_lease(
+                self.get(lease_id),
+                lease_id=lease_id,
+                operation="adopt_instance bootstrap",
+            )
 
         if existing["provider_name"] != provider_name:
             raise RuntimeError(f"Lease provider mismatch during adopt: lease={existing['provider_name']}, requested={provider_name}")
@@ -166,10 +172,7 @@ def adopt_instance(
             }
         ).execute()
 
-        adopted = self.get(lease_id)
-        if adopted is None:
-            raise RuntimeError(f"Supabase lease repo failed to load adopted lease: {lease_id}")
-        return adopted
+        return self._require_lease(self.get(lease_id), lease_id=lease_id, operation="adopt_instance")
 
     def mark_needs_refresh(self, lease_id: str, hint_at: Any = None) -> bool:
         from datetime import datetime as _dt
diff --git a/storage/providers/supabase/member_repo.py b/storage/providers/supabase/member_repo.py
index cea404524..68a174b41 100644
--- a/storage/providers/supabase/member_repo.py
+++ b/storage/providers/supabase/member_repo.py
@@ -4,14 +4,25 @@
 
 from typing import Any
 
-from storage.contracts import AccountRow, MemberRow
+from storage.contracts import MemberRow
 from storage.providers.supabase import _query as q
 
 _MEMBER_REPO = "member repo"
 _MEMBER_TABLE = "members"
-
-_ACCOUNT_REPO = "account repo"
-_ACCOUNT_TABLE = "accounts"
+_COLS = (
+    "id",
+    "name",
+    "type",
+    "avatar",
+    "description",
+    "config_dir",
+    "owner_user_id",
+    "created_at",
+    "updated_at",
+    "next_thread_seq",
+    "email",
+    "mycel_id",
+)
 
 
 class SupabaseMemberRepo:
@@ -31,7 +42,7 @@ def create(self, row: MemberRow) -> None:
                 "description": row.description,
                 "config_dir": row.config_dir,
                 "owner_user_id": row.owner_user_id,
-                "next_entity_seq": row.next_entity_seq,
+                "next_thread_seq": row.next_thread_seq,
                 "email": row.email,
                 "mycel_id": row.mycel_id,
                 "created_at": row.created_at,
@@ -40,48 +51,62 @@ def create(self, row: MemberRow) -> None:
         ).execute()
 
     def get_by_id(self, member_id: str) -> MemberRow | None:
-        response = self._t().select("*").eq("id", member_id).execute()
+        response = self._t().select(", ".join(_COLS)).eq("id", member_id).execute()
         rows = q.rows(response, _MEMBER_REPO, "get_by_id")
         if not rows:
             return None
-        return MemberRow.model_validate(self._normalize(rows[0]))
+        return MemberRow.model_validate(rows[0])
 
-    def get_by_name(self, name: str) -> MemberRow | None:
-        response = self._t().select("*").eq("name", name).execute()
+    def get_by_name(self, name: str, owner_user_id: str | None = None) -> MemberRow | None:
+        query = self._t().select(", ".join(_COLS)).eq("name", name)
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
+        response = query.execute()
         rows = q.rows(response, _MEMBER_REPO, "get_by_name")
         if not rows:
             return None
-        return MemberRow.model_validate(self._normalize(rows[0]))
+        return MemberRow.model_validate(rows[0])
 
     def get_by_email(self, email: str) -> MemberRow | None:
-        response = self._t().select("*").eq("email", email).execute()
+        response = self._t().select(", ".join(_COLS)).eq("email", email).execute()
         rows = q.rows(response, _MEMBER_REPO, "get_by_email")
         if not rows:
             return None
-        return MemberRow.model_validate(self._normalize(rows[0]))
+        return MemberRow.model_validate(rows[0])
 
     def get_by_mycel_id(self, mycel_id: int) -> MemberRow | None:
-        response = self._t().select("*").eq("mycel_id", mycel_id).execute()
+        response = self._t().select(", ".join(_COLS)).eq("mycel_id", mycel_id).execute()
         rows = q.rows(response, _MEMBER_REPO, "get_by_mycel_id")
         if not rows:
             return None
-        return MemberRow.model_validate(self._normalize(rows[0]))
+        return MemberRow.model_validate(rows[0])
 
     def list_all(self) -> list[MemberRow]:
-        query = q.order(self._t().select("*"), "created_at", desc=False, repo=_MEMBER_REPO, operation="list_all")
+        query = q.order(self._t().select(", ".join(_COLS)), "created_at", desc=False, repo=_MEMBER_REPO, operation="list_all")
         rows = q.rows(query.execute(), _MEMBER_REPO, "list_all")
-        return [MemberRow.model_validate(self._normalize(r)) for r in rows]
+        return [MemberRow.model_validate(r) for r in rows]
+
+    def list_by_type(self, member_type: str) -> list[MemberRow]:
+        query = q.order(
+            self._t().select(", ".join(_COLS)).eq("type", member_type),
+            "created_at",
+            desc=False,
+            repo=_MEMBER_REPO,
+            operation="list_by_type",
+        )
+        rows = q.rows(query.execute(), _MEMBER_REPO, "list_by_type")
+        return [MemberRow.model_validate(r) for r in rows]
 
     def list_by_owner_user_id(self, owner_user_id: str) -> list[MemberRow]:
         query = q.order(
-            self._t().select("*").eq("owner_user_id", owner_user_id),
+            self._t().select(", ".join(_COLS)).eq("owner_user_id", owner_user_id),
             "created_at",
             desc=False,
             repo=_MEMBER_REPO,
             operation="list_by_owner_user_id",
         )
         rows = q.rows(query.execute(), _MEMBER_REPO, "list_by_owner_user_id")
-        return [MemberRow.model_validate(self._normalize(r)) for r in rows]
+        return [MemberRow.model_validate(r) for r in rows]
 
     def update(self, member_id: str, **fields: Any) -> None:
         allowed = {"name", "avatar", "description", "config_dir", "owner_user_id", "updated_at"}
@@ -90,12 +115,9 @@ def update(self, member_id: str, **fields: Any) -> None:
             return
         self._t().update(updates).eq("id", member_id).execute()
 
-    def increment_entity_seq(self, member_id: str) -> int:
-        """Atomically increment next_entity_seq and return the new value via RPC."""
-        response = self._client.rpc(
-            "increment_member_entity_seq",
-            {"p_member_id": member_id},
-        ).execute()
+    def increment_thread_seq(self, member_id: str) -> int:
+        """Atomically increment the thread sequence and return the new value via RPC."""
+        response = self._client.rpc("increment_member_thread_seq", {"p_member_id": member_id}).execute()
         # RPC returns scalar; supabase-py wraps it in data
         if isinstance(response, dict):
             data = response.get("data")
@@ -103,69 +125,18 @@ def increment_entity_seq(self, member_id: str) -> int:
             data = getattr(response, "data", None)
         if data is None:
             raise RuntimeError(
-                f"Supabase {_MEMBER_REPO} expected data from increment_member_entity_seq RPC. "
+                f"Supabase {_MEMBER_REPO} expected data from increment_member_thread_seq RPC. "
                 "Check the function exists and member_id is valid."
             )
         # data may be a list with one element (scalar), or an int directly
         if isinstance(data, list):
             if not data:
-                raise RuntimeError(f"Supabase {_MEMBER_REPO} increment_entity_seq returned empty list for member {member_id}.")
+                raise RuntimeError(f"Supabase {_MEMBER_REPO} increment_thread_seq returned empty list for member {member_id}.")
             return int(data[0])
         return int(data)
 
     def delete(self, member_id: str) -> None:
         self._t().delete().eq("id", member_id).execute()
 
-    def _normalize(self, row: dict[str, Any]) -> dict[str, Any]:
-        """Ensure type is a MemberType-compatible value."""
-        return row
-
     def _t(self) -> Any:
         return self._client.table(_MEMBER_TABLE)
-
-
-class SupabaseAccountRepo:
-    def __init__(self, client: Any) -> None:
-        self._client = q.validate_client(client, _ACCOUNT_REPO)
-
-    def close(self) -> None:
-        return None
-
-    def create(self, row: AccountRow) -> None:
-        self._t().insert(
-            {
-                "id": row.id,
-                "user_id": row.user_id,
-                "username": row.username,
-                "password_hash": row.password_hash,
-                "api_key_hash": row.api_key_hash,
-                "created_at": row.created_at,
-            }
-        ).execute()
-
-    def get_by_id(self, account_id: str) -> AccountRow | None:
-        response = self._t().select("*").eq("id", account_id).execute()
-        rows = q.rows(response, _ACCOUNT_REPO, "get_by_id")
-        if not rows:
-            return None
-        return AccountRow.model_validate(rows[0])
-
-    def get_by_user_id(self, user_id: str) -> AccountRow | None:
-        response = self._t().select("*").eq("user_id", user_id).execute()
-        rows = q.rows(response, _ACCOUNT_REPO, "get_by_user_id")
-        if not rows:
-            return None
-        return AccountRow.model_validate(rows[0])
-
-    def get_by_username(self, username: str) -> AccountRow | None:
-        response = self._t().select("*").eq("username", username).execute()
-        rows = q.rows(response, _ACCOUNT_REPO, "get_by_username")
-        if not rows:
-            return None
-        return AccountRow.model_validate(rows[0])
-
-    def delete(self, account_id: str) -> None:
-        self._t().delete().eq("id", account_id).execute()
-
-    def _t(self) -> Any:
-        return self._client.table(_ACCOUNT_TABLE)
diff --git a/storage/providers/supabase/messaging_repo.py b/storage/providers/supabase/messaging_repo.py
new file mode 100644
index 000000000..df54286c3
--- /dev/null
+++ b/storage/providers/supabase/messaging_repo.py
@@ -0,0 +1,248 @@
+"""Supabase implementations for messaging v2 repos.
+
+Covers: chats, chat_members, messages, message_reads, message_deliveries.
+All IDs are TEXT (UUID strings) for consistency with existing SQLite schema.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime, timedelta
+from typing import Any
+
+from messaging._utils import now_iso
+
+logger = logging.getLogger(__name__)
+
+
+class SupabaseChatMemberRepo:
+    """chat_members table — replaces SQLiteChatParticipantRepo for Supabase backend."""
+
+    def __init__(self, client: Any) -> None:
+        self._client = client
+
+    def close(self) -> None:
+        pass
+
+    def add_member(self, chat_id: str, user_id: str) -> None:
+        self._client.table("chat_members").upsert(
+            {"chat_id": chat_id, "user_id": user_id, "role": "member", "joined_at": now_iso()},
+            on_conflict="chat_id,user_id",
+        ).execute()
+
+    def list_members(self, chat_id: str) -> list[dict[str, Any]]:
+        res = self._client.table("chat_members").select("*").eq("chat_id", chat_id).execute()
+        return res.data or []
+
+    def list_chats_for_user(self, user_id: str) -> list[str]:
+        res = self._client.table("chat_members").select("chat_id").eq("user_id", user_id).execute()
+        return [r["chat_id"] for r in (res.data or [])]
+
+    def is_member(self, chat_id: str, user_id: str) -> bool:
+        res = self._client.table("chat_members").select("user_id").eq("chat_id", chat_id).eq("user_id", user_id).limit(1).execute()
+        return bool(res.data)
+
+    def find_chat_between(self, user_a: str, user_b: str) -> str | None:
+        """Find the 1:1 chat between two users (exactly 2 members)."""
+        # Fetch all chats for user_a, then find which has user_b as only other member
+        chats_a = set(self.list_chats_for_user(user_a))
+        chats_b = set(self.list_chats_for_user(user_b))
+        common = chats_a & chats_b
+        for chat_id in common:
+            members = self.list_members(chat_id)
+            if len(members) == 2:
+                return chat_id
+        return None
+
+    def update_last_read(self, chat_id: str, user_id: str) -> None:
+        self._client.table("chat_members").update({"last_read_at": now_iso()}).eq("chat_id", chat_id).eq("user_id", user_id).execute()
+
+    def update_mute(self, chat_id: str, user_id: str, muted: bool, mute_until: str | None = None) -> None:
+        self._client.table("chat_members").update({"muted": muted, "mute_until": mute_until}).eq("chat_id", chat_id).eq(
+            "user_id", user_id
+        ).execute()
+
+
+class SupabaseMessagesRepo:
+    """messages table — rich message model for Supabase backend."""
+
+    def __init__(self, client: Any) -> None:
+        self._client = client
+
+    def close(self) -> None:
+        pass
+
+    def create(self, row: dict[str, Any]) -> dict[str, Any]:
+        """Insert a new message. Returns the created row."""
+        res = self._client.table("messages").insert(row).execute()
+        return res.data[0] if res.data else row
+
+    def get_by_id(self, message_id: str) -> dict[str, Any] | None:
+        res = self._client.table("messages").select("*").eq("id", message_id).limit(1).execute()
+        return res.data[0] if res.data else None
+
+    def list_by_chat(
+        self, chat_id: str, *, limit: int = 50, before: str | None = None, viewer_id: str | None = None
+    ) -> list[dict[str, Any]]:
+        q = self._client.table("messages").select("*").eq("chat_id", chat_id).is_("deleted_at", "null")
+        if before:
+            q = q.lt("created_at", before)
+        res = q.order("created_at", desc=True).limit(limit).execute()
+        rows = list(reversed(res.data or []))
+        # Filter soft-deleted for viewer
+        if viewer_id:
+            rows = [r for r in rows if viewer_id not in (r.get("deleted_for") or [])]
+        return rows
+
+    def list_unread(self, chat_id: str, user_id: str) -> list[dict[str, Any]]:
+        """Messages after user's last_read_at, excluding own, not deleted."""
+        # Get last_read_at from chat_members
+        member_res = (
+            self._client.table("chat_members").select("last_read_at").eq("chat_id", chat_id).eq("user_id", user_id).limit(1).execute()
+        )
+        last_read = None
+        if member_res.data:
+            last_read = member_res.data[0].get("last_read_at")
+
+        q = self._client.table("messages").select("*").eq("chat_id", chat_id).neq("sender_id", user_id).is_("deleted_at", "null")
+        if last_read:
+            q = q.gt("created_at", last_read)
+        res = q.order("created_at", desc=False).execute()
+        rows = res.data or []
+        return [r for r in rows if user_id not in (r.get("deleted_for") or [])]
+
+    def count_unread(self, chat_id: str, user_id: str) -> int:
+        """Count unread messages using a COUNT query to avoid materializing rows."""
+        member_res = (
+            self._client.table("chat_members").select("last_read_at").eq("chat_id", chat_id).eq("user_id", user_id).limit(1).execute()
+        )
+        last_read = None
+        if member_res.data:
+            last_read = member_res.data[0].get("last_read_at")
+
+        q = (
+            self._client.table("messages")
+            .select("id", count="exact")
+            .eq("chat_id", chat_id)
+            .neq("sender_id", user_id)
+            .is_("deleted_at", "null")
+        )
+        if last_read:
+            q = q.gt("created_at", last_read)
+        res = q.execute()
+        return res.count or 0
+
+    def retract(self, message_id: str, sender_id: str) -> bool:
+        """Retract a message within 2-minute window."""
+
+        msg = self.get_by_id(message_id)
+        if not msg or msg.get("sender_id") != sender_id:
+            return False
+        created = msg.get("created_at")
+        if created:
+            try:
+                created_dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
+                if datetime.now(tz=UTC) - created_dt > timedelta(minutes=2):
+                    return False
+            except (ValueError, AttributeError):
+                pass
+        self._client.table("messages").update({"retracted_at": now_iso(), "content": "[已撤回]"}).eq("id", message_id).execute()
+        return True
+
+    def delete_for(self, message_id: str, user_id: str) -> None:
+        """Soft-delete for a specific user."""
+        msg = self.get_by_id(message_id)
+        if not msg:
+            return
+        deleted_for = list(msg.get("deleted_for") or [])
+        if user_id not in deleted_for:
+            deleted_for.append(user_id)
+        self._client.table("messages").update({"deleted_for": deleted_for}).eq("id", message_id).execute()
+
+    def search(self, query: str, *, chat_id: str, limit: int = 50) -> list[dict[str, Any]]:
+        q = self._client.table("messages").select("*").ilike("content", f"%{query}%").is_("deleted_at", "null")
+        q = q.eq("chat_id", chat_id)
+        res = q.order("created_at", desc=False).limit(limit).execute()
+        return res.data or []
+
+    def list_by_time_range(
+        self, chat_id: str, *, after: str | None = None, before: str | None = None, limit: int = 100
+    ) -> list[dict[str, Any]]:
+        q = self._client.table("messages").select("*").eq("chat_id", chat_id).is_("deleted_at", "null")
+        if after:
+            q = q.gte("created_at", after)
+        if before:
+            q = q.lte("created_at", before)
+        res = q.order("created_at", desc=False).limit(limit).execute()
+        return res.data or []
+
+
+class SupabaseMessageReadRepo:
+    """message_reads table — per-message read receipts."""
+
+    def __init__(self, client: Any) -> None:
+        self._client = client
+
+    def close(self) -> None:
+        pass
+
+    def mark_read(self, message_id: str, user_id: str) -> None:
+        self._client.table("message_reads").upsert(
+            {"message_id": message_id, "user_id": user_id, "read_at": now_iso()},
+            on_conflict="message_id,user_id",
+        ).execute()
+
+    def mark_chat_read(self, chat_id: str, user_id: str, message_ids: list[str]) -> None:
+        """Bulk mark messages as read."""
+        rows = [{"message_id": mid, "user_id": user_id, "read_at": now_iso()} for mid in message_ids]
+        if rows:
+            self._client.table("message_reads").upsert(rows, on_conflict="message_id,user_id").execute()
+
+    def get_read_count(self, message_id: str) -> int:
+        res = self._client.table("message_reads").select("user_id", count="exact").eq("message_id", message_id).execute()
+        return res.count or 0
+
+    def has_read(self, message_id: str, user_id: str) -> bool:
+        res = self._client.table("message_reads").select("user_id").eq("message_id", message_id).eq("user_id", user_id).limit(1).execute()
+        return bool(res.data)
+
+
+class SupabaseRelationshipRepo:
+    """relationships table — Hire/Visit state machine persistence."""
+
+    def __init__(self, client: Any) -> None:
+        self._client = client
+
+    def close(self) -> None:
+        pass
+
+    def _ordered(self, a: str, b: str) -> tuple[str, str]:
+        return (a, b) if a < b else (b, a)
+
+    def get(self, user_a: str, user_b: str) -> dict[str, Any] | None:
+        pa, pb = self._ordered(user_a, user_b)
+        res = self._client.table("relationships").select("*").eq("principal_a", pa).eq("principal_b", pb).limit(1).execute()
+        return res.data[0] if res.data else None
+
+    def get_by_id(self, relationship_id: str) -> dict[str, Any] | None:
+        res = self._client.table("relationships").select("*").eq("id", relationship_id).limit(1).execute()
+        return res.data[0] if res.data else None
+
+    def upsert(self, user_a: str, user_b: str, **fields: Any) -> dict[str, Any]:
+        pa, pb = self._ordered(user_a, user_b)
+        existing = self.get(user_a, user_b)
+        now = now_iso()
+        if existing:
+            res = self._client.table("relationships").update({"updated_at": now, **fields}).eq("id", existing["id"]).execute()
+            return res.data[0] if res.data else {**existing, "updated_at": now, **fields}
+        else:
+            import uuid
+
+            row = {"id": str(uuid.uuid4()), "principal_a": pa, "principal_b": pb, "updated_at": now, **fields}
+            res = self._client.table("relationships").insert(row).execute()
+            return res.data[0] if res.data else row
+
+    def list_for_user(self, user_id: str) -> list[dict[str, Any]]:
+        # Single query with OR filter
+        res = self._client.table("relationships").select("*").or_(f"principal_a.eq.{user_id},principal_b.eq.{user_id}").execute()
+        return res.data or []
diff --git a/storage/providers/supabase/panel_task_repo.py b/storage/providers/supabase/panel_task_repo.py
index b21e89047..9f2f693ae 100644
--- a/storage/providers/supabase/panel_task_repo.py
+++ b/storage/providers/supabase/panel_task_repo.py
@@ -38,25 +38,34 @@ def _deserialize(self, row: dict[str, Any]) -> dict[str, Any]:
             row["tags"] = []
         return row
 
-    def list_all(self) -> list[dict[str, Any]]:
+    def list_all(self, owner_user_id: str | None = None) -> list[dict[str, Any]]:
+        query = self._table().select("*")
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            q.order(self._table().select("*"), "created_at", desc=True, repo=_REPO, operation="list_all").execute(),
+            q.order(query, "created_at", desc=True, repo=_REPO, operation="list_all").execute(),
             _REPO,
             "list_all",
         )
         return [self._deserialize(r) for r in rows]
 
-    def get(self, task_id: str) -> dict[str, Any] | None:
+    def get(self, task_id: str, owner_user_id: str | None = None) -> dict[str, Any] | None:
+        query = self._table().select("*").eq("id", task_id)
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            self._table().select("*").eq("id", task_id).execute(),
+            query.execute(),
             _REPO,
             "get",
         )
         return self._deserialize(rows[0]) if rows else None
 
-    def get_highest_priority_pending(self) -> dict[str, Any] | None:
+    def get_highest_priority_pending(self, owner_user_id: str | None = None) -> dict[str, Any] | None:
+        query = self._table().select("*").eq("status", "pending")
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            self._table().select("*").eq("status", "pending").execute(),
+            query.execute(),
             _REPO,
             "get_highest_priority_pending",
         )
@@ -88,11 +97,12 @@ def create(self, **fields: Any) -> dict[str, Any]:
                 "started_at": fields.get("started_at", 0),
                 "completed_at": fields.get("completed_at", 0),
                 "tags": tags,
+                "owner_user_id": fields.get("owner_user_id", None),
             }
         ).execute()
         return self.get(task_id) or {}
 
-    def update(self, task_id: str, **fields: Any) -> dict[str, Any] | None:
+    def update(self, task_id: str, owner_user_id: str | None = None, **fields: Any) -> dict[str, Any] | None:
         allowed = {
             "title",
             "description",
@@ -108,32 +118,42 @@ def update(self, task_id: str, **fields: Any) -> dict[str, Any] | None:
             "started_at",
             "completed_at",
             "tags",
+            "owner_user_id",
         }
         updates = {k: v for k, v in fields.items() if k in allowed and v is not None}
         if not updates:
-            return self.get(task_id)
-        self._table().update(updates).eq("id", task_id).execute()
-        return self.get(task_id)
-
-    def delete(self, task_id: str) -> bool:
+            return self.get(task_id, owner_user_id=owner_user_id)
+        query = self._table().update(updates).eq("id", task_id)
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
+        query.execute()
+        return self.get(task_id, owner_user_id=owner_user_id)
+
+    def delete(self, task_id: str, owner_user_id: str | None = None) -> bool:
+        query = self._table().delete().eq("id", task_id)
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            self._table().delete().eq("id", task_id).execute(),
+            query.execute(),
             _REPO,
             "delete",
         )
         return len(rows) > 0
 
-    def bulk_delete(self, ids: list[str]) -> int:
+    def bulk_delete(self, ids: list[str], owner_user_id: str | None = None) -> int:
         if not ids:
             return 0
+        query = q.in_(self._table().delete(), "id", ids, _REPO, "bulk_delete")
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            q.in_(self._table().delete(), "id", ids, _REPO, "bulk_delete").execute(),
+            query.execute(),
             _REPO,
             "bulk_delete",
         )
         return len(rows)
 
-    def bulk_update_status(self, ids: list[str], status: str) -> int:
+    def bulk_update_status(self, ids: list[str], status: str, owner_user_id: str | None = None) -> int:
         if not ids:
             return 0
         updates: dict[str, Any] = {"status": status}
@@ -141,8 +161,11 @@ def bulk_update_status(self, ids: list[str], status: str) -> int:
             updates["progress"] = 100
         elif status == "pending":
             updates["progress"] = 0
+        query = q.in_(self._table().update(updates), "id", ids, _REPO, "bulk_update_status")
+        if owner_user_id is not None:
+            query = query.eq("owner_user_id", owner_user_id)
         rows = q.rows(
-            q.in_(self._table().update(updates), "id", ids, _REPO, "bulk_update_status").execute(),
+            query.execute(),
             _REPO,
             "bulk_update_status",
         )
diff --git a/storage/providers/supabase/resource_snapshot_repo.py b/storage/providers/supabase/resource_snapshot_repo.py
index e4abb9f45..baae2dd17 100644
--- a/storage/providers/supabase/resource_snapshot_repo.py
+++ b/storage/providers/supabase/resource_snapshot_repo.py
@@ -74,3 +74,17 @@ def list_snapshots_by_lease_ids(
         "list_by_ids",
     )
     return {str(r["lease_id"]): dict(r) for r in rows}
+
+
+class SupabaseResourceSnapshotRepo:
+    def __init__(self, client: Any) -> None:
+        self._client = client
+
+    def close(self) -> None:
+        return None
+
+    def upsert_lease_resource_snapshot(self, **kwargs: Any) -> None:
+        upsert_lease_resource_snapshot(**kwargs, client=self._client)
+
+    def list_snapshots_by_lease_ids(self, lease_ids: list[str]) -> dict[str, dict[str, Any]]:
+        return list_snapshots_by_lease_ids(lease_ids, client=self._client)
diff --git a/storage/providers/supabase/sandbox_monitor_repo.py b/storage/providers/supabase/sandbox_monitor_repo.py
index 2de7749e0..cfc647008 100644
--- a/storage/providers/supabase/sandbox_monitor_repo.py
+++ b/storage/providers/supabase/sandbox_monitor_repo.py
@@ -181,6 +181,39 @@ def query_lease(self, lease_id: str) -> dict | None:
         )
         return dict(rows[0]) if rows else None
 
+    def query_lease_sessions(self, lease_id: str) -> list[dict]:
+        sessions = q.rows(
+            q.order(
+                self._client.table("chat_sessions")
+                .select("chat_session_id,thread_id,status,started_at,ended_at,close_reason,lease_id")
+                .eq("lease_id", lease_id),
+                "started_at",
+                desc=True,
+                repo=_REPO,
+                operation="query_lease_sessions",
+            ).execute(),
+            _REPO,
+            "query_lease_sessions",
+        )
+        lease = self.query_lease(lease_id)
+        return [
+            {
+                "chat_session_id": session.get("chat_session_id"),
+                "thread_id": session.get("thread_id"),
+                "status": session.get("status"),
+                "started_at": session.get("started_at"),
+                "ended_at": session.get("ended_at"),
+                "close_reason": session.get("close_reason"),
+                "lease_id": session.get("lease_id"),
+                "provider_name": lease.get("provider_name") if lease else None,
+                "desired_state": lease.get("desired_state") if lease else None,
+                "observed_state": lease.get("observed_state") if lease else None,
+                "current_instance_id": lease.get("current_instance_id") if lease else None,
+                "last_error": lease.get("last_error") if lease else None,
+            }
+            for session in sessions
+        ]
+
     def query_lease_threads(self, lease_id: str) -> list[dict]:
         rows = q.rows(
             q.order(
@@ -303,7 +336,6 @@ def count_rows(self, table_names: list[str]) -> dict[str, int]:
         return counts
 
     def list_sessions_with_leases(self) -> list[dict]:
-        # Active sessions joined with leases
         active_sessions = q.rows(
             self._client.table("chat_sessions").select("chat_session_id,thread_id,lease_id,started_at").neq("status", "closed").execute(),
             _REPO,
@@ -318,20 +350,30 @@ def list_sessions_with_leases(self) -> list[dict]:
         )
         lease_map = {le["lease_id"]: le for le in leases}
 
-        # Terminals for fallback
         all_terminals = q.rows(
             self._client.table("abstract_terminals").select("lease_id,thread_id,created_at").execute(),
             _REPO,
             "list_sessions_with_leases terminals",
         )
-        term_map: dict[str, str] = {}
-        for t in sorted(all_terminals, key=lambda x: x.get("created_at") or ""):
-            term_map[t["lease_id"]] = t["thread_id"]
+        terminal_rows_by_lease: dict[str, list[dict[str, Any]]] = {}
+        for row in all_terminals:
+            terminal_rows_by_lease.setdefault(str(row.get("lease_id") or ""), []).append(dict(row))
+
+        all_sessions = q.rows(
+            self._client.table("chat_sessions").select("chat_session_id,thread_id,lease_id,status,started_at").execute(),
+            _REPO,
+            "list_sessions_with_leases all_sessions",
+        )
+        latest_session_thread_by_lease: dict[str, str] = {}
+        for row in sorted(all_sessions, key=lambda x: x.get("started_at") or ""):
+            lease_id = str(row.get("lease_id") or "")
+            thread_id = str(row.get("thread_id") or "")
+            if lease_id and thread_id:
+                latest_session_thread_by_lease[lease_id] = thread_id
 
         result = []
         seen_leases: set[str] = set()
 
-        # Active sessions
         for s in active_sessions:
             lease = lease_map.get(s.get("lease_id") or "")
             if not lease:
@@ -349,17 +391,31 @@ def list_sessions_with_leases(self) -> list[dict]:
                 }
             )
 
-        # Terminal fallback for leases with no active session
         for lease in leases:
             lid = lease["lease_id"]
             if lid in seen_leases:
                 continue
-            thread_id = term_map.get(lid)
+            terminal_rows = terminal_rows_by_lease.get(lid, [])
+            if terminal_rows:
+                for terminal_row in terminal_rows:
+                    result.append(
+                        {
+                            "provider": lease.get("provider_name") or "local",
+                            "session_id": None,
+                            "thread_id": terminal_row.get("thread_id"),
+                            "lease_id": lid,
+                            "observed_state": lease.get("observed_state"),
+                            "desired_state": lease.get("desired_state"),
+                            "created_at": lease.get("created_at"),
+                        }
+                    )
+                continue
+
             result.append(
                 {
                     "provider": lease.get("provider_name") or "local",
                     "session_id": None,
-                    "thread_id": thread_id,
+                    "thread_id": latest_session_thread_by_lease.get(lid),
                     "lease_id": lid,
                     "observed_state": lease.get("observed_state"),
                     "desired_state": lease.get("desired_state"),
diff --git a/storage/providers/supabase/thread_repo.py b/storage/providers/supabase/thread_repo.py
index c3a28103c..35339db0b 100644
--- a/storage/providers/supabase/thread_repo.py
+++ b/storage/providers/supabase/thread_repo.py
@@ -11,6 +11,7 @@
 
 _COLS = (
     "id",
+    "user_id",
     "member_id",
     "sandbox_type",
     "model",
@@ -26,7 +27,7 @@ def _validate_thread_identity(*, is_main: bool, branch_index: int) -> None:
     if branch_index < 0:
         raise ValueError(f"branch_index must be >= 0, got {branch_index}")
     if is_main and branch_index != 0:
-        raise ValueError(f"Main thread must have branch_index=0, got {branch_index}")
+        raise ValueError(f"Default thread must have branch_index=0, got {branch_index}")
     if not is_main and branch_index == 0:
         raise ValueError("Child thread must have branch_index>0")
 
@@ -49,6 +50,7 @@ def create(
         self,
         thread_id: str,
         member_id: str,
+        user_id: str,
         sandbox_type: str,
         cwd: str | None = None,
         created_at: float = 0,
@@ -60,6 +62,7 @@ def create(
         self._t().insert(
             {
                 "id": thread_id,
+                "user_id": user_id,
                 "member_id": member_id,
                 "sandbox_type": sandbox_type,
                 "cwd": cwd,
@@ -79,10 +82,18 @@ def get_by_id(self, thread_id: str) -> dict[str, Any] | None:
             return None
         return _to_dict(rows[0])
 
-    def get_main_thread(self, member_id: str) -> dict[str, Any] | None:
+    def get_by_user_id(self, user_id: str) -> dict[str, Any] | None:
+        select = ", ".join(_COLS)
+        response = self._t().select(select).eq("user_id", user_id).execute()
+        rows = q.rows(response, _REPO, "get_by_user_id")
+        if not rows:
+            return None
+        return _to_dict(rows[0])
+
+    def get_default_thread(self, member_id: str) -> dict[str, Any] | None:
         select = ", ".join(_COLS)
         response = self._t().select(select).eq("member_id", member_id).eq("is_main", 1).execute()
-        rows = q.rows(response, _REPO, "get_main_thread")
+        rows = q.rows(response, _REPO, "get_default_thread")
         if not rows:
             return None
         return _to_dict(rows[0])
@@ -145,23 +156,7 @@ def list_by_owner_user_id(self, owner_user_id: str) -> list[dict[str, Any]]:
         )
         thread_rows = q.rows(query.execute(), _REPO, "list_by_owner_user_id:threads")
 
-        # Step 3: enrich with member_name, member_avatar; entity_name via entities table
-        # Entity id = member_id in the new model, so look up entities by member_id
-        member_ids = list({r["member_id"] for r in thread_rows if r.get("member_id")})
-        entity_map: dict[str, str] = {}
-        if member_ids:
-            ent_response = q.in_(
-                self._client.table("entities").select("id, name"),
-                "id",
-                member_ids,
-                _REPO,
-                "list_by_owner_user_id:entities",
-            ).execute()
-            ent_rows = q.rows(ent_response, _REPO, "list_by_owner_user_id:entities")
-            for er in ent_rows:
-                if er.get("id"):
-                    entity_map[er["id"]] = er.get("name", "")
-
+        # Step 3: enrich with member_name, member_avatar from member_map
         result: list[dict[str, Any]] = []
         for raw in thread_rows:
             d = _to_dict(raw)
@@ -169,7 +164,6 @@ def list_by_owner_user_id(self, owner_user_id: str) -> list[dict[str, Any]]:
             member_info = member_map.get(mid, {})
             d["member_name"] = member_info.get("name")
             d["member_avatar"] = member_info.get("avatar")
-            d["entity_name"] = entity_map.get(mid)
             result.append(d)
         return result
 
diff --git a/storage/providers/supabase/tool_task_repo.py b/storage/providers/supabase/tool_task_repo.py
index 4daea78de..7842f31d5 100644
--- a/storage/providers/supabase/tool_task_repo.py
+++ b/storage/providers/supabase/tool_task_repo.py
@@ -28,7 +28,9 @@ def next_id(self, thread_id: str) -> str:
             _REPO,
             "next_id",
         )
-        return str(len(rows) + 1)
+        if not rows:
+            return "1"
+        return str(max(int(str(row["task_id"])) for row in rows) + 1)
 
     def get(self, thread_id: str, task_id: str) -> Task | None:
         rows = q.rows(
diff --git a/storage/providers/supabase/user_settings_repo.py b/storage/providers/supabase/user_settings_repo.py
index 633c0041c..3210b4818 100644
--- a/storage/providers/supabase/user_settings_repo.py
+++ b/storage/providers/supabase/user_settings_repo.py
@@ -62,6 +62,45 @@ def add_recent_workspace(self, user_id: str, workspace: str) -> None:
     def set_default_model(self, user_id: str, model: str) -> None:
         self._upsert(user_id, {"default_model": model})
 
+    # ------------------------------------------------------------------
+    # Models config (JSONB)
+    # ------------------------------------------------------------------
+
+    def get_models_config(self, user_id: str) -> dict[str, Any] | None:
+        rows = q.rows(self._table().select("models_config").eq("user_id", user_id).execute(), _REPO, "get_models_config")
+        if not rows:
+            return None
+        return rows[0].get("models_config")
+
+    def set_models_config(self, user_id: str, config: dict[str, Any]) -> None:
+        self._upsert(user_id, {"models_config": config})
+
+    # ------------------------------------------------------------------
+    # Observation config (JSONB)
+    # ------------------------------------------------------------------
+
+    def get_observation_config(self, user_id: str) -> dict[str, Any] | None:
+        rows = q.rows(self._table().select("observation_config").eq("user_id", user_id).execute(), _REPO, "get_observation_config")
+        if not rows:
+            return None
+        return rows[0].get("observation_config")
+
+    def set_observation_config(self, user_id: str, config: dict[str, Any]) -> None:
+        self._upsert(user_id, {"observation_config": config})
+
+    # ------------------------------------------------------------------
+    # Sandbox configs (JSONB)
+    # ------------------------------------------------------------------
+
+    def get_sandbox_configs(self, user_id: str) -> dict[str, Any] | None:
+        rows = q.rows(self._table().select("sandbox_configs").eq("user_id", user_id).execute(), _REPO, "get_sandbox_configs")
+        if not rows:
+            return None
+        return rows[0].get("sandbox_configs")
+
+    def set_sandbox_configs(self, user_id: str, configs: dict[str, Any]) -> None:
+        self._upsert(user_id, {"sandbox_configs": configs})
+
     def _upsert(self, user_id: str, updates: dict[str, Any]) -> None:
         now = datetime.now(UTC).isoformat()
         self._table().upsert({"user_id": user_id, "updated_at": now, **updates}).execute()
diff --git a/storage/runtime.py b/storage/runtime.py
index 0a2d1b394..2c2ae7a2c 100644
--- a/storage/runtime.py
+++ b/storage/runtime.py
@@ -1,124 +1,136 @@
-"""Runtime wiring helpers for storage strategy selection."""
+"""Runtime wiring helpers for storage (Supabase-only)."""
 
 from __future__ import annotations
 
 import importlib
-import json
 import os
-from collections.abc import Callable, Mapping
-from pathlib import Path
+from collections.abc import Callable
 from typing import Any
 
-from storage.container import StorageContainer, StorageStrategy
+from storage.container import StorageContainer
+
+_WEB_SUPABASE_CLIENT_FACTORY = "backend.web.core.supabase_factory:create_supabase_client"
 
 
 def build_storage_container(
     *,
-    main_db_path: str | Path | None = None,
-    eval_db_path: str | Path | None = None,
-    strategy: str | None = None,
-    repo_providers: Mapping[str, str] | None = None,
     supabase_client: Any | None = None,
     supabase_client_factory: str | None = None,
-    env: Mapping[str, str] | None = None,
+    **_kwargs: Any,
 ) -> StorageContainer:
-    """Build a runtime storage container from config/environment."""
-    env_map = env if env is not None else os.environ
-    raw_strategy = strategy if strategy is not None else env_map.get("LEON_STORAGE_STRATEGY")
-    resolved_strategy = _resolve_strategy(raw_strategy)
-    resolved_repo_providers = _resolve_repo_providers(repo_providers, env_map)
-    supabase_needed = _uses_supabase_provider(resolved_strategy, resolved_repo_providers)
-
-    if not supabase_needed:
-        return StorageContainer(
-            main_db_path=main_db_path,
-            eval_db_path=eval_db_path,
-            strategy=resolved_strategy,
-            repo_providers=resolved_repo_providers,
-        )
+    """Build a runtime storage container (Supabase-only)."""
+    client = _resolve_supabase_client(supabase_client, supabase_client_factory)
+    return StorageContainer(supabase_client=client)
 
-    client = supabase_client
-    if client is None:
-        factory_ref = supabase_client_factory if supabase_client_factory is not None else env_map.get("LEON_SUPABASE_CLIENT_FACTORY")
-        if not factory_ref:
-            raise RuntimeError(
-                "Supabase storage strategy requires runtime config. "
-                "Set LEON_SUPABASE_CLIENT_FACTORY=<module>:<callable> "
-                "or inject supabase_client explicitly."
-            )
-        factory = _load_factory(factory_ref)
-        client = factory()
-
-    _ensure_supabase_client(client)
-    return StorageContainer(
-        main_db_path=main_db_path,
-        eval_db_path=eval_db_path,
-        strategy=resolved_strategy,
-        repo_providers=resolved_repo_providers,
-        supabase_client=client,
-    )
-
-
-def _resolve_strategy(raw: str | None) -> StorageStrategy:
-    value = (raw or "sqlite").strip().lower()
-    if value in {"", "sqlite"}:
-        return "sqlite"
-    if value == "supabase":
-        return "supabase"
-    raise RuntimeError(f"Invalid LEON_STORAGE_STRATEGY value: {raw!r}. Supported values: sqlite, supabase.")
-
-
-def _resolve_repo_providers(
-    repo_providers: Mapping[str, str] | None,
-    env: Mapping[str, str],
-) -> Mapping[str, str] | None:
-    if repo_providers is not None:
-        return repo_providers
-
-    raw = env.get("LEON_STORAGE_REPO_PROVIDERS")
-    if not raw:
-        return None
-    try:
-        parsed = json.loads(raw)
-    except Exception as exc:
-        raise RuntimeError(f"Invalid LEON_STORAGE_REPO_PROVIDERS value: {raw!r}. Expected JSON object.") from exc
-    if not isinstance(parsed, dict):
-        raise RuntimeError(f"Invalid LEON_STORAGE_REPO_PROVIDERS value: {raw!r}. Expected JSON object.")
-    for key, value in parsed.items():
-        if not isinstance(key, str) or not isinstance(value, str):
-            raise RuntimeError("Invalid LEON_STORAGE_REPO_PROVIDERS entries. Expected string-to-string map of repo_name -> provider.")
-    return parsed
-
-
-def _uses_supabase_provider(
-    strategy: StorageStrategy,
-    repo_providers: Mapping[str, str] | None,
-) -> bool:
-    if repo_providers is None:
-        return strategy == "supabase"
-    for repo_name in StorageContainer._REPO_NAMES:
-        provider = repo_providers.get(repo_name, strategy).strip().lower()
-        if provider == "supabase":
-            return True
-    return False
+
+def build_thread_repo(
+    *,
+    supabase_client: Any | None = None,
+    supabase_client_factory: str | None = None,
+    **_kwargs: Any,
+):
+    client = _resolve_supabase_client(supabase_client, supabase_client_factory)
+    from storage.providers.supabase.thread_repo import SupabaseThreadRepo
+
+    return SupabaseThreadRepo(client)
+
+
+def build_member_repo(
+    *,
+    supabase_client: Any | None = None,
+    supabase_client_factory: str | None = None,
+    **_kwargs: Any,
+):
+    client = _resolve_supabase_client(supabase_client, supabase_client_factory)
+    from storage.providers.supabase.member_repo import SupabaseMemberRepo
+
+    return SupabaseMemberRepo(client)
+
+
+def build_tool_task_repo(
+    *,
+    supabase_client: Any | None = None,
+    supabase_client_factory: str | None = None,
+    **kwargs: Any,
+):
+    return build_storage_container(
+        supabase_client=supabase_client,
+        supabase_client_factory=supabase_client_factory,
+        **kwargs,
+    ).tool_task_repo()
+
+
+def build_agent_registry_repo(
+    *,
+    supabase_client: Any | None = None,
+    supabase_client_factory: str | None = None,
+    **kwargs: Any,
+):
+    return build_storage_container(
+        supabase_client=supabase_client,
+        supabase_client_factory=supabase_client_factory,
+        **kwargs,
+    ).agent_registry_repo()
+
+
+def build_sync_file_repo(
+    *,
+    supabase_client: Any | None = None,
+    supabase_client_factory: str | None = None,
+    **kwargs: Any,
+):
+    return build_storage_container(
+        supabase_client=supabase_client,
+        supabase_client_factory=supabase_client_factory,
+        **kwargs,
+    ).sync_file_repo()
+
+
+def build_resource_snapshot_repo(
+    *,
+    supabase_client: Any | None = None,
+    supabase_client_factory: str | None = None,
+    **kwargs: Any,
+):
+    return build_storage_container(
+        supabase_client=supabase_client,
+        supabase_client_factory=supabase_client_factory or _WEB_SUPABASE_CLIENT_FACTORY,
+        **kwargs,
+    ).resource_snapshot_repo()
+
+
+def _resolve_supabase_client(
+    client: Any | None = None,
+    factory_ref: str | None = None,
+) -> Any:
+    if client is not None:
+        _ensure_supabase_client(client)
+        return client
+    ref = factory_ref or os.environ.get("LEON_SUPABASE_CLIENT_FACTORY")
+    if not ref:
+        raise RuntimeError(
+            "Supabase storage requires runtime config. "
+            "Set LEON_SUPABASE_CLIENT_FACTORY=<module>:<callable> "
+            "or inject supabase_client explicitly."
+        )
+    factory = _load_factory(ref)
+    result = factory()
+    _ensure_supabase_client(result)
+    return result
 
 
 def _load_factory(factory_ref: str) -> Callable[[], Any]:
     module_name, sep, attr_name = factory_ref.partition(":")
     if not sep or not module_name or not attr_name:
         raise RuntimeError("Invalid LEON_SUPABASE_CLIENT_FACTORY format. Expected '<module>:<callable>'.")
-
-    # @@@factory-path-import - keep runtime client wiring pluggable without adding hard deps in core storage package.
     try:
         module = importlib.import_module(module_name)
-    except Exception as exc:  # pragma: no cover - failure path asserted via RuntimeError text
+    except Exception as exc:
         raise RuntimeError(f"Failed to import supabase client factory module {module_name!r}: {exc}") from exc
-
     try:
         factory = getattr(module, attr_name)
     except AttributeError as exc:
         raise RuntimeError(f"Supabase client factory {factory_ref!r} is missing attribute {attr_name!r}.") from exc
-
     if not callable(factory):
         raise RuntimeError(f"Supabase client factory {factory_ref!r} must be callable.")
     return factory
diff --git a/storage/utils.py b/storage/utils.py
new file mode 100644
index 000000000..8e6c61e31
--- /dev/null
+++ b/storage/utils.py
@@ -0,0 +1,11 @@
+"""Storage utility functions."""
+
+import secrets
+import string
+
+_ID_ALPHABET = string.ascii_letters + string.digits
+
+
+def generate_member_id() -> str:
+    """Generate member ID: m_{12 random alphanumeric chars}."""
+    return "m_" + "".join(secrets.choice(_ID_ALPHABET) for _ in range(12))
diff --git a/tests/config/conftest.py b/tests/Config/conftest.py
similarity index 100%
rename from tests/config/conftest.py
rename to tests/Config/conftest.py
diff --git a/tests/config/test_loader.py b/tests/Config/test_loader.py
similarity index 74%
rename from tests/config/test_loader.py
rename to tests/Config/test_loader.py
index f3671fa09..c0874f38d 100644
--- a/tests/config/test_loader.py
+++ b/tests/Config/test_loader.py
@@ -1,11 +1,13 @@
 """Comprehensive tests for config.loader module."""
 
+import json
 import os
 import sys
+from pathlib import Path
 
 import pytest
 
-from config.loader import ConfigLoader, load_config
+from config.loader import AgentLoader, ConfigLoader, load_config
 from config.schema import LeonSettings
 
 
@@ -157,6 +159,27 @@ def test_expand_env_vars_nested(self):
         assert result["paths"] == ["/base/path1", "/base/path2"]
         assert result["config"]["root"] == "/base"
 
+    def test_discover_mcp_preserves_explicit_transport(self, tmp_path):
+        path = tmp_path / ".mcp.json"
+        path.write_text(
+            json.dumps(
+                {
+                    "mcpServers": {
+                        "wsdemo": {
+                            "transport": "websocket",
+                            "url": "ws://example.test/mcp",
+                        }
+                    }
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = ConfigLoader._discover_mcp(tmp_path)
+
+        assert result["wsdemo"].transport == "websocket"
+        assert result["wsdemo"].url == "ws://example.test/mcp"
+
 
 class TestLoadConfigFunction:
     """Tests for load_config convenience function."""
@@ -169,3 +192,32 @@ def test_load_config_with_workspace(self, tmp_path, monkeypatch):
 
         settings = load_config(workspace_root=str(project_dir))
         assert isinstance(settings, LeonSettings)
+
+
+def test_project_agent_file_does_not_claim_bundle_source_dir(tmp_path: Path):
+    agents_dir = tmp_path / ".leon" / "agents"
+    agents_dir.mkdir(parents=True)
+    (agents_dir / "explore.md").write_text(
+        "---\nname: explore\nmodel: project-model\n---\nproject prompt\n",
+        encoding="utf-8",
+    )
+
+    agent = AgentLoader(workspace_root=tmp_path).load_all_agents()["explore"]
+
+    assert agent.model == "project-model"
+    assert agent.source_dir is None
+
+
+def test_member_agent_retains_bundle_source_dir(tmp_path: Path, monkeypatch):
+    home_root = tmp_path
+    monkeypatch.setattr("config.loader.user_home_read_candidates", lambda *parts: (home_root.joinpath(*parts),))
+    member_dir = home_root / "members" / "alice"
+    member_dir.mkdir(parents=True)
+    (member_dir / "agent.md").write_text(
+        '---\nname: alice\ntools:\n  - "*"\n---\nmember prompt\n',
+        encoding="utf-8",
+    )
+
+    agent = AgentLoader(workspace_root=tmp_path).load_all_agents()["alice"]
+
+    assert agent.source_dir == member_dir.resolve()
diff --git a/tests/config/test_loader_skill_dir_bootstrap.py b/tests/Config/test_loader_skill_dir_bootstrap.py
similarity index 100%
rename from tests/config/test_loader_skill_dir_bootstrap.py
rename to tests/Config/test_loader_skill_dir_bootstrap.py
diff --git a/tests/Integration/test_auth_router.py b/tests/Integration/test_auth_router.py
new file mode 100644
index 000000000..0d06e962c
--- /dev/null
+++ b/tests/Integration/test_auth_router.py
@@ -0,0 +1,206 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.routers import auth as auth_router
+from backend.web.routers import messaging as chats_router
+
+
+class _FakeAuthService:
+    def __init__(self) -> None:
+        self.send_otp_calls: list[tuple[str, str, str]] = []
+        self.verify_otp_calls: list[tuple[str, str]] = []
+        self.complete_register_calls: list[tuple[str, str]] = []
+        self.login_calls: list[tuple[str, str]] = []
+        self.verify_otp_result = {"temp_token": "temp-otp"}
+        self.complete_register_result = {"token": "tok-register"}
+        self.login_result = {"token": "tok-login"}
+        self.send_otp_error: Exception | None = None
+        self.verify_otp_error: Exception | None = None
+        self.complete_register_error: Exception | None = None
+        self.login_error: Exception | None = None
+
+    def send_otp(self, email: str, password: str, invite_code: str) -> None:
+        self.send_otp_calls.append((email, password, invite_code))
+        if self.send_otp_error is not None:
+            raise self.send_otp_error
+
+    def verify_register_otp(self, email: str, token: str) -> dict:
+        self.verify_otp_calls.append((email, token))
+        if self.verify_otp_error is not None:
+            raise self.verify_otp_error
+        return self.verify_otp_result
+
+    def complete_register(self, temp_token: str, invite_code: str) -> dict:
+        self.complete_register_calls.append((temp_token, invite_code))
+        if self.complete_register_error is not None:
+            raise self.complete_register_error
+        return self.complete_register_result
+
+    def login(self, identifier: str, password: str) -> dict:
+        self.login_calls.append((identifier, password))
+        if self.login_error is not None:
+            raise self.login_error
+        return self.login_result
+
+
+@pytest.mark.asyncio
+async def test_send_otp_calls_auth_service_directly():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router.send_otp(
+        auth_router.SendOtpRequest(email="fresh@example.com", password="pass1234", invite_code="invite-1"),
+        app,
+    )
+
+    assert result == {"ok": True}
+    assert service.send_otp_calls == [("fresh@example.com", "pass1234", "invite-1")]
+
+
+@pytest.mark.asyncio
+async def test_send_otp_maps_value_error_to_bad_request():
+    service = _FakeAuthService()
+    service.send_otp_error = ValueError("邀请码无效或已过期")
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    with pytest.raises(HTTPException) as exc_info:
+        await auth_router.send_otp(
+            auth_router.SendOtpRequest(email="fresh@example.com", password="pass1234", invite_code="invite-1"),
+            app,
+        )
+
+    assert exc_info.value.status_code == 400
+    assert "邀请码无效" in str(exc_info.value.detail)
+
+
+@pytest.mark.asyncio
+async def test_verify_otp_calls_auth_service_directly():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router.verify_otp(
+        auth_router.VerifyOtpRequest(email="fresh@example.com", token="123456"),
+        app,
+    )
+
+    assert result == {"temp_token": "temp-otp"}
+    assert service.verify_otp_calls == [("fresh@example.com", "123456")]
+
+
+@pytest.mark.asyncio
+async def test_complete_register_calls_auth_service_directly():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router.complete_register(
+        auth_router.CompleteRegisterRequest(temp_token="temp-otp", invite_code="invite-1"),
+        app,
+    )
+
+    assert result == {"token": "tok-register"}
+    assert service.complete_register_calls == [("temp-otp", "invite-1")]
+
+
+@pytest.mark.asyncio
+async def test_login_calls_auth_service_directly():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router.login(auth_router.LoginRequest(identifier="fresh@example.com", password="pass1234"), app)
+
+    assert result == {"token": "tok-login"}
+    assert service.login_calls == [("fresh@example.com", "pass1234")]
+
+
+@pytest.mark.asyncio
+async def test_login_maps_value_error_to_unauthorized():
+    service = _FakeAuthService()
+    service.login_error = ValueError("Invalid username or password")
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    with pytest.raises(HTTPException) as exc_info:
+        await auth_router.login(auth_router.LoginRequest(identifier="fresh@example.com", password="pass1234"), app)
+
+    assert exc_info.value.status_code == 401
+    assert "Invalid username or password" in str(exc_info.value.detail)
+
+
+@pytest.mark.asyncio
+async def test_call_auth_service_returns_service_result():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router._call_auth_service(
+        app,
+        400,
+        "verify_register_otp",
+        "fresh@example.com",
+        "123456",
+    )
+
+    assert result == {"temp_token": "temp-otp"}
+    assert service.verify_otp_calls == [("fresh@example.com", "123456")]
+
+
+@pytest.mark.asyncio
+async def test_call_auth_service_maps_value_error_to_given_status():
+    service = _FakeAuthService()
+    service.complete_register_error = ValueError("邀请码无效")
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    with pytest.raises(HTTPException) as exc_info:
+        await auth_router._call_auth_service(
+            app,
+            400,
+            "complete_register",
+            "temp-otp",
+            "invite-1",
+        )
+
+    assert exc_info.value.status_code == 400
+    assert exc_info.value.detail == "邀请码无效"
+
+
+class _VerifyOnlyAuthService:
+    def __init__(self) -> None:
+        self.tokens: list[str] = []
+
+    def verify_token(self, token: str) -> dict:
+        self.tokens.append(token)
+        return {"user_id": "user-1"}
+
+
+@pytest.mark.asyncio
+async def test_chat_events_requires_token():
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            auth_service=_VerifyOnlyAuthService(),
+            chat_event_bus=SimpleNamespace(subscribe=lambda _chat_id: None),
+        )
+    )
+
+    with pytest.raises(HTTPException) as exc_info:
+        await chats_router.stream_chat_events("chat-1", token=None, app=app)
+
+    assert exc_info.value.status_code == 401
+    assert exc_info.value.detail == "Missing token"
+
+
+@pytest.mark.asyncio
+async def test_chat_events_verifies_provided_token():
+    auth_service = _VerifyOnlyAuthService()
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            auth_service=auth_service,
+            chat_event_bus=SimpleNamespace(subscribe=lambda _chat_id: None),
+        )
+    )
+
+    response = await chats_router.stream_chat_events("chat-1", token="tok-chat", app=app)
+
+    assert auth_service.tokens == ["tok-chat"]
+    assert response.media_type == "text/event-stream"
diff --git a/tests/Integration/test_background_task_cleanup.py b/tests/Integration/test_background_task_cleanup.py
new file mode 100644
index 000000000..2450c51e6
--- /dev/null
+++ b/tests/Integration/test_background_task_cleanup.py
@@ -0,0 +1,535 @@
+"""Integration tests for background task cleanup across command/agent surfaces."""
+
+import asyncio
+import json
+import shutil
+import sys
+from pathlib import Path
+from typing import cast
+
+import pytest
+from langchain_core.messages import AIMessage
+
+from core.agents.registry import AgentEntry, AgentRegistry
+from core.agents.service import AgentService, BackgroundRun, _BashBackgroundRun, _RunningTask
+from core.runtime.middleware.queue import MessageQueueManager
+from core.runtime.middleware.queue.middleware import SteeringMiddleware
+from core.runtime.registry import ToolRegistry
+from core.tools.command.bash.executor import BashExecutor
+from core.tools.command.service import CommandService
+from sandbox.thread_context import set_current_thread_id
+
+
+class _FakeAgentRegistry:
+    def __init__(self):
+        self._entries: dict[str, AgentEntry] = {}
+
+    async def register(self, entry):
+        self._entries[entry.agent_id] = entry
+        self.entry = entry
+
+    async def update_status(self, agent_id: str, status: str):
+        self.last_status = (agent_id, status)
+        if agent_id in self._entries:
+            self._entries[agent_id] = AgentEntry(
+                agent_id=agent_id,
+                name=self._entries[agent_id].name,
+                thread_id=self._entries[agent_id].thread_id,
+                status=status,
+                parent_agent_id=self._entries[agent_id].parent_agent_id,
+                subagent_type=self._entries[agent_id].subagent_type,
+            )
+
+    async def list_running_by_name(self, name: str) -> list[AgentEntry]:
+        return [e for e in self._entries.values() if e.name == name and e.status == "running"]
+
+    async def get_by_id(self, agent_id: str) -> AgentEntry | None:
+        return self._entries.get(agent_id)
+
+    async def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None) -> AgentEntry | None:
+        matches = [e for e in self._entries.values() if e.name == name and e.parent_agent_id == parent_agent_id]
+        return matches[-1] if matches else None
+
+    async def list_running(self) -> list[AgentEntry]:
+        return [e for e in self._entries.values() if e.status == "running"]
+
+
+def _fake_agent_registry() -> AgentRegistry:
+    return cast(AgentRegistry, _FakeAgentRegistry())
+
+
+def _require_bash_run(run: BackgroundRun) -> _BashBackgroundRun:
+    assert isinstance(run, _BashBackgroundRun)
+    return run
+
+
+def _require_running_task(run: BackgroundRun) -> _RunningTask:
+    assert isinstance(run, _RunningTask)
+    return run
+
+
+class _SlowChildAgent:
+    def __init__(self, first_text: str, release_event: asyncio.Event, started_event: asyncio.Event):
+        self._first_text = first_text
+        self._release_event = release_event
+        self._started_event = started_event
+        self._agent_service = type(
+            "_ChildService",
+            (),
+            {"cleanup_background_runs": self._cleanup_background_runs},
+        )()
+        self.agent = type("_InnerAgent", (), {"astream": self._astream})()
+        self.closed = False
+
+    async def ainit(self):
+        return None
+
+    async def _astream(self, *args, **kwargs):
+        self._started_event.set()
+        yield {"agent": {"messages": [AIMessage(content=self._first_text)]}}
+        await self._release_event.wait()
+
+    async def _cleanup_background_runs(self):
+        return None
+
+    def close(self):
+        self.closed = True
+        return None
+
+
+class _CompleteChildAgent:
+    def __init__(self, text: str):
+        self._text = text
+        self._agent_service = type(
+            "_ChildService",
+            (),
+            {"cleanup_background_runs": self._cleanup_background_runs},
+        )()
+        self.agent = type("_InnerAgent", (), {"astream": self._astream})()
+        self.closed = False
+
+    async def ainit(self):
+        return None
+
+    async def _astream(self, *args, **kwargs):
+        yield {"agent": {"messages": [AIMessage(content=self._text)]}}
+
+    async def _cleanup_background_runs(self):
+        return None
+
+    def close(self):
+        self.closed = True
+        return None
+
+
+class _FailingInitChildAgent:
+    def __init__(self, error: Exception):
+        self._error = error
+
+    async def ainit(self):
+        raise self._error
+
+
+def _agent_tool_json(result) -> dict:
+    content = getattr(result, "content", result)
+    return json.loads(content)
+
+
+@pytest.mark.skipif(
+    sys.platform == "win32" or shutil.which("bash") is None,
+    reason="bash background cleanup integration requires Unix-compatible bash",
+)
+def test_taskstop_terminates_real_background_bash_run(tmp_path):
+    async def run():
+        registry = ToolRegistry()
+        shared_runs: dict[str, BackgroundRun] = {}
+        executor = BashExecutor(default_cwd=str(tmp_path))
+        command_service = CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            executor=executor,
+            background_runs=shared_runs,
+        )
+        agent_service = AgentService(
+            tool_registry=registry,
+            agent_registry=_fake_agent_registry(),
+            workspace_root=Path(tmp_path),
+            model_name="gpt-test",
+            shared_runs=shared_runs,
+        )
+
+        result = await command_service._execute_async(
+            "sleep 30",
+            str(tmp_path),
+            30.0,
+            description="integration bash cleanup",
+        )
+        assert "task_id:" in result
+        assert len(shared_runs) == 1
+
+        task_id, running = next(iter(shared_runs.items()))
+        bash_run = _require_bash_run(running)
+        assert running.is_done is False
+
+        stop_result = await agent_service._handle_task_stop(task_id)
+
+        assert stop_result == f"Task {task_id} cancelled"
+        assert task_id not in shared_runs
+        assert bash_run._cmd.process.returncode is not None
+
+    asyncio.run(run())
+
+
+def test_sendmessage_search_hint_uses_queue_naming(tmp_path):
+    registry = ToolRegistry()
+    AgentService(
+        tool_registry=registry,
+        agent_registry=_fake_agent_registry(),
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+    )
+
+    entry = registry.get("SendMessage")
+
+    assert entry is not None
+    assert "queue" in entry.search_hint
+    assert "mailbox" not in entry.search_hint
+
+
+@pytest.mark.asyncio
+async def test_sendmessage_enqueues_real_agent_notification_for_target_thread(tmp_path):
+    registry = ToolRegistry()
+    agent_registry = cast(AgentRegistry, _FakeAgentRegistry())
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=agent_registry,
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+    )
+    await agent_registry.register(
+        AgentEntry(
+            agent_id="agent-1",
+            name="worker-1",
+            thread_id="thread-worker-1",
+            status="running",
+        )
+    )
+
+    result = await service._handle_send_message(
+        target_name="worker-1",
+        message="hello from coordinator",
+        sender_name="coordinator",
+    )
+
+    assert result == "Message sent to worker-1."
+    items = queue_manager.drain_all("thread-worker-1")
+    assert len(items) == 1
+    assert items[0].notification_type == "agent"
+    assert items[0].sender_name == "coordinator"
+    assert "hello from coordinator" in items[0].content
+
+
+@pytest.mark.asyncio
+async def test_sendmessage_reaches_target_next_turn_via_steering_middleware(tmp_path):
+    registry = ToolRegistry()
+    agent_registry = cast(AgentRegistry, _FakeAgentRegistry())
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=agent_registry,
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+    )
+    await agent_registry.register(
+        AgentEntry(
+            agent_id="agent-1",
+            name="worker-1",
+            thread_id="thread-worker-1",
+            status="running",
+        )
+    )
+
+    await service._handle_send_message(
+        target_name="worker-1",
+        message="queue payload",
+        sender_name="coordinator",
+    )
+
+    injected = SteeringMiddleware(queue_manager=queue_manager).before_model(
+        state={},
+        runtime=None,
+        config={"configurable": {"thread_id": "thread-worker-1"}},
+    )
+
+    assert injected is not None
+    messages = injected["messages"]
+    assert len(messages) == 1
+    assert "queue payload" in str(messages[0].content)
+    assert messages[0].metadata["notification_type"] == "agent"
+    assert messages[0].metadata["sender_name"] == "coordinator"
+
+
+@pytest.mark.asyncio
+async def test_sendmessage_rejects_ambiguous_running_agent_names(tmp_path):
+    registry = ToolRegistry()
+    agent_registry = cast(AgentRegistry, _FakeAgentRegistry())
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=agent_registry,
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+    )
+    await agent_registry.register(
+        AgentEntry(
+            agent_id="agent-1",
+            name="worker",
+            thread_id="thread-worker-1",
+            status="running",
+        )
+    )
+    await agent_registry.register(
+        AgentEntry(
+            agent_id="agent-2",
+            name="worker",
+            thread_id="thread-worker-2",
+            status="running",
+        )
+    )
+
+    result = await service._handle_send_message(
+        target_name="worker",
+        message="hello dup",
+        sender_name="coordinator",
+    )
+
+    assert "ambiguous" in result
+    assert queue_manager.drain_all("thread-worker-1") == []
+    assert queue_manager.drain_all("thread-worker-2") == []
+
+
+@pytest.mark.asyncio
+async def test_background_agent_progress_notification_reaches_parent_next_turn(tmp_path, monkeypatch):
+    started = asyncio.Event()
+    release = asyncio.Event()
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _SlowChildAgent("Inspecting repository", release, started)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=_fake_agent_registry(),
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+        background_progress_interval_s=0.02,
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw = await service._handle_agent(
+            prompt="do work",
+            name="worker-1",
+            description="Investigating repository",
+            run_in_background=True,
+        )
+        task_id = _agent_tool_json(raw)["task_id"]
+        await asyncio.wait_for(started.wait(), timeout=1)
+        await asyncio.sleep(0.05)
+
+        injected = SteeringMiddleware(queue_manager=queue_manager).before_model(
+            state={},
+            runtime=None,
+            config={"configurable": {"thread_id": "parent-thread"}},
+        )
+
+        assert injected is not None
+        text = str(injected["messages"][0].content)
+        assert "<worker-progress>" in text
+        assert f"<agent-id>{task_id}</agent-id>" in text
+        assert "Inspecting repository" in text
+    finally:
+        release.set()
+        await service.cleanup_background_runs()
+        set_current_thread_id("")
+
+
+@pytest.mark.asyncio
+async def test_background_agent_completion_notification_waits_for_followthrough_run(tmp_path, monkeypatch):
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _CompleteChildAgent("Finished indexing")
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=_fake_agent_registry(),
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+        background_progress_interval_s=0.02,
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw = await service._handle_agent(
+            prompt="do work",
+            name="worker-1",
+            description="Index repository",
+            run_in_background=True,
+        )
+        task_id = _agent_tool_json(raw)["task_id"]
+        running = _require_running_task(service._tasks[task_id])
+        await asyncio.wait_for(running.task, timeout=1)
+
+        injected = SteeringMiddleware(queue_manager=queue_manager).before_model(
+            state={},
+            runtime=None,
+            config={"configurable": {"thread_id": "parent-thread"}},
+        )
+
+        assert injected is None
+        queued = queue_manager.list_queue("parent-thread")
+        assert len(queued) == 1
+        text = queued[0]["content"]
+        assert "<task-notification>" in text
+        assert f"<run-id>{task_id}</run-id>" in text
+        assert "<status>completed</status>" in text
+        assert "Finished indexing" in text
+    finally:
+        set_current_thread_id("")
+
+
+@pytest.mark.asyncio
+async def test_mixed_success_and_init_failure_background_agents_queue_both_terminal_notifications(tmp_path, monkeypatch):
+    created = 0
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        nonlocal created
+        created += 1
+        if created == 1:
+            return _CompleteChildAgent("GOOD:BASE:2")
+        return _FailingInitChildAgent(RuntimeError("bad child init"))
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=_fake_agent_registry(),
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw_good = await service._handle_agent(
+            prompt="good child",
+            name="good-child",
+            description="good child",
+            run_in_background=True,
+        )
+        raw_bad = await service._handle_agent(
+            prompt="bad child",
+            name="bad-child",
+            description="bad child",
+            run_in_background=True,
+        )
+
+        await asyncio.wait_for(_require_running_task(service._tasks[_agent_tool_json(raw_good)["task_id"]]).task, timeout=1)
+        with pytest.raises(RuntimeError, match="bad child init"):
+            await asyncio.wait_for(_require_running_task(service._tasks[_agent_tool_json(raw_bad)["task_id"]]).task, timeout=1)
+
+        queued = queue_manager.list_queue("parent-thread")
+
+        assert len(queued) == 2
+        contents = [item["content"] for item in queued]
+        assert any("<status>completed</status>" in content and "GOOD:BASE:2" in content for content in contents)
+        assert any("<status>error</status>" in content and "Agent failed" in content for content in contents)
+    finally:
+        set_current_thread_id("")
+
+
+def test_terminal_background_notification_waits_for_followup_run_during_owner_turn(tmp_path):
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>",
+        "parent-thread",
+        notification_type="agent",
+        source="system",
+    )
+
+    runtime = type("_Runtime", (), {"current_run_source": "owner"})()
+    injected = SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime).before_model(
+        state={},
+        runtime=None,
+        config={"configurable": {"thread_id": "parent-thread"}},
+    )
+
+    assert injected is None
+    queued = queue_manager.list_queue("parent-thread")
+    assert len(queued) == 1
+    assert "<task-notification>" in queued[0]["content"]
+
+
+def test_terminal_background_notification_waits_for_followup_run_during_system_turn(tmp_path):
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>completed</status><result>BG1:STEP1:2</result></task-notification></system-reminder>",
+        "parent-thread",
+        notification_type="agent",
+        source="system",
+    )
+
+    runtime = type("_Runtime", (), {"current_run_source": "system"})()
+    injected = SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime).before_model(
+        state={},
+        runtime=None,
+        config={"configurable": {"thread_id": "parent-thread"}},
+    )
+
+    assert injected is None
+    queued = queue_manager.list_queue("parent-thread")
+    assert len(queued) == 1
+    assert "<task-notification>" in queued[0]["content"]
+
+
+def test_steer_injection_emits_phase_boundary_events(tmp_path):
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "Stop the current plan and summarize status.",
+        "parent-thread",
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+
+    class _Runtime:
+        def __init__(self) -> None:
+            self.events: list[dict[str, str]] = []
+
+        def emit_activity_event(self, event: dict[str, str]) -> None:
+            self.events.append(event)
+
+    runtime = _Runtime()
+    injected = SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime).before_model(
+        state={},
+        runtime=None,
+        config={"configurable": {"thread_id": "parent-thread"}},
+    )
+
+    assert injected is not None
+    assert str(injected["messages"][0].content) == "Stop the current plan and summarize status."
+    assert [event["event"] for event in runtime.events] == ["run_done", "run_start"]
diff --git a/tests/Integration/test_child_thread_live_bridge.py b/tests/Integration/test_child_thread_live_bridge.py
new file mode 100644
index 000000000..53689bb28
--- /dev/null
+++ b/tests/Integration/test_child_thread_live_bridge.py
@@ -0,0 +1,738 @@
+from __future__ import annotations
+
+import asyncio
+import json
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+from fastapi import Request
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+
+from backend.web.routers import threads as threads_router
+from backend.web.services.display_builder import DisplayBuilder
+from backend.web.services.event_buffer import ThreadEventBuffer
+from backend.web.services.streaming_service import run_child_thread_live
+from backend.web.utils.serializers import serialize_message
+from core.runtime.middleware.monitor import AgentState
+from core.runtime.middleware.queue.manager import MessageQueueManager
+
+
+class _FakeRuntime:
+    def __init__(self) -> None:
+        self.current_state = AgentState.IDLE
+        self._event_callback = None
+        self._activity_sink = None
+        self.state = SimpleNamespace(flags=SimpleNamespace(is_compacting=False))
+        self.calls = 0
+        self.tokens = 0
+        self.cost = 0.0
+        self.ctx_percent = 0.0
+
+    def transition(self, new_state: AgentState) -> bool:
+        self.current_state = new_state
+        return True
+
+    def set_event_callback(self, callback) -> None:
+        self._event_callback = callback
+
+    def bind_thread(self, activity_sink) -> None:
+        self._activity_sink = activity_sink
+
+    def unbind_thread(self) -> None:
+        self._activity_sink = None
+
+    def get_compact_dict(self) -> dict:
+        return {
+            "state": self.current_state.value,
+            "tokens": self.tokens,
+            "cost": self.cost,
+            "calls": self.calls,
+            "ctx_percent": self.ctx_percent,
+        }
+
+    def get_status_dict(self) -> dict:
+        return {
+            "state": {"state": self.current_state.value, "flags": {}},
+            "tokens": {"total": self.tokens},
+            "context": {"percent": self.ctx_percent},
+            "calls": self.calls,
+            "cost": self.cost,
+        }
+
+
+class _BlockingChildGraph:
+    def __init__(self) -> None:
+        self.messages: list = []
+        self.started = asyncio.Event()
+        self.release = asyncio.Event()
+        self.system_prompt = None
+
+    async def aget_state(self, _config):
+        return SimpleNamespace(values={"messages": list(self.messages)})
+
+    async def aupdate_state(self, _config, input_data, as_node=None):
+        self.messages.extend(input_data.get("messages", []))
+
+    async def astream(self, input_data, config=None, stream_mode=None):
+        if input_data is not None:
+            self.messages.extend(input_data.get("messages", []))
+        self.started.set()
+        await self.release.wait()
+        yield ("messages", (SimpleNamespace(__class__=SimpleNamespace(__name__="AIMessageChunk")), {}))
+        ai = AIMessage(content="CHILD_DONE")
+        ai.id = "ai-child-1"
+        self.messages.append(ai)
+        yield ("updates", {"agent": {"messages": [ai]}})
+
+
+class _BlockingChildAgent:
+    def __init__(self) -> None:
+        self.runtime = _FakeRuntime()
+        self.agent = _BlockingChildGraph()
+
+
+def _make_request(app: SimpleNamespace) -> Request:
+    return Request({"type": "http", "headers": [], "app": app})
+
+
+def _require_entries(builder: DisplayBuilder, thread_id: str) -> list[dict]:
+    entries = builder.get_entries(thread_id)
+    assert entries is not None
+    return entries
+
+
+def _prime_agent_turn(
+    builder: DisplayBuilder,
+    thread_id: str,
+    *,
+    tool_call_id: str = "tc-agent-1",
+    args: dict | None = None,
+    run_id: str = "run-1",
+) -> None:
+    builder.apply_event(
+        thread_id,
+        "run_start",
+        {"run_id": run_id, "source": "owner", "showing": True},
+    )
+    builder.apply_event(
+        thread_id,
+        "tool_call",
+        {
+            "id": tool_call_id,
+            "name": "Agent",
+            "args": args or {"prompt": "do work"},
+            "showing": True,
+        },
+    )
+
+
+def _set_single_subagent_entry(
+    builder: DisplayBuilder,
+    thread_id: str,
+    *,
+    task_id: str,
+    thread_ref: str,
+    status: str,
+    result: str,
+    description: str = "inspect workspace",
+) -> None:
+    builder.set_entries(
+        thread_id,
+        [
+            {"id": "u1", "role": "user", "content": "do work", "timestamp": 1},
+            {
+                "id": "a1",
+                "role": "assistant",
+                "timestamp": 2,
+                "segments": [
+                    {
+                        "type": "tool",
+                        "step": {
+                            "id": "call-agent-1",
+                            "name": "Agent",
+                            "args": {"description": description},
+                            "status": "done",
+                            "result": result,
+                            "subagent_stream": {
+                                "task_id": task_id,
+                                "thread_id": thread_ref,
+                                "description": description,
+                                "text": "",
+                                "tool_calls": [],
+                                "status": status,
+                            },
+                        },
+                    }
+                ],
+            },
+        ],
+    )
+
+
+def _make_router_app(
+    builder: DisplayBuilder,
+    thread_id: str,
+    monkeypatch: pytest.MonkeyPatch,
+) -> SimpleNamespace:
+    fake_agent = SimpleNamespace(runtime=SimpleNamespace(current_state=AgentState.ACTIVE), agent=SimpleNamespace(aget_state=None))
+    monkeypatch.setattr(threads_router, "get_or_create_agent", AsyncMock(return_value=fake_agent))
+    return SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=builder,
+            agent_pool={},
+            thread_sandbox={thread_id: "local"},
+        )
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_child_thread_live_rebinds_from_parent_sink_and_surfaces_runtime_and_detail_before_completion():
+    child_thread_id = "subagent-live-1"
+    agent = _BlockingChildAgent()
+    parent_events: list[dict] = []
+
+    async def _parent_sink(event: dict) -> None:
+        parent_events.append(event)
+
+    agent.runtime.bind_thread(_parent_sink)
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            queue_manager=MessageQueueManager(),
+            _event_loop=asyncio.get_running_loop(),
+            thread_event_buffers={},
+            thread_tasks={},
+            thread_last_active={},
+            agent_pool={},
+            thread_sandbox={child_thread_id: "local"},
+            thread_cwd={},
+            thread_repo=SimpleNamespace(get_by_id=lambda thread_id: {"model": "gpt-live"} if thread_id == child_thread_id else None),
+        )
+    )
+
+    task = asyncio.create_task(
+        run_child_thread_live(
+            agent,
+            child_thread_id,
+            "child prompt",
+            app,
+            input_messages=[HumanMessage(content="child prompt")],
+        )
+    )
+
+    await agent.agent.started.wait()
+
+    runtime = await threads_router.get_thread_runtime(child_thread_id, stream=False, user_id="owner-1", app=app)
+    detail = await threads_router.get_thread_messages(child_thread_id, user_id="owner-1", app=app)
+
+    assert runtime["state"]["state"] == "active"
+    assert detail["entries"]
+    assert detail["entries"][0]["role"] == "user"
+    assert detail["entries"][0]["content"] == "child prompt"
+    assert isinstance(app.state.thread_event_buffers[child_thread_id], ThreadEventBuffer)
+    assert app.state.agent_pool[f"{child_thread_id}:local"] is agent
+    assert agent.runtime._activity_sink is not _parent_sink
+    assert parent_events == []
+
+    agent.agent.release.set()
+    result = await task
+
+    assert result == "CHILD_DONE"
+
+
+@pytest.mark.asyncio
+async def test_run_child_thread_live_raises_when_child_run_emits_error_event(monkeypatch):
+    child_thread_id = "subagent-live-error"
+    agent = _BlockingChildAgent()
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            queue_manager=MessageQueueManager(),
+            _event_loop=asyncio.get_running_loop(),
+            thread_event_buffers={},
+            thread_tasks={},
+            thread_last_active={},
+            agent_pool={},
+            thread_sandbox={child_thread_id: "local"},
+            thread_cwd={},
+            thread_repo=SimpleNamespace(get_by_id=lambda thread_id: {"model": "gpt-live"} if thread_id == child_thread_id else None),
+        )
+    )
+
+    def fake_start_agent_run(agent, thread_id, message, app, enable_trajectory=False, message_metadata=None, input_messages=None):
+        async def _fake_run():
+            thread_buf = app.state.thread_event_buffers[thread_id]
+            await thread_buf.put({"event": "error", "data": json.dumps({"error": "child model init failed"})})
+            return ""
+
+        app.state.thread_tasks[thread_id] = asyncio.create_task(_fake_run())
+        return "run-error-1"
+
+    monkeypatch.setattr("backend.web.services.streaming_service.start_agent_run", fake_start_agent_run)
+
+    with pytest.raises(RuntimeError, match="child model init failed"):
+        await run_child_thread_live(
+            agent,
+            child_thread_id,
+            "child prompt",
+            app,
+            input_messages=[HumanMessage(content="child prompt")],
+        )
+
+
+@pytest.mark.asyncio
+async def test_run_child_thread_live_raises_when_child_never_makes_a_model_call(monkeypatch):
+    child_thread_id = "subagent-live-no-call"
+    agent = _BlockingChildAgent()
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            queue_manager=MessageQueueManager(),
+            _event_loop=asyncio.get_running_loop(),
+            thread_event_buffers={},
+            thread_tasks={},
+            thread_last_active={},
+            agent_pool={},
+            thread_sandbox={child_thread_id: "local"},
+            thread_cwd={},
+            thread_repo=SimpleNamespace(get_by_id=lambda thread_id: {"model": "gpt-live"} if thread_id == child_thread_id else None),
+        )
+    )
+
+    def fake_start_agent_run(agent, thread_id, message, app, enable_trajectory=False, message_metadata=None, input_messages=None):
+        async def _fake_run():
+            return ""
+
+        app.state.thread_tasks[thread_id] = asyncio.create_task(_fake_run())
+        return "run-no-call-1"
+
+    monkeypatch.setattr("backend.web.services.streaming_service.start_agent_run", fake_start_agent_run)
+
+    with pytest.raises(RuntimeError, match="before first model call"):
+        await run_child_thread_live(
+            agent,
+            child_thread_id,
+            "child prompt",
+            app,
+            input_messages=[HumanMessage(content="child prompt")],
+        )
+
+
+def test_live_tool_result_restores_subagent_stream_from_agent_background_json():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+    _prime_agent_turn(builder, thread_id, args={"prompt": "do work", "run_in_background": True})
+
+    delta = builder.apply_event(
+        thread_id,
+        "tool_result",
+        {
+            "tool_call_id": "tc-agent-1",
+            "name": "Agent",
+            "content": (
+                '{"task_id":"task-123","agent_name":"agent-task-123",'
+                '"thread_id":"subagent-task-123","status":"running",'
+                '"message":"Agent started in background. Use TaskOutput to get result."}'
+            ),
+            "metadata": {},
+            "showing": True,
+        },
+    )
+
+    seg = _require_entries(builder, thread_id)[0]["segments"][0]
+    assert delta is not None
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-123"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-123"
+    assert seg["step"]["subagent_stream"]["status"] == "running"
+
+
+def test_live_tool_result_restores_subagent_stream_from_blocking_agent_metadata():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+    _prime_agent_turn(builder, thread_id)
+
+    delta = builder.apply_event(
+        thread_id,
+        "tool_result",
+        {
+            "tool_call_id": "tc-agent-1",
+            "name": "Agent",
+            "content": "CHILD_DONE",
+            "metadata": {
+                "task_id": "task-456",
+                "subagent_thread_id": "subagent-task-456",
+                "description": "blocking child",
+            },
+            "showing": True,
+        },
+    )
+
+    seg = _require_entries(builder, thread_id)[0]["segments"][0]
+    assert delta is not None
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-456"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-456"
+    assert seg["step"]["subagent_stream"]["status"] == "completed"
+
+
+def test_live_hidden_user_message_does_not_append_entry():
+    builder = DisplayBuilder()
+    thread_id = "hidden-user-thread"
+
+    delta = builder.apply_event(
+        thread_id,
+        "user_message",
+        {
+            "content": "<ask_user_question_answers>{}</ask_user_question_answers>",
+            "showing": False,
+        },
+    )
+
+    assert delta is None
+    assert builder.get_entries(thread_id) == []
+
+
+def test_live_hidden_ask_user_answer_message_appends_hidden_anchor_entry():
+    builder = DisplayBuilder()
+    thread_id = "hidden-ask-answer-thread"
+
+    delta = builder.apply_event(
+        thread_id,
+        "user_message",
+        {
+            "content": "",
+            "showing": False,
+            "ask_user_question_answered": {
+                "questions": [
+                    {
+                        "header": "Choice",
+                        "question": "Pick one",
+                        "options": [{"label": "Alpha", "description": "A"}],
+                    }
+                ],
+                "answers": [
+                    {
+                        "header": "Choice",
+                        "question": "Pick one",
+                        "selected_options": ["Alpha"],
+                    }
+                ],
+            },
+        },
+    )
+
+    assert delta is not None
+    assert delta["type"] == "append_entry"
+    entry = _require_entries(builder, thread_id)[0]
+    assert entry["role"] == "user"
+    assert entry["showing"] is False
+    assert entry["ask_user_question_answered"]["answers"][0]["selected_options"] == ["Alpha"]
+
+
+def test_checkpoint_rebuild_preserves_hidden_ask_user_answer_anchor_entry():
+    builder = DisplayBuilder()
+    thread_id = "checkpoint-ask-answer-thread"
+    rebuilt = builder.build_from_checkpoint(
+        thread_id,
+        [
+            serialize_message(
+                HumanMessage(
+                    content="ignored",
+                    metadata={
+                        "source": "internal",
+                        "ask_user_question_answered": {
+                            "questions": [
+                                {
+                                    "header": "Choice",
+                                    "question": "Pick one",
+                                    "options": [{"label": "Alpha", "description": "A"}],
+                                }
+                            ],
+                            "answers": [
+                                {
+                                    "header": "Choice",
+                                    "question": "Pick one",
+                                    "selected_options": ["Alpha"],
+                                }
+                            ],
+                        },
+                    },
+                )
+            )
+        ],
+    )
+
+    assert len(rebuilt) == 1
+    assert rebuilt[0]["showing"] is False
+    assert rebuilt[0]["ask_user_question_answered"]["answers"][0]["selected_options"] == ["Alpha"]
+
+
+def test_task_start_can_patch_background_agent_after_tool_result_race():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+    _prime_agent_turn(
+        builder,
+        thread_id,
+        tool_call_id="tc-agent-race",
+        args={"prompt": "do work", "run_in_background": True},
+    )
+    builder.apply_event(
+        thread_id,
+        "tool_result",
+        {
+            "tool_call_id": "tc-agent-race",
+            "name": "Agent",
+            "content": "Agent started in background.",
+            "metadata": {},
+            "showing": True,
+        },
+    )
+
+    delta = builder.apply_event(
+        thread_id,
+        "task_start",
+        {
+            "task_id": "task-race",
+            "thread_id": "subagent-task-race",
+            "description": "late task start",
+        },
+    )
+
+    seg = _require_entries(builder, thread_id)[0]["segments"][0]
+    assert delta is not None
+    assert seg["step"]["status"] == "done"
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-race"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-race"
+    assert seg["step"]["subagent_stream"]["status"] == "running"
+
+
+@pytest.mark.parametrize("task_status", ["completed", "error", "cancelled"])
+def test_live_notice_reconciles_subagent_stream_status_from_terminal_notification(task_status: str):
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+    _prime_agent_turn(builder, thread_id, args={"prompt": "do work", "run_in_background": True})
+    builder.apply_event(
+        thread_id,
+        "tool_result",
+        {
+            "tool_call_id": "tc-agent-1",
+            "name": "Agent",
+            "content": (
+                '{"task_id":"task-123","agent_name":"agent-task-123",'
+                '"thread_id":"subagent-task-123","status":"running",'
+                '"message":"Agent started in background. Use TaskOutput to get result."}'
+            ),
+            "metadata": {},
+            "showing": True,
+        },
+    )
+
+    delta = builder.apply_event(
+        thread_id,
+        "notice",
+        {
+            "content": (
+                "<system-reminder>\n"
+                "<task-notification>\n"
+                "  <run-id>task-123</run-id>\n"
+                f"  <status>{task_status}</status>\n"
+                "  <description>child task</description>\n"
+                "  <summary>child task</summary>\n"
+                "  <result>CHILD_DONE</result>\n"
+                "</task-notification>\n"
+                "</system-reminder>"
+            ),
+            "source": "system",
+            "notification_type": "agent",
+        },
+    )
+
+    seg = _require_entries(builder, thread_id)[0]["segments"][0]
+    assert delta is not None
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-123"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-123"
+    assert seg["step"]["subagent_stream"]["status"] == task_status
+
+
+def test_checkpoint_rebuild_reconciles_subagent_stream_status_from_terminal_notification():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+
+    ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Agent", "args": {"prompt": "do work", "run_in_background": True}, "id": "tc-agent-1"}],
+    )
+    tool = ToolMessage(
+        content=(
+            '{"task_id":"task-123","agent_name":"agent-task-123",'
+            '"thread_id":"subagent-task-123","status":"running",'
+            '"message":"Agent started in background. Use TaskOutput to get result."}'
+        ),
+        name="Agent",
+        tool_call_id="tc-agent-1",
+    )
+    notice = HumanMessage(
+        content=(
+            "<system-reminder>\n"
+            "<task-notification>\n"
+            "  <run-id>task-123</run-id>\n"
+            "  <status>completed</status>\n"
+            "  <description>child task</description>\n"
+            "  <summary>child task</summary>\n"
+            "  <result>CHILD_DONE</result>\n"
+            "</task-notification>\n"
+            "</system-reminder>"
+        ),
+        metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    entries = builder.build_from_checkpoint(
+        thread_id,
+        [serialize_message(ai), serialize_message(tool), serialize_message(notice)],
+    )
+
+    seg = entries[0]["segments"][0]
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-123"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-123"
+    assert seg["step"]["subagent_stream"]["status"] == "completed"
+
+
+def test_checkpoint_rebuild_restores_blocking_subagent_stream_from_tool_result_meta():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+
+    ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Agent", "args": {"prompt": "do work"}, "id": "tc-agent-1"}],
+    )
+    tool = ToolMessage(
+        content="CHILD_DONE",
+        name="Agent",
+        tool_call_id="tc-agent-1",
+        additional_kwargs={
+            "tool_result_meta": {
+                "task_id": "task-456",
+                "subagent_thread_id": "subagent-task-456",
+                "description": "blocking child",
+                "kind": "success",
+                "source": "local",
+            }
+        },
+    )
+
+    entries = builder.build_from_checkpoint(
+        thread_id,
+        [serialize_message(ai), serialize_message(tool)],
+    )
+
+    seg = entries[0]["segments"][0]
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-456"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-456"
+    assert seg["step"]["subagent_stream"]["status"] == "completed"
+
+
+@pytest.mark.asyncio
+async def test_list_tasks_includes_subagent_stream_from_display_entries():
+    thread_id = "parent-thread-tasks"
+    builder = DisplayBuilder()
+    _set_single_subagent_entry(
+        builder,
+        thread_id,
+        task_id="task-123",
+        thread_ref="subagent-task-123",
+        status="completed",
+        result="workspace looks empty",
+    )
+    monkeypatch = pytest.MonkeyPatch()
+    app = _make_router_app(builder, thread_id, monkeypatch)
+
+    tasks = await threads_router.list_tasks(thread_id, request=_make_request(app))
+
+    assert tasks == [
+        {
+            "task_id": "task-123",
+            "task_type": "agent",
+            "status": "completed",
+            "command_line": None,
+            "description": "inspect workspace",
+            "exit_code": None,
+            "error": None,
+        }
+    ]
+    monkeypatch.undo()
+
+
+@pytest.mark.asyncio
+async def test_get_task_returns_subagent_stream_result_from_display_entries():
+    thread_id = "parent-thread-task-detail"
+    builder = DisplayBuilder()
+    _set_single_subagent_entry(
+        builder,
+        thread_id,
+        task_id="task-123",
+        thread_ref="subagent-task-123",
+        status="completed",
+        result="workspace looks empty",
+    )
+    monkeypatch = pytest.MonkeyPatch()
+    app = _make_router_app(builder, thread_id, monkeypatch)
+
+    task = await threads_router.get_task(thread_id, "task-123", request=_make_request(app))
+
+    assert task == {
+        "task_id": "task-123",
+        "task_type": "agent",
+        "status": "completed",
+        "command_line": None,
+        "result": "workspace looks empty",
+        "text": "workspace looks empty",
+    }
+    monkeypatch.undo()
+
+
+@pytest.mark.asyncio
+async def test_blocking_subagent_done_state_overrides_stale_running_stream_on_detail_and_tasks(monkeypatch):
+    thread_id = "parent-thread-stale-running-completed"
+    builder = DisplayBuilder()
+    _set_single_subagent_entry(
+        builder,
+        thread_id,
+        task_id="task-stale-completed",
+        thread_ref="subagent-task-stale-completed",
+        status="running",
+        result="workspace looks empty",
+    )
+    app = _make_router_app(builder, thread_id, monkeypatch)
+
+    detail = await threads_router.get_thread_messages(thread_id, user_id="owner-1", app=app)
+    tasks = await threads_router.list_tasks(thread_id, request=_make_request(app))
+    task = await threads_router.get_task(thread_id, "task-stale-completed", request=_make_request(app))
+
+    stream = detail["entries"][1]["segments"][0]["step"]["subagent_stream"]
+    assert stream["status"] == "completed"
+    assert tasks[0]["status"] == "completed"
+    assert task["status"] == "completed"
+
+
+@pytest.mark.asyncio
+async def test_blocking_subagent_error_overrides_stale_running_stream_on_detail_and_tasks(monkeypatch):
+    thread_id = "parent-thread-stale-running-error"
+    builder = DisplayBuilder()
+    _set_single_subagent_entry(
+        builder,
+        thread_id,
+        task_id="task-stale-error",
+        thread_ref="subagent-task-stale-error",
+        status="running",
+        result="<tool_use_error>Agent failed: bad child model</tool_use_error>",
+    )
+    app = _make_router_app(builder, thread_id, monkeypatch)
+
+    detail = await threads_router.get_thread_messages(thread_id, user_id="owner-1", app=app)
+    tasks = await threads_router.list_tasks(thread_id, request=_make_request(app))
+    task = await threads_router.get_task(thread_id, "task-stale-error", request=_make_request(app))
+
+    stream = detail["entries"][1]["segments"][0]["step"]["subagent_stream"]
+    assert stream["status"] == "error"
+    assert tasks[0]["status"] == "error"
+    assert task["status"] == "error"
diff --git a/tests/Integration/test_conversations_router.py b/tests/Integration/test_conversations_router.py
new file mode 100644
index 000000000..6bf1d48cc
--- /dev/null
+++ b/tests/Integration/test_conversations_router.py
@@ -0,0 +1,102 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from backend.web.routers import conversations as conversations_router
+from backend.web.utils.serializers import avatar_url
+
+
+@pytest.mark.asyncio
+async def test_list_conversations_resolves_thread_user_participant_title_and_avatar() -> None:
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            thread_repo=SimpleNamespace(
+                list_by_owner_user_id=lambda _user_id: [],
+                get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None,
+            ),
+            agent_pool={},
+            thread_last_active={},
+            messaging_service=SimpleNamespace(
+                list_chats_for_user=lambda _user_id: [{"id": "chat-1"}],
+                list_chat_members=lambda _chat_id: [
+                    {"user_id": "human-user-1"},
+                    {"user_id": "thread-user-1"},
+                ],
+            ),
+            member_repo=SimpleNamespace(
+                get_by_id=lambda uid: (
+                    None
+                    if uid == "thread-user-1"
+                    else SimpleNamespace(id=uid, name="Toad", avatar=None)
+                    if uid == "member-agent-1"
+                    else None
+                )
+            ),
+            chat_repo=SimpleNamespace(
+                get_by_id=lambda _chat_id: SimpleNamespace(id="chat-1", title=None, created_at="2026-04-07T00:00:00Z")
+            ),
+            messages_repo=SimpleNamespace(count_unread=lambda _chat_id, _user_id: 3),
+        )
+    )
+
+    result = await conversations_router.list_conversations("human-user-1", app=app)
+
+    assert result == [
+        {
+            "id": "chat-1",
+            "type": "visit",
+            "title": "Toad",
+            "member_id": None,
+            "avatar_url": avatar_url("member-agent-1", False),
+            "updated_at": "2026-04-07T00:00:00Z",
+            "unread_count": 3,
+            "running": False,
+        }
+    ]
+
+
+@pytest.mark.asyncio
+async def test_list_conversations_sorts_mixed_updated_at_types_without_type_error() -> None:
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            thread_repo=SimpleNamespace(
+                list_by_owner_user_id=lambda _user_id: [
+                    {
+                        "id": "thread-1",
+                        "member_id": "member-agent-1",
+                        "member_name": "Morel",
+                        "member_avatar": None,
+                        "sandbox_type": "local",
+                    }
+                ],
+                get_by_user_id=lambda _uid: None,
+            ),
+            agent_pool={},
+            thread_last_active={"thread-1": 1775540000.0},
+            messaging_service=SimpleNamespace(
+                list_chats_for_user=lambda _user_id: [{"id": "chat-1"}],
+                list_chat_members=lambda _chat_id: [
+                    {"user_id": "human-user-1"},
+                    {"user_id": "member-agent-2"},
+                ],
+            ),
+            member_repo=SimpleNamespace(
+                get_by_id=lambda uid: SimpleNamespace(id=uid, name="Toad", avatar=None) if uid == "member-agent-2" else None
+            ),
+            chat_repo=SimpleNamespace(
+                get_by_id=lambda _chat_id: SimpleNamespace(
+                    id="chat-1",
+                    title=None,
+                    created_at=1775540100.0,
+                    updated_at=1775540100.0,
+                )
+            ),
+            messages_repo=SimpleNamespace(count_unread=lambda _chat_id, _user_id: 0),
+        )
+    )
+
+    result = await conversations_router.list_conversations("human-user-1", app=app)
+
+    assert [item["id"] for item in result] == ["chat-1", "thread-1"]
diff --git a/tests/test_daytona_e2e.py b/tests/Integration/test_daytona_e2e.py
similarity index 100%
rename from tests/test_daytona_e2e.py
rename to tests/Integration/test_daytona_e2e.py
diff --git a/tests/test_e2e_backend_api.py b/tests/Integration/test_e2e_backend_api.py
similarity index 100%
rename from tests/test_e2e_backend_api.py
rename to tests/Integration/test_e2e_backend_api.py
diff --git a/tests/test_e2e_providers.py b/tests/Integration/test_e2e_providers.py
similarity index 72%
rename from tests/test_e2e_providers.py
rename to tests/Integration/test_e2e_providers.py
index 9d0f0e6c5..66e8bf139 100644
--- a/tests/test_e2e_providers.py
+++ b/tests/Integration/test_e2e_providers.py
@@ -5,11 +5,13 @@
 Simulates all frontend interactions programmatically.
 """
 
+import asyncio
 import os
 
 import pytest
 
 from agent import create_leon_agent
+from sandbox import RemoteSandbox
 from sandbox.thread_context import set_current_thread_id
 
 
@@ -28,6 +30,30 @@ def temp_workspace(tmp_path):
     return str(workspace)
 
 
+def _require_remote_sandbox(agent) -> RemoteSandbox:
+    sandbox = agent.sandbox
+    assert isinstance(sandbox, RemoteSandbox)
+    return sandbox
+
+
+def _run_shell(agent, command: str):
+    executor = agent.sandbox.shell()
+    assert executor is not None
+    return asyncio.run(executor.execute(command))
+
+
+def _read_file(agent, path: str) -> str:
+    fs = agent.sandbox.fs()
+    assert fs is not None
+    return fs.read_file(path).content
+
+
+def _list_names(agent, path: str) -> list[str]:
+    fs = agent.sandbox.fs()
+    assert fs is not None
+    return [entry.name for entry in fs.list_dir(path).entries]
+
+
 class TestAgentBayE2E:
     """End-to-end tests with AgentBay provider."""
 
@@ -40,7 +66,7 @@ def test_agentbay_basic_execution(self, test_db_path):
         agent = create_leon_agent(sandbox="agentbay", db_path=test_db_path)
 
         # Execute command through agent
-        result = agent.sandbox.shell().execute("echo 'AgentBay Test'")
+        result = _run_shell(agent, "echo 'AgentBay Test'")
         assert result.exit_code == 0
         assert "AgentBay Test" in result.stdout
 
@@ -55,13 +81,13 @@ def test_agentbay_terminal_state_persistence(self, test_db_path):
         agent = create_leon_agent(sandbox="agentbay", db_path=test_db_path)
 
         # Change directory
-        agent.sandbox.shell().execute("cd /tmp")
-        result = agent.sandbox.shell().execute("pwd")
+        _run_shell(agent, "cd /tmp")
+        result = _run_shell(agent, "pwd")
         assert "/tmp" in result.stdout
 
         # Set environment variable
-        agent.sandbox.shell().execute("export AGENTBAY_VAR=test123")
-        result = agent.sandbox.shell().execute("echo $AGENTBAY_VAR")
+        _run_shell(agent, "export AGENTBAY_VAR=test123")
+        result = _run_shell(agent, "echo $AGENTBAY_VAR")
         assert "test123" in result.stdout
 
         agent.close()
@@ -76,14 +102,14 @@ def test_agentbay_file_operations(self, test_db_path):
 
         # Create file
         test_content = "AgentBay file test"
-        agent.sandbox.shell().execute(f"echo '{test_content}' > /tmp/agentbay_test.txt")
+        _run_shell(agent, f"echo '{test_content}' > /tmp/agentbay_test.txt")
 
         # Read file
-        content = agent.sandbox.fs().read_file("/tmp/agentbay_test.txt")
+        content = _read_file(agent, "/tmp/agentbay_test.txt")
         assert test_content in content
 
         # List directory
-        files = agent.sandbox.fs().list_dir("/tmp")
+        files = _list_names(agent, "/tmp")
         assert "agentbay_test.txt" in files
 
         agent.close()
@@ -100,7 +126,7 @@ def test_e2b_basic_execution(self, test_db_path):
 
         agent = create_leon_agent(sandbox="e2b", db_path=test_db_path)
 
-        result = agent.sandbox.shell().execute("echo 'E2B Test'")
+        result = _run_shell(agent, "echo 'E2B Test'")
         assert result.exit_code == 0
         assert "E2B Test" in result.stdout
 
@@ -114,13 +140,13 @@ def test_e2b_terminal_state_persistence(self, test_db_path):
         agent = create_leon_agent(sandbox="e2b", db_path=test_db_path)
 
         # Change directory
-        agent.sandbox.shell().execute("cd /tmp")
-        result = agent.sandbox.shell().execute("pwd")
+        _run_shell(agent, "cd /tmp")
+        result = _run_shell(agent, "pwd")
         assert "/tmp" in result.stdout
 
         # Set env var
-        agent.sandbox.shell().execute("export E2B_VAR=test123")
-        result = agent.sandbox.shell().execute("echo $E2B_VAR")
+        _run_shell(agent, "export E2B_VAR=test123")
+        result = _run_shell(agent, "echo $E2B_VAR")
         assert "test123" in result.stdout
 
         agent.close()
@@ -134,10 +160,10 @@ def test_e2b_file_operations(self, test_db_path):
 
         # Create file
         test_content = "E2B file test"
-        agent.sandbox.shell().execute(f"echo '{test_content}' > /tmp/e2b_test.txt")
+        _run_shell(agent, f"echo '{test_content}' > /tmp/e2b_test.txt")
 
         # Read file
-        content = agent.sandbox.fs().read_file("/tmp/e2b_test.txt")
+        content = _read_file(agent, "/tmp/e2b_test.txt")
         assert test_content in content
 
         agent.close()
@@ -150,17 +176,18 @@ def test_e2b_pause_resume(self, test_db_path):
         agent = create_leon_agent(sandbox="e2b", db_path=test_db_path)
 
         # Set state
-        agent.sandbox.shell().execute("cd /tmp")
-        agent.sandbox.shell().execute("export PAUSE_VAR=preserved")
+        sandbox = _require_remote_sandbox(agent)
+        _run_shell(agent, "cd /tmp")
+        _run_shell(agent, "export PAUSE_VAR=preserved")
 
         # Pause session
-        agent.sandbox.manager.pause_session(thread_id)
+        sandbox.manager.pause_session(thread_id)
 
         # Resume by getting sandbox again
-        result = agent.sandbox.shell().execute("pwd")
+        result = _run_shell(agent, "pwd")
         assert "/tmp" in result.stdout
 
-        result = agent.sandbox.shell().execute("echo $PAUSE_VAR")
+        result = _run_shell(agent, "echo $PAUSE_VAR")
         assert "preserved" in result.stdout
 
         agent.close()
@@ -177,7 +204,7 @@ def test_daytona_basic_execution(self, test_db_path):
 
         agent = create_leon_agent(sandbox="daytona", db_path=test_db_path)
 
-        result = agent.sandbox.shell().execute("echo 'Daytona Test'")
+        result = _run_shell(agent, "echo 'Daytona Test'")
         assert result.exit_code == 0
         assert "Daytona Test" in result.stdout
 
@@ -191,13 +218,13 @@ def test_daytona_terminal_state_persistence(self, test_db_path):
         agent = create_leon_agent(sandbox="daytona", db_path=test_db_path)
 
         # Change directory
-        agent.sandbox.shell().execute("cd /tmp")
-        result = agent.sandbox.shell().execute("pwd")
+        _run_shell(agent, "cd /tmp")
+        result = _run_shell(agent, "pwd")
         assert "/tmp" in result.stdout
 
         # Set env var
-        agent.sandbox.shell().execute("export DAYTONA_VAR=test456")
-        result = agent.sandbox.shell().execute("echo $DAYTONA_VAR")
+        _run_shell(agent, "export DAYTONA_VAR=test456")
+        result = _run_shell(agent, "echo $DAYTONA_VAR")
         assert "test456" in result.stdout
 
         agent.close()
diff --git a/tests/test_e2e_summary_persistence.py b/tests/Integration/test_e2e_summary_persistence.py
similarity index 100%
rename from tests/test_e2e_summary_persistence.py
rename to tests/Integration/test_e2e_summary_persistence.py
diff --git a/tests/Integration/test_entities_avatar_auth_shell.py b/tests/Integration/test_entities_avatar_auth_shell.py
new file mode 100644
index 000000000..5d5a74bdf
--- /dev/null
+++ b/tests/Integration/test_entities_avatar_auth_shell.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.routers import entities as entities_router
+
+
+class _FakeUploadFile:
+    def __init__(self, content: bytes, *, content_type: str) -> None:
+        self._content = content
+        self.content_type = content_type
+
+    async def read(self) -> bytes:
+        return self._content
+
+
+def _member(member_id: str, *, owner_user_id: str | None = None, avatar: str | None = None):
+    return SimpleNamespace(
+        id=member_id,
+        owner_user_id=owner_user_id,
+        avatar=avatar,
+    )
+
+
+def test_avatar_member_helper_allows_self_or_owner():
+    member_repo = SimpleNamespace(
+        get_by_id=lambda member_id: _member(member_id, owner_user_id="user-9"),
+    )
+
+    self_member = entities_router._get_owned_avatar_member_or_404("user-1", "user-1", member_repo)
+    owner_member = entities_router._get_owned_avatar_member_or_404("agent-1", "user-9", member_repo)
+
+    assert self_member.id == "user-1"
+    assert owner_member.id == "agent-1"
+
+
+def test_avatar_member_helper_raises_404_for_missing_member():
+    member_repo = SimpleNamespace(get_by_id=lambda _member_id: None)
+
+    with pytest.raises(HTTPException) as excinfo:
+        entities_router._get_owned_avatar_member_or_404("missing", "user-1", member_repo)
+
+    assert excinfo.value.status_code == 404
+    assert excinfo.value.detail == "Member not found"
+
+
+def test_avatar_member_helper_raises_403_for_unrelated_user():
+    member_repo = SimpleNamespace(
+        get_by_id=lambda _member_id: _member("agent-1", owner_user_id="user-2"),
+    )
+
+    with pytest.raises(HTTPException) as excinfo:
+        entities_router._get_owned_avatar_member_or_404("agent-1", "user-1", member_repo)
+
+    assert excinfo.value.status_code == 403
+    assert excinfo.value.detail == "Not authorized"
+
+
+@pytest.mark.asyncio
+async def test_delete_avatar_route_uses_auth_shell(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+    seen: list[tuple[str, object]] = []
+    avatar_dir = tmp_path / "avatars"
+    avatar_dir.mkdir()
+    avatar_path = avatar_dir / "agent-1.png"
+    avatar_path.write_bytes(b"png")
+    monkeypatch.setattr(entities_router, "AVATARS_DIR", avatar_dir)
+
+    def fake_helper(member_id: str, current_user_id: str, member_repo):
+        seen.append(("helper", (member_id, current_user_id)))
+        return _member(member_id, owner_user_id="user-1", avatar="avatars/agent-1.png")
+
+    monkeypatch.setattr(entities_router, "_get_owned_avatar_member_or_404", fake_helper)
+
+    fake_repo = SimpleNamespace(
+        get_by_id=lambda _member_id: (_ for _ in ()).throw(AssertionError("route should use helper, not repo lookup directly")),
+        update=lambda member_id, **fields: seen.append(("update", (member_id, fields))),
+    )
+
+    result = await entities_router.delete_avatar(
+        "agent-1",
+        current_user_id="user-1",
+        app=SimpleNamespace(state=SimpleNamespace(member_repo=fake_repo)),
+    )
+
+    assert result == {"status": "ok"}
+    assert seen == [
+        ("helper", ("agent-1", "user-1")),
+        ("update", ("agent-1", {"avatar": None, "updated_at": pytest.approx(seen[1][1][1]["updated_at"], rel=0, abs=5)})),
+    ]
+    assert not avatar_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_upload_avatar_route_uses_auth_shell(monkeypatch: pytest.MonkeyPatch):
+    seen: list[tuple[str, object]] = []
+
+    def fake_helper(member_id: str, current_user_id: str, member_repo):
+        seen.append(("helper", (member_id, current_user_id)))
+        return _member(member_id, owner_user_id="user-1")
+
+    monkeypatch.setattr(entities_router, "_get_owned_avatar_member_or_404", fake_helper)
+    monkeypatch.setattr(
+        entities_router,
+        "process_and_save_avatar",
+        lambda data, member_id: seen.append(("save", (data, member_id))) or f"avatars/{member_id}.png",
+    )
+
+    fake_repo = SimpleNamespace(
+        get_by_id=lambda _member_id: (_ for _ in ()).throw(AssertionError("route should use helper, not repo lookup directly")),
+        update=lambda member_id, **fields: seen.append(("update", (member_id, fields))),
+    )
+
+    result = await entities_router.upload_avatar(
+        "agent-1",
+        _FakeUploadFile(b"png-bytes", content_type="image/png"),
+        current_user_id="user-1",
+        app=SimpleNamespace(state=SimpleNamespace(member_repo=fake_repo)),
+    )
+
+    assert result == {"status": "ok", "avatar": "avatars/agent-1.png"}
+    assert seen[0] == ("helper", ("agent-1", "user-1"))
+    assert seen[1] == ("save", (b"png-bytes", "agent-1"))
+    assert seen[2][0] == "update"
+    assert seen[2][1][0] == "agent-1"
+    assert seen[2][1][1]["avatar"] == "avatars/agent-1.png"
diff --git a/tests/Integration/test_entities_router.py b/tests/Integration/test_entities_router.py
new file mode 100644
index 000000000..0bf3f3ac7
--- /dev/null
+++ b/tests/Integration/test_entities_router.py
@@ -0,0 +1,140 @@
+from __future__ import annotations
+
+# NOTE: EntityRow was deleted from storage/contracts.py in the entity→member
+# refactor (commit cc156856). The old test asserted that child agent branches
+# were filtered out on the backend; that filtering was removed along with the
+# entity layer — it is now the frontend's responsibility. The test below
+# verifies the current production behaviour of list_entities:
+#   • current user is excluded
+#   • other humans and agents are all included (no branch filtering)
+#   • thread metadata (is_main, branch_index) is attached from thread_repo
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.routers import entities as entities_router
+from storage.contracts import MemberRow, MemberType
+
+
+@pytest.mark.asyncio
+async def test_list_entities_excludes_current_user_and_returns_all_others():
+    now = 1_775_223_756.0
+    current_user = MemberRow(id="u1", name="owner", type=MemberType.HUMAN, created_at=now)
+    other_human = MemberRow(id="u2", name="other", type=MemberType.HUMAN, created_at=now)
+    main_agent = MemberRow(
+        id="a-main",
+        name="Toad",
+        type=MemberType.MYCEL_AGENT,
+        owner_user_id="u2",
+        created_at=now,
+    )
+    child_agent = MemberRow(
+        id="a-child",
+        name="Toad Branch",
+        type=MemberType.MYCEL_AGENT,
+        owner_user_id="u2",
+        created_at=now,
+    )
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            member_repo=SimpleNamespace(list_all=lambda: [current_user, other_human, main_agent, child_agent]),
+            thread_repo=SimpleNamespace(
+                get_default_thread=lambda member_id: (
+                    {"id": "thread-main", "is_main": True, "branch_index": 0}
+                    if member_id == "a-main"
+                    else {"id": "thread-child", "is_main": False, "branch_index": 1}
+                )
+            ),
+        )
+    )
+
+    result = await entities_router.list_entities(user_id="u1", app=app)
+
+    # Current user (u1) is excluded; all other members are returned.
+    identities = [(item["type"], item.get("user_id"), item.get("member_id")) for item in result]
+    assert identities == [
+        ("human", "u2", None),
+        ("mycel_agent", None, "a-main"),
+        ("mycel_agent", None, "a-child"),
+    ]
+
+    # Human entry is keyed by social user identity, not a generic mixed id.
+    human_item = next(i for i in result if i["user_id"] == "u2")
+    assert human_item["type"] == "human"
+    assert "id" not in human_item
+    assert human_item["default_thread_id"] is None
+
+    # Agent entry is keyed by member template plus explicit default thread.
+    main_item = next(i for i in result if i.get("member_id") == "a-main")
+    assert "id" not in main_item
+    assert main_item["default_thread_id"] == "thread-main"
+    assert main_item["is_default_thread"] is True
+    assert main_item["branch_index"] == 0
+
+    # Child agent: also returned (frontend decides whether to hide it).
+    child_item = next(i for i in result if i.get("member_id") == "a-child")
+    assert child_item["default_thread_id"] == "thread-child"
+    assert child_item["is_default_thread"] is False
+    assert child_item["branch_index"] == 1
+
+
+@pytest.mark.asyncio
+async def test_get_agent_thread_reads_main_thread_from_thread_repo():
+    now = 1_775_223_756.0
+    agent = MemberRow(
+        id="a-main",
+        name="Toad",
+        type=MemberType.MYCEL_AGENT,
+        owner_user_id="u2",
+        created_at=now,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            member_repo=SimpleNamespace(get_by_id=lambda member_id: agent if member_id == "a-main" else None),
+            thread_repo=SimpleNamespace(
+                get_default_thread=lambda member_id: (
+                    {"id": "thread-main", "is_main": True, "branch_index": 0} if member_id == "a-main" else None
+                )
+            ),
+        )
+    )
+
+    result = await entities_router.get_agent_thread("a-main", current_user_id="u2", app=app)
+
+    assert result == {"member_id": "a-main", "default_thread_id": "thread-main"}
+
+
+def test_get_member_or_404_returns_member():
+    now = 1_775_223_756.0
+    agent = MemberRow(
+        id="a-main",
+        name="Toad",
+        type=MemberType.MYCEL_AGENT,
+        owner_user_id="u2",
+        created_at=now,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            member_repo=SimpleNamespace(get_by_id=lambda member_id: agent if member_id == "a-main" else None),
+        )
+    )
+
+    result = entities_router._get_member_or_404(app, "a-main")
+
+    assert result is agent
+
+
+def test_get_member_or_404_raises_for_missing_member():
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            member_repo=SimpleNamespace(get_by_id=lambda _member_id: None),
+        )
+    )
+
+    with pytest.raises(HTTPException) as exc_info:
+        entities_router._get_member_or_404(app, "missing")
+
+    assert exc_info.value.status_code == 404
+    assert exc_info.value.detail == "Member not found"
diff --git a/tests/Integration/test_followup_requeue.py b/tests/Integration/test_followup_requeue.py
new file mode 100644
index 000000000..a85b61faf
--- /dev/null
+++ b/tests/Integration/test_followup_requeue.py
@@ -0,0 +1,180 @@
+"""Tests for followup queue re-enqueue logic in streaming_service.
+
+Covers the _consume_followup_queue function:
+- Normal path: dequeue + start_agent_run succeeds
+- Re-enqueue on failure: message is put back when start_agent_run raises
+- No followup: dequeue returns None, nothing happens
+- Re-enqueue failure: logs error when enqueue also fails (message lost)
+- Retry success: re-enqueued message can be processed on next attempt
+"""
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from core.runtime.middleware.queue.manager import MessageQueueManager
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture()
+def queue_manager(tmp_path):
+    """Real MessageQueueManager backed by a temp SQLite DB."""
+    qm = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    yield qm
+
+
+@pytest.fixture()
+def mock_app(queue_manager):
+    """Minimal app stub with state.queue_manager and state.thread_event_buffers/thread_tasks."""
+    state = SimpleNamespace(
+        queue_manager=queue_manager,
+        thread_event_buffers={},
+        thread_tasks={},
+    )
+    return SimpleNamespace(state=state)
+
+
+@pytest.fixture()
+def mock_agent():
+    """Minimal agent stub with runtime that supports transition()."""
+    runtime = MagicMock()
+    runtime.transition.return_value = True
+    runtime._activity_sink = None
+    agent = SimpleNamespace(runtime=runtime)
+    return agent
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestConsumeFollowupQueue:
+    """Tests for _consume_followup_queue re-enqueue logic."""
+
+    async def test_no_followup_does_nothing(self, mock_agent, mock_app):
+        """When queue is empty, nothing happens."""
+        from backend.web.services.streaming_service import _consume_followup_queue
+
+        await _consume_followup_queue(mock_agent, "thread-1", mock_app)
+        # Queue is still empty
+        assert mock_app.state.queue_manager.dequeue("thread-1") is None
+        # Runtime transition was never called
+        mock_agent.runtime.transition.assert_not_called()
+
+    async def test_successful_followup_consumes_message(self, mock_agent, mock_app, queue_manager):
+        """When followup succeeds, message is consumed and not re-enqueued."""
+        queue_manager.enqueue("do something", "thread-1")
+        from backend.web.services.streaming_service import _consume_followup_queue
+
+        with patch("backend.web.services.streaming_service.start_agent_run") as mock_start:
+            mock_start.return_value = "run-123"  # start_agent_run returns str run_id
+
+            await _consume_followup_queue(mock_agent, "thread-1", mock_app)
+
+            mock_start.assert_called_once_with(
+                mock_agent,
+                "thread-1",
+                "do something",
+                mock_app,
+                message_metadata={
+                    "source": "system",
+                    "notification_type": "steer",
+                    "sender_name": None,
+                    "sender_avatar_url": None,
+                    "is_steer": False,
+                },
+            )
+        # Message was consumed, queue is empty
+        assert queue_manager.dequeue("thread-1") is None
+
+    async def test_exception_re_enqueues_message(self, mock_agent, mock_app, queue_manager):
+        """When start_agent_run raises, the dequeued message is re-enqueued."""
+        queue_manager.enqueue("important followup", "thread-1")
+        from backend.web.services.streaming_service import _consume_followup_queue
+
+        with patch("backend.web.services.streaming_service.start_agent_run", side_effect=RuntimeError("boom")):
+            await _consume_followup_queue(mock_agent, "thread-1", mock_app)
+
+        # Message was re-enqueued — it should be available again
+        item = queue_manager.dequeue("thread-1")
+        assert item is not None
+        assert item.content == "important followup"
+
+    async def test_re_enqueued_message_succeeds_on_retry(self, mock_agent, mock_app, queue_manager):
+        """A re-enqueued message can be successfully processed on the next attempt."""
+        queue_manager.enqueue("retry me", "thread-1")
+        from backend.web.services.streaming_service import _consume_followup_queue
+
+        # First attempt: fails
+        with patch("backend.web.services.streaming_service.start_agent_run", side_effect=RuntimeError("temporary failure")):
+            await _consume_followup_queue(mock_agent, "thread-1", mock_app)
+
+        # Verify message was re-enqueued
+        assert queue_manager.peek("thread-1") is True
+
+        # Second attempt: succeeds
+        with patch("backend.web.services.streaming_service.start_agent_run") as mock_start:
+            mock_start.return_value = "run-456"  # start_agent_run returns str run_id
+
+            await _consume_followup_queue(mock_agent, "thread-1", mock_app)
+
+            mock_start.assert_called_once_with(
+                mock_agent,
+                "thread-1",
+                "retry me",
+                mock_app,
+                message_metadata={
+                    "source": "system",
+                    "notification_type": "steer",
+                    "sender_name": None,
+                    "sender_avatar_url": None,
+                    "is_steer": False,
+                },
+            )
+
+        # Queue is now empty
+        assert queue_manager.dequeue("thread-1") is None
+
+    async def test_no_re_enqueue_when_dequeue_returns_none(self, mock_agent, mock_app, queue_manager):
+        """If dequeue itself raises, followup is None so re-enqueue is skipped."""
+        from backend.web.services.streaming_service import _consume_followup_queue
+
+        # Make dequeue raise — followup stays None, no re-enqueue attempted
+        with patch.object(queue_manager, "dequeue", side_effect=RuntimeError("db error")):
+            await _consume_followup_queue(mock_agent, "thread-1", mock_app)
+
+        # enqueue was never called for re-enqueue (followup was None)
+        # Queue is still empty
+        assert queue_manager.dequeue("thread-1") is None
+
+    async def test_re_enqueue_failure_logs_error(self, mock_agent, mock_app, queue_manager):
+        """When both start_agent_run AND re-enqueue fail, error is logged (message lost)."""
+        queue_manager.enqueue("doomed message", "thread-1")
+        from backend.web.services.streaming_service import _consume_followup_queue
+
+        with patch("backend.web.services.streaming_service.start_agent_run", side_effect=RuntimeError("start failed")):
+            with patch.object(queue_manager, "enqueue", side_effect=RuntimeError("enqueue failed")):
+                await _consume_followup_queue(mock_agent, "thread-1", mock_app)
+
+        # Message is truly lost — queue is empty
+        assert queue_manager.dequeue("thread-1") is None
+
+    async def test_transition_failure_skips_start(self, mock_agent, mock_app, queue_manager):
+        """When runtime.transition returns False, followup stays queued."""
+        queue_manager.enqueue("wont run", "thread-1")
+        mock_agent.runtime.transition.return_value = False
+        from backend.web.services.streaming_service import _consume_followup_queue
+
+        with patch("backend.web.services.streaming_service.start_agent_run") as mock_start:
+            await _consume_followup_queue(mock_agent, "thread-1", mock_app)
+            mock_start.assert_not_called()
+
+        item = queue_manager.dequeue("thread-1")
+        assert item is not None
+        assert item.content == "wont run"
diff --git a/tests/Integration/test_invite_codes_router.py b/tests/Integration/test_invite_codes_router.py
new file mode 100644
index 000000000..7083c6924
--- /dev/null
+++ b/tests/Integration/test_invite_codes_router.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.routers import invite_codes as invite_codes_router
+
+
+class _FakeInviteCodeRepo:
+    def __init__(self) -> None:
+        self.generate_calls: list[tuple[str, int | None]] = []
+        self.revoke_calls: list[str] = []
+        self.is_valid_calls: list[str] = []
+        self.generate_result = {"code": "invite-2"}
+        self.revoke_result = True
+        self.is_valid_result = True
+        self.generate_error: Exception | None = None
+        self.revoke_error: Exception | None = None
+        self.is_valid_error: Exception | None = None
+
+    def generate(self, *, created_by: str, expires_days: int | None):
+        self.generate_calls.append((created_by, expires_days))
+        if self.generate_error is not None:
+            raise self.generate_error
+        return self.generate_result
+
+    def revoke(self, code: str):
+        self.revoke_calls.append(code)
+        if self.revoke_error is not None:
+            raise self.revoke_error
+        return self.revoke_result
+
+    def is_valid(self, code: str):
+        self.is_valid_calls.append(code)
+        if self.is_valid_error is not None:
+            raise self.is_valid_error
+        return self.is_valid_result
+
+
+def _request(repo: _FakeInviteCodeRepo):
+    return SimpleNamespace(app=SimpleNamespace(state=SimpleNamespace(_supabase_client=object(), invite_code_repo=repo)))
+
+
+@pytest.mark.asyncio
+async def test_call_invite_code_repo_maps_exception_to_prefixed_500():
+    repo = _FakeInviteCodeRepo()
+    repo.generate_error = RuntimeError("db down")
+
+    with pytest.raises(HTTPException) as exc_info:
+        await invite_codes_router._call_invite_code_repo(
+            _request(repo),
+            "生成邀请码失败：",
+            "generate",
+            created_by="user-1",
+            expires_days=7,
+        )
+
+    assert exc_info.value.status_code == 500
+    assert exc_info.value.detail == "生成邀请码失败：db down"
+
+
+@pytest.mark.asyncio
+async def test_call_invite_code_repo_preserves_http_exception():
+    repo = _FakeInviteCodeRepo()
+    repo.is_valid_error = HTTPException(503, "邀请码仓库未初始化")
+
+    with pytest.raises(HTTPException) as exc_info:
+        await invite_codes_router._call_invite_code_repo(
+            _request(repo),
+            "校验邀请码失败：",
+            "is_valid",
+            "invite-1",
+        )
+
+    assert exc_info.value.status_code == 503
+    assert exc_info.value.detail == "邀请码仓库未初始化"
+
+
+@pytest.mark.asyncio
+async def test_revoke_invite_code_raises_404_when_repo_reports_missing():
+    repo = _FakeInviteCodeRepo()
+    repo.revoke_result = False
+    request = _request(repo)
+
+    with pytest.raises(HTTPException) as exc_info:
+        await invite_codes_router.revoke_invite_code("invite-1", request=request, user_id="user-1")
+
+    assert exc_info.value.status_code == 404
+    assert exc_info.value.detail == "邀请码不存在"
+    assert repo.revoke_calls == ["invite-1"]
diff --git a/tests/Integration/test_leon_agent.py b/tests/Integration/test_leon_agent.py
new file mode 100644
index 000000000..6d1b05f06
--- /dev/null
+++ b/tests/Integration/test_leon_agent.py
@@ -0,0 +1,1449 @@
+"""Integration tests for LeonAgent with QueryLoop.
+
+Uses mock model to verify the full astream pipeline without real API calls.
+"""
+
+import json
+import os
+from types import SimpleNamespace
+from typing import Any, cast
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage, SystemMessage, ToolMessage
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _mock_model(text="Integration test response"):
+    """Create a mock LangChain model that returns a plain AIMessage."""
+    ai_msg = AIMessage(content=text)
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(return_value=ai_msg)
+    # configurable_fields support
+    model.configurable_fields.return_value = model
+    model.with_config.return_value = model
+    return model
+
+
+def _empty_stream_model():
+    class _EmptyStreamModel:
+        def bind_tools(self, tools):
+            return self
+
+        def configurable_fields(self, **kwargs):
+            return self
+
+        def with_config(self, **kwargs):
+            return self
+
+        async def astream(self, messages):
+            if False:
+                yield AIMessageChunk(content="")
+
+    return _EmptyStreamModel()
+
+
+def _patch_env_api_key():
+    """Ensure ANTHROPIC_API_KEY is set for LeonAgent init (uses a fake value)."""
+    return patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-test-integration"})
+
+
+class _FakeToolTaskRepo:
+    def __init__(self) -> None:
+        self._rows: dict[str, dict[str, dict[str, Any]]] = {}
+
+    def close(self) -> None:
+        return None
+
+    def next_id(self, thread_id: str) -> str:
+        tasks = self._rows.get(thread_id, {})
+        if not tasks:
+            return "1"
+        return str(max(int(task_id) for task_id in tasks) + 1)
+
+    def get(self, thread_id: str, task_id: str) -> dict[str, Any] | None:
+        return self._rows.get(thread_id, {}).get(task_id)
+
+    def list_all(self, thread_id: str) -> list[dict[str, Any]]:
+        return list(self._rows.get(thread_id, {}).values())
+
+    def insert(self, thread_id: str, task: Any) -> None:
+        self._rows.setdefault(thread_id, {})[str(task.id)] = {"id": task.id, "task": task}
+
+    def update(self, thread_id: str, task: Any) -> None:
+        self._rows.setdefault(thread_id, {})[str(task.id)] = {"id": task.id, "task": task}
+
+    def delete(self, thread_id: str, task_id: str) -> None:
+        self._rows.get(thread_id, {}).pop(str(task_id), None)
+
+
+class _FakeAgentRegistryRepo:
+    def __init__(self) -> None:
+        self._rows: dict[str, tuple[str, str, str, str, str | None, str | None]] = {}
+
+    def close(self) -> None:
+        return None
+
+    def register(
+        self,
+        *,
+        agent_id: str,
+        name: str,
+        thread_id: str,
+        status: str,
+        parent_agent_id: str | None,
+        subagent_type: str | None,
+    ) -> None:
+        self._rows[agent_id] = (agent_id, name, thread_id, status, parent_agent_id, subagent_type)
+
+    def get_by_id(self, agent_id: str) -> tuple[str, str, str, str, str | None, str | None] | None:
+        return self._rows.get(agent_id)
+
+    def list_running_by_name(self, name: str) -> list[tuple[str, str, str, str, str | None, str | None]]:
+        return [row for row in self._rows.values() if row[1] == name and row[3] == "running"]
+
+    def update_status(self, agent_id: str, status: str) -> None:
+        row = self._rows.get(agent_id)
+        if row is None:
+            return
+        self._rows[agent_id] = (row[0], row[1], row[2], status, row[4], row[5])
+
+    def get_latest_by_name_and_parent(
+        self, name: str, parent_agent_id: str | None
+    ) -> tuple[str, str, str, str, str | None, str | None] | None:
+        matches = [row for row in self._rows.values() if row[1] == name and row[4] == parent_agent_id]
+        return matches[-1] if matches else None
+
+    def list_running(self) -> list[tuple[str, str, str, str, str | None, str | None]]:
+        return [row for row in self._rows.values() if row[3] == "running"]
+
+
+class _FakeSyncFileRepo:
+    def __init__(self) -> None:
+        self._rows: dict[str, dict[str, tuple[str, int]]] = {}
+
+    def close(self) -> None:
+        return None
+
+    def track_file(self, thread_id: str, relative_path: str, checksum: str, timestamp: int) -> None:
+        self._rows.setdefault(thread_id, {})[relative_path] = (checksum, timestamp)
+
+    def track_files_batch(self, thread_id: str, file_records: list[tuple[str, str, int]]) -> None:
+        for relative_path, checksum, timestamp in file_records:
+            self.track_file(thread_id, relative_path, checksum, timestamp)
+
+    def get_file_info(self, thread_id: str, relative_path: str) -> dict[str, Any] | None:
+        info = self._rows.get(thread_id, {}).get(relative_path)
+        if info is None:
+            return None
+        return {"checksum": info[0], "last_synced": info[1]}
+
+    def get_all_files(self, thread_id: str) -> dict[str, str]:
+        return {path: checksum for path, (checksum, _timestamp) in self._rows.get(thread_id, {}).items()}
+
+    def clear_thread(self, thread_id: str) -> int:
+        removed = len(self._rows.get(thread_id, {}))
+        self._rows.pop(thread_id, None)
+        return removed
+
+
+@pytest.fixture(autouse=True)
+def _patch_runtime_storage_container(monkeypatch: pytest.MonkeyPatch):
+    class _FakeRuntimeContainer:
+        def __init__(self) -> None:
+            self._tool_task_repo = _FakeToolTaskRepo()
+            self._agent_registry_repo = _FakeAgentRegistryRepo()
+            self._sync_file_repo = _FakeSyncFileRepo()
+
+        def tool_task_repo(self) -> _FakeToolTaskRepo:
+            return self._tool_task_repo
+
+        def agent_registry_repo(self) -> _FakeAgentRegistryRepo:
+            return self._agent_registry_repo
+
+        def sync_file_repo(self) -> _FakeSyncFileRepo:
+            return self._sync_file_repo
+
+    container = _FakeRuntimeContainer()
+    monkeypatch.setattr("storage.runtime.build_storage_container", lambda **_kwargs: container)
+    return container
+
+
+class _MemoryCheckpointer:
+    def __init__(self):
+        self.store = {}
+
+    async def aget(self, cfg):
+        return self.store.get(cfg["configurable"]["thread_id"])
+
+    async def aput(self, cfg, checkpoint, metadata, new_versions):
+        self.store[cfg["configurable"]["thread_id"]] = checkpoint
+
+
+def _set_agent_checkpointer(agent: object, checkpointer: object) -> None:
+    setattr(agent, "checkpointer", checkpointer)
+    setattr(getattr(agent, "agent"), "checkpointer", checkpointer)
+
+
+def _set_agent_runtime(agent: object, runtime: object) -> None:
+    setattr(agent, "runtime", runtime)
+
+
+def _require_tool_name(tool: dict[str, Any]) -> str:
+    name = tool.get("name")
+    assert isinstance(name, str)
+    return name
+
+
+class _DirectCompactionProbeModel:
+    def __init__(self):
+        self.summary_calls = 0
+        self.turn_calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, **kwargs):
+        return self
+
+    def bind(self, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        first_content = getattr(messages[0], "content", "") if messages else ""
+        if isinstance(first_content, str) and "summarizing conversations" in first_content:
+            self.summary_calls += 1
+            return AIMessage(
+                content=(
+                    "1. Request/Intent — summarize\n"
+                    "2. Technical Concepts — compaction\n"
+                    "3. Files/Code — none\n"
+                    "4. Errors — none\n"
+                    "5. Problem Solving — keep going\n"
+                    "6. User Messages — large payloads\n"
+                    "7. Pending Tasks — continue\n"
+                    "8. Current Work — compacting\n"
+                    "9. Next Step — answer user"
+                )
+            )
+
+        self.turn_calls += 1
+        return AIMessage(content=f"OK_{self.turn_calls}")
+
+
+class _MessageCaptureModel:
+    def __init__(self, text: str = "captured"):
+        self.calls: list[list[object]] = []
+        self.text = text
+
+    def bind_tools(self, tools):
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, **kwargs):
+        return self
+
+    def bind(self, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls.append(list(messages))
+        return AIMessage(content=self.text)
+
+
+def test_leon_agent_destructor_does_not_reenable_skipped_sandbox_cleanup():
+    """Explicit child close(cleanup_sandbox=False) must stay final under __del__."""
+    from core.runtime.agent import LeonAgent
+
+    agent = object.__new__(LeonAgent)
+    agent._session_started = False
+    agent._mark_terminated = MagicMock()
+    agent._cleanup_mcp_client = MagicMock()
+    agent._cleanup_sqlite_connection = MagicMock()
+    agent._cleanup_sandbox = MagicMock()
+
+    LeonAgent.close(agent, cleanup_sandbox=False)
+    LeonAgent.__del__(agent)
+
+    agent._cleanup_sandbox.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Integration Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_simple_run(tmp_path):
+    """LeonAgent with mock model: astream completes and yields chunks."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Hello from integration test")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        results = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "hello"}]},
+            config={"configurable": {"thread_id": "test-integration-1"}},
+            stream_mode="updates",
+        ):
+            results.append(chunk)
+
+        assert len(results) > 0
+        # At least one agent chunk
+        agent_chunks = [c for c in results if "agent" in c]
+        assert len(agent_chunks) >= 1
+        # Agent message content matches mock
+        first_ai_msgs = agent_chunks[0]["agent"]["messages"]
+        assert any("integration test" in str(m.content) for m in first_ai_msgs)
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_ainit_pushes_late_checkpointer_into_memory_middleware(tmp_path):
+    """Async checkpointer init should update both QueryLoop and MemoryMiddleware."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("late checkpointer")
+    checkpointer = _MemoryCheckpointer()
+
+    async def _late_init_checkpointer(self):
+        self.checkpointer = checkpointer
+
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "SUPABASE_PUBLIC_URL": "http://127.0.0.1:54320",
+                "SUPABASE_INTERNAL_URL": "http://127.0.0.1:54320",
+                "LEON_SUPABASE_SERVICE_ROLE_KEY": "dummy",
+                "SUPABASE_ANON_KEY": "dummy",
+            },
+        ),
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new=_late_init_checkpointer),
+        patch("core.runtime.agent.LeonAgent._init_mcp_tools", new_callable=AsyncMock, return_value=[]),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        assert agent._memory_middleware.checkpointer is None
+
+        await agent.ainit()
+
+        assert agent.agent.checkpointer is checkpointer
+        assert agent._memory_middleware.checkpointer is checkpointer
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_interface_compatible(tmp_path):
+    """astream yields dicts with 'agent' key — compatible with LangGraph stream_mode=updates."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Compatible response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        chunks = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "test"}]},
+            config={"configurable": {"thread_id": "test-integration-2"}},
+            stream_mode="updates",
+        ):
+            chunks.append(chunk)
+
+        # All chunks are dicts
+        assert all(isinstance(c, dict) for c in chunks)
+        # All keys are one of "agent" or "tools"
+        for c in chunks:
+            assert set(c.keys()).issubset({"agent", "tools"})
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_messages_updates_mode_yields_langgraph_tuples(tmp_path):
+    """messages+updates mode must yield LangGraph-style (mode, data) tuples for SSE consumers."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Tuple compatible response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        chunks = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "tuple"}]},
+            config={"configurable": {"thread_id": "test-integration-tuples"}},
+            stream_mode=["messages", "updates"],
+        ):
+            chunks.append(chunk)
+
+        assert chunks
+        assert all(isinstance(chunk, tuple) and len(chunk) == 2 for chunk in chunks)
+        assert any(mode == "messages" for mode, _ in chunks)
+        assert any(mode == "updates" for mode, _ in chunks)
+
+        message_chunks = [data for mode, data in chunks if mode == "messages"]
+        first_msg_chunk, first_metadata = message_chunks[0]
+        assert isinstance(first_msg_chunk, AIMessageChunk)
+        assert "Tuple compatible response" in str(first_msg_chunk.content)
+        assert isinstance(first_metadata, dict)
+
+        update_chunks = [data for mode, data in chunks if mode == "updates"]
+        assert any("agent" in update for update in update_chunks)
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_raises_loudly_on_empty_stream(tmp_path):
+    """Empty streaming responses should surface as errors, not silent empty iterators."""
+    from core.runtime.agent import LeonAgent
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=_empty_stream_model()),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        with pytest.raises(RuntimeError, match="streaming model returned no AIMessageChunk"):
+            async for _ in agent.astream(
+                "test",
+                thread_id="test-empty-stream",
+                stream_mode=["messages", "updates"],
+            ):
+                pass
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_bundle_dir_registers_mcp_resource_tools(tmp_path):
+    """Member bundle MCP config should surface MCP resource tools in the live registry."""
+    from core.runtime.agent import LeonAgent
+
+    member_dir = tmp_path / "members" / "toad"
+    member_dir.mkdir(parents=True)
+    (member_dir / "agent.md").write_text(
+        "---\nname: Toad\ndescription: Demo member\n---\nYou are Toad.\n",
+        encoding="utf-8",
+    )
+    (member_dir / ".mcp.json").write_text(
+        '{"mcpServers":{"nu50demo":{"transport":"stdio","command":"uv","args":["run","python","/tmp/nu50_mcp_server.py"]}}}',
+        encoding="utf-8",
+    )
+
+    mock_model = _mock_model("Bundle MCP response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(
+            workspace_root=str(tmp_path),
+            bundle_dir=str(member_dir),
+            api_key="sk-test-integration",
+        )
+        await agent.ainit()
+
+        assert agent._tool_registry.get("ListMcpResources") is not None
+        assert agent._tool_registry.get("ReadMcpResource") is not None
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_announces_mcp_instruction_delta_once_and_reannounces_on_change(tmp_path):
+    from core.runtime.agent import LeonAgent
+
+    member_dir = tmp_path / "members" / "toad"
+    member_dir.mkdir(parents=True)
+    (member_dir / "agent.md").write_text(
+        "---\nname: Toad\ndescription: Demo member\n---\nYou are Toad.\n",
+        encoding="utf-8",
+    )
+
+    def _write_mcp(instructions: str) -> None:
+        (member_dir / ".mcp.json").write_text(
+            json.dumps(
+                {
+                    "mcpServers": {
+                        "nu50demo": {
+                            "transport": "stdio",
+                            "command": "uv",
+                            "args": ["run", "python", "/tmp/nu50_mcp_server.py"],
+                            "instructions": instructions,
+                        }
+                    }
+                }
+            ),
+            encoding="utf-8",
+        )
+
+    def _message_text(message: object) -> str:
+        content = getattr(message, "content", "")
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            return "\n".join(str(block.get("text", "")) for block in content if isinstance(block, dict))
+        return str(content)
+
+    def _delta_messages(messages: list[object]) -> list[str]:
+        hits: list[str] = []
+        for message in messages:
+            content = _message_text(message)
+            if "<mcp_instructions_delta>" in content:
+                hits.append(content)
+        return hits
+
+    _write_mcp("Use nu50demo carefully.")
+    first_model = _MessageCaptureModel("First MCP delta response")
+    checkpointer = _MemoryCheckpointer()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=first_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(
+            workspace_root=str(tmp_path),
+            bundle_dir=str(member_dir),
+            api_key="sk-test-integration",
+        )
+        await agent.ainit()
+        _set_agent_checkpointer(agent, checkpointer)
+
+        await agent.ainvoke("first turn", thread_id="mcp-delta-thread")
+        assert first_model.calls
+        first_messages = first_model.calls[0]
+        first_deltas = _delta_messages(first_messages)
+        assert len(first_deltas) == 1
+        assert "Use nu50demo carefully." in first_deltas[0]
+
+        second_call_index = len(first_model.calls)
+        await agent.ainvoke("second turn", thread_id="mcp-delta-thread")
+        assert len(first_model.calls) > second_call_index
+        second_messages = first_model.calls[second_call_index]
+        second_deltas = _delta_messages(second_messages)
+        assert len(second_deltas) == 1
+        assert second_deltas[0] == first_deltas[0]
+
+        agent.close()
+
+    _write_mcp("Use nu50demo only for trusted reads.")
+    second_model = _MessageCaptureModel("Second MCP delta response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=second_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(
+            workspace_root=str(tmp_path),
+            bundle_dir=str(member_dir),
+            api_key="sk-test-integration",
+        )
+        await agent.ainit()
+        _set_agent_checkpointer(agent, checkpointer)
+
+        await agent.ainvoke("third turn", thread_id="mcp-delta-thread")
+        assert second_model.calls
+        third_messages = second_model.calls[0]
+        third_deltas = _delta_messages(third_messages)
+        assert len(third_deltas) == 2
+        assert "Use nu50demo carefully." in third_deltas[0]
+        assert "Use nu50demo only for trusted reads." in third_deltas[1]
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_memoizes_prompt_sections_between_builds(tmp_path):
+    """Pattern 6: prompt sections should be cached across repeated prompt assembly."""
+    from core.runtime import prompts as prompt_builders
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Prompt cache response")
+    original_context = prompt_builders.build_context_section
+    original_rules = prompt_builders.build_rules_section
+    counts = {"context": 0, "rules": 0}
+
+    def counted_context(*args, **kwargs):
+        counts["context"] += 1
+        return original_context(*args, **kwargs)
+
+    def counted_rules(*args, **kwargs):
+        counts["rules"] += 1
+        return original_rules(*args, **kwargs)
+
+    with (
+        patch("core.runtime.prompts.build_context_section", side_effect=counted_context),
+        patch("core.runtime.prompts.build_rules_section", side_effect=counted_rules),
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        first = agent._compose_system_prompt()
+        second = agent._compose_system_prompt()
+
+        assert first == second
+        assert counts == {"context": 1, "rules": 1}
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_clear_thread_invalidates_prompt_section_cache(tmp_path):
+    """Pattern 6: clear should invalidate cached prompt sections before rebuilding."""
+    from core.runtime import prompts as prompt_builders
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Prompt clear response")
+    original_context = prompt_builders.build_context_section
+    original_rules = prompt_builders.build_rules_section
+    counts = {"context": 0, "rules": 0}
+
+    def counted_context(*args, **kwargs):
+        counts["context"] += 1
+        return original_context(*args, **kwargs)
+
+    def counted_rules(*args, **kwargs):
+        counts["rules"] += 1
+        return original_rules(*args, **kwargs)
+
+    with (
+        patch("core.runtime.prompts.build_context_section", side_effect=counted_context),
+        patch("core.runtime.prompts.build_rules_section", side_effect=counted_rules),
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.agent.aclear = AsyncMock()
+
+        assert counts == {"context": 1, "rules": 1}
+
+        await agent.aclear_thread("prompt-clear-thread")
+
+        assert counts == {"context": 2, "rules": 2}
+
+        agent.close()
+
+
+def test_build_rules_section_unifies_core_risk_and_tool_preferences():
+    from core.runtime.prompts import build_rules_section
+
+    rules = build_rules_section(
+        is_sandbox=False,
+        working_dir="/repo",
+        workspace_root="/repo",
+    )
+
+    assert "**Workspace**" in rules
+    assert "**Absolute Paths**" in rules
+    assert "**Security**" in rules
+    assert "**Tool Priority**" in rules
+    assert "Do not guess URLs" in rules
+    assert "Do not add features, refactor code, or make speculative abstractions" in rules
+    assert "Don't create helpers, utilities, or abstractions for one-time operations" in rules
+    assert "Don't add error handling, fallbacks, or validation for scenarios that can't happen" in rules
+    assert "Prefer dedicated tools over `Bash`" in rules
+    assert "Use `Read` instead of `cat`, `head`, or `tail`." in rules
+    assert "Use `Glob`/`Grep` for file discovery and content search before falling back to `Bash`." in rules
+    assert "Ask before destructive, hard-to-reverse, or shared-state actions" in rules
+    assert (
+        "Examples: deleting files, force-pushing, dropping tables, killing unfamiliar processes, modifying shared infrastructure." in rules
+    )
+    assert "Background Task Description" not in rules
+
+
+def test_leon_agent_chat_identity_prompt_uses_honest_legacy_wording():
+    from core.runtime.agent import LeonAgent
+
+    agent = object.__new__(LeonAgent)
+    agent._build_system_prompt = lambda: "BASE"
+    cast(Any, agent).config = SimpleNamespace(system_prompt=None)
+    agent._chat_repos = {
+        "user_id": "agent-member-1",
+        "owner_id": "human-user-1",
+        "member_repo": SimpleNamespace(
+            get_by_id=lambda uid: SimpleNamespace(id=uid, name="Toad") if uid == "agent-member-1" else SimpleNamespace(id=uid, name="Owner")
+        ),
+    }
+
+    prompt = LeonAgent._compose_system_prompt(agent)
+
+    assert "- Your chat identity id: agent-member-1" in prompt
+    assert "- The chat tools still use the parameter name user_id for legacy reasons." in prompt
+    assert "- Your owner: Owner (human user_id: human-user-1)" in prompt
+    assert "- Your user_id:" not in prompt
+
+
+def test_leon_agent_chat_identity_prompt_accepts_chat_identity_id_without_legacy_user_id():
+    from core.runtime.agent import LeonAgent
+
+    agent = object.__new__(LeonAgent)
+    agent._build_system_prompt = lambda: "BASE"
+    cast(Any, agent).config = SimpleNamespace(system_prompt=None)
+    agent._chat_repos = {
+        "chat_identity_id": "agent-member-2",
+        "owner_id": "human-user-2",
+        "member_repo": SimpleNamespace(
+            get_by_id=lambda uid: (
+                SimpleNamespace(id=uid, name="Morel") if uid == "agent-member-2" else SimpleNamespace(id=uid, name="Owner 2")
+            )
+        ),
+    }
+
+    prompt = LeonAgent._compose_system_prompt(agent)
+
+    assert "- Your chat identity id: agent-member-2" in prompt
+    assert "- Your owner: Owner 2 (human user_id: human-user-2)" in prompt
+
+
+def test_leon_agent_chat_identity_prompt_resolves_thread_user_name_via_member() -> None:
+    from core.runtime.agent import LeonAgent
+
+    agent = object.__new__(LeonAgent)
+    agent._build_system_prompt = lambda: "BASE"
+    cast(Any, agent).config = SimpleNamespace(system_prompt=None)
+    agent._thread_repo = SimpleNamespace(
+        get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-3"} if uid == "thread-user-3" else None
+    )
+    agent._chat_repos = {
+        "chat_identity_id": "thread-user-3",
+        "owner_id": "human-user-3",
+        "member_repo": SimpleNamespace(
+            get_by_id=lambda uid: (
+                None
+                if uid == "thread-user-3"
+                else SimpleNamespace(id=uid, name="Truffle")
+                if uid == "member-agent-3"
+                else SimpleNamespace(id=uid, name="Owner 3")
+            )
+        ),
+    }
+
+    prompt = LeonAgent._compose_system_prompt(agent)
+
+    assert "- Your name: Truffle" in prompt
+    assert "- Your chat identity id: thread-user-3" in prompt
+    assert "- Your owner: Owner 3 (human user_id: human-user-3)" in prompt
+
+
+def test_build_rules_section_includes_function_result_clearing_guidance_when_spill_buffer_enabled():
+    from core.runtime.prompts import build_rules_section
+
+    rules = build_rules_section(
+        is_sandbox=False,
+        working_dir="/repo",
+        workspace_root="/repo",
+        spill_buffer_enabled=True,
+        spill_keep_recent=3,
+    )
+
+    assert "**Function Result Clearing**" in rules
+    assert "Old tool results may be cleared from context to free up space." in rules
+    assert "The 3 most recent results are always kept." in rules
+    assert "write down any important information you might need later in your response" in rules
+
+
+def test_build_rules_section_omits_function_result_clearing_guidance_when_spill_buffer_disabled():
+    from core.runtime.prompts import build_rules_section
+
+    rules = build_rules_section(
+        is_sandbox=False,
+        working_dir="/repo",
+        workspace_root="/repo",
+        spill_buffer_enabled=False,
+        spill_keep_recent=3,
+    )
+
+    assert "**Function Result Clearing**" not in rules
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_session_start_hook_runs_on_ainit(tmp_path):
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Session start response")
+    seen = []
+
+    def on_start(payload):
+        seen.append(payload)
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        agent.app_state.add_session_hook("SessionStart", on_start)
+
+        await agent.ainit()
+
+        assert len(seen) == 1
+        assert seen[0]["event"] == "SessionStart"
+        assert seen[0]["sandbox"] == "local"
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_session_end_hook_runs_on_close(tmp_path):
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Session end response")
+    seen = []
+
+    def on_end(payload):
+        seen.append(payload)
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.app_state.add_session_hook("SessionEnd", on_end)
+
+        agent.close()
+
+        assert len(seen) == 1
+        assert seen[0]["event"] == "SessionEnd"
+        assert seen[0]["sandbox"] == "local"
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_session_hooks_support_async_callbacks_and_fire_once(tmp_path):
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Session once response")
+    seen = []
+
+    async def on_start(payload):
+        seen.append(("start", payload["event"]))
+
+    async def on_end(payload):
+        seen.append(("end", payload["event"]))
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        agent.app_state.add_session_hook("SessionStart", on_start)
+        agent.app_state.add_session_hook("SessionEnd", on_end)
+
+        await agent.ainit()
+        await agent.ainit()
+        agent.close()
+        agent.close()
+
+        assert seen == [("start", "SessionStart"), ("end", "SessionEnd")]
+
+
+class _DeferredDiscoveryProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([_require_tool_name(tool) for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:TaskCreate"}, "id": "tc-search"}],
+            )
+        self._turn += 1
+        return AIMessage(content="done")
+
+
+class _DeferredExecutionProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([_require_tool_name(tool) for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:TaskCreate"}, "id": "tc-search"}],
+            )
+        if self._turn == 1:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[
+                    {
+                        "name": "TaskCreate",
+                        "args": {"subject": "PT02_EXEC", "description": "created after discovery"},
+                        "id": "tc-task-create",
+                    }
+                ],
+            )
+        self._turn += 1
+        return AIMessage(content="PT02_EXEC_DONE")
+
+
+class _DeferredCrossThreadProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([_require_tool_name(tool) for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        joined = " ".join(str(getattr(msg, "content", "")) for msg in messages)
+        current_tool_names = {tool.get("name") for tool in self._tools if isinstance(tool, dict)}
+
+        if "discover task tools" in joined and "TaskCreate" not in current_tool_names:
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:TaskCreate"}, "id": "tc-search"}],
+            )
+
+        if "discover task tools" in joined:
+            return AIMessage(content="discover-done")
+
+        return AIMessage(content="plain-done")
+
+
+class _DeferredInlineSelectProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([_require_tool_name(tool) for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:Read,TaskCreate"}, "id": "tc-search"}],
+            )
+        self._turn += 1
+        return AIMessage(content="after-inline-select")
+
+
+class _DeferredResumeProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([_require_tool_name(tool) for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        return AIMessage(content="resume-done")
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_reinjects_discovered_deferred_tool_schemas_on_following_turn(tmp_path):
+    """Deferred tools discovered via tool_search must become real schemas on the next turn."""
+    from core.runtime.agent import LeonAgent
+
+    probe_model = _DeferredDiscoveryProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        result = await agent.ainvoke("discover task tools", thread_id="test-deferred-discovery")
+
+        assert result["reason"] == "completed"
+        assert len(probe_model.turn_tool_names) >= 2
+        first_turn, second_turn = probe_model.turn_tool_names[:2]
+        assert "TaskCreate" not in first_turn
+        assert "tool_search" in first_turn
+        assert "TaskCreate" in second_turn
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_can_execute_discovered_deferred_tool_on_following_turn(tmp_path):
+    """A deferred tool discovered via tool_search should become callable on the next turn."""
+    from core.runtime.agent import LeonAgent
+
+    probe_model = _DeferredExecutionProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        result = await agent.ainvoke("discover then run deferred task tool", thread_id="test-deferred-execution")
+
+        assert result["reason"] == "completed"
+        assert len(probe_model.turn_tool_names) >= 2
+        assert "TaskCreate" not in probe_model.turn_tool_names[0]
+        assert "TaskCreate" in probe_model.turn_tool_names[1]
+
+        task_tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage) and msg.tool_call_id == "tc-task-create"]
+        assert len(task_tool_messages) == 1
+        assert "PT02_EXEC" in str(task_tool_messages[0].content)
+        assert any(isinstance(msg, AIMessage) and msg.content == "PT02_EXEC_DONE" for msg in result["messages"])
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_deferred_discovery_does_not_leak_across_threads(tmp_path):
+    """Deferred tools discovered on one thread must not become inline on another thread."""
+    from core.runtime.agent import LeonAgent
+
+    probe_model = _DeferredCrossThreadProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        result_a = await agent.ainvoke("discover task tools", thread_id="thread-A")
+        result_b = await agent.ainvoke("plain request", thread_id="thread-B")
+
+        assert result_a["reason"] == "completed"
+        assert result_b["reason"] == "completed"
+        assert len(probe_model.turn_tool_names) >= 3
+
+        first_thread_a, second_thread_a, first_thread_b = probe_model.turn_tool_names[:3]
+        assert "TaskCreate" not in first_thread_a
+        assert "TaskCreate" in second_thread_a
+        assert "TaskCreate" not in first_thread_b
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_tool_search_exact_select_fails_loudly_for_inline_tools(tmp_path):
+    """Exact select should surface inline-tool misuse as a tool_use_error in the live loop."""
+    from core.runtime.agent import LeonAgent
+
+    probe_model = _DeferredInlineSelectProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        result = await agent.ainvoke("probe inline select", thread_id="test-inline-select")
+
+        assert result["reason"] == "completed"
+        tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage) and msg.tool_call_id == "tc-search"]
+        assert len(tool_messages) == 1
+        assert "<tool_use_error>" in str(tool_messages[0].content)
+        assert "inline/already-available tools: Read" in str(tool_messages[0].content)
+        assert any(isinstance(msg, AIMessage) and msg.content == "after-inline-select" for msg in result["messages"])
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_restores_discovered_deferred_tools_after_restart(tmp_path):
+    """Restarting the loop on the same thread should restore prior deferred discoveries from history."""
+    from core.runtime.agent import LeonAgent
+
+    checkpointer = _MemoryCheckpointer()
+    discovery_model = _DeferredDiscoveryProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=discovery_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        _set_agent_checkpointer(agent, checkpointer)
+
+        result = await agent.ainvoke("discover task tools", thread_id="resume-thread")
+        assert result["reason"] == "completed"
+        agent.close()
+
+    resume_model = _DeferredResumeProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=resume_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        _set_agent_checkpointer(agent, checkpointer)
+
+        result = await agent.ainvoke("after restart", thread_id="resume-thread")
+
+        assert result["reason"] == "completed"
+        assert resume_model.turn_tool_names
+        assert "TaskCreate" in resume_model.turn_tool_names[0]
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_multiple_thread_ids(tmp_path):
+    """Different thread_ids produce independent sessions (no cross-contamination)."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = MagicMock()
+    mock_model.bind_tools.return_value = mock_model
+    mock_model.with_config.return_value = mock_model
+    mock_model.configurable_fields.return_value = mock_model
+    mock_model.ainvoke = AsyncMock(
+        side_effect=[
+            AIMessage(content="Response for thread-A"),
+            AIMessage(content="Response for thread-B"),
+        ]
+    )
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        chunks_a = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "hi A"}]},
+            config={"configurable": {"thread_id": "thread-A"}},
+            stream_mode="updates",
+        ):
+            chunks_a.append(chunk)
+
+        chunks_b = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "hi B"}]},
+            config={"configurable": {"thread_id": "thread-B"}},
+            stream_mode="updates",
+        ):
+            chunks_b.append(chunk)
+
+        # Both sessions produced chunks
+        assert len(chunks_a) > 0
+        assert len(chunks_b) > 0
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_wrapper_exposes_caller_surface(tmp_path):
+    """LeonAgent should expose a caller-owned astream surface instead of forcing callers onto agent.agent.astream."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Caller surface response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        chunks = []
+        async for chunk in agent.astream(
+            "caller stream",
+            thread_id="test-astream-wrapper",
+            stream_mode=["messages", "updates"],
+        ):
+            chunks.append(chunk)
+
+        assert chunks
+        assert all(isinstance(chunk, tuple) and len(chunk) == 2 for chunk in chunks)
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_can_enforce_max_budget_per_event(tmp_path):
+    """Caller-owned astream surface should be able to stop once runtime cost exceeds a caller budget."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Caller surface response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        async def fake_stream(*args, **kwargs):
+            yield ("messages", ("first", {"langgraph_node": "agent"}))
+            yield ("updates", {"agent": {"messages": [AIMessage(content="done")]}})
+
+        agent.agent.astream = fake_stream
+        _set_agent_runtime(agent, SimpleNamespace(cost=0.75))
+
+        chunks = []
+        with pytest.raises(RuntimeError, match="max_budget_usd exceeded"):
+            async for chunk in agent.astream(
+                "caller stream",
+                thread_id="test-astream-budget",
+                stream_mode=["messages", "updates"],
+                max_budget_usd=0.5,
+            ):
+                chunks.append(chunk)
+
+        assert chunks == [("messages", ("first", {"langgraph_node": "agent"}))]
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_aclear_thread_resets_thread_history(tmp_path):
+    """aclear_thread should clear replayable thread history while preserving accumulators."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("clearable response")
+    checkpointer = _MemoryCheckpointer()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        _set_agent_checkpointer(agent, checkpointer)
+        agent.app_state.total_cost = 1.25
+
+        await agent.ainvoke("hello", thread_id="clear-agent-thread")
+        assert checkpointer.store["clear-agent-thread"]["channel_values"]["messages"]
+
+        agent.agent._tool_read_file_state["/tmp/file.py"] = {"partial": False}
+        agent.agent._tool_loaded_nested_memory_paths.add("/tmp/memory.md")
+        agent.agent._tool_discovered_skill_names.add("skill-a")
+        old_session_id = agent._bootstrap.session_id
+
+        await agent.aclear_thread("clear-agent-thread")
+
+        assert checkpointer.store["clear-agent-thread"]["channel_values"]["messages"] == []
+        assert agent.app_state.messages == []
+        assert agent.app_state.turn_count == 0
+        assert agent.app_state.compact_boundary_index == 0
+        assert agent.app_state.total_cost == 1.25
+        assert agent._bootstrap.session_id != old_session_id
+        assert agent._bootstrap.parent_session_id == old_session_id
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_aclear_thread_does_not_restore_stale_summary(tmp_path):
+    from core.runtime.agent import LeonAgent
+    from core.runtime.middleware import ModelRequest, ModelResponse
+    from core.runtime.middleware.memory.summary_store import SummaryStore
+    from sandbox.thread_context import set_current_thread_id
+
+    async def _handler(req: ModelRequest) -> ModelResponse:
+        return ModelResponse(result=[AIMessage(content="final")], request_messages=req.messages)
+
+    mock_model = _mock_model("clearable response")
+    checkpointer = _MemoryCheckpointer()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        _set_agent_checkpointer(agent, checkpointer)
+
+        store = SummaryStore(tmp_path / "summary.db")
+        agent._memory_middleware.summary_store = store
+        store.save_summary(
+            thread_id="clear-summary-thread",
+            summary_text="STALE SUMMARY",
+            compact_up_to_index=2,
+            compacted_at=2,
+        )
+
+        await agent.aclear_thread("clear-summary-thread")
+
+        assert store.get_latest_summary("clear-summary-thread") is None
+
+        set_current_thread_id("clear-summary-thread")
+        request = ModelRequest(
+            model=mock_model,
+            messages=[HumanMessage(content="fresh-1"), HumanMessage(content="fresh-2")],
+            system_message=SystemMessage(content="sys"),
+        )
+        result = await agent._memory_middleware.awrap_model_call(request, _handler)
+
+        assert result.request_messages is not None
+        assert [msg.content for msg in result.request_messages] == ["fresh-1", "fresh-2"]
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_persists_summary_store_after_second_turn_compaction(tmp_path):
+    from core.runtime.agent import LeonAgent
+    from core.runtime.middleware.memory.summary_store import SummaryStore
+
+    checkpointer = _MemoryCheckpointer()
+    probe_model = _DirectCompactionProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        _set_agent_checkpointer(agent, checkpointer)
+
+        store = SummaryStore(tmp_path / "summary.db")
+        agent._memory_middleware.summary_store = store
+        agent._memory_middleware._compaction_threshold = 0.01
+        agent._memory_middleware.compactor.keep_recent_tokens = 10
+
+        turn1 = await agent.ainvoke("A" * 12000, thread_id="agent-compaction-thread")
+        assert turn1["reason"] == "completed"
+        assert store.get_latest_summary("agent-compaction-thread") is None
+
+        turn2 = await agent.ainvoke("B" * 12000, thread_id="agent-compaction-thread")
+        assert turn2["reason"] == "completed"
+        assert probe_model.summary_calls == 1
+        assert agent._memory_middleware._cached_summary is not None
+        assert agent._memory_middleware._compact_up_to_index > 0
+
+        summary = store.get_latest_summary("agent-compaction-thread")
+        assert summary is not None
+        assert summary.compact_up_to_index == agent._memory_middleware._compact_up_to_index
+        assert "Request/Intent" in summary.summary_text
+
+        agent.close()
diff --git a/tests/middleware/memory/test_memory_middleware_integration.py b/tests/Integration/test_memory_middleware_integration.py
similarity index 74%
rename from tests/middleware/memory/test_memory_middleware_integration.py
rename to tests/Integration/test_memory_middleware_integration.py
index 2892d1081..ac7378f50 100644
--- a/tests/middleware/memory/test_memory_middleware_integration.py
+++ b/tests/Integration/test_memory_middleware_integration.py
@@ -3,13 +3,16 @@
 Tests the complete flow: MemoryMiddleware → SummaryStore → SQLite → Checkpointer
 """
 
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import MagicMock
 
 import pytest
 from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import RunnableLambda
 
+from core.runtime.middleware import ModelRequest, ModelResponse
 from core.runtime.middleware.memory.middleware import MemoryMiddleware
 from core.runtime.middleware.memory.summary_store import SummaryStore
+from sandbox.thread_context import set_current_thread_id
 
 
 @pytest.fixture
@@ -35,13 +38,18 @@ def mock_get(config):
         }
 
     checkpointer.get = mock_get
+
+    async def mock_aget(config):
+        return mock_get(config)
+
+    checkpointer.aget = mock_aget
     return checkpointer
 
 
 @pytest.fixture
 def mock_model():
     """Create mock LLM model for testing."""
-    model = AsyncMock()
+    model = MagicMock()
 
     async def mock_ainvoke(messages):
         # Return a mock summary response
@@ -50,6 +58,7 @@ async def mock_ainvoke(messages):
         return response
 
     model.ainvoke = mock_ainvoke
+    model.bind.return_value = model
     return model
 
 
@@ -77,6 +86,17 @@ def create_large_message_list(count: int = 50) -> list:
     return messages
 
 
+class _AsyncOnlyCheckpointer:
+    def __init__(self) -> None:
+        self.store: dict[str, dict] = {}
+
+    async def aget(self, cfg):
+        return self.store.get(cfg["configurable"]["thread_id"])
+
+    async def aput(self, cfg, checkpoint, metadata, new_versions):
+        self.store[cfg["configurable"]["thread_id"]] = checkpoint
+
+
 class TestSummarySaveOnCompaction:
     """Test 1: Verify summary is saved to store when compaction occurs."""
 
@@ -165,6 +185,61 @@ async def mock_handler(req):
         assert middleware2._compact_up_to_index == original_index
         assert middleware2._summary_restored is True
 
+    @pytest.mark.asyncio
+    async def test_summary_restore_is_isolated_per_thread_on_shared_middleware(self, temp_db, mock_model):
+        middleware = MemoryMiddleware(
+            context_limit=10000,
+            compaction_threshold=0.5,
+            db_path=temp_db,
+            verbose=True,
+        )
+        middleware.set_model(mock_model)
+
+        store = SummaryStore(temp_db)
+        store.save_summary(
+            thread_id="t1",
+            summary_text="SUMMARY ONE",
+            compact_up_to_index=1,
+            compacted_at=2,
+        )
+        store.save_summary(
+            thread_id="t2",
+            summary_text="SUMMARY TWO",
+            compact_up_to_index=1,
+            compacted_at=2,
+        )
+
+        async def handler(req: ModelRequest) -> ModelResponse:
+            return ModelResponse(result=[], request_messages=req.messages)
+
+        request_t1 = ModelRequest(
+            model=RunnableLambda(lambda x: x),
+            messages=[HumanMessage(content="a1"), HumanMessage(content="a2")],
+            system_message=None,
+        )
+
+        request_t2 = ModelRequest(
+            model=RunnableLambda(lambda x: x),
+            messages=[HumanMessage(content="b1"), HumanMessage(content="b2")],
+            system_message=None,
+        )
+
+        set_current_thread_id("t1")
+        result_t1 = await middleware.awrap_model_call(request_t1, handler)
+        set_current_thread_id("t2")
+        result_t2 = await middleware.awrap_model_call(request_t2, handler)
+        assert result_t1.request_messages is not None
+        assert result_t2.request_messages is not None
+
+        assert [getattr(msg, "content", "") for msg in result_t1.request_messages] == [
+            "[Conversation Summary]\nSUMMARY ONE",
+            "a2",
+        ]
+        assert [getattr(msg, "content", "") for msg in result_t2.request_messages] == [
+            "[Conversation Summary]\nSUMMARY TWO",
+            "b2",
+        ]
+
 
 class TestSplitTurnSaveAndRestore:
     """Test 3: Verify split turn summaries are saved and restored correctly."""
@@ -232,6 +307,34 @@ async def mock_handler(req):
 class TestRebuildFromCheckpointer:
     """Test 4: Verify summary can be rebuilt from checkpointer when store data is corrupted."""
 
+    @pytest.mark.asyncio
+    async def test_late_bound_async_checkpointer_rebuilds_summary(self, temp_db, mock_model):
+        """Late-bound async savers should be enough for rebuild; sync .get() is not required."""
+        middleware = MemoryMiddleware(
+            context_limit=10000,
+            compaction_threshold=0.5,
+            db_path=temp_db,
+            checkpointer=None,
+            verbose=True,
+        )
+        middleware.set_model(mock_model)
+
+        checkpointer = _AsyncOnlyCheckpointer()
+        checkpointer.store["late-rebuild-thread"] = {
+            "channel_values": {
+                "messages": create_large_message_list(30),
+            }
+        }
+        middleware.checkpointer = checkpointer
+
+        await middleware._rebuild_summary_from_checkpointer("late-rebuild-thread")
+
+        store = SummaryStore(temp_db)
+        rebuilt_summary = store.get_latest_summary("late-rebuild-thread")
+        assert rebuilt_summary is not None
+        assert "This is a test summary of the conversation." in rebuilt_summary.summary_text
+        assert rebuilt_summary.compact_up_to_index > 0
+
     @pytest.mark.asyncio
     async def test_rebuild_from_checkpointer(self, temp_db, mock_model, mock_checkpointer, mock_request):
         """Test rebuilding summary from checkpointer when store is corrupted."""
@@ -325,6 +428,60 @@ async def mock_handler(req):
         assert summary1.summary_id != summary2.summary_id
 
 
+class TestCompactionBreakerScope:
+    """Breaker should gate proactive compaction without poisoning reactive recovery."""
+
+    @pytest.mark.asyncio
+    async def test_reactive_recovery_can_bypass_and_clear_thread_breaker(self, temp_db, mock_request):
+        class _EventuallyRecoveringModel:
+            def __init__(self):
+                self.compact_calls = 0
+
+            async def ainvoke(self, messages):
+                self.compact_calls += 1
+                if self.compact_calls <= 3:
+                    raise RuntimeError("compaction failed")
+                response = MagicMock()
+                response.content = "Recovered summary"
+                return response
+
+        model = _EventuallyRecoveringModel()
+        middleware = MemoryMiddleware(
+            context_limit=10000,
+            compaction_threshold=0.5,
+            db_path=temp_db,
+            verbose=True,
+        )
+        middleware.set_model(model)
+
+        messages = create_large_message_list(30)
+        mock_request.messages = messages
+
+        async def mock_handler(req):
+            return ModelResponse(result=[], request_messages=req.messages)
+
+        for _ in range(3):
+            await middleware.awrap_model_call(mock_request, mock_handler)
+
+        snapshot = middleware.snapshot_thread_state("test-thread-1")
+        assert snapshot == {"failure_count": 3, "breaker_open": True}
+
+        recovered = await middleware.compact_messages_for_recovery(
+            messages,
+            thread_id="test-thread-1",
+        )
+        assert recovered is not None
+        assert getattr(recovered[0], "content", "").startswith("[Conversation Summary]\nRecovered summary")
+
+        snapshot = middleware.snapshot_thread_state("test-thread-1")
+        assert snapshot == {"failure_count": 0, "breaker_open": False}
+
+        result = await middleware.awrap_model_call(mock_request, mock_handler)
+        assert result.request_messages is not None
+        assert getattr(result.request_messages[0], "content", "").startswith("[Conversation Summary]\nRecovered summary")
+        assert model.compact_calls >= 5
+
+
 class TestMissingThreadIdRaisesError:
     """Test 6: Verify missing thread_id is handled gracefully."""
 
diff --git a/tests/Integration/test_messaging_router.py b/tests/Integration/test_messaging_router.py
new file mode 100644
index 000000000..4eff8e667
--- /dev/null
+++ b/tests/Integration/test_messaging_router.py
@@ -0,0 +1,270 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.routers import messaging as messaging_router
+from backend.web.utils.serializers import avatar_url
+
+
+def _chat(chat_id: str) -> SimpleNamespace:
+    return SimpleNamespace(
+        id=chat_id,
+        title="Chat title",
+        status="active",
+        created_at="2026-04-07T00:00:00Z",
+    )
+
+
+def test_get_accessible_chat_or_404_returns_chat():
+    chat = _chat("chat-1")
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            chat_repo=SimpleNamespace(get_by_id=lambda chat_id: chat if chat_id == "chat-1" else None),
+            messaging_service=SimpleNamespace(is_chat_member=lambda chat_id, user_id: (chat_id, user_id) == ("chat-1", "user-1")),
+        )
+    )
+
+    result = messaging_router._get_accessible_chat_or_404(app, "chat-1", "user-1")
+
+    assert result is chat
+
+
+def test_get_accessible_chat_or_404_raises_404_for_missing_chat():
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            chat_repo=SimpleNamespace(get_by_id=lambda _chat_id: None),
+            messaging_service=SimpleNamespace(is_chat_member=lambda _chat_id, _user_id: True),
+        )
+    )
+
+    with pytest.raises(HTTPException) as exc_info:
+        messaging_router._get_accessible_chat_or_404(app, "missing", "user-1")
+
+    assert exc_info.value.status_code == 404
+    assert exc_info.value.detail == "Chat not found"
+
+
+def test_get_accessible_chat_or_404_raises_403_for_non_member():
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            chat_repo=SimpleNamespace(get_by_id=lambda _chat_id: _chat("chat-1")),
+            messaging_service=SimpleNamespace(is_chat_member=lambda _chat_id, _user_id: False),
+        )
+    )
+
+    with pytest.raises(HTTPException) as exc_info:
+        messaging_router._get_accessible_chat_or_404(app, "chat-1", "user-2")
+
+    assert exc_info.value.status_code == 403
+    assert exc_info.value.detail == "Not a participant of this chat"
+
+
+@pytest.mark.asyncio
+async def test_get_chat_uses_access_helper(monkeypatch: pytest.MonkeyPatch):
+    seen: list[tuple[str, object]] = []
+    chat = _chat("chat-1")
+
+    def fake_helper(app_obj, chat_id: str, user_id: str):
+        seen.append(("helper", (app_obj, chat_id, user_id)))
+        return chat
+
+    monkeypatch.setattr(messaging_router, "_get_accessible_chat_or_404", fake_helper)
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            chat_repo=SimpleNamespace(
+                get_by_id=lambda _chat_id: (_ for _ in ()).throw(AssertionError("route should use helper, not chat_repo lookup directly"))
+            ),
+            messaging_service=SimpleNamespace(list_chat_members=lambda _chat_id: []),
+            member_repo=SimpleNamespace(get_by_id=lambda _member_id: None),
+        )
+    )
+
+    result = await messaging_router.get_chat("chat-1", user_id="user-1", app=app)
+
+    assert result == {
+        "id": "chat-1",
+        "title": "Chat title",
+        "status": "active",
+        "created_at": "2026-04-07T00:00:00Z",
+        "entities": [],
+    }
+    assert seen == [("helper", (app, "chat-1", "user-1"))]
+
+
+@pytest.mark.asyncio
+async def test_delete_chat_uses_access_helper(monkeypatch: pytest.MonkeyPatch):
+    seen: list[tuple[str, object]] = []
+    chat = _chat("chat-1")
+
+    def fake_helper(app_obj, chat_id: str, user_id: str):
+        seen.append(("helper", (app_obj, chat_id, user_id)))
+        return chat
+
+    monkeypatch.setattr(messaging_router, "_get_accessible_chat_or_404", fake_helper)
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            chat_repo=SimpleNamespace(
+                get_by_id=lambda _chat_id: (_ for _ in ()).throw(AssertionError("route should use helper, not chat_repo lookup directly")),
+                delete=lambda chat_id: seen.append(("delete", chat_id)),
+            ),
+        )
+    )
+
+    result = await messaging_router.delete_chat("chat-1", user_id="user-1", app=app)
+
+    assert result == {"status": "deleted"}
+    assert seen == [
+        ("helper", (app, "chat-1", "user-1")),
+        ("delete", "chat-1"),
+    ]
+
+
+@pytest.mark.asyncio
+async def test_get_chat_resolves_thread_user_participant_via_thread_repo(monkeypatch: pytest.MonkeyPatch):
+    chat = _chat("chat-1")
+
+    monkeypatch.setattr(messaging_router, "_get_accessible_chat_or_404", lambda _app, _chat_id, _user_id: chat)
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            messaging_service=SimpleNamespace(
+                list_chat_members=lambda _chat_id: [
+                    {"user_id": "human-user-1"},
+                    {"user_id": "thread-user-1"},
+                ]
+            ),
+            member_repo=SimpleNamespace(
+                get_by_id=lambda uid: (
+                    None
+                    if uid == "thread-user-1"
+                    else SimpleNamespace(id=uid, name="Toad", type="mycel_agent", avatar=None)
+                    if uid == "member-agent-1"
+                    else None
+                )
+            ),
+            thread_repo=SimpleNamespace(
+                get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+            ),
+        )
+    )
+
+    result = await messaging_router.get_chat("chat-1", user_id="human-user-1", app=app)
+
+    assert result["entities"] == [
+        {
+            "id": "thread-user-1",
+            "name": "Toad",
+            "type": "mycel_agent",
+            "avatar_url": avatar_url("member-agent-1", False),
+        }
+    ]
+
+
+@pytest.mark.asyncio
+async def test_list_messages_resolves_thread_user_sender_name_via_thread_repo():
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            messaging_service=SimpleNamespace(
+                is_chat_member=lambda _chat_id, _user_id: True,
+                list_messages=lambda _chat_id, **_kwargs: [
+                    {
+                        "id": "msg-1",
+                        "chat_id": "chat-1",
+                        "sender_id": "thread-user-1",
+                        "content": "hello",
+                        "message_type": "human",
+                        "created_at": "2026-04-07T00:00:00Z",
+                    }
+                ],
+            ),
+            member_repo=SimpleNamespace(
+                get_by_id=lambda uid: (
+                    None
+                    if uid == "thread-user-1"
+                    else SimpleNamespace(id=uid, name="Toad", type="mycel_agent", avatar=None)
+                    if uid == "member-agent-1"
+                    else None
+                )
+            ),
+            thread_repo=SimpleNamespace(
+                get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+            ),
+        )
+    )
+
+    result = await messaging_router.list_messages("chat-1", user_id="human-user-1", app=app)
+
+    assert result == [
+        {
+            "id": "msg-1",
+            "chat_id": "chat-1",
+            "sender_id": "thread-user-1",
+            "sender_name": "Toad",
+            "content": "hello",
+            "message_type": "human",
+            "mentioned_ids": [],
+            "signal": None,
+            "retracted_at": None,
+            "created_at": "2026-04-07T00:00:00Z",
+        }
+    ]
+
+
+@pytest.mark.asyncio
+async def test_send_message_accepts_owned_thread_user_sender_id_via_thread_repo():
+    seen: list[tuple[str, str, str]] = []
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            member_repo=SimpleNamespace(
+                get_by_id=lambda uid: (
+                    None
+                    if uid == "thread-user-1"
+                    else SimpleNamespace(id=uid, name="Toad", type="mycel_agent", avatar=None, owner_user_id="owner-user-1")
+                    if uid == "member-agent-1"
+                    else None
+                )
+            ),
+            thread_repo=SimpleNamespace(
+                get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+            ),
+            messaging_service=SimpleNamespace(
+                send=lambda chat_id, sender_id, content, **_kwargs: (
+                    seen.append((chat_id, sender_id, content))
+                    or {
+                        "id": "msg-1",
+                        "chat_id": chat_id,
+                        "sender_id": sender_id,
+                        "content": content,
+                        "message_type": "human",
+                        "created_at": "2026-04-07T00:00:00Z",
+                    }
+                )
+            ),
+        )
+    )
+
+    result = await messaging_router.send_message(
+        "chat-1",
+        messaging_router.SendMessageBody(content="hello", sender_id="thread-user-1"),
+        user_id="owner-user-1",
+        app=app,
+    )
+
+    assert seen == [("chat-1", "thread-user-1", "hello")]
+    assert result == {
+        "id": "msg-1",
+        "chat_id": "chat-1",
+        "sender_id": "thread-user-1",
+        "sender_name": "Toad",
+        "content": "hello",
+        "message_type": "human",
+        "mentioned_ids": [],
+        "signal": None,
+        "retracted_at": None,
+        "created_at": "2026-04-07T00:00:00Z",
+    }
diff --git a/tests/Integration/test_messaging_social_handle_contract.py b/tests/Integration/test_messaging_social_handle_contract.py
new file mode 100644
index 000000000..068a3a203
--- /dev/null
+++ b/tests/Integration/test_messaging_social_handle_contract.py
@@ -0,0 +1,527 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any, cast
+
+import pytest
+
+from backend.web.utils.serializers import avatar_url
+from core.agents.communication import delivery as delivery_module
+from core.runtime.registry import ToolRegistry
+from messaging.relationships.service import RelationshipService
+from messaging.service import MessagingService
+from messaging.tools.chat_tool_service import ChatToolService
+
+
+class _FakeRelationshipRepo:
+    def __init__(self) -> None:
+        self._existing = {
+            ("agent-user-1", "human-user-1"): {
+                "id": "rel-1",
+                "principal_a": "agent-user-1",
+                "principal_b": "human-user-1",
+                "state": "hire",
+                "direction": "b_to_a",
+                "created_at": "2026-04-07T00:00:00Z",
+                "updated_at": "2026-04-07T00:00:00Z",
+            }
+        }
+
+    def get(self, actor_id: str, target_id: str):
+        key = cast(tuple[str, str], tuple(sorted((actor_id, target_id))))
+        return self._existing.get(key)
+
+    def upsert(self, actor_id: str, target_id: str, **fields):
+        key = cast(tuple[str, str], tuple(sorted((actor_id, target_id))))
+        row = dict(self._existing[key])
+        row.update(fields)
+        row["updated_at"] = "2026-04-07T00:01:00Z"
+        self._existing[key] = row
+        return row
+
+
+def test_deliver_to_agents_does_not_require_main_thread_id():
+    delivered: list[tuple[str, str]] = []
+    service = MessagingService(
+        chat_repo=SimpleNamespace(),
+        chat_member_repo=SimpleNamespace(list_members=lambda _chat_id: [{"user_id": "agent-user-1"}]),
+        messages_repo=SimpleNamespace(),
+        message_read_repo=SimpleNamespace(),
+        member_repo=SimpleNamespace(
+            get_by_id=lambda uid: (
+                SimpleNamespace(id=uid, name="Toad", type="mycel_agent", avatar=None)
+                if uid == "agent-user-1"
+                else SimpleNamespace(id=uid, name="Human", type="human", avatar=None)
+            )
+        ),
+        delivery_fn=lambda recipient_id, member, *_args, **_kwargs: delivered.append((recipient_id, member.id)),
+    )
+
+    service._deliver_to_agents("chat-1", "human-user-1", "hello", [])
+
+    assert delivered == [("agent-user-1", "agent-user-1")]
+
+
+def test_relationship_hire_snapshot_drops_main_thread_id():
+    repo = _FakeRelationshipRepo()
+    service = RelationshipService(
+        relationship_repo=repo,
+        member_repo=SimpleNamespace(
+            get_by_id=lambda user_id: SimpleNamespace(id=user_id, name="Toad") if user_id == "agent-user-1" else None
+        ),
+    )
+
+    row = service.revoke("human-user-1", "agent-user-1")
+
+    assert row.hire_snapshot is not None
+    assert row.hire_snapshot["user_id"] == "agent-user-1"
+    assert row.hire_snapshot["name"] == "Toad"
+    assert "main_thread_id" not in row.hire_snapshot
+
+
+def test_relationship_hire_snapshot_resolves_thread_user_name_via_member() -> None:
+    repo = _FakeRelationshipRepo()
+    repo._existing[("human-user-1", "thread-user-1")] = {
+        "id": "rel-2",
+        "principal_a": "human-user-1",
+        "principal_b": "thread-user-1",
+        "state": "hire",
+        "direction": "b_to_a",
+        "created_at": "2026-04-07T00:00:00Z",
+        "updated_at": "2026-04-07T00:00:00Z",
+    }
+    service = RelationshipService(
+        relationship_repo=repo,
+        member_repo=SimpleNamespace(
+            get_by_id=lambda user_id: (
+                None if user_id == "thread-user-1" else SimpleNamespace(id=user_id, name="Toad") if user_id == "member-agent-1" else None
+            )
+        ),
+        thread_repo=SimpleNamespace(
+            get_by_user_id=lambda user_id: {"id": "thread-1", "member_id": "member-agent-1"} if user_id == "thread-user-1" else None
+        ),
+    )
+
+    row = service.revoke("human-user-1", "thread-user-1")
+
+    assert row.hire_snapshot is not None
+    assert row.hire_snapshot["user_id"] == "thread-user-1"
+    assert row.hire_snapshot["name"] == "Toad"
+
+
+def test_chat_tool_directory_uses_neutral_id_label() -> None:
+    registry = ToolRegistry()
+    ChatToolService(
+        registry=registry,
+        user_id="owner-user-1",
+        owner_id="owner-user-1",
+        member_repo=SimpleNamespace(
+            list_all=lambda: [
+                SimpleNamespace(id="agent-user-1", name="Toad", type="mycel_agent", owner_user_id="owner-user-1"),
+            ],
+            get_by_id=lambda member_id: (
+                SimpleNamespace(id=member_id, name="Owner", owner_user_id=None) if member_id == "owner-user-1" else None
+            ),
+        ),
+        thread_repo=SimpleNamespace(
+            get_default_thread=lambda member_id: {"id": "thread-1", "user_id": "thread-user-1"} if member_id == "agent-user-1" else None
+        ),
+        relationship_repo=None,
+    )
+
+    directory = registry.get("directory")
+    assert directory is not None
+
+    result = directory.handler()
+    assert isinstance(result, str)
+
+    assert "id=thread-user-1" in result
+    assert "user_id=thread-user-1" not in result
+
+
+def test_chat_tool_send_schema_marks_user_id_name_as_legacy() -> None:
+    registry = ToolRegistry()
+    ChatToolService(
+        registry=registry,
+        user_id="agent-user-1",
+        owner_id="owner-user-1",
+    )
+
+    chat_send = registry.get("chat_send")
+    directory = registry.get("directory")
+    assert chat_send is not None
+    assert directory is not None
+
+    chat_send_schema = chat_send.get_schema()
+    directory_schema = directory.get_schema()
+
+    assert "legacy" in chat_send_schema["parameters"]["properties"]["user_id"]["description"].lower()
+    assert "chat_send(user_id" in directory_schema["description"]
+
+
+def test_chat_tool_service_accepts_chat_identity_id_without_legacy_user_id() -> None:
+    registry = ToolRegistry()
+    ChatToolService(
+        registry=registry,
+        chat_identity_id="agent-user-1",
+        owner_id="owner-user-1",
+        member_repo=SimpleNamespace(
+            list_all=lambda: [
+                SimpleNamespace(id="agent-user-2", name="Morel", type="mycel_agent", owner_user_id="owner-user-1"),
+            ],
+            get_by_id=lambda member_id: (
+                SimpleNamespace(id=member_id, name="Owner", owner_user_id=None) if member_id == "owner-user-1" else None
+            ),
+        ),
+        thread_repo=SimpleNamespace(
+            get_default_thread=lambda member_id: {"id": "thread-2", "user_id": "thread-user-2"} if member_id == "agent-user-2" else None
+        ),
+        relationship_repo=None,
+    )
+
+    directory = registry.get("directory")
+    assert directory is not None
+    result = directory.handler()
+    assert isinstance(result, str)
+    assert "id=thread-user-2" in result
+
+
+def test_chat_tool_directory_exposes_default_thread_user_id_for_agents() -> None:
+    registry = ToolRegistry()
+    seen_relationship_targets: list[str] = []
+    ChatToolService(
+        registry=registry,
+        chat_identity_id="human-user-1",
+        owner_id="owner-user-1",
+        member_repo=SimpleNamespace(
+            list_all=lambda: [
+                SimpleNamespace(id="member-agent-1", name="Toad", type="mycel_agent", owner_user_id="owner-user-9"),
+            ],
+            get_by_id=lambda member_id: (
+                SimpleNamespace(id=member_id, name="Owner", owner_user_id="owner-user-1") if member_id == "owner-user-1" else None
+            ),
+        ),
+        thread_repo=SimpleNamespace(
+            get_default_thread=lambda member_id: {"id": "thread-1", "user_id": "thread-user-1"} if member_id == "member-agent-1" else None
+        ),
+        relationship_repo=SimpleNamespace(
+            get=lambda actor_id, target_id: (
+                seen_relationship_targets.append(target_id) or {"state": "hire"}
+                if actor_id == "human-user-1" and target_id == "thread-user-1"
+                else None
+            )
+        ),
+    )
+
+    directory = registry.get("directory")
+    assert directory is not None
+
+    result = directory.handler()
+
+    assert result == "- Toad [mycel_agent] id=thread-user-1"
+    assert seen_relationship_targets == ["thread-user-1"]
+
+
+def test_chat_tool_directory_keeps_same_owner_agents_visible_without_relationship() -> None:
+    registry = ToolRegistry()
+    seen_relationship_targets: list[str] = []
+    ChatToolService(
+        registry=registry,
+        chat_identity_id="thread-user-self",
+        owner_id="owner-user-1",
+        member_repo=SimpleNamespace(
+            list_all=lambda: [
+                SimpleNamespace(id="member-agent-2", name="Morel", type="mycel_agent", owner_user_id="owner-user-1"),
+            ],
+            get_by_id=lambda member_id: (
+                SimpleNamespace(id=member_id, name="Owner", owner_user_id=None) if member_id == "owner-user-1" else None
+            ),
+        ),
+        thread_repo=SimpleNamespace(
+            get_default_thread=lambda member_id: {"id": "thread-2", "user_id": "thread-user-2"} if member_id == "member-agent-2" else None
+        ),
+        relationship_repo=SimpleNamespace(get=lambda _actor_id, target_id: seen_relationship_targets.append(target_id) or None),
+    )
+
+    directory = registry.get("directory")
+    assert directory is not None
+
+    result = directory.handler()
+
+    assert result == "- Morel [mycel_agent] id=thread-user-2 (owner: Owner)"
+    assert seen_relationship_targets == []
+
+
+def test_messaging_service_resolves_sender_name_from_thread_user_id() -> None:
+    published: list[dict[str, object]] = []
+    service = MessagingService(
+        chat_repo=SimpleNamespace(),
+        chat_member_repo=SimpleNamespace(list_members=lambda _chat_id: []),
+        messages_repo=SimpleNamespace(create=lambda row: row),
+        message_read_repo=SimpleNamespace(),
+        member_repo=SimpleNamespace(
+            get_by_id=lambda uid: (
+                None
+                if uid == "thread-user-1"
+                else SimpleNamespace(id=uid, name="Human", type="human", avatar=None)
+                if uid == "human-user-1"
+                else SimpleNamespace(id=uid, name="Toad", type="mycel_agent", avatar=None)
+                if uid == "member-agent-1"
+                else None
+            )
+        ),
+        thread_repo=SimpleNamespace(
+            get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+        ),
+        event_bus=SimpleNamespace(publish=lambda _chat_id, payload: published.append(payload)),
+    )
+
+    service.send("chat-1", "thread-user-1", "hello")
+
+    payload = cast(dict[str, object], published[0])
+    data = cast(dict[str, object], payload["data"])
+    assert data["sender_name"] == "Toad"
+
+
+def test_messaging_service_list_chats_exposes_thread_user_participant_id() -> None:
+    service = MessagingService(
+        chat_repo=SimpleNamespace(
+            get_by_id=lambda chat_id: SimpleNamespace(id=chat_id, title=None, status="active", created_at="2026-04-07T00:00:00Z")
+        ),
+        chat_member_repo=SimpleNamespace(
+            list_chats_for_user=lambda _user_id: ["chat-1"],
+            list_members=lambda _chat_id: [{"user_id": "human-user-1"}, {"user_id": "thread-user-1"}],
+        ),
+        messages_repo=SimpleNamespace(list_by_chat=lambda _chat_id, limit=1: [], count_unread=lambda _chat_id, _user_id: 0),
+        message_read_repo=SimpleNamespace(),
+        member_repo=SimpleNamespace(
+            get_by_id=lambda uid: (
+                SimpleNamespace(id=uid, name="Human", type="human", avatar=None)
+                if uid == "human-user-1"
+                else None
+                if uid == "thread-user-1"
+                else SimpleNamespace(id=uid, name="Toad", type="mycel_agent", avatar=None)
+                if uid == "member-agent-1"
+                else None
+            )
+        ),
+        thread_repo=SimpleNamespace(
+            get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+        ),
+    )
+
+    chats = service.list_chats_for_user("human-user-1")
+
+    assert chats[0]["entities"] == [
+        {
+            "id": "human-user-1",
+            "name": "Human",
+            "type": "human",
+            "avatar_url": avatar_url("human-user-1", False),
+        },
+        {
+            "id": "thread-user-1",
+            "name": "Toad",
+            "type": "mycel_agent",
+            "avatar_url": avatar_url("member-agent-1", False),
+        },
+    ]
+
+
+def test_chat_tool_formats_thread_user_id_sender_as_agent_name() -> None:
+    registry = ToolRegistry()
+    service = ChatToolService(
+        registry=registry,
+        chat_identity_id="human-user-1",
+        owner_id="owner-user-1",
+        member_repo=SimpleNamespace(
+            get_by_id=lambda uid: (
+                None
+                if uid == "thread-user-1"
+                else SimpleNamespace(id=uid, name="Toad", owner_user_id="owner-user-1")
+                if uid == "member-agent-1"
+                else None
+            ),
+        ),
+        thread_repo=SimpleNamespace(
+            get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+        ),
+    )
+
+    rendered = service._format_msgs([{"sender_id": "thread-user-1", "content": "hello"}], "human-user-1")
+
+    assert "[Toad]: hello" in rendered
+
+
+def test_chat_tool_send_accepts_thread_user_target_id() -> None:
+    registry = ToolRegistry()
+    sent: list[tuple[str, str, str]] = []
+    ChatToolService(
+        registry=registry,
+        chat_identity_id="human-user-1",
+        owner_id="owner-user-1",
+        member_repo=SimpleNamespace(
+            get_by_id=lambda uid: (
+                None
+                if uid == "thread-user-1"
+                else SimpleNamespace(id=uid, name="Toad", owner_user_id="owner-user-1")
+                if uid == "member-agent-1"
+                else None
+            ),
+        ),
+        thread_repo=SimpleNamespace(
+            get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+        ),
+        chat_member_repo=SimpleNamespace(is_member=lambda _chat_id, _user_id: True),
+        messaging_service=SimpleNamespace(
+            find_or_create_chat=lambda user_ids: {"id": "chat-1", "user_ids": user_ids},
+            count_unread=lambda _chat_id, _user_id: 0,
+            send=lambda chat_id, sender_id, content, **_kwargs: sent.append((chat_id, sender_id, content)),
+        ),
+    )
+
+    chat_send = registry.get("chat_send")
+    assert chat_send is not None
+
+    result = chat_send.handler(content="hello", user_id="thread-user-1")
+
+    assert result == "Message sent to Toad."
+    assert sent == [("chat-1", "human-user-1", "hello")]
+
+
+def test_chat_tool_read_uses_thread_user_target_name_on_no_history() -> None:
+    registry = ToolRegistry()
+    ChatToolService(
+        registry=registry,
+        chat_identity_id="human-user-1",
+        owner_id="owner-user-1",
+        member_repo=SimpleNamespace(
+            get_by_id=lambda uid: (
+                None
+                if uid == "thread-user-1"
+                else SimpleNamespace(id=uid, name="Toad", owner_user_id="owner-user-1")
+                if uid == "member-agent-1"
+                else None
+            ),
+        ),
+        thread_repo=SimpleNamespace(
+            get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+        ),
+        chat_member_repo=SimpleNamespace(find_chat_between=lambda _eid, _user_id: None),
+        messaging_service=SimpleNamespace(),
+    )
+
+    chat_read = registry.get("chat_read")
+    assert chat_read is not None
+
+    result = chat_read.handler(user_id="thread-user-1")
+
+    assert result == "No chat history with Toad."
+
+
+def test_chat_tool_search_does_not_fall_back_to_global_search_for_thread_user_target() -> None:
+    registry = ToolRegistry()
+    search_calls: list[tuple[str, str | None]] = []
+    ChatToolService(
+        registry=registry,
+        chat_identity_id="human-user-1",
+        owner_id="owner-user-1",
+        member_repo=SimpleNamespace(
+            get_by_id=lambda uid: (
+                None
+                if uid == "thread-user-1"
+                else SimpleNamespace(id=uid, name="Toad", owner_user_id="owner-user-1")
+                if uid == "member-agent-1"
+                else None
+            ),
+        ),
+        thread_repo=SimpleNamespace(
+            get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+        ),
+        chat_member_repo=SimpleNamespace(find_chat_between=lambda _eid, _user_id: None),
+        messaging_service=SimpleNamespace(
+            search_messages=lambda query, *, chat_id=None: search_calls.append((query, chat_id)) or [{"content": "wrong"}]
+        ),
+    )
+
+    chat_search = registry.get("chat_search")
+    assert chat_search is not None
+
+    result = chat_search.handler(query="hello", user_id="thread-user-1")
+
+    assert result == "No messages matching 'hello' with Toad."
+    assert search_calls == []
+
+
+def test_deliver_to_agents_routes_delivery_by_thread_user_id() -> None:
+    delivered: list[tuple[str, str]] = []
+    service = MessagingService(
+        chat_repo=SimpleNamespace(),
+        chat_member_repo=SimpleNamespace(list_members=lambda _chat_id: [{"user_id": "thread-user-1"}]),
+        messages_repo=SimpleNamespace(),
+        message_read_repo=SimpleNamespace(),
+        member_repo=SimpleNamespace(
+            get_by_id=lambda uid: (
+                None
+                if uid == "thread-user-1"
+                else SimpleNamespace(id=uid, name="Toad", type="mycel_agent", avatar=None)
+                if uid == "member-agent-1"
+                else SimpleNamespace(id=uid, name="Human", type="human", avatar=None)
+            )
+        ),
+        thread_repo=SimpleNamespace(
+            get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+        ),
+        delivery_fn=lambda recipient_id, member, *_args, **_kwargs: delivered.append((recipient_id, member.id)),
+    )
+
+    service._deliver_to_agents("chat-1", "human-user-1", "hello", [])
+
+    assert delivered == [("thread-user-1", "member-agent-1")]
+
+
+@pytest.mark.asyncio
+async def test_async_deliver_uses_recipient_social_user_id_for_thread_lookup_and_unread(monkeypatch: pytest.MonkeyPatch) -> None:
+    started: list[tuple[str, str, str]] = []
+    unread_calls: list[tuple[str, str]] = []
+    enqueued: list[tuple[str, str, str | None, str | None]] = []
+
+    async def _fake_get_or_create_agent(_app, _sandbox_type: str, *, thread_id: str):
+        return SimpleNamespace(id=f"agent-for-{thread_id}")
+
+    monkeypatch.setattr("backend.web.services.agent_pool.get_or_create_agent", _fake_get_or_create_agent)
+    monkeypatch.setattr("backend.web.services.agent_pool.resolve_thread_sandbox", lambda _app, _thread_id: "local")
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        "core.runtime.middleware.queue.formatters.format_chat_notification",
+        lambda sender_name, chat_id, unread_count, signal=None: f"{sender_name}|{chat_id}|{unread_count}|{signal}",
+    )
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            thread_repo=SimpleNamespace(
+                get_by_user_id=lambda uid: {"id": "thread-1", "member_id": "member-agent-1"} if uid == "thread-user-1" else None
+            ),
+            typing_tracker=SimpleNamespace(start_chat=lambda thread_id, chat_id, user_id: started.append((thread_id, chat_id, user_id))),
+            messaging_service=SimpleNamespace(count_unread=lambda chat_id, user_id: unread_calls.append((chat_id, user_id)) or 7),
+            queue_manager=SimpleNamespace(
+                enqueue=lambda content, thread_id, notification_type, **meta: enqueued.append(
+                    (content, thread_id, meta.get("sender_id"), meta.get("sender_name"))
+                )
+            ),
+        )
+    )
+
+    await delivery_module._async_deliver(
+        app,
+        "thread-user-1",
+        cast(Any, SimpleNamespace(id="member-agent-1", name="Toad", type="mycel_agent", avatar=None)),
+        "Human",
+        "chat-1",
+        "human-user-1",
+        signal="ping",
+    )
+
+    assert started == [("thread-1", "chat-1", "thread-user-1")]
+    assert unread_calls == [("chat-1", "thread-user-1")]
+    assert enqueued == [("Human|chat-1|7|ping", "thread-1", "human-user-1", "Human")]
diff --git a/tests/Integration/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py
new file mode 100644
index 000000000..95a82d809
--- /dev/null
+++ b/tests/Integration/test_monitor_resources_route.py
@@ -0,0 +1,178 @@
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from backend.web.core.dependencies import get_current_user_id
+from backend.web.routers import monitor, resources
+
+
+def _build_monitor_test_app(*, include_product_resources: bool = False) -> FastAPI:
+    app = FastAPI()
+    app.include_router(monitor.router)
+    if include_product_resources:
+        app.include_router(resources.router)
+        app.dependency_overrides[get_current_user_id] = lambda: "user-test"
+    return app
+
+
+def _stub_monitor_resource_snapshot(monkeypatch):
+    snapshot = {
+        "summary": {
+            "snapshot_at": "2026-04-07T00:00:00Z",
+            "last_refreshed_at": "2026-04-07T00:00:00Z",
+            "refresh_status": "fresh",
+            "running_sessions": 0,
+            "active_providers": 0,
+            "unavailable_providers": 0,
+        },
+        "providers": [],
+        "triage": {
+            "summary": {
+                "total": 0,
+                "active_drift": 0,
+                "detached_residue": 0,
+                "orphan_cleanup": 0,
+                "healthy_capacity": 0,
+            },
+            "groups": [],
+        },
+    }
+
+    monkeypatch.setattr(monitor, "get_monitor_resource_overview_snapshot", lambda: snapshot)
+    monkeypatch.setattr(monitor, "refresh_monitor_resource_overview_sync", lambda: snapshot)
+    return snapshot
+
+
+def test_monitor_resources_route_smoke(monkeypatch):
+    _stub_monitor_resource_snapshot(monkeypatch)
+
+    with TestClient(_build_monitor_test_app()) as client:
+        response = client.get("/api/monitor/resources")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert "summary" in payload
+    assert "providers" in payload
+    assert "triage" in payload
+    assert "snapshot_at" in payload["summary"]
+    assert "running_sessions" in payload["summary"]
+    assert isinstance(payload["providers"], list)
+    assert set(payload["triage"]["summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"})
+    assert isinstance(payload["triage"]["groups"], list)
+
+
+def test_monitor_resources_refresh_route_smoke(monkeypatch):
+    _stub_monitor_resource_snapshot(monkeypatch)
+
+    with TestClient(_build_monitor_test_app()) as client:
+        response = client.post("/api/monitor/resources/refresh")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert "summary" in payload
+    assert "providers" in payload
+    assert "triage" in payload
+    assert "last_refreshed_at" in payload["summary"]
+    assert "refresh_status" in payload["summary"]
+    assert set(payload["triage"]["summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"})
+
+
+def test_monitor_and_product_resource_routes_coexist_intentionally(monkeypatch):
+    from backend.web.services import resource_projection_service
+
+    _stub_monitor_resource_snapshot(monkeypatch)
+    monkeypatch.setattr(
+        resource_projection_service,
+        "list_user_resource_providers",
+        lambda *_args, **_kwargs: {"summary": {"snapshot_at": "now"}, "providers": []},
+    )
+
+    with TestClient(_build_monitor_test_app(include_product_resources=True)) as client:
+        monitor_response = client.get("/api/monitor/resources")
+        product_response = client.get("/api/resources/overview")
+
+    assert monitor_response.status_code == 200
+    assert product_response.status_code == 200
+
+
+def test_monitor_health_route_smoke():
+    with TestClient(_build_monitor_test_app()) as client:
+        response = client.get("/api/monitor/health")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert "snapshot_at" in payload
+    assert "db" in payload
+    assert "sessions" in payload
+
+
+def test_monitor_dashboard_route_smoke(monkeypatch):
+    _stub_monitor_resource_snapshot(monkeypatch)
+
+    with TestClient(_build_monitor_test_app()) as client:
+        response = client.get("/api/monitor/dashboard")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert "snapshot_at" in payload
+    assert "resources_summary" in payload
+    assert "infra" in payload
+    assert "workload" in payload
+    assert "latest_evaluation" in payload
+
+
+def test_monitor_leases_route_exposes_summary_and_groups():
+    with TestClient(_build_monitor_test_app()) as client:
+        response = client.get("/api/monitor/leases")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert "summary" in payload
+    assert "groups" in payload
+    assert "triage" in payload
+    assert set(payload["summary"]).issuperset({"total", "healthy", "diverged", "orphan", "orphan_diverged"})
+    assert isinstance(payload["groups"], list)
+    assert set(payload["triage"]["summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"})
+    assert isinstance(payload["triage"]["groups"], list)
+
+
+def test_monitor_resources_cleanup_route_forwards_structured_payload(monkeypatch):
+    from backend.web.services import monitor_service
+
+    monkeypatch.setattr(
+        monitor_service,
+        "cleanup_resource_leases",
+        lambda *, action, lease_ids, expected_category: {
+            "action": action,
+            "expected_category": expected_category,
+            "attempted": list(lease_ids),
+            "cleaned": [{"lease_id": "lease-1", "category": expected_category}],
+            "skipped": [],
+            "errors": [],
+            "refreshed_summary": {
+                "total": 1,
+                "active_drift": 0,
+                "detached_residue": 0,
+                "orphan_cleanup": 1,
+                "healthy_capacity": 0,
+            },
+        },
+    )
+
+    with TestClient(_build_monitor_test_app()) as client:
+        response = client.post(
+            "/api/monitor/resources/cleanup",
+            json={
+                "action": "cleanup_residue",
+                "lease_ids": ["lease-1"],
+                "expected_category": "detached_residue",
+            },
+        )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["action"] == "cleanup_residue"
+    assert payload["attempted"] == ["lease-1"]
+    assert payload["cleaned"] == [{"lease_id": "lease-1", "category": "detached_residue"}]
+    assert payload["skipped"] == []
+    assert payload["errors"] == []
+    assert set(payload["refreshed_summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"})
diff --git a/tests/test_p3_api_only.py b/tests/Integration/test_p3_api_only.py
similarity index 100%
rename from tests/test_p3_api_only.py
rename to tests/Integration/test_p3_api_only.py
diff --git a/tests/test_p3_e2e.py b/tests/Integration/test_p3_e2e.py
similarity index 100%
rename from tests/test_p3_e2e.py
rename to tests/Integration/test_p3_e2e.py
diff --git a/tests/Integration/test_panel_auth_shell_coherence.py b/tests/Integration/test_panel_auth_shell_coherence.py
new file mode 100644
index 000000000..36aa95f79
--- /dev/null
+++ b/tests/Integration/test_panel_auth_shell_coherence.py
@@ -0,0 +1,164 @@
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.models.panel import PublishMemberRequest, UpdateMemberRequest
+from backend.web.routers import panel as panel_router
+from backend.web.services import member_service, profile_service
+from storage.contracts import MemberRow, MemberType
+
+
+@pytest.mark.asyncio
+async def test_panel_members_uses_injected_member_repo_for_owner_scope(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+    now = 1_775_278_000.0
+    agent = MemberRow(
+        id="agent-1",
+        name="Toad",
+        type=MemberType.MYCEL_AGENT,
+        owner_user_id="user-1",
+        created_at=now,
+    )
+    seen: list[str] = []
+    monkeypatch.setattr(
+        member_service,
+        "_member_to_dict",
+        lambda _member_dir: {
+            "id": "agent-1",
+            "name": "Toad",
+            "avatar_url": "avatars/agent-1.png",
+            "config": {},
+        },
+    )
+    member_dir = tmp_path / "agent-1"
+    member_dir.mkdir()
+    (member_dir / "agent.md").write_text("stub", encoding="utf-8")
+    monkeypatch.setattr(member_service, "MEMBERS_DIR", tmp_path)
+
+    fake_repo = SimpleNamespace(
+        list_by_owner_user_id=lambda owner_user_id: seen.append(owner_user_id) or [agent],
+    )
+
+    result = await panel_router.list_members(
+        user_id="user-1",
+        request=SimpleNamespace(app=SimpleNamespace(state=SimpleNamespace(member_repo=fake_repo))),
+    )
+
+    assert seen == ["user-1"]
+    assert result["items"] == [{"id": "agent-1", "name": "Toad", "avatar_url": "avatars/agent-1.png", "config": {}}]
+
+
+def test_owned_member_helper_returns_member_for_owner(monkeypatch: pytest.MonkeyPatch):
+    member = {"id": "agent-1", "owner_user_id": "user-1", "name": "Toad"}
+    monkeypatch.setattr(member_service, "get_member", lambda member_id: member if member_id == "agent-1" else None)
+
+    result = panel_router._get_owned_member_or_404("agent-1", "user-1")
+
+    assert result == member
+
+
+def test_owned_member_helper_raises_404_for_missing_member(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.setattr(member_service, "get_member", lambda _member_id: None)
+
+    with pytest.raises(HTTPException) as excinfo:
+        panel_router._get_owned_member_or_404("missing", "user-1")
+
+    assert excinfo.value.status_code == 404
+    assert excinfo.value.detail == "Member not found"
+
+
+def test_owned_member_helper_raises_403_for_wrong_owner(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.setattr(
+        member_service,
+        "get_member",
+        lambda _member_id: {"id": "agent-1", "owner_user_id": "user-2"},
+    )
+
+    with pytest.raises(HTTPException) as excinfo:
+        panel_router._get_owned_member_or_404("agent-1", "user-1")
+
+    assert excinfo.value.status_code == 403
+    assert excinfo.value.detail == "Forbidden"
+
+
+@pytest.mark.asyncio
+async def test_update_member_route_returns_404_for_missing_member(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.setattr(member_service, "get_member", lambda _member_id: None)
+
+    with pytest.raises(HTTPException) as excinfo:
+        await panel_router.update_member(
+            "missing",
+            UpdateMemberRequest(name="new-name"),
+            request=SimpleNamespace(app=SimpleNamespace(state=SimpleNamespace(member_repo=SimpleNamespace()))),
+            user_id="user-1",
+        )
+
+    assert excinfo.value.status_code == 404
+    assert excinfo.value.detail == "Member not found"
+
+
+@pytest.mark.asyncio
+async def test_delete_member_route_keeps_builtin_guard_before_owner_lookup(monkeypatch: pytest.MonkeyPatch):
+    def explode(_member_id: str):
+        raise AssertionError("member lookup should not run for builtin guard")
+
+    monkeypatch.setattr(member_service, "get_member", explode)
+
+    with pytest.raises(HTTPException) as excinfo:
+        await panel_router.delete_member(
+            "__leon__",
+            request=SimpleNamespace(app=SimpleNamespace(state=SimpleNamespace())),
+            user_id="user-1",
+        )
+
+    assert excinfo.value.status_code == 403
+    assert excinfo.value.detail == "Cannot delete builtin member"
+
+
+@pytest.mark.asyncio
+async def test_publish_member_route_keeps_builtin_guard_before_owner_lookup(monkeypatch: pytest.MonkeyPatch):
+    def explode(_member_id: str):
+        raise AssertionError("member lookup should not run for builtin guard")
+
+    monkeypatch.setattr(member_service, "get_member", explode)
+
+    with pytest.raises(HTTPException) as excinfo:
+        await panel_router.publish_member(
+            "__leon__",
+            PublishMemberRequest(),
+            request=SimpleNamespace(app=SimpleNamespace(state=SimpleNamespace())),
+            user_id="user-1",
+        )
+
+    assert excinfo.value.status_code == 403
+    assert excinfo.value.detail == "Cannot publish builtin member"
+
+
+def test_profile_service_prefers_authenticated_member_over_config_defaults():
+    member = MemberRow(
+        id="user-1",
+        name="codex",
+        type=MemberType.HUMAN,
+        email="codex@example.com",
+        created_at=1.0,
+    )
+
+    profile = profile_service.get_profile(member=member)
+
+    assert profile == {"name": "codex", "initials": "CO", "email": "codex@example.com"}
+
+
+def test_builtin_member_surface_exposes_chat_tools():
+    member = member_service._leon_builtin()
+    tools = {item["name"]: item for item in member["config"]["tools"]}
+
+    for tool_name in ("list_chats", "read_messages", "send_message", "search_messages"):
+        assert tool_name in tools
+        assert tools[tool_name]["enabled"] is True
+        assert tools[tool_name]["group"] == "chat"
+
+    for removed_name in ("chats", "read_message", "search_message", "directory", "wechat_send", "wechat_contacts"):
+        assert removed_name not in tools
diff --git a/tests/Integration/test_panel_task_owner_contract.py b/tests/Integration/test_panel_task_owner_contract.py
new file mode 100644
index 000000000..4c6298cd6
--- /dev/null
+++ b/tests/Integration/test_panel_task_owner_contract.py
@@ -0,0 +1,320 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any, cast
+
+import pytest
+
+from backend.web.models.panel import (
+    BulkDeleteTasksRequest,
+    BulkTaskStatusRequest,
+    CreateCronJobRequest,
+    CreateTaskRequest,
+    UpdateCronJobRequest,
+    UpdateTaskRequest,
+)
+from backend.web.routers import panel as panel_router
+from backend.web.services import cron_job_service, task_service
+from backend.web.services.cron_service import CronService
+
+
+@pytest.mark.asyncio
+async def test_panel_task_mutations_forward_owner_scope(monkeypatch: pytest.MonkeyPatch):
+    seen: dict[str, Any] = {}
+    request = cast(Any, SimpleNamespace(app=SimpleNamespace(state=SimpleNamespace(panel_task_repo=object()))))
+
+    def fake_bulk_update(ids: list[str], status: str, owner_user_id: str | None = None, repo: Any = None) -> int:
+        seen["bulk_status"] = (ids, status, owner_user_id, repo)
+        return len(ids)
+
+    def fake_bulk_delete(ids: list[str], owner_user_id: str | None = None, repo: Any = None) -> int:
+        seen["bulk_delete"] = (ids, owner_user_id, repo)
+        return len(ids)
+
+    def fake_update(task_id: str, owner_user_id: str | None = None, repo: Any = None, **fields: Any) -> dict[str, Any]:
+        seen["update"] = (task_id, owner_user_id, repo, fields)
+        return {"id": task_id, **fields}
+
+    def fake_delete(task_id: str, owner_user_id: str | None = None, repo: Any = None) -> bool:
+        seen["delete"] = (task_id, owner_user_id, repo)
+        return True
+
+    monkeypatch.setattr(panel_router.task_service, "bulk_update_task_status", fake_bulk_update)
+    monkeypatch.setattr(panel_router.task_service, "bulk_delete_tasks", fake_bulk_delete)
+    monkeypatch.setattr(panel_router.task_service, "update_task", fake_update)
+    monkeypatch.setattr(panel_router.task_service, "delete_task", fake_delete)
+
+    await panel_router.bulk_update_status(BulkTaskStatusRequest(ids=["t-1"], status="completed"), request=request, user_id="user-1")
+    await panel_router.bulk_delete_tasks(BulkDeleteTasksRequest(ids=["t-2"]), request=request, user_id="user-1")
+    await panel_router.update_task("t-3", UpdateTaskRequest(title="new"), request=request, user_id="user-1")
+    await panel_router.delete_task("t-4", request=request, user_id="user-1")
+
+    assert seen["bulk_status"][0:3] == (["t-1"], "completed", "user-1")
+    assert seen["bulk_delete"][0:2] == (["t-2"], "user-1")
+    assert seen["update"][0:2] == ("t-3", "user-1")
+    assert seen["update"][3]["title"] == "new"
+    assert seen["delete"][0:2] == ("t-4", "user-1")
+
+
+@pytest.mark.asyncio
+async def test_panel_cron_mutations_forward_owner_scope(monkeypatch: pytest.MonkeyPatch):
+    seen: dict[str, Any] = {}
+
+    def fake_update(job_id: str, owner_user_id: str | None = None, repo: Any = None, **fields: Any) -> dict[str, Any]:
+        seen["update"] = (job_id, owner_user_id, repo, fields)
+        return {"id": job_id, **fields}
+
+    def fake_delete(job_id: str, owner_user_id: str | None = None, repo: Any = None) -> bool:
+        seen["delete"] = (job_id, owner_user_id, repo)
+        return True
+
+    class _FakeCronService:
+        async def trigger_job(self, job_id: str, owner_user_id: str | None = None) -> dict[str, Any]:
+            seen["trigger"] = (job_id, owner_user_id)
+            return {"id": "task-1", "job_id": job_id, "owner_user_id": owner_user_id}
+
+    monkeypatch.setattr(panel_router.cron_job_service, "update_cron_job", fake_update)
+    monkeypatch.setattr(panel_router.cron_job_service, "delete_cron_job", fake_delete)
+
+    request = cast(
+        Any,
+        SimpleNamespace(app=SimpleNamespace(state=SimpleNamespace(cron_service=_FakeCronService(), cron_job_repo=object()))),
+    )
+
+    await panel_router.update_cron_job("job-1", UpdateCronJobRequest(description="desc"), request=request, user_id="user-1")
+    await panel_router.delete_cron_job("job-2", request=request, user_id="user-1")
+    result = await panel_router.trigger_cron_job("job-3", request=request, user_id="user-1")
+
+    assert seen["update"][0:2] == ("job-1", "user-1")
+    assert seen["update"][3] == {"description": "desc"}
+    assert seen["delete"][0:2] == ("job-2", "user-1")
+    assert seen["trigger"] == ("job-3", "user-1")
+    assert result["item"]["owner_user_id"] == "user-1"
+
+
+@pytest.mark.asyncio
+async def test_cron_trigger_copies_job_owner_to_created_task(monkeypatch: pytest.MonkeyPatch):
+    def fake_get(job_id: str, owner_user_id: str | None = None, repo: Any = None) -> dict[str, Any]:
+        return {
+            "id": job_id,
+            "enabled": 1,
+            "owner_user_id": "owner-7",
+            "task_template": '{"title": "From cron"}',
+        }
+
+    created: dict[str, Any] = {}
+
+    def fake_create_task(**fields: Any) -> dict[str, Any]:
+        created.update(fields)
+        return {"id": "task-1", **fields}
+
+    def fake_update_job(job_id: str, owner_user_id: str | None = None, repo: Any = None, **fields: Any) -> dict[str, Any]:
+        return {"id": job_id, "owner_user_id": owner_user_id, **fields}
+
+    monkeypatch.setattr("backend.web.services.cron_service.cron_job_service.get_cron_job", fake_get)
+    monkeypatch.setattr("backend.web.services.cron_service.task_service.create_task", fake_create_task)
+    monkeypatch.setattr("backend.web.services.cron_service.cron_job_service.update_cron_job", fake_update_job)
+
+    task = await CronService().trigger_job("job-1")
+
+    assert task is not None
+    assert created["owner_user_id"] == "owner-7"
+    assert created["source"] == "cron"
+    assert created["cron_job_id"] == "job-1"
+
+
+def test_task_service_forwards_owner_scope_to_repo(monkeypatch: pytest.MonkeyPatch):
+    seen: dict[str, Any] = {}
+
+    class _FakeRepo:
+        def close(self) -> None:
+            return None
+
+        def get(self, task_id: str, owner_user_id: str | None = None) -> dict[str, Any]:
+            seen["get"] = (task_id, owner_user_id)
+            return {"id": task_id}
+
+        def update(self, task_id: str, owner_user_id: str | None = None, **fields: Any) -> dict[str, Any]:
+            seen["update"] = (task_id, owner_user_id, fields)
+            return {"id": task_id, **fields}
+
+        def delete(self, task_id: str, owner_user_id: str | None = None) -> bool:
+            seen["delete"] = (task_id, owner_user_id)
+            return True
+
+        def bulk_delete(self, ids: list[str], owner_user_id: str | None = None) -> int:
+            seen["bulk_delete"] = (ids, owner_user_id)
+            return len(ids)
+
+        def bulk_update_status(self, ids: list[str], status: str, owner_user_id: str | None = None) -> int:
+            seen["bulk_status"] = (ids, status, owner_user_id)
+            return len(ids)
+
+    monkeypatch.setattr(task_service, "_repo", lambda: _FakeRepo())
+
+    task_service.get_task("t-1", owner_user_id="user-1")
+    task_service.update_task("t-2", owner_user_id="user-1", title="new")
+    task_service.delete_task("t-3", owner_user_id="user-1")
+    task_service.bulk_delete_tasks(["t-4"], owner_user_id="user-1")
+    task_service.bulk_update_task_status(["t-5"], "completed", owner_user_id="user-1")
+
+    assert seen["get"] == ("t-1", "user-1")
+    assert seen["update"] == ("t-2", "user-1", {"title": "new"})
+    assert seen["delete"] == ("t-3", "user-1")
+    assert seen["bulk_delete"] == (["t-4"], "user-1")
+    assert seen["bulk_status"] == (["t-5"], "completed", "user-1")
+
+
+def test_task_service_prefers_injected_repos_over_storage_factory(monkeypatch: pytest.MonkeyPatch):
+    seen: dict[str, Any] = {}
+
+    class _FakeRepo:
+        def close(self) -> None:
+            return None
+
+        def list_all(self, owner_user_id: str | None = None) -> list[dict[str, Any]]:
+            seen["list_all"] = owner_user_id
+            return [{"id": "t-1", "thread_id": "thread-1"}]
+
+        def get(self, task_id: str, owner_user_id: str | None = None) -> dict[str, Any]:
+            seen["get"] = (task_id, owner_user_id)
+            return {"id": task_id}
+
+    class _FakeThreadRepo:
+        def close(self) -> None:
+            return None
+
+        def get_by_id(self, thread_id: str) -> dict[str, Any]:
+            seen["thread_lookup"] = thread_id
+            return {"member_id": "member-1"}
+
+    monkeypatch.setattr(task_service, "_repo", lambda: (_ for _ in ()).throw(AssertionError("unexpected storage factory repo")))
+    monkeypatch.setattr(
+        task_service,
+        "build_thread_repo",
+        lambda: (_ for _ in ()).throw(AssertionError("unexpected runtime thread repo builder")),
+    )
+
+    items = task_service.list_tasks(owner_user_id="user-1", repo=_FakeRepo(), thread_repo=_FakeThreadRepo())
+    item = task_service.get_task("t-1", owner_user_id="user-1", repo=_FakeRepo())
+
+    assert seen["list_all"] == "user-1"
+    assert seen["thread_lookup"] == "thread-1"
+    assert items[0]["member_id"] == "member-1"
+    assert seen["get"] == ("t-1", "user-1")
+    assert item == {"id": "t-1"}
+
+
+def test_cron_job_service_forwards_owner_scope_to_repo(monkeypatch: pytest.MonkeyPatch):
+    seen: dict[str, Any] = {}
+
+    class _FakeRepo:
+        def close(self) -> None:
+            return None
+
+        def get(self, job_id: str, owner_user_id: str | None = None) -> dict[str, Any]:
+            seen["get"] = (job_id, owner_user_id)
+            return {"id": job_id}
+
+        def update(self, job_id: str, owner_user_id: str | None = None, **fields: Any) -> dict[str, Any]:
+            seen["update"] = (job_id, owner_user_id, fields)
+            return {"id": job_id, **fields}
+
+        def delete(self, job_id: str, owner_user_id: str | None = None) -> bool:
+            seen["delete"] = (job_id, owner_user_id)
+            return True
+
+    monkeypatch.setattr(cron_job_service, "_repo", lambda: _FakeRepo())
+
+    cron_job_service.get_cron_job("job-1", owner_user_id="user-1")
+    cron_job_service.update_cron_job("job-2", owner_user_id="user-1", description="desc")
+    cron_job_service.delete_cron_job("job-3", owner_user_id="user-1")
+
+    assert seen["get"] == ("job-1", "user-1")
+    assert seen["update"] == ("job-2", "user-1", {"description": "desc"})
+    assert seen["delete"] == ("job-3", "user-1")
+
+
+def test_cron_job_service_prefers_injected_repo_over_storage_factory(monkeypatch: pytest.MonkeyPatch):
+    seen: dict[str, Any] = {}
+
+    class _FakeRepo:
+        def close(self) -> None:
+            return None
+
+        def list_all(self, owner_user_id: str | None = None) -> list[dict[str, Any]]:
+            seen["list_all"] = owner_user_id
+            return [{"id": "job-1"}]
+
+        def create(self, *, name: str, cron_expression: str, **fields: Any) -> dict[str, Any]:
+            seen["create"] = (name, cron_expression, fields)
+            return {"id": "job-1", "name": name}
+
+    monkeypatch.setattr(cron_job_service, "_repo", lambda: (_ for _ in ()).throw(AssertionError("unexpected storage factory repo")))
+
+    jobs = cron_job_service.list_cron_jobs(owner_user_id="user-1", repo=_FakeRepo())
+    created = cron_job_service.create_cron_job(
+        name="Nightly",
+        cron_expression="0 0 * * *",
+        owner_user_id="user-1",
+        repo=_FakeRepo(),
+    )
+
+    assert seen["list_all"] == "user-1"
+    assert jobs == [{"id": "job-1"}]
+    assert seen["create"] == ("Nightly", "0 0 * * *", {"owner_user_id": "user-1"})
+    assert created == {"id": "job-1", "name": "Nightly"}
+
+
+@pytest.mark.asyncio
+async def test_panel_routes_pass_app_state_repos_to_task_and_cron_services(monkeypatch: pytest.MonkeyPatch):
+    seen: dict[str, Any] = {}
+
+    def fake_list_tasks(*, owner_user_id: str | None = None, repo: Any = None, thread_repo: Any = None) -> list[dict[str, Any]]:
+        seen["task_list"] = (owner_user_id, repo, thread_repo)
+        return []
+
+    def fake_create_task(*, owner_user_id: str | None = None, repo: Any = None, **fields: Any) -> dict[str, Any]:
+        seen["task_create"] = (owner_user_id, repo, fields)
+        return {"id": "task-1"}
+
+    def fake_list_cron_jobs(*, owner_user_id: str | None = None, repo: Any = None) -> list[dict[str, Any]]:
+        seen["cron_list"] = (owner_user_id, repo)
+        return []
+
+    def fake_create_cron_job(
+        *, name: str, cron_expression: str, owner_user_id: str | None = None, repo: Any = None, **fields: Any
+    ) -> dict[str, Any]:
+        seen["cron_create"] = (name, cron_expression, owner_user_id, repo, fields)
+        return {"id": "job-1"}
+
+    monkeypatch.setattr(panel_router.task_service, "list_tasks", fake_list_tasks)
+    monkeypatch.setattr(panel_router.task_service, "create_task", fake_create_task)
+    monkeypatch.setattr(panel_router.cron_job_service, "list_cron_jobs", fake_list_cron_jobs)
+    monkeypatch.setattr(panel_router.cron_job_service, "create_cron_job", fake_create_cron_job)
+
+    panel_task_repo = object()
+    thread_repo = object()
+    cron_job_repo = object()
+    request = cast(
+        Any,
+        SimpleNamespace(
+            app=SimpleNamespace(
+                state=SimpleNamespace(panel_task_repo=panel_task_repo, thread_repo=thread_repo, cron_job_repo=cron_job_repo)
+            )
+        ),
+    )
+
+    await panel_router.list_tasks(request=request, user_id="user-1")
+    await panel_router.create_task(CreateTaskRequest(title="hello"), request=request, user_id="user-1")
+    await panel_router.list_cron_jobs(request=request, user_id="user-1")
+    await panel_router.create_cron_job(
+        CreateCronJobRequest(name="Nightly", cron_expression="0 0 * * *", enabled=True, task_template="{}"),
+        request=request,
+        user_id="user-1",
+    )
+
+    assert seen["task_list"] == ("user-1", panel_task_repo, thread_repo)
+    assert seen["task_create"][0:2] == ("user-1", panel_task_repo)
+    assert seen["cron_list"] == ("user-1", cron_job_repo)
+    assert seen["cron_create"][0:4] == ("Nightly", "0 0 * * *", "user-1", cron_job_repo)
diff --git a/tests/Integration/test_query_loop_backend_bridge.py b/tests/Integration/test_query_loop_backend_bridge.py
new file mode 100644
index 000000000..9abc65350
--- /dev/null
+++ b/tests/Integration/test_query_loop_backend_bridge.py
@@ -0,0 +1,2023 @@
+"""Backend-facing regression tests for QueryLoop caller-contract bridge."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, cast
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
+
+from backend.web.models.requests import SendMessageRequest
+from backend.web.routers import threads as threads_router
+from backend.web.routers.threads import get_thread_history, get_thread_messages
+from backend.web.services.display_builder import DisplayBuilder
+from backend.web.services.event_buffer import ThreadEventBuffer
+from backend.web.services.streaming_service import (
+    _ensure_thread_handlers,
+    _repair_incomplete_tool_calls,
+    _run_agent_to_buffer,
+    start_agent_run,
+)
+from core.runtime.loop import QueryLoop
+from core.runtime.middleware.memory.middleware import MemoryMiddleware
+from core.runtime.middleware.monitor.state_monitor import AgentState
+from core.runtime.middleware.queue.manager import MessageQueueManager
+from core.runtime.middleware.queue.middleware import SteeringMiddleware
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.state import AppState, BootstrapConfig
+from core.tools.tool_search.service import ToolSearchService
+from storage.contracts import NotificationType
+
+
+class _MemoryCheckpointer:
+    def __init__(self) -> None:
+        self.store: dict[str, dict] = {}
+
+    async def aget(self, cfg):
+        return self.store.get(cfg["configurable"]["thread_id"])
+
+    async def aget_tuple(self, cfg):
+        return None
+
+    async def aput(self, cfg, checkpoint, metadata, new_versions):
+        self.store[cfg["configurable"]["thread_id"]] = checkpoint
+
+
+class _NoToolModel:
+    def __init__(self, text: str = "done") -> None:
+        self._text = text
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        return AIMessage(content=self._text)
+
+
+class _TurnTextModel:
+    def __init__(self, *texts: str) -> None:
+        self._texts = list(texts)
+        self._index = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._index < len(self._texts):
+            text = self._texts[self._index]
+            self._index += 1
+            return AIMessage(content=text)
+        return AIMessage(content=self._texts[-1] if self._texts else "done")
+
+
+class _TerminalFollowthroughPromptAwareModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        system_text = ""
+        if messages and messages[0].__class__.__name__ == "SystemMessage":
+            system_text = getattr(messages[0], "content", "") or ""
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        if "CommandNotification" not in last_human and "task-notification" not in last_human:
+            return AIMessage(content="UNRELATED")
+        if "Terminal background completion notifications require an explicit assistant followthrough." in system_text:
+            return AIMessage(content="FOLLOWTHROUGH_ACK")
+        return AIMessage(content="")
+
+
+class _TerminalFollowthroughSilentModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        if "CommandNotification" in last_human or "task-notification" in last_human:
+            return AIMessage(content="")
+        return AIMessage(content="UNRELATED")
+
+
+class _ChatNotificationSilentModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        if "New message from" in last_human and "read_messages(chat_id=" in last_human:
+            return AIMessage(content="")
+        return AIMessage(content="UNRELATED")
+
+
+class _PromptTooLongTwiceModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        raise RuntimeError("prompt is too long")
+
+
+class _QueryOkWithFailingCompactorModel:
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        system_text = ""
+        if messages and messages[0].__class__.__name__ == "SystemMessage":
+            system_text = getattr(messages[0], "content", "") or ""
+        if "tasked with summarizing conversations" in system_text or "split turn" in system_text.lower():
+            raise RuntimeError("compaction failed")
+        return AIMessage(content="OK")
+
+
+class _BridgeReactiveCompactMiddleware:
+    compact_boundary_index = 1
+
+    async def compact_messages_for_recovery(self, messages):
+        return [SystemMessage(content="[Conversation Summary]\nSUMMARY")] + list(messages[-1:])
+
+
+class _ToolSearchInlineSelectModel:
+    def __init__(self) -> None:
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:Read,TaskCreate"}, "id": "tc-search"}],
+            )
+        return AIMessage(content="after-inline-select")
+
+
+class _ToolThenConcurrencyLimitModel:
+    def __init__(self) -> None:
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "Write", "args": {"file_path": "/tmp/demo.txt", "content": "hi"}, "id": "tc-write"}],
+            )
+        raise RuntimeError("Concurrency limit exceeded for user, please retry later")
+
+
+class _SteerAwareTerminalModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        return AIMessage(content="STEER_DONE" if last_human == "Stop and just say STEER_DONE." else "UNKNOWN")
+
+
+class _StopHonestyAwareModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        system_text = ""
+        if messages and messages[0].__class__.__name__ == "SystemMessage":
+            system_text = getattr(messages[0], "content", "") or ""
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        if last_human != "Stop immediately. Do not continue the old task. Reply exactly STOPPED_NOW and do not write any file.":
+            return AIMessage(content="UNKNOWN")
+        if "Steer requests accepted during an active run are non-preemptive." in system_text:
+            return AIMessage(content="STOP_ACK_AFTER_COMPLETED_WORK")
+        return AIMessage(content="STOPPED_NOW")
+
+
+class _SteerCancelPoisonModel:
+    def __init__(self) -> None:
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "SleepTool", "args": {}, "id": "tc-sleep"}],
+            )
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        return AIMessage(content=f"LAST_HUMAN:{last_human}")
+
+
+class _FakeDisplayBuilder:
+    def __init__(self, cached_entries):
+        self._cached_entries = cached_entries
+        self.rebuilt_with: tuple[str, list[dict]] | None = None
+
+    def get_entries(self, thread_id: str):
+        return self._cached_entries
+
+    def build_from_checkpoint(self, thread_id: str, messages: list[dict]):
+        self.rebuilt_with = (thread_id, messages)
+        return [{"id": "rebuilt-notice", "role": "notice", "content": "rebuilt"}]
+
+    def get_display_seq(self, thread_id: str) -> int:
+        return 7
+
+
+class _StreamingGraphAgent:
+    checkpointer = None
+
+    async def aget_state(self, _config):
+        return SimpleNamespace(values={"messages": []})
+
+    async def astream(self, *_args, **_kwargs):
+        if False:
+            yield None
+
+
+class _NoResumeGraphAgent(_StreamingGraphAgent):
+    def __init__(self) -> None:
+        self.astream_calls = 0
+        self.aupdate_calls = 0
+
+    async def aupdate_state(self, *_args, **_kwargs):
+        self.aupdate_calls += 1
+
+    async def astream(self, *_args, **_kwargs):
+        self.astream_calls += 1
+        if False:
+            yield None
+        return
+
+
+class _BrokenStreamGraphAgent(_StreamingGraphAgent):
+    def __init__(self, error: Exception | None = None) -> None:
+        self._error = error or RuntimeError("stream boom")
+
+    async def astream(self, *_args, **_kwargs):
+        if False:
+            yield None
+        raise self._error
+
+
+class _StreamingRuntime:
+    current_state = AgentState.IDLE
+
+    def __init__(self) -> None:
+        self.current_run_source = None
+        self._event_callback = None
+        self.state = SimpleNamespace(flags=SimpleNamespace(is_compacting=False))
+
+    def set_event_callback(self, cb) -> None:
+        self._event_callback = cb
+
+    def bind_thread(self, *, activity_sink=None) -> None:
+        self._activity_sink = activity_sink
+
+    def get_status_dict(self) -> dict[str, object]:
+        return {"state": {"state": "idle", "flags": {}}}
+
+    def transition(self, new_state) -> bool:
+        valid = {
+            AgentState.IDLE: {AgentState.ACTIVE},
+            AgentState.ACTIVE: {AgentState.IDLE},
+        }
+        if new_state not in valid.get(self.current_state, set()):
+            return False
+        self.current_state = new_state
+        return True
+
+
+async def _wait_for_followthrough_text(loop: QueryLoop, thread_id: str, expected: str) -> None:
+    for _ in range(100):
+        state = await loop.aget_state({"configurable": {"thread_id": thread_id}})
+        messages = state.values.get("messages", []) if state and state.values else []
+        if any(msg.__class__.__name__ == "AIMessage" and getattr(msg, "content", None) == expected for msg in messages):
+            return
+        await asyncio.sleep(0.01)
+    raise AssertionError(f"followthrough text not observed: {expected}")
+
+
+def _make_loop(
+    *,
+    text: str = "done",
+    model=None,
+    registry: ToolRegistry | None = None,
+    checkpointer: _MemoryCheckpointer | None = None,
+    middleware: list | None = None,
+) -> QueryLoop:
+    return QueryLoop(
+        model=model or _NoToolModel(text=text),
+        system_prompt=SystemMessage(content="sys"),
+        middleware=middleware or [],
+        checkpointer=checkpointer,
+        registry=registry or ToolRegistry(),
+        app_state=AppState(),
+        runtime=None,
+        bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+        max_turns=5,
+    )
+
+
+def _patch_streaming_event_store(monkeypatch: pytest.MonkeyPatch) -> None:
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+
+
+def _patch_direct_streaming(monkeypatch: pytest.MonkeyPatch) -> None:
+    _patch_streaming_event_store(monkeypatch)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+
+def _make_streaming_agent(loop: QueryLoop, *, queue_manager: MessageQueueManager | None = None) -> SimpleNamespace:
+    agent = SimpleNamespace(
+        agent=loop,
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    if queue_manager is not None:
+        agent.queue_manager = queue_manager
+    return agent
+
+
+def _make_streaming_app(
+    tmp_path: Path,
+    *,
+    thread_id: str | None = None,
+    agent: SimpleNamespace | None = None,
+    queue_manager: MessageQueueManager | None = None,
+    include_route_locks: bool = False,
+) -> tuple[SimpleNamespace, MessageQueueManager]:
+    queue_manager = queue_manager or MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    state = SimpleNamespace(
+        display_builder=DisplayBuilder(),
+        thread_tasks={},
+        thread_event_buffers={},
+        subagent_buffers={},
+        queue_manager=queue_manager,
+        thread_last_active={},
+        typing_tracker=None,
+    )
+    if thread_id is not None and agent is not None:
+        state.agent_pool = {f"{thread_id}:local": agent}
+        state.thread_sandbox = {thread_id: "local"}
+        state._event_loop = asyncio.get_running_loop()
+    if include_route_locks:
+        state.thread_locks = {}
+        state.thread_locks_guard = asyncio.Lock()
+    return SimpleNamespace(state=state), queue_manager
+
+
+def _make_direct_streaming_context(
+    tmp_path: Path,
+    loop: QueryLoop,
+    *,
+    queue_manager: MessageQueueManager | None = None,
+) -> tuple[SimpleNamespace, SimpleNamespace, ThreadEventBuffer]:
+    agent = _make_streaming_agent(loop, queue_manager=queue_manager)
+    app, _ = _make_streaming_app(tmp_path, queue_manager=queue_manager)
+    return agent, app, ThreadEventBuffer()
+
+
+def _make_route_followthrough_context(
+    tmp_path: Path,
+    *,
+    thread_id: str,
+    loop: QueryLoop,
+) -> tuple[MessageQueueManager, SimpleNamespace, SimpleNamespace]:
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    agent = _make_streaming_agent(loop, queue_manager=queue_manager)
+    app, _ = _make_streaming_app(tmp_path, thread_id=thread_id, agent=agent, queue_manager=queue_manager)
+    _ensure_thread_handlers(agent, thread_id, app)
+    return queue_manager, agent, app
+
+
+async def _run_direct_notification_followthrough(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    *,
+    loop: QueryLoop,
+    thread_id: str,
+    message: str,
+    run_id: str,
+    message_metadata: dict[str, str] | None = None,
+) -> list[dict]:
+    _patch_direct_streaming(monkeypatch)
+    agent, app, thread_buf = _make_direct_streaming_context(tmp_path, loop)
+
+    await _run_agent_to_buffer(
+        agent,
+        thread_id,
+        message,
+        app,
+        False,
+        thread_buf,
+        run_id,
+        message_metadata=message_metadata,
+    )
+
+    entries = app.state.display_builder.get_entries(thread_id)
+    assert entries is not None
+    return entries
+
+
+def _assert_notice_then_text(entries: list[dict], notice_contains: str, expected_text: str) -> None:
+    assert entries[0]["segments"][0]["type"] == "notice"
+    assert notice_contains in entries[0]["segments"][0]["content"]
+    assert entries[0]["segments"][1] == {"type": "text", "content": expected_text}
+
+
+async def _get_local_thread_history(thread_id: str, *, agent: SimpleNamespace, app: SimpleNamespace) -> dict:
+    with (
+        patch.object(threads_router, "get_or_create_agent", return_value=agent),
+        patch.object(threads_router, "resolve_thread_sandbox", return_value="local"),
+    ):
+        return await get_thread_history(thread_id, limit=20, truncate=400, user_id="u", app=app)
+
+
+def _patch_fake_event_bus(monkeypatch: pytest.MonkeyPatch) -> None:
+    class _FakeEventBus:
+        def subscribe(self, *_args, **_kwargs):
+            return None
+
+        def make_emitter(self, **_kwargs):
+            async def _emit(_event):
+                return None
+
+            return _emit
+
+    monkeypatch.setattr("backend.web.event_bus.get_event_bus", lambda: _FakeEventBus())
+
+
+@pytest.mark.asyncio
+async def test_repair_incomplete_tool_calls_uses_query_loop_state_bridge():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(checkpointer=checkpointer)
+    broken_ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Read", "args": {"file_path": "/tmp/a.txt"}, "id": "tc-1"}],
+    )
+    trailing = HumanMessage(content="after tool")
+    trailing.id = "human-after"
+    checkpointer.store["repair-live-thread"] = {"channel_values": {"messages": [broken_ai, trailing]}}
+
+    await _repair_incomplete_tool_calls(
+        SimpleNamespace(agent=loop),
+        {"configurable": {"thread_id": "repair-live-thread"}},
+    )
+
+    state = await loop.aget_state({"configurable": {"thread_id": "repair-live-thread"}})
+
+    assert [msg.__class__.__name__ for msg in state.values["messages"]] == [
+        "AIMessage",
+        "ToolMessage",
+        "HumanMessage",
+    ]
+    assert [getattr(msg, "content", None) for msg in state.values["messages"]] == [
+        "",
+        "Error: task was interrupted (server restart or timeout). Results unavailable.",
+        "after tool",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_reads_messages_via_query_loop_state_bridge():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text="history reply", checkpointer=checkpointer)
+    config = {"configurable": {"thread_id": "history-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "hello"}]},
+        config=config,
+    ):
+        pass
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "history-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert history["total"] == 2
+    assert history["thread_id"] == "history-thread"
+    assert [item["role"] for item in history["messages"]] == ["human", "assistant"]
+    assert history["messages"][1]["text"] == "history reply"
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_skips_empty_ai_messages_after_notifications():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(checkpointer=checkpointer)
+    system_notice = HumanMessage(
+        content="<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>",
+        metadata={"source": "system"},
+    )
+    checkpointer.store["history-empty-ai-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="launch background task"),
+                system_notice,
+                AIMessage(content=""),
+            ]
+        }
+    }
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "history-empty-ai-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert [item["role"] for item in history["messages"]] == ["human", "notification"]
+    assert history["messages"][-1]["text"].startswith("<system-reminder><task-notification>")
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_retains_tool_search_inline_select_error():
+    checkpointer = _MemoryCheckpointer()
+    registry = ToolRegistry()
+    registry.register(
+        ToolEntry(
+            name="Read",
+            mode=ToolMode.INLINE,
+            schema={"name": "Read", "description": "read file"},
+            handler=lambda **_: "read",
+            source="test",
+        )
+    )
+    registry.register(
+        ToolEntry(
+            name="TaskCreate",
+            mode=ToolMode.DEFERRED,
+            schema={"name": "TaskCreate", "description": "create task"},
+            handler=lambda **_: "task",
+            source="test",
+        )
+    )
+    ToolSearchService(registry)
+    loop = _make_loop(
+        model=_ToolSearchInlineSelectModel(),
+        registry=registry,
+        checkpointer=checkpointer,
+    )
+    config = {"configurable": {"thread_id": "history-tool-search-inline-select"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "probe inline select"}]},
+        config=config,
+    ):
+        pass
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "history-tool-search-inline-select",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert [item["role"] for item in history["messages"]] == ["human", "tool_call", "tool_result", "assistant"]
+    assert history["messages"][1]["tool"] == "tool_search"
+    assert "<tool_use_error>" in history["messages"][2]["text"]
+    assert "inline/already-available tools: Read" in history["messages"][2]["text"]
+    assert history["messages"][3]["text"] == "after-inline-select"
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_persists_visible_assistant_error_after_model_failure():
+    checkpointer = _MemoryCheckpointer()
+    registry = ToolRegistry()
+    registry.register(
+        ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "description": "write file"},
+            handler=lambda **_: "FILE_WRITTEN",
+            source="test",
+        )
+    )
+    loop = _make_loop(
+        model=_ToolThenConcurrencyLimitModel(),
+        registry=registry,
+        checkpointer=checkpointer,
+    )
+    config = {"configurable": {"thread_id": "history-visible-model-error"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "write once, then continue"}]},
+        config=config,
+    ):
+        pass
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "history-visible-model-error",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert [item["role"] for item in history["messages"]] == ["human", "tool_call", "tool_result", "assistant"]
+    assert history["messages"][-1]["text"] == "Error: Concurrency limit exceeded for user, please retry later"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_visible_terminal_followthrough_when_system_notification_resume_is_silent():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text="", checkpointer=checkpointer)
+    system_notice = HumanMessage(
+        content="<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>",
+        metadata={"source": "system", "notification_type": "agent"},
+    )
+    checkpointer.store["resume-empty-ai-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="launch background task"),
+                system_notice,
+            ]
+        }
+    }
+
+    async for _ in loop.query(
+        cast(dict[str, Any], None),
+        config={"configurable": {"thread_id": "resume-empty-ai-thread"}},
+    ):
+        pass
+
+    state = await loop.aget_state({"configurable": {"thread_id": "resume-empty-ai-thread"}})
+
+    assert [msg.__class__.__name__ for msg in state.values["messages"]] == [
+        "HumanMessage",
+        "HumanMessage",
+        "AIMessage",
+    ]
+    assert state.values["messages"][-2].content.startswith("<system-reminder><task-notification>")
+    assert state.values["messages"][-1].content == "Background agent failed, but the followthrough assistant reply was empty."
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_midrun_steer_message_into_checkpoint_state(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "Stop and just say STEER_DONE.",
+        "steer-persist-thread",
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+    runtime = SimpleNamespace(events=[], emit_activity_event=lambda event: runtime.events.append(event))
+    loop = _make_loop(
+        model=_SteerAwareTerminalModel(),
+        checkpointer=checkpointer,
+        middleware=[SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime)],
+    )
+    checkpointer.store["steer-persist-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="Use Bash to run `sleep 20; echo LONG_PHASE_DONE`, then reply exactly ORIGINAL_DONE."),
+                AIMessage(
+                    content="",
+                    tool_calls=[{"name": "Bash", "args": {"command": "sleep 20; echo LONG_PHASE_DONE"}, "id": "tc-bash"}],
+                ),
+                ToolMessage(content="LONG_PHASE_DONE", name="Bash", tool_call_id="tc-bash"),
+            ]
+        }
+    }
+
+    async for _ in loop.query(cast(dict[str, Any], None), config={"configurable": {"thread_id": "steer-persist-thread"}}):
+        pass
+
+    state = await loop.aget_state({"configurable": {"thread_id": "steer-persist-thread"}})
+    persisted = state.values["messages"]
+
+    assert [msg.__class__.__name__ for msg in persisted] == [
+        "HumanMessage",
+        "AIMessage",
+        "ToolMessage",
+        "HumanMessage",
+        "AIMessage",
+    ]
+    assert persisted[3].content == "Stop and just say STEER_DONE."
+    assert persisted[3].metadata["source"] == "owner"
+    assert persisted[3].metadata["is_steer"] is True
+    assert persisted[4].content == "STEER_DONE"
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_rebuilds_persisted_midrun_steer_message(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "Stop and just say STEER_DONE.",
+        "steer-history-thread",
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+    runtime = SimpleNamespace(events=[], emit_activity_event=lambda event: runtime.events.append(event))
+    loop = _make_loop(
+        model=_SteerAwareTerminalModel(),
+        checkpointer=checkpointer,
+        middleware=[SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime)],
+    )
+    checkpointer.store["steer-history-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="Use Bash to run `sleep 20; echo LONG_PHASE_DONE`, then reply exactly ORIGINAL_DONE."),
+                AIMessage(
+                    content="",
+                    tool_calls=[{"name": "Bash", "args": {"command": "sleep 20; echo LONG_PHASE_DONE"}, "id": "tc-bash"}],
+                ),
+                ToolMessage(content="LONG_PHASE_DONE", name="Bash", tool_call_id="tc-bash"),
+            ]
+        }
+    }
+
+    async for _ in loop.query(cast(dict[str, Any], None), config={"configurable": {"thread_id": "steer-history-thread"}}):
+        pass
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "steer-history-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert [item["role"] for item in history["messages"]] == [
+        "human",
+        "tool_call",
+        "tool_result",
+        "human",
+        "assistant",
+    ]
+    assert history["messages"][3]["text"] == "Stop and just say STEER_DONE."
+    assert history["messages"][4]["text"] == "STEER_DONE"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_adds_non_preemptive_steer_contract_before_terminal_reply(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "Stop immediately. Do not continue the old task. Reply exactly STOPPED_NOW and do not write any file.",
+        "steer-stop-honesty-thread",
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+    runtime = SimpleNamespace(events=[], emit_activity_event=lambda event: runtime.events.append(event))
+    loop = _make_loop(
+        model=_StopHonestyAwareModel(),
+        checkpointer=checkpointer,
+        middleware=[SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime)],
+    )
+    checkpointer.store["steer-stop-honesty-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="Run the long bash."),
+                AIMessage(
+                    content="",
+                    tool_calls=[{"name": "Bash", "args": {"command": "sleep 15; echo LONG_PHASE_DONE"}, "id": "tc-bash"}],
+                ),
+                ToolMessage(content="LONG_PHASE_DONE", name="Bash", tool_call_id="tc-bash"),
+            ]
+        }
+    }
+
+    async for _ in loop.query(cast(dict[str, Any], None), config={"configurable": {"thread_id": "steer-stop-honesty-thread"}}):
+        pass
+
+    state = await loop.aget_state({"configurable": {"thread_id": "steer-stop-honesty-thread"}})
+    persisted = state.values["messages"]
+
+    assert [msg.__class__.__name__ for msg in persisted] == [
+        "HumanMessage",
+        "AIMessage",
+        "ToolMessage",
+        "HumanMessage",
+        "AIMessage",
+    ]
+    assert persisted[3].content == "Stop immediately. Do not continue the old task. Reply exactly STOPPED_NOW and do not write any file."
+    assert persisted[4].content == "STOP_ACK_AFTER_COMPLETED_WORK"
+
+
+@pytest.mark.asyncio
+async def test_cancelled_midrun_steer_persists_and_does_not_poison_next_turn(monkeypatch, tmp_path):
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+    checkpointer = _MemoryCheckpointer()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    runtime = _StreamingRuntime()
+    tool_started = asyncio.Event()
+
+    async def sleep_tool() -> str:
+        tool_started.set()
+        try:
+            await asyncio.sleep(60)
+        except asyncio.CancelledError:
+            raise
+        return "SLEPT"
+
+    registry = ToolRegistry()
+    registry.register(
+        ToolEntry(
+            name="SleepTool",
+            mode=ToolMode.INLINE,
+            schema={"name": "SleepTool", "description": "sleep", "parameters": {}},
+            handler=sleep_tool,
+            source="test",
+        )
+    )
+    loop = _make_loop(
+        model=_SteerCancelPoisonModel(),
+        registry=registry,
+        checkpointer=checkpointer,
+        middleware=[SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime)],
+    )
+    agent = SimpleNamespace(
+        agent=loop,
+        runtime=runtime,
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=queue_manager,
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_id = "steer-cancel-poison-thread"
+    config = {"configurable": {"thread_id": thread_id}}
+
+    start_agent_run(agent, thread_id, "start", app)
+    task = app.state.thread_tasks[thread_id]
+
+    await asyncio.wait_for(tool_started.wait(), timeout=2)
+    queue_manager.enqueue(
+        "Stop and just say STEER_DONE.",
+        thread_id,
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+
+    task.cancel()
+    await asyncio.gather(task, return_exceptions=True)
+
+    assert queue_manager.list_queue(thread_id) == []
+    assert app.state.thread_tasks.get(thread_id) is None
+    assert runtime.current_state == AgentState.IDLE
+
+    state_after_cancel = await loop.aget_state(config)
+    cancelled_contents = [getattr(msg, "content", "") for msg in state_after_cancel.values["messages"]]
+    assert cancelled_contents[:2] == ["start", "Stop and just say STEER_DONE."]
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "fresh user message"}]},
+        config=config,
+    ):
+        pass
+
+    final_state = await loop.aget_state(config)
+    final_contents = [getattr(msg, "content", "") for msg in final_state.values["messages"]]
+    assert final_contents == [
+        "start",
+        "Stop and just say STEER_DONE.",
+        "fresh user message",
+        "LAST_HUMAN:fresh user message",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_get_thread_messages_rebuilds_idle_thread_when_cached_entries_are_stale():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text="history reply", checkpointer=checkpointer)
+    config = {"configurable": {"thread_id": "detail-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "hello"}]},
+        config=config,
+    ):
+        pass
+
+    display_builder = _FakeDisplayBuilder(cached_entries=[{"id": "stale-turn", "role": "assistant", "segments": []}])
+    fake_agent = SimpleNamespace(
+        agent=loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=display_builder))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "detail-thread",
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert detail["entries"] == [{"id": "rebuilt-notice", "role": "notice", "content": "rebuilt"}]
+    assert display_builder.rebuilt_with is not None
+    rebuilt_thread_id, rebuilt_messages = display_builder.rebuilt_with
+    assert rebuilt_thread_id == "detail-thread"
+    assert [msg["type"] for msg in rebuilt_messages] == ["HumanMessage", "AIMessage"]
+
+
+@pytest.mark.asyncio
+async def test_get_thread_messages_idle_rebuild_replays_latest_run_error_from_event_log():
+    human = HumanMessage(content="hello")
+    fake_agent = SimpleNamespace(
+        agent=SimpleNamespace(aget_state=AsyncMock(return_value=SimpleNamespace(values={"messages": [human]}))),
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+    run_events = [
+        {
+            "seq": 1,
+            "event": "run_start",
+            "data": json.dumps(
+                {
+                    "thread_id": "detail-thread",
+                    "run_id": "run-error-1",
+                    "source": "owner",
+                    "showing": True,
+                }
+            ),
+            "message_id": None,
+        },
+        {
+            "seq": 2,
+            "event": "error",
+            "data": json.dumps({"error": "quota exploded"}),
+            "message_id": None,
+        },
+        {
+            "seq": 3,
+            "event": "run_done",
+            "data": json.dumps({"thread_id": "detail-thread", "run_id": "run-error-1"}),
+            "message_id": None,
+        },
+    ]
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+        patch("backend.web.services.event_store.get_latest_run_id", AsyncMock(return_value="run-error-1")),
+        patch("backend.web.services.event_store.read_events_after", AsyncMock(return_value=run_events)),
+    ):
+        detail = await get_thread_messages(
+            "detail-thread",
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert detail["entries"][0]["role"] == "user"
+    assert any(
+        entry.get("role") == "assistant"
+        and any(segment.get("type") == "text" and "quota exploded" in segment.get("content", "") for segment in entry.get("segments", []))
+        for entry in detail["entries"]
+    )
+
+
+@pytest.mark.asyncio
+async def test_cold_rebuild_surfaces_persisted_compaction_notice_in_detail_and_history():
+    checkpointer = _MemoryCheckpointer()
+    summary_model = MagicMock()
+    summary_model.bind.return_value = summary_model
+    summary_model.ainvoke = AsyncMock(return_value=AIMessage(content="SUMMARY"))
+    memory = MemoryMiddleware(
+        context_limit=40,
+        compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=10),
+        compaction_threshold=0.1,
+    )
+    memory.set_model(summary_model)
+    loop = _make_loop(
+        text="after compact",
+        checkpointer=checkpointer,
+        middleware=[memory],
+    )
+    config = {"configurable": {"thread_id": "compact-thread"}}
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="hello after compact"),
+    ]
+
+    async for _ in loop.query({"messages": history}, config=config):
+        pass
+
+    fake_agent = SimpleNamespace(
+        agent=loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "compact-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        rebuilt_history = await get_thread_history(
+            "compact-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert any(
+        any(segment.get("type") == "notice" and segment.get("notification_type") == "compact" for segment in entry.get("segments", []))
+        for entry in detail["entries"]
+        if entry.get("role") == "assistant"
+    )
+    assert any(
+        item.get("role") == "notification" and "Conversation compacted" in item.get("text", "") for item in rebuilt_history["messages"]
+    )
+
+
+@pytest.mark.asyncio
+async def test_cold_rebuild_surfaces_persisted_prompt_too_long_notice_after_recovery_exhausts():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(
+        model=_PromptTooLongTwiceModel(),
+        checkpointer=checkpointer,
+        middleware=[_BridgeReactiveCompactMiddleware()],
+    )
+    config = {"configurable": {"thread_id": "prompt-too-long-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "start"}]},
+        config=config,
+    ):
+        pass
+
+    fake_agent = SimpleNamespace(
+        agent=loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "prompt-too-long-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        rebuilt_history = await get_thread_history(
+            "prompt-too-long-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert any(
+        any(
+            segment.get("type") == "notice" and "Prompt is too long. Automatic recovery exhausted." in segment.get("content", "")
+            for segment in entry.get("segments", [])
+        )
+        for entry in detail["entries"]
+        if entry.get("role") == "assistant"
+    )
+    assert any(
+        item.get("role") == "notification" and "Prompt is too long. Automatic recovery exhausted." in item.get("text", "")
+        for item in rebuilt_history["messages"]
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("task_status", "result_text"),
+    [
+        ("completed", "CHILD_DONE"),
+        ("error", "Agent failed"),
+        ("cancelled", "Agent cancelled"),
+    ],
+)
+async def test_get_thread_messages_idle_rebuild_keeps_terminal_subagent_stream_status(
+    task_status: str,
+    result_text: str,
+):
+    ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Agent", "args": {"prompt": "do work", "run_in_background": True}, "id": "tc-agent-1"}],
+    )
+    tool = ToolMessage(
+        content=(
+            '{"task_id":"task-123","agent_name":"agent-task-123",'
+            '"thread_id":"subagent-task-123","status":"running",'
+            '"message":"Agent started in background. Use TaskOutput to get result."}'
+        ),
+        name="Agent",
+        tool_call_id="tc-agent-1",
+    )
+    notice = HumanMessage(
+        content=(
+            "<system-reminder>\n"
+            "<task-notification>\n"
+            "  <run-id>task-123</run-id>\n"
+            f"  <status>{task_status}</status>\n"
+            "  <description>child task</description>\n"
+            "  <summary>child task</summary>\n"
+            f"  <result>{result_text}</result>\n"
+            "</task-notification>\n"
+            "</system-reminder>"
+        ),
+        metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    fake_agent = SimpleNamespace(
+        agent=SimpleNamespace(aget_state=AsyncMock(return_value=SimpleNamespace(values={"messages": [ai, tool, notice]}))),
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "parent-thread",
+            user_id="u",
+            app=fake_app,
+        )
+
+    seg = detail["entries"][0]["segments"][0]
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-123"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-123"
+    assert seg["step"]["subagent_stream"]["status"] == task_status
+
+
+@pytest.mark.asyncio
+async def test_compaction_clear_then_recovery_notice_rebuilds_honestly(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    summary_model = MagicMock()
+    summary_model.bind.return_value = summary_model
+    summary_model.ainvoke = AsyncMock(return_value=AIMessage(content="SUMMARY"))
+
+    memory = MemoryMiddleware(
+        context_limit=40,
+        compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=10),
+        compaction_threshold=0.1,
+        db_path=tmp_path / "compaction-lifecycle.db",
+    )
+    memory.set_model(summary_model)
+    config = {"configurable": {"thread_id": "compaction-lifecycle-thread"}}
+    compact_loop = _make_loop(
+        text="after compact",
+        checkpointer=checkpointer,
+        middleware=[memory],
+    )
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="hello after compact"),
+    ]
+
+    async for _ in compact_loop.query({"messages": history}, config=config):
+        pass
+
+    assert memory.summary_store is not None
+    assert memory.summary_store.get_latest_summary("compaction-lifecycle-thread") is not None
+
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+    fake_agent = SimpleNamespace(
+        agent=compact_loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        compact_detail = await get_thread_messages(
+            "compaction-lifecycle-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        compact_history = await get_thread_history(
+            "compaction-lifecycle-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert any(
+        item.get("role") == "notification" and "Conversation compacted" in item.get("text", "") for item in compact_history["messages"]
+    )
+    assert any(
+        any(
+            segment.get("type") == "notice" and "Conversation compacted" in segment.get("content", "")
+            for segment in entry.get("segments", [])
+        )
+        for entry in compact_detail["entries"]
+        if entry.get("role") == "assistant"
+    )
+
+    await compact_loop.aclear("compaction-lifecycle-thread")
+
+    assert memory.summary_store.get_latest_summary("compaction-lifecycle-thread") is None
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        cleared_detail = await get_thread_messages(
+            "compaction-lifecycle-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        cleared_history = await get_thread_history(
+            "compaction-lifecycle-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert cleared_detail["entries"] == []
+    assert cleared_history["messages"] == []
+
+    recovery_loop = _make_loop(
+        model=_PromptTooLongTwiceModel(),
+        checkpointer=checkpointer,
+        middleware=[_BridgeReactiveCompactMiddleware()],
+    )
+    recovery_agent = SimpleNamespace(
+        agent=recovery_loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+
+    async for _ in recovery_loop.query(
+        {"messages": [{"role": "user", "content": "start"}]},
+        config=config,
+    ):
+        pass
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=recovery_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        recovery_detail = await get_thread_messages(
+            "compaction-lifecycle-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        recovery_history = await get_thread_history(
+            "compaction-lifecycle-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    notices = [item for item in recovery_history["messages"] if item.get("role") == "notification"]
+    assert notices == [
+        {
+            "role": "notification",
+            "text": "Prompt is too long. Automatic recovery exhausted. Clear the thread or start a new one.",
+        }
+    ]
+    assert not any("Conversation compacted" in item.get("text", "") for item in recovery_history["messages"])
+    assert any(
+        any(
+            segment.get("type") == "notice" and "Prompt is too long. Automatic recovery exhausted." in segment.get("content", "")
+            for segment in entry.get("segments", [])
+        )
+        for entry in recovery_detail["entries"]
+        if entry.get("role") == "assistant"
+    )
+
+
+@pytest.mark.asyncio
+async def test_cold_rebuild_surfaces_compaction_breaker_notice_after_repeated_failures(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    model = _QueryOkWithFailingCompactorModel()
+    memory = MemoryMiddleware(
+        context_limit=10000,
+        compaction_threshold=0.5,
+        db_path=tmp_path / "compaction-breaker.db",
+        compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=10),
+    )
+    memory.set_model(model)
+    loop = _make_loop(
+        model=model,
+        checkpointer=checkpointer,
+        middleware=[memory],
+    )
+    config = {"configurable": {"thread_id": "compaction-breaker-thread"}}
+
+    for attempt in range(3):
+        async for _ in loop.query(
+            {
+                "messages": [
+                    {"role": "user", "content": "A" * 8000},
+                    {"role": "assistant", "content": "B" * 8000},
+                    {"role": "user", "content": f"start {attempt} " + ("C" * 8000)},
+                ]
+            },
+            config=config,
+        ):
+            pass
+
+    fake_agent = SimpleNamespace(
+        agent=loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "compaction-breaker-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        rebuilt_history = await get_thread_history(
+            "compaction-breaker-thread",
+            limit=50,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert any(
+        entry.get("role") == "assistant"
+        and any(
+            seg.get("type") == "notice"
+            and "Automatic compaction disabled for this thread after repeated failures." in seg.get("content", "")
+            for seg in entry.get("segments", [])
+        )
+        for entry in detail["entries"]
+    )
+    assert any(
+        item.get("role") == "notification"
+        and "Automatic compaction disabled for this thread after repeated failures." in item.get("text", "")
+        for item in rebuilt_history["messages"]
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_emits_notice_for_system_agent_notifications(monkeypatch, tmp_path):
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    agent = SimpleNamespace(
+        agent=_StreamingGraphAgent(),
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=MessageQueueManager(db_path=str(tmp_path / "queue.db")),
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-notice",
+        "<system-reminder><task-notification><status>completed</status></task-notification></system-reminder>",
+        app,
+        False,
+        thread_buf,
+        "run-notice",
+        message_metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    entries = app.state.display_builder.get_entries("thread-notice")
+    assert entries is not None
+    assert entries[0]["segments"] == [
+        {
+            "type": "notice",
+            "content": "<system-reminder><task-notification><status>completed</status></task-notification></system-reminder>",
+            "notification_type": "agent",
+        }
+    ]
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_persists_terminal_notifications_before_assistant_followthrough(monkeypatch, tmp_path):
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(checkpointer=checkpointer)
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>",
+        "thread-terminal-history",
+        notification_type="agent",
+        source="system",
+    )
+
+    agent = SimpleNamespace(
+        agent=loop,
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=queue_manager,
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-terminal-history",
+        "<system-reminder><task-notification><status>completed</status><result>BG_OK</result></task-notification></system-reminder>",
+        app,
+        False,
+        thread_buf,
+        "run-terminal-history",
+        message_metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    state = await loop.aget_state({"configurable": {"thread_id": "thread-terminal-history"}})
+
+    assert [msg.__class__.__name__ for msg in state.values["messages"]] == [
+        "HumanMessage",
+        "HumanMessage",
+        "AIMessage",
+    ]
+    assert "BG_OK" in state.values["messages"][0].content
+    assert "Agent failed" in state.values["messages"][1].content
+    assert state.values["messages"][2].content == "done"
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_resumes_graph_for_terminal_background_notifications(monkeypatch, tmp_path):
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    graph = _NoResumeGraphAgent()
+    agent = SimpleNamespace(
+        agent=graph,
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=MessageQueueManager(db_path=str(tmp_path / "queue.db")),
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-terminal-notice",
+        "<system-reminder><task-notification><status>completed</status><result>BG_SEEN:RESULT:3</result></task-notification></system-reminder>",
+        app,
+        False,
+        thread_buf,
+        "run-terminal-notice",
+        message_metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    assert graph.astream_calls == 1
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    (
+        "thread_id",
+        "run_id",
+        "message",
+        "message_metadata",
+        "notice_contains",
+        "expected_text",
+    ),
+    [
+        (
+            "thread-terminal-followthrough",
+            "run-terminal-followthrough",
+            "<system-reminder><task-notification><status>completed</status><result>BG_OK</result></task-notification></system-reminder>",
+            {"source": "system", "notification_type": "agent"},
+            "BG_OK",
+            "AFTER_BG_DONE",
+        ),
+        (
+            "thread-command-followthrough",
+            "run-command-followthrough",
+            "<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+            {"source": "system", "notification_type": "command"},
+            "CommandNotification",
+            "AFTER_COMMAND_DONE",
+        ),
+        (
+            "thread-command-cancel-followthrough",
+            "run-command-cancel-followthrough",
+            '<CommandNotification task_id="cmd-x" status="cancelled"><Status>cancelled</Status><Description>cancelled task</Description></CommandNotification>',
+            {"source": "system", "notification_type": "command"},
+            "cancelled",
+            "AFTER_COMMAND_CANCELLED",
+        ),
+    ],
+)
+async def test_run_agent_to_buffer_surfaces_notice_then_assistant_followthrough(
+    monkeypatch,
+    tmp_path,
+    thread_id: str,
+    run_id: str,
+    message: str,
+    message_metadata: dict[str, str],
+    notice_contains: str,
+    expected_text: str,
+):
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text=expected_text, checkpointer=checkpointer)
+
+    entries = await _run_direct_notification_followthrough(
+        monkeypatch,
+        tmp_path,
+        loop=loop,
+        thread_id=thread_id,
+        message=message,
+        run_id=run_id,
+        message_metadata=message_metadata,
+    )
+
+    _assert_notice_then_text(entries, notice_contains, expected_text)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("thread_id", "message", "notification_type", "expected_notice", "expected_text"),
+    [
+        (
+            "thread-route-followthrough",
+            "<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+            "command",
+            "CommandNotification",
+            "AFTER_QUEUE_WAKE",
+        ),
+        (
+            "thread-route-agent-followthrough",
+            "<system-reminder><task-notification><status>completed</status><summary>Simple background tool test</summary><result>Simple Background Tool Test Done</result></task-notification></system-reminder>",
+            "agent",
+            "Simple Background Tool Test Done",
+            "AFTER_AGENT_WAKE",
+        ),
+        (
+            "thread-route-agent-error-followthrough",
+            "<system-reminder><task-notification><status>error</status><summary>Simple background tool test</summary><result>Agent failed</result></task-notification></system-reminder>",
+            "agent",
+            "Agent failed",
+            "AFTER_AGENT_ERROR_WAKE",
+        ),
+    ],
+)
+async def test_queue_wake_handler_starts_terminal_followthrough_run(
+    monkeypatch,
+    tmp_path,
+    thread_id: str,
+    message: str,
+    notification_type: NotificationType,
+    expected_notice: str,
+    expected_text: str,
+):
+    _patch_streaming_event_store(monkeypatch)
+
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text=expected_text, checkpointer=checkpointer)
+    queue_manager, agent, app = _make_route_followthrough_context(tmp_path, thread_id=thread_id, loop=loop)
+
+    queue_manager.enqueue(
+        message,
+        thread_id,
+        notification_type=notification_type,
+        source="system",
+    )
+
+    await _wait_for_followthrough_text(loop, thread_id, expected_text)
+    history = await _get_local_thread_history(thread_id, agent=agent, app=app)
+
+    assert [item["role"] for item in history["messages"]] == ["notification", "assistant"]
+    assert expected_notice in history["messages"][0]["text"]
+    assert history["messages"][1]["text"] == expected_text
+
+
+@pytest.mark.asyncio
+async def test_cancelled_task_notification_wakes_followthrough_run(monkeypatch, tmp_path):
+    _patch_streaming_event_store(monkeypatch)
+    _patch_fake_event_bus(monkeypatch)
+
+    thread_id = "thread-route-cancel-followthrough"
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text="AFTER_CANCEL_WAKE", checkpointer=checkpointer)
+    queue_manager, agent, app = _make_route_followthrough_context(tmp_path, thread_id=thread_id, loop=loop)
+    run = SimpleNamespace(is_done=True, description="cancelled task", command="echo hi")
+    await threads_router._notify_task_cancelled(app, thread_id, "cmd-cancel", run)
+
+    await _wait_for_followthrough_text(loop, thread_id, "AFTER_CANCEL_WAKE")
+    history = await _get_local_thread_history(thread_id, agent=agent, app=app)
+    assert [item["role"] for item in history["messages"]] == ["notification", "assistant"]
+    assert "cancelled" in history["messages"][0]["text"]
+    assert history["messages"][1]["text"] == "AFTER_CANCEL_WAKE"
+
+
+@pytest.mark.asyncio
+async def test_send_message_route_then_agent_terminal_notification_reenters_followthrough(monkeypatch, tmp_path):
+    _patch_streaming_event_store(monkeypatch)
+
+    thread_id = "thread-route-send-message-followthrough"
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_TurnTextModel("OWNER_OK", "AFTER_AGENT_ROUTE_WAKE"), checkpointer=checkpointer)
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    agent = _make_streaming_agent(loop, queue_manager=queue_manager)
+    app, _ = _make_streaming_app(
+        tmp_path,
+        thread_id=thread_id,
+        agent=agent,
+        queue_manager=queue_manager,
+        include_route_locks=True,
+    )
+
+    with (
+        patch("backend.web.services.agent_pool.get_or_create_agent", AsyncMock(return_value=agent)),
+        patch("backend.web.services.agent_pool.resolve_thread_sandbox", return_value="local"),
+    ):
+        result = await threads_router.send_message(
+            thread_id,
+            SendMessageRequest(message="start owner turn"),
+            user_id="u",
+            app=app,
+        )
+
+    assert result["status"] == "started"
+    await _wait_for_followthrough_text(loop, thread_id, "OWNER_OK")
+
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>completed</status><summary>Simple background tool test</summary><result>Simple Background Tool Test Done</result></task-notification></system-reminder>",
+        thread_id,
+        notification_type="agent",
+        source="system",
+    )
+
+    await _wait_for_followthrough_text(loop, thread_id, "AFTER_AGENT_ROUTE_WAKE")
+
+    with (
+        patch.object(threads_router, "get_or_create_agent", return_value=agent),
+        patch.object(threads_router, "resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(thread_id, limit=20, truncate=400, user_id="u", app=app)
+
+    assert [item["role"] for item in history["messages"]] == ["human", "assistant", "notification", "assistant"]
+    assert history["messages"][0]["text"] == "start owner turn"
+    assert history["messages"][1]["text"] == "OWNER_OK"
+    assert "Simple Background Tool Test Done" in history["messages"][2]["text"]
+    assert history["messages"][3]["text"] == "AFTER_AGENT_ROUTE_WAKE"
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_adds_terminal_followthrough_system_note_to_prevent_silent_completion(monkeypatch, tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_TerminalFollowthroughPromptAwareModel(), checkpointer=checkpointer)
+    entries = await _run_direct_notification_followthrough(
+        monkeypatch,
+        tmp_path,
+        loop=loop,
+        thread_id="thread-terminal-followthrough-note",
+        message="<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+        run_id="run-terminal-followthrough-note",
+        message_metadata={"source": "system", "notification_type": "command"},
+    )
+    _assert_notice_then_text(entries, "CommandNotification", "FOLLOWTHROUGH_ACK")
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_turns_silent_terminal_reentry_into_visible_followthrough(monkeypatch, tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_TerminalFollowthroughSilentModel(), checkpointer=checkpointer)
+    entries = await _run_direct_notification_followthrough(
+        monkeypatch,
+        tmp_path,
+        loop=loop,
+        thread_id="thread-terminal-followthrough-silent",
+        message="<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+        run_id="run-terminal-followthrough-silent",
+        message_metadata={"source": "system", "notification_type": "command"},
+    )
+    _assert_notice_then_text(
+        entries,
+        "CommandNotification",
+        "Background command completed, but the followthrough assistant reply was empty.",
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_turns_silent_chat_notification_into_visible_followthrough(monkeypatch, tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_ChatNotificationSilentModel(), checkpointer=checkpointer)
+    entries = await _run_direct_notification_followthrough(
+        monkeypatch,
+        tmp_path,
+        loop=loop,
+        thread_id="thread-chat-followthrough-silent",
+        message='<system-reminder>\nNew message from alice in chat chat-123 (1 unread).\nRead it with read_messages(chat_id="chat-123").\nReply with send_message(chat_id="chat-123", content="...").\nDo not treat your normal assistant text as a chat reply.\n</system-reminder>',
+        run_id="run-chat-followthrough-silent",
+        message_metadata={"source": "external", "notification_type": "chat"},
+    )
+    _assert_notice_then_text(
+        entries,
+        'read_messages(chat_id="chat-123")',
+        'I received a chat notification, but the followthrough assistant reply was empty. Read it with read_messages(chat_id="chat-123") before deciding whether to reply.',
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_tags_display_delta_with_source_seq(monkeypatch, tmp_path):
+    _patch_streaming_event_store(monkeypatch)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_NoToolModel("SEQ_OK"), checkpointer=checkpointer)
+    agent, app, thread_buf = _make_direct_streaming_context(tmp_path, loop)
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-display-delta-seq",
+        "hello",
+        app,
+        False,
+        thread_buf,
+        "run-display-delta-seq",
+    )
+
+    events, _ = await thread_buf.read_with_timeout(0, timeout=0.01)
+    assert events is not None
+    display_deltas = [json.loads(event["data"]) for event in events if event.get("event") == "display_delta"]
+    assert display_deltas
+    assert all(isinstance(delta.get("_seq"), int) for delta in display_deltas)
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_logs_real_stream_error_without_none_traceback_noise(monkeypatch, tmp_path, capsys):
+    _patch_direct_streaming(monkeypatch)
+
+    agent = SimpleNamespace(
+        agent=_BrokenStreamGraphAgent(RuntimeError("stream blew up")),
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=MessageQueueManager(db_path=str(tmp_path / "queue.db")),
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-stream-error",
+        "hello",
+        app,
+        False,
+        thread_buf,
+        "run-stream-error",
+    )
+
+    events, _ = await thread_buf.read_with_timeout(0, timeout=0.01)
+    assert events is not None
+    error_events = [json.loads(event["data"]) for event in events if event.get("event") == "error"]
+    assert len(error_events) == 1
+    assert error_events[0]["error"] == "stream blew up"
+    assert "NoneType: None" not in capsys.readouterr().err
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_batches_additional_terminal_notifications(monkeypatch, tmp_path):
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    start_calls: list[tuple[str, str, dict | None]] = []
+
+    def fake_start_agent_run(agent, thread_id, message, app, enable_trajectory=False, message_metadata=None):
+        start_calls.append((thread_id, message, message_metadata))
+        return "run-next"
+
+    monkeypatch.setattr("backend.web.services.streaming_service.start_agent_run", fake_start_agent_run)
+
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>",
+        "thread-batch-notice",
+        notification_type="agent",
+    )
+    queue_manager.enqueue(
+        "<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+        "thread-batch-notice",
+        notification_type="command",
+    )
+
+    agent = SimpleNamespace(
+        agent=_StreamingGraphAgent(),
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=queue_manager,
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-batch-notice",
+        "<system-reminder><task-notification><status>completed</status><result>BG_OK</result></task-notification></system-reminder>",
+        app,
+        False,
+        thread_buf,
+        "run-batch-notice",
+        message_metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    entries = app.state.display_builder.get_entries("thread-batch-notice")
+    assert entries is not None
+    notice_segments = [segment for segment in entries[0]["segments"] if segment.get("type") == "notice"]
+    assert len(notice_segments) == 3
+    assert "BG_OK" in notice_segments[0]["content"]
+    assert "Agent failed" in notice_segments[1]["content"]
+    assert "CommandNotification" in notice_segments[2]["content"]
+    assert start_calls == []
+    assert queue_manager.list_queue("thread-batch-notice") == []
diff --git a/tests/test_queue_mode_integration.py b/tests/Integration/test_queue_mode_integration.py
similarity index 100%
rename from tests/test_queue_mode_integration.py
rename to tests/Integration/test_queue_mode_integration.py
diff --git a/tests/test_real_multiround.py b/tests/Integration/test_real_multiround.py
similarity index 100%
rename from tests/test_real_multiround.py
rename to tests/Integration/test_real_multiround.py
diff --git a/tests/Integration/test_resource_overview_contract_split.py b/tests/Integration/test_resource_overview_contract_split.py
new file mode 100644
index 000000000..0a22dbd69
--- /dev/null
+++ b/tests/Integration/test_resource_overview_contract_split.py
@@ -0,0 +1,204 @@
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+from fastapi import FastAPI, HTTPException
+from fastapi.testclient import TestClient
+
+from backend.web.core.dependencies import get_current_user_id
+from backend.web.main import app
+from backend.web.routers import monitor as monitor_router
+from backend.web.routers import resources as resources_router
+from backend.web.services import resource_projection_service, resource_service
+
+
+def test_resources_overview_route_exists() -> None:
+    assert any(getattr(route, "path", None) == "/api/resources/overview" for route in app.routes)
+
+
+def test_resources_overview_maps_runtime_error_to_500(monkeypatch) -> None:
+    monkeypatch.setattr(
+        resource_projection_service,
+        "list_user_resource_providers",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(RuntimeError("provider unavailable")),
+    )
+
+    request = type("_Request", (), {"app": object()})()
+
+    with pytest.raises(HTTPException) as exc_info:
+        asyncio.run(
+            resources_router.resources_overview(
+                user_id="user-1",
+                request=request,
+            )
+        )
+
+    assert exc_info.value.status_code == 500
+    assert exc_info.value.detail == "provider unavailable"
+
+
+def test_monitor_resources_route_stays_global(monkeypatch) -> None:
+    monkeypatch.setattr(
+        monitor_router,
+        "get_monitor_resource_overview_snapshot",
+        lambda: {"summary": {"snapshot_at": "now"}, "providers": [{"id": "global-daytona"}]},
+    )
+
+    test_app = FastAPI()
+    test_app.include_router(monitor_router.router)
+    test_app.dependency_overrides[get_current_user_id] = lambda: "user-1"
+    try:
+        with TestClient(test_app) as client:
+            response = client.get("/api/monitor/resources")
+    finally:
+        test_app.dependency_overrides.clear()
+
+    assert response.status_code == 200
+    assert response.json()["providers"][0]["id"] == "global-daytona"
+
+
+def test_user_resource_projection_groups_visible_leases_into_provider_cards(monkeypatch) -> None:
+    class _State:
+        thread_repo = object()
+        member_repo = object()
+
+    class _App:
+        state = _State()
+
+    monkeypatch.setattr(
+        resource_projection_service.sandbox_service,
+        "list_user_leases",
+        lambda owner_user_id, **_kwargs: [
+            {
+                "lease_id": "lease-1",
+                "provider_name": "daytona_selfhost",
+                "recipe": {"id": "daytona:default", "provider_type": "daytona", "name": "Daytona Default"},
+                "cwd": "/home/daytona/app",
+                "thread_ids": ["thread-1"],
+                "agents": [
+                    {
+                        "member_id": "member-1",
+                        "member_name": "Morel",
+                        "avatar_url": "/api/members/member-1/avatar",
+                    }
+                ],
+                "observed_state": "running",
+                "desired_state": "running",
+                "created_at": "2026-04-07T10:00:00Z",
+            }
+        ],
+    )
+    monkeypatch.setattr(
+        resource_projection_service.resource_service,
+        "get_provider_display_contract",
+        lambda *_args, **_kwargs: {
+            "provider_name": "daytona",
+            "description": "Daytona",
+            "vendor": "Daytona",
+            "type": "cloud",
+            "console_url": "https://example.com/daytona",
+        },
+        raising=False,
+    )
+    monkeypatch.setattr(
+        resource_projection_service.resource_service,
+        "get_provider_capability_contract",
+        lambda *_args, **_kwargs: (resource_projection_service._empty_capabilities(), None),
+        raising=False,
+    )
+
+    payload = resource_projection_service.list_user_resource_providers(_App(), "owner-1")
+
+    assert payload["summary"]["total_providers"] == 1
+    assert payload["summary"]["running_sessions"] == 1
+    assert payload["providers"][0]["id"] == "daytona_selfhost"
+    assert payload["providers"][0]["description"] == "Daytona"
+    assert payload["providers"][0]["vendor"] == "Daytona"
+    assert payload["providers"][0]["type"] == "cloud"
+    assert payload["providers"][0]["consoleUrl"] == "https://example.com/daytona"
+    assert payload["providers"][0]["sessions"][0]["leaseId"] == "lease-1"
+    assert payload["providers"][0]["sessions"][0]["threadId"] == "thread-1"
+    assert payload["providers"][0]["sessions"][0]["memberName"] == "Morel"
+    assert payload["providers"][0]["sessions"][0]["startedAt"] == "2026-04-07T10:00:00Z"
+
+
+def test_user_resource_projection_marks_provider_unavailable_when_capability_probe_fails(monkeypatch) -> None:
+    class _State:
+        thread_repo = object()
+        member_repo = object()
+
+    class _App:
+        state = _State()
+
+    monkeypatch.setattr(
+        resource_projection_service.sandbox_service,
+        "list_user_leases",
+        lambda owner_user_id, **_kwargs: [
+            {
+                "lease_id": "lease-1",
+                "provider_name": "daytona_selfhost",
+                "thread_ids": ["thread-1"],
+                "agents": [{"member_id": "member-1", "member_name": "Morel", "avatar_url": None}],
+                "observed_state": "paused",
+                "desired_state": "paused",
+                "created_at": "2026-04-07T10:00:00Z",
+            }
+        ],
+    )
+    monkeypatch.setattr(
+        resource_projection_service.resource_service,
+        "get_provider_display_contract",
+        lambda *_args, **_kwargs: {
+            "provider_name": "daytona",
+            "description": "Daytona",
+            "vendor": "Daytona",
+            "type": "cloud",
+            "console_url": "https://example.com/daytona",
+        },
+        raising=False,
+    )
+    monkeypatch.setattr(
+        resource_projection_service.resource_service,
+        "get_provider_capability_contract",
+        lambda *_args, **_kwargs: (resource_projection_service._empty_capabilities(), "provider unavailable"),
+        raising=False,
+    )
+
+    payload = resource_projection_service.list_user_resource_providers(_App(), "owner-1")
+
+    assert payload["providers"][0]["status"] == "unavailable"
+    assert payload["providers"][0]["unavailableReason"] == "provider unavailable"
+    assert payload["providers"][0]["error"] == {
+        "code": "PROVIDER_UNAVAILABLE",
+        "message": "provider unavailable",
+    }
+
+
+def test_provider_display_contract_exposes_public_metadata(monkeypatch) -> None:
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_provider_type",
+        lambda *_args, **_kwargs: "cloud",
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_console_url",
+        lambda *_args, **_kwargs: "https://example.com/daytona",
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_CATALOG",
+        {"daytona": type("_Catalog", (), {"description": "Daytona", "vendor": "Daytona"})()},
+    )
+
+    payload = resource_service.get_provider_display_contract("daytona_selfhost")
+
+    assert payload == {
+        "provider_name": "daytona",
+        "description": "Daytona",
+        "vendor": "Daytona",
+        "type": "cloud",
+        "console_url": "https://example.com/daytona",
+    }
diff --git a/tests/Integration/test_settings_local_path_shell.py b/tests/Integration/test_settings_local_path_shell.py
new file mode 100644
index 000000000..9e9eb299d
--- /dev/null
+++ b/tests/Integration/test_settings_local_path_shell.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.routers import settings as settings_router
+
+
+@pytest.mark.asyncio
+async def test_browse_filesystem_lists_directory_entries(tmp_path: Path):
+    child = tmp_path / "child"
+    child.mkdir()
+
+    result = await settings_router.browse_filesystem(path=str(tmp_path), include_files=False)
+
+    assert result == {
+        "current_path": str(tmp_path.resolve()),
+        "parent_path": str(tmp_path.resolve().parent),
+        "items": [{"name": "child", "path": str(child.resolve()), "is_dir": True}],
+    }
+
+
+@pytest.mark.asyncio
+async def test_read_local_file_reads_content(tmp_path: Path):
+    file_path = tmp_path / "note.txt"
+    file_path.write_text("hello world", encoding="utf-8")
+
+    result = await settings_router.read_local_file(path=str(file_path))
+
+    assert result == {"path": str(file_path.resolve()), "content": "hello world", "truncated": False}
+
+
+@pytest.mark.asyncio
+async def test_browse_and_read_keep_route_specific_path_errors(tmp_path: Path):
+    missing = tmp_path / "missing"
+    file_path = tmp_path / "note.txt"
+    file_path.write_text("hello", encoding="utf-8")
+
+    with pytest.raises(HTTPException) as browse_missing_exc:
+        await settings_router.browse_filesystem(path=str(missing), include_files=False)
+
+    with pytest.raises(HTTPException) as browse_wrong_type_exc:
+        await settings_router.browse_filesystem(path=str(file_path), include_files=False)
+
+    with pytest.raises(HTTPException) as read_missing_exc:
+        await settings_router.read_local_file(path=str(missing))
+
+    with pytest.raises(HTTPException) as read_wrong_type_exc:
+        await settings_router.read_local_file(path=str(tmp_path))
+
+    assert browse_missing_exc.value.status_code == 404
+    assert browse_missing_exc.value.detail == "Path does not exist"
+    assert browse_wrong_type_exc.value.status_code == 400
+    assert browse_wrong_type_exc.value.detail == "Path is not a directory"
+    assert read_missing_exc.value.status_code == 404
+    assert read_missing_exc.value.detail == "File not found"
+    assert read_wrong_type_exc.value.status_code == 400
+    assert read_wrong_type_exc.value.detail == "Path is a directory"
diff --git a/tests/Integration/test_storage_repo_abstraction_unification.py b/tests/Integration/test_storage_repo_abstraction_unification.py
new file mode 100644
index 000000000..affbe80cf
--- /dev/null
+++ b/tests/Integration/test_storage_repo_abstraction_unification.py
@@ -0,0 +1,311 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+from fastapi import FastAPI
+
+from backend.web.core import lifespan as lifespan_module
+from core.agents.registry import AgentRegistry
+from core.runtime.registry import ToolRegistry
+from core.tools.task.service import TaskService
+from sandbox import resource_snapshot as resource_snapshot_module
+from sandbox.sync.state import SyncState
+from storage import runtime as storage_runtime
+from storage.container import StorageContainer
+
+
+class _FakeSupabaseClient:
+    pass
+
+
+class _FakeRepo:
+    def close(self) -> None:
+        return None
+
+
+class _FakeContainer:
+    def __init__(self) -> None:
+        self.member_repo_value = _FakeRepo()
+        self.thread_repo_value = _FakeRepo()
+        self.thread_launch_pref_repo_value = _FakeRepo()
+        self.recipe_repo_value = _FakeRepo()
+        self.chat_repo_value = _FakeRepo()
+        self.invite_code_repo_value = _FakeRepo()
+        self.user_settings_repo_value = _FakeRepo()
+        self.agent_config_repo_value = _FakeRepo()
+        self.contact_repo_value = _FakeRepo()
+
+    def member_repo(self) -> _FakeRepo:
+        return self.member_repo_value
+
+    def thread_repo(self) -> _FakeRepo:
+        return self.thread_repo_value
+
+    def thread_launch_pref_repo(self) -> _FakeRepo:
+        return self.thread_launch_pref_repo_value
+
+    def recipe_repo(self) -> _FakeRepo:
+        return self.recipe_repo_value
+
+    def chat_repo(self) -> _FakeRepo:
+        return self.chat_repo_value
+
+    def invite_code_repo(self) -> _FakeRepo:
+        return self.invite_code_repo_value
+
+    def user_settings_repo(self) -> _FakeRepo:
+        return self.user_settings_repo_value
+
+    def agent_config_repo(self) -> _FakeRepo:
+        return self.agent_config_repo_value
+
+    def contact_repo(self) -> _FakeRepo:
+        return self.contact_repo_value
+
+    def panel_task_repo(self) -> _FakeRepo:
+        return _FakeRepo()
+
+    def cron_job_repo(self) -> _FakeRepo:
+        return _FakeRepo()
+
+    def tool_task_repo(self) -> _FakeRepo:
+        return _FakeRepo()
+
+    def agent_registry_repo(self) -> _FakeRepo:
+        return _FakeRepo()
+
+    def sync_file_repo(self) -> _FakeRepo:
+        return _FakeRepo()
+
+    def resource_snapshot_repo(self) -> _FakeRepo:
+        return _FakeRepo()
+
+
+class _FakeMessagingService:
+    def __init__(self, **_: object) -> None:
+        self.delivery_fn = None
+
+    def set_delivery_fn(self, delivery_fn: object) -> None:
+        self.delivery_fn = delivery_fn
+
+
+class _FakeCronService:
+    def __init__(self, **_: object) -> None:
+        return None
+
+    async def start(self) -> None:
+        return None
+
+    async def stop(self) -> None:
+        return None
+
+
+async def _noop_async(*_: object, **__: object) -> None:
+    return None
+
+
+def _fake_repo_factory(*_args: object, **_kwargs: object) -> _FakeRepo:
+    return _FakeRepo()
+
+
+def _install_lifespan_noop_dependencies(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(lifespan_module, "_require_web_runtime_contract", lambda: None)
+    monkeypatch.setattr(lifespan_module, "_validate_web_checkpointer_contract", _noop_async)
+    monkeypatch.setattr(lifespan_module, "idle_reaper_loop", _noop_async)
+    monkeypatch.setattr(lifespan_module, "monitor_resource_overview_refresh_loop", _noop_async)
+
+    monkeypatch.setattr(
+        "backend.web.core.supabase_factory.create_supabase_client",
+        lambda: _FakeSupabaseClient(),
+    )
+    monkeypatch.setattr(
+        "backend.web.core.supabase_factory.create_supabase_auth_client",
+        lambda *_args, **_kwargs: object(),
+    )
+    monkeypatch.setattr(
+        "backend.web.core.supabase_factory.create_messaging_supabase_client",
+        lambda: _FakeSupabaseClient(),
+    )
+
+    monkeypatch.setattr("storage.providers.supabase.SupabaseMemberRepo", _fake_repo_factory)
+    monkeypatch.setattr("storage.providers.supabase.SupabaseThreadRepo", _fake_repo_factory)
+    monkeypatch.setattr("storage.providers.supabase.SupabaseThreadLaunchPrefRepo", _fake_repo_factory)
+    monkeypatch.setattr("storage.providers.supabase.SupabaseRecipeRepo", _fake_repo_factory)
+    monkeypatch.setattr("storage.providers.supabase.SupabaseChatRepo", _fake_repo_factory)
+    monkeypatch.setattr("storage.providers.supabase.SupabaseInviteCodeRepo", _fake_repo_factory)
+    monkeypatch.setattr("storage.providers.supabase.SupabaseUserSettingsRepo", _fake_repo_factory)
+    monkeypatch.setattr("storage.providers.supabase.SupabaseContactRepo", _fake_repo_factory)
+    monkeypatch.setattr(
+        "storage.providers.supabase.agent_config_repo.SupabaseAgentConfigRepo",
+        _fake_repo_factory,
+    )
+
+    monkeypatch.setattr("backend.web.services.auth_service.AuthService", lambda **_kwargs: object())
+    monkeypatch.setattr("backend.web.services.chat_events.ChatEventBus", lambda: object())
+    monkeypatch.setattr(
+        "backend.web.services.typing_tracker.TypingTracker",
+        lambda *_args, **_kwargs: object(),
+    )
+
+    monkeypatch.setattr(
+        "storage.providers.supabase.messaging_repo.SupabaseChatMemberRepo",
+        _fake_repo_factory,
+    )
+    monkeypatch.setattr(
+        "storage.providers.supabase.messaging_repo.SupabaseMessagesRepo",
+        _fake_repo_factory,
+    )
+    monkeypatch.setattr(
+        "storage.providers.supabase.messaging_repo.SupabaseMessageReadRepo",
+        _fake_repo_factory,
+    )
+    monkeypatch.setattr(
+        "storage.providers.supabase.messaging_repo.SupabaseRelationshipRepo",
+        _fake_repo_factory,
+    )
+    monkeypatch.setattr(
+        "messaging.relationships.service.RelationshipService",
+        lambda *_args, **_kwargs: object(),
+    )
+    monkeypatch.setattr(
+        "messaging.delivery.resolver.HireVisitDeliveryResolver",
+        lambda *_args, **_kwargs: object(),
+    )
+    monkeypatch.setattr(
+        "messaging.service.MessagingService",
+        lambda **_kwargs: _FakeMessagingService(**_kwargs),
+    )
+    monkeypatch.setattr(
+        "core.agents.communication.delivery.make_chat_delivery_fn",
+        lambda _app: object(),
+    )
+
+    monkeypatch.setattr("backend.web.services.display_builder.DisplayBuilder", lambda: object())
+    monkeypatch.setattr("backend.web.services.cron_service.CronService", _FakeCronService)
+    monkeypatch.setattr(
+        "core.tools.lsp.service.lsp_pool",
+        SimpleNamespace(close_all=_noop_async),
+    )
+
+
+def test_storage_container_exposes_bypass_repo_builders() -> None:
+    container = StorageContainer(supabase_client=_FakeSupabaseClient())
+
+    assert callable(container.panel_task_repo)
+    assert callable(container.cron_job_repo)
+    assert callable(container.agent_registry_repo)
+    assert callable(container.tool_task_repo)
+    assert callable(container.sync_file_repo)
+    assert callable(container.resource_snapshot_repo)
+
+
+@pytest.mark.asyncio
+async def test_lifespan_wires_member_and_thread_repos_from_storage_container(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    container = _FakeContainer()
+    app = FastAPI()
+    _install_lifespan_noop_dependencies(monkeypatch)
+    monkeypatch.setattr("storage.container.StorageContainer", lambda **_: container)
+
+    async with lifespan_module.lifespan(app):
+        assert app.state.member_repo is container.member_repo_value
+        assert app.state.thread_repo is container.thread_repo_value
+
+
+def test_runtime_services_default_to_storage_runtime_container(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None:
+    class _FakeRuntimeContainer:
+        def __init__(self) -> None:
+            self.tool_task_repo_value = object()
+            self.agent_registry_repo_value = object()
+            self.sync_file_repo_value = object()
+
+        def tool_task_repo(self) -> object:
+            return self.tool_task_repo_value
+
+        def agent_registry_repo(self) -> object:
+            return self.agent_registry_repo_value
+
+        def sync_file_repo(self) -> object:
+            return self.sync_file_repo_value
+
+    container = _FakeRuntimeContainer()
+
+    monkeypatch.setattr("storage.runtime.build_storage_container", lambda **_kwargs: container)
+
+    task_service = TaskService(registry=ToolRegistry(), db_path=tmp_path / "test.db")
+    agent_registry = AgentRegistry()
+    sync_state = SyncState()
+
+    assert task_service._repo is container.tool_task_repo_value
+    assert agent_registry._repo is container.agent_registry_repo_value
+    assert sync_state._repo is container.sync_file_repo_value
+
+
+def test_resource_snapshot_helpers_default_to_storage_runtime_container(monkeypatch: pytest.MonkeyPatch) -> None:
+    class _FakeResourceSnapshotRepo:
+        def __init__(self) -> None:
+            self.upserts: list[dict[str, object]] = []
+            self.snapshots = {"lease-1": {"lease_id": "lease-1", "cpu_used": 1.0}}
+
+        def close(self) -> None:
+            return None
+
+        def upsert_lease_resource_snapshot(self, **kwargs: object) -> None:
+            self.upserts.append(kwargs)
+
+        def list_snapshots_by_lease_ids(self, lease_ids: list[str]) -> dict[str, dict[str, object]]:
+            return {lease_id: self.snapshots[lease_id] for lease_id in lease_ids if lease_id in self.snapshots}
+
+    class _FakeRuntimeContainer:
+        def __init__(self) -> None:
+            self.resource_snapshot_repo_value = _FakeResourceSnapshotRepo()
+
+        def resource_snapshot_repo(self) -> _FakeResourceSnapshotRepo:
+            return self.resource_snapshot_repo_value
+
+    container = _FakeRuntimeContainer()
+
+    monkeypatch.setattr(
+        "backend.web.core.storage_factory.list_resource_snapshots",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("unexpected web storage factory resource list")),
+    )
+    monkeypatch.setattr("storage.runtime.build_storage_container", lambda **_kwargs: container)
+
+    resource_snapshot_module.upsert_lease_resource_snapshot(
+        lease_id="lease-1",
+        provider_name="daytona",
+        observed_state="running",
+        probe_mode="runtime",
+    )
+    snapshots = resource_snapshot_module.list_snapshots_by_lease_ids(["lease-1"])
+
+    assert container.resource_snapshot_repo_value.upserts == [
+        {
+            "lease_id": "lease-1",
+            "provider_name": "daytona",
+            "observed_state": "running",
+            "probe_mode": "runtime",
+        }
+    ]
+    assert snapshots == {"lease-1": {"lease_id": "lease-1", "cpu_used": 1.0}}
+
+
+def test_build_resource_snapshot_repo_defaults_to_web_supabase_factory(monkeypatch: pytest.MonkeyPatch) -> None:
+    recorded: dict[str, object] = {}
+
+    class _FakeRuntimeContainer:
+        def resource_snapshot_repo(self) -> object:
+            return object()
+
+    def _fake_build_storage_container(**kwargs: object) -> _FakeRuntimeContainer:
+        recorded.update(kwargs)
+        return _FakeRuntimeContainer()
+
+    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
+    monkeypatch.setattr("storage.runtime.build_storage_container", _fake_build_storage_container)
+
+    storage_runtime.build_resource_snapshot_repo()
+
+    assert recorded["supabase_client_factory"] == "backend.web.core.supabase_factory:create_supabase_client"
diff --git a/tests/Integration/test_thread_files_channel_shell.py b/tests/Integration/test_thread_files_channel_shell.py
new file mode 100644
index 000000000..a4cb43a1f
--- /dev/null
+++ b/tests/Integration/test_thread_files_channel_shell.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from fastapi import HTTPException
+from fastapi.responses import FileResponse
+
+from backend.web.routers import thread_files as thread_files_router
+
+
+@pytest.mark.asyncio
+async def test_call_channel_file_service_maps_value_error_to_400():
+    def fake_method(*_args: object, **_kwargs: object):
+        raise ValueError("bad path")
+
+    with pytest.raises(HTTPException) as exc_info:
+        await thread_files_router._call_channel_file_service(fake_method, "thread-1")
+
+    assert exc_info.value.status_code == 400
+    assert exc_info.value.detail == "bad path"
+
+
+@pytest.mark.asyncio
+async def test_call_channel_file_service_maps_missing_file_to_404():
+    def fake_method(*_args: object, **_kwargs: object):
+        raise FileNotFoundError("missing.txt")
+
+    with pytest.raises(HTTPException) as exc_info:
+        await thread_files_router._call_channel_file_service(
+            fake_method,
+            "thread-1",
+            missing_status=404,
+        )
+
+    assert exc_info.value.status_code == 404
+    assert exc_info.value.detail == "missing.txt"
+
+
+@pytest.mark.asyncio
+async def test_download_file_returns_file_response(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+    file_path = tmp_path / "notes.txt"
+    file_path.write_text("hello", encoding="utf-8")
+
+    async def fake_call(method, *args: object, **kwargs: object):
+        return file_path
+
+    monkeypatch.setattr(thread_files_router, "_call_channel_file_service", fake_call)
+
+    response = await thread_files_router.download_file("thread-1", path="notes.txt")
+
+    assert isinstance(response, FileResponse)
+    assert response.path == str(file_path)
+    assert response.media_type == "application/octet-stream"
+
+
+@pytest.mark.asyncio
+async def test_delete_workspace_file_returns_ok_payload(monkeypatch: pytest.MonkeyPatch):
+    async def fake_call(method, *args: object, **kwargs: object):
+        return None
+
+    monkeypatch.setattr(thread_files_router, "_call_channel_file_service", fake_call)
+
+    result = await thread_files_router.delete_workspace_file("thread-1", path="notes.txt")
+
+    assert result == {"ok": True, "path": "notes.txt"}
+
+
+@pytest.mark.asyncio
+async def test_list_channel_files_returns_entries_payload(monkeypatch: pytest.MonkeyPatch):
+    async def fake_call(method, *args: object, **kwargs: object):
+        return [{"path": "notes.txt"}]
+
+    monkeypatch.setattr(thread_files_router, "_call_channel_file_service", fake_call)
+
+    result = await thread_files_router.list_channel_files("thread-1")
+
+    assert result == {"thread_id": "thread-1", "entries": [{"path": "notes.txt"}]}
diff --git a/tests/Integration/test_thread_launch_config_contract.py b/tests/Integration/test_thread_launch_config_contract.py
new file mode 100644
index 000000000..4a6fda552
--- /dev/null
+++ b/tests/Integration/test_thread_launch_config_contract.py
@@ -0,0 +1,486 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from backend.web.models.requests import CreateThreadRequest
+from backend.web.routers import threads as threads_router
+from backend.web.services import thread_launch_config_service
+from sandbox.recipes import default_recipe_snapshot, normalize_recipe_snapshot
+from storage.contracts import MemberRow, MemberType
+
+
+class _FakeMemberRepo:
+    def __init__(self) -> None:
+        self._members = {
+            "member-1": MemberRow(
+                id="member-1",
+                name="Toad",
+                type=MemberType.MYCEL_AGENT,
+                owner_user_id="owner-1",
+                created_at=1.0,
+            ),
+            "member-2": MemberRow(
+                id="member-2",
+                name="Dryad",
+                type=MemberType.MYCEL_AGENT,
+                owner_user_id="owner-2",
+                created_at=2.0,
+            ),
+        }
+        self._seq = {"member-1": 0}
+
+    def get_by_id(self, member_id: str):
+        return self._members.get(member_id)
+
+    def increment_thread_seq(self, member_id: str) -> int:
+        self._seq[member_id] += 1
+        return self._seq[member_id]
+
+
+class _FakeThreadRepo:
+    def __init__(self) -> None:
+        self.rows: dict[str, dict] = {}
+
+    def get_default_thread(self, member_id: str):
+        for row in self.rows.values():
+            if row["member_id"] == member_id and row["is_main"]:
+                return {"id": row["thread_id"], **row}
+        return None
+
+    def get_next_branch_index(self, member_id: str) -> int:
+        indices = [row["branch_index"] for row in self.rows.values() if row["member_id"] == member_id]
+        return max(indices, default=0) + 1
+
+    def create(self, **kwargs):
+        self.rows[kwargs["thread_id"]] = dict(kwargs)
+
+    def list_by_member(self, member_id: str):
+        return [{"id": thread_id, **row} for thread_id, row in self.rows.items() if row["member_id"] == member_id]
+
+
+class _FakeThreadLaunchPrefRepo:
+    def __init__(self) -> None:
+        self.confirmed: list[tuple[str, str, dict[str, object]]] = []
+        self.successful: list[tuple[str, str, dict[str, object]]] = []
+
+    def save_confirmed(self, owner_user_id: str, member_id: str, config: dict[str, object]) -> None:
+        self.confirmed.append((owner_user_id, member_id, config))
+
+    def save_successful(self, owner_user_id: str, member_id: str, config: dict[str, object]) -> None:
+        self.successful.append((owner_user_id, member_id, config))
+
+
+def _make_threads_app():
+    return SimpleNamespace(
+        state=SimpleNamespace(
+            member_repo=_FakeMemberRepo(),
+            thread_repo=_FakeThreadRepo(),
+            thread_launch_pref_repo=_FakeThreadLaunchPrefRepo(),
+            thread_sandbox={},
+            thread_cwd={},
+        )
+    )
+
+
+def _require_thread_result(result: dict[str, object] | threads_router.JSONResponse) -> dict[str, object]:
+    assert not isinstance(result, threads_router.JSONResponse)
+    return result
+
+
+def _recipe_library_entry(provider_type: str) -> dict[str, object]:
+    recipe = default_recipe_snapshot(provider_type)
+    return {
+        **recipe,
+        "type": "recipe",
+        "available": True,
+        "created_at": 0,
+        "updated_at": 0,
+    }
+
+
+def test_save_last_confirmed_config_normalizes_payload() -> None:
+    app = _make_threads_app()
+
+    thread_launch_config_service.save_last_confirmed_config(
+        app,
+        "owner-1",
+        "member-1",
+        {
+            "create_mode": "wat",
+            "provider_config": "  local  ",
+            "recipe": "nope",
+            "lease_id": "  ",
+            "model": "  gpt-5.4-mini  ",
+            "workspace": "  /tmp/demo  ",
+        },
+    )
+
+    assert app.state.thread_launch_pref_repo.confirmed == [
+        (
+            "owner-1",
+            "member-1",
+            {
+                "create_mode": "new",
+                "provider_config": "local",
+                "recipe": None,
+                "lease_id": None,
+                "model": "gpt-5.4-mini",
+                "workspace": "/tmp/demo",
+            },
+        )
+    ]
+
+
+def test_build_existing_launch_config_uses_canonical_shape() -> None:
+    config = thread_launch_config_service.build_existing_launch_config(
+        lease={
+            "lease_id": "lease-1",
+            "provider_name": "daytona_selfhost",
+            "recipe": {"id": "daytona:recipe-1"},
+        },
+        model="gpt-5.4",
+        workspace="/workspace/reused",
+    )
+
+    assert config == {
+        "create_mode": "existing",
+        "provider_config": "daytona_selfhost",
+        "recipe": {"id": "daytona:recipe-1"},
+        "lease_id": "lease-1",
+        "model": "gpt-5.4",
+        "workspace": "/workspace/reused",
+    }
+
+
+def test_build_new_launch_config_normalizes_recipe_snapshot() -> None:
+    config = thread_launch_config_service.build_new_launch_config(
+        provider_config="local",
+        recipe={
+            "id": "local:custom",
+            "name": "Custom Local",
+            "provider_type": "local",
+            "features": {"lark_cli": True},
+        },
+        model="gpt-5.4-mini",
+        workspace="/tmp/custom",
+    )
+
+    assert config == {
+        "create_mode": "new",
+        "provider_config": "local",
+        "recipe": normalize_recipe_snapshot(
+            "local",
+            {
+                "id": "local:custom",
+                "name": "Custom Local",
+                "provider_type": "local",
+                "features": {"lark_cli": True},
+            },
+        ),
+        "lease_id": None,
+        "model": "gpt-5.4-mini",
+        "workspace": "/tmp/custom",
+    }
+
+
+def test_resolve_default_config_prefers_last_successful_over_last_confirmed() -> None:
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            thread_launch_pref_repo=SimpleNamespace(
+                get=lambda _owner_user_id, _member_id: {
+                    "last_successful": {
+                        "create_mode": "existing",
+                        "provider_config": "local",
+                        "recipe": {"id": "stale"},
+                        "lease_id": "lease-1",
+                        "model": "gpt-5.4",
+                        "workspace": "/workspace/stale",
+                    },
+                    "last_confirmed": {
+                        "create_mode": "new",
+                        "provider_config": "local",
+                        "recipe": default_recipe_snapshot("local"),
+                        "lease_id": None,
+                        "model": "gpt-4.1",
+                        "workspace": "/tmp/draft",
+                    },
+                }
+            ),
+            thread_repo=_FakeThreadRepo(),
+            member_repo=_FakeMemberRepo(),
+            recipe_repo=object(),
+        )
+    )
+
+    with (
+        patch.object(
+            thread_launch_config_service.sandbox_service,
+            "list_user_leases",
+            return_value=[
+                {
+                    "lease_id": "lease-1",
+                    "provider_name": "local",
+                    "recipe": default_recipe_snapshot("local"),
+                    "cwd": "/workspace/reused",
+                    "thread_ids": [],
+                }
+            ],
+        ),
+        patch.object(
+            thread_launch_config_service.sandbox_service,
+            "available_sandbox_types",
+            return_value=[{"name": "local", "available": True}],
+        ),
+        patch.object(
+            thread_launch_config_service,
+            "list_library",
+            return_value=[_recipe_library_entry("local")],
+        ),
+    ):
+        result = thread_launch_config_service.resolve_default_config(app, "owner-1", "member-1")
+
+    assert result == {
+        "source": "last_successful",
+        "config": {
+            "create_mode": "existing",
+            "provider_config": "local",
+            "recipe": default_recipe_snapshot("local"),
+            "lease_id": "lease-1",
+            "model": "gpt-5.4",
+            "workspace": "/workspace/reused",
+        },
+    }
+
+
+def test_resolve_default_config_skips_invalid_successful_and_uses_confirmed() -> None:
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            thread_launch_pref_repo=SimpleNamespace(
+                get=lambda _owner_user_id, _member_id: {
+                    "last_successful": {
+                        "create_mode": "existing",
+                        "provider_config": "local",
+                        "recipe": None,
+                        "lease_id": "missing-lease",
+                        "model": "gpt-5.4",
+                        "workspace": "/workspace/missing",
+                    },
+                    "last_confirmed": {
+                        "create_mode": "new",
+                        "provider_config": "local",
+                        "recipe": default_recipe_snapshot("local"),
+                        "lease_id": None,
+                        "model": "gpt-4.1",
+                        "workspace": "/tmp/draft",
+                    },
+                }
+            ),
+            thread_repo=_FakeThreadRepo(),
+            member_repo=_FakeMemberRepo(),
+            recipe_repo=object(),
+        )
+    )
+
+    with (
+        patch.object(
+            thread_launch_config_service.sandbox_service,
+            "list_user_leases",
+            return_value=[],
+        ),
+        patch.object(
+            thread_launch_config_service.sandbox_service,
+            "available_sandbox_types",
+            return_value=[{"name": "local", "available": True}],
+        ),
+        patch.object(
+            thread_launch_config_service,
+            "list_library",
+            return_value=[_recipe_library_entry("local")],
+        ),
+    ):
+        result = thread_launch_config_service.resolve_default_config(app, "owner-1", "member-1")
+
+    assert result == {
+        "source": "last_confirmed",
+        "config": {
+            "create_mode": "new",
+            "provider_config": "local",
+            "recipe": default_recipe_snapshot("local"),
+            "lease_id": None,
+            "model": "gpt-4.1",
+            "workspace": "/tmp/draft",
+        },
+    }
+
+
+def test_find_owned_member_returns_none_for_foreign_member() -> None:
+    app = _make_threads_app()
+
+    result = threads_router._find_owned_member(app, "member-2", "owner-1")
+
+    assert result is None
+
+
+def test_require_owned_member_raises_for_foreign_member() -> None:
+    app = _make_threads_app()
+
+    with pytest.raises(threads_router.HTTPException) as excinfo:
+        threads_router._require_owned_member(app, "member-2", "owner-1")
+
+    assert excinfo.value.status_code == 403
+    assert excinfo.value.detail == "Not authorized"
+
+
+@pytest.mark.asyncio
+async def test_create_thread_persists_existing_lease_successful_config() -> None:
+    app = _make_threads_app()
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "lease_id": "lease-1",
+            "model": "gpt-5.4",
+            "cwd": "/workspace/requested",
+        }
+    )
+
+    with (
+        patch.object(threads_router, "_validate_sandbox_provider_gate", return_value=None),
+        patch.object(threads_router, "_validate_mount_capability_gate", AsyncMock(return_value=None)),
+        patch.object(threads_router, "_invalidate_resource_overview_cache", return_value=None),
+        patch.object(
+            threads_router.sandbox_service,
+            "list_user_leases",
+            return_value=[
+                {
+                    "lease_id": "lease-1",
+                    "provider_name": "daytona_selfhost",
+                    "recipe": {"id": "daytona:recipe-1"},
+                }
+            ],
+        ),
+        patch.object(threads_router, "bind_thread_to_existing_lease", return_value="/workspace/reused"),
+        patch.object(threads_router, "save_last_successful_config", return_value=None) as save_successful,
+    ):
+        _require_thread_result(await threads_router.create_thread(payload, "owner-1", app))
+
+    save_successful.assert_called_once_with(
+        app,
+        "owner-1",
+        "member-1",
+        {
+            "create_mode": "existing",
+            "provider_config": "daytona_selfhost",
+            "recipe": {"id": "daytona:recipe-1"},
+            "lease_id": "lease-1",
+            "model": "gpt-5.4",
+            "workspace": "/workspace/reused",
+        },
+    )
+
+
+@pytest.mark.asyncio
+async def test_resolve_main_thread_uses_owned_member_lookup(monkeypatch: pytest.MonkeyPatch) -> None:
+    app = _make_threads_app()
+    payload = threads_router.ResolveMainThreadRequest(member_id="member-2")
+    calls: list[tuple[object, str, str]] = []
+
+    def _fake_find_owned_member(app_obj, member_id: str, owner_user_id: str):
+        calls.append((app_obj, member_id, owner_user_id))
+        return None
+
+    monkeypatch.setattr(threads_router, "_find_owned_member", _fake_find_owned_member)
+
+    result = await threads_router.resolve_main_thread(payload, "owner-1", app)
+
+    assert result == {
+        "member_id": "member-2",
+        "default_thread_id": None,
+        "thread": None,
+    }
+    assert calls == [(app, "member-2", "owner-1")]
+
+
+@pytest.mark.asyncio
+async def test_get_default_thread_config_uses_strict_member_gate(monkeypatch: pytest.MonkeyPatch) -> None:
+    app = _make_threads_app()
+    calls: list[tuple[object, str, str]] = []
+
+    def _fake_require_owned_member(app_obj, member_id: str, owner_user_id: str):
+        calls.append((app_obj, member_id, owner_user_id))
+        raise threads_router.HTTPException(403, "Not authorized")
+
+    monkeypatch.setattr(threads_router, "_require_owned_member", _fake_require_owned_member)
+
+    with pytest.raises(threads_router.HTTPException) as excinfo:
+        await threads_router.get_default_thread_config("member-2", "owner-1", app)
+
+    assert excinfo.value.status_code == 403
+    assert excinfo.value.detail == "Not authorized"
+    assert calls == [(app, "member-2", "owner-1")]
+
+
+@pytest.mark.asyncio
+async def test_save_default_thread_config_uses_strict_member_gate(monkeypatch: pytest.MonkeyPatch) -> None:
+    app = _make_threads_app()
+    payload = threads_router.SaveThreadLaunchConfigRequest(
+        member_id="member-2",
+        create_mode="new",
+        provider_config="local",
+        recipe=None,
+        lease_id=None,
+        model="gpt-5.4-mini",
+        workspace="/tmp/demo",
+    )
+    calls: list[tuple[object, str, str]] = []
+
+    def _fake_require_owned_member(app_obj, member_id: str, owner_user_id: str):
+        calls.append((app_obj, member_id, owner_user_id))
+        raise threads_router.HTTPException(403, "Not authorized")
+
+    monkeypatch.setattr(threads_router, "_require_owned_member", _fake_require_owned_member)
+
+    with pytest.raises(threads_router.HTTPException) as excinfo:
+        await threads_router.save_default_thread_config(payload, "owner-1", app)
+
+    assert excinfo.value.status_code == 403
+    assert excinfo.value.detail == "Not authorized"
+    assert calls == [(app, "member-2", "owner-1")]
+
+
+@pytest.mark.asyncio
+async def test_create_thread_persists_new_launch_successful_config() -> None:
+    app = _make_threads_app()
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "model": "gpt-5.4-mini",
+            "cwd": "/tmp/fresh-local-thread",
+        }
+    )
+
+    with (
+        patch.object(threads_router, "_validate_sandbox_provider_gate", return_value=None),
+        patch.object(threads_router, "_validate_mount_capability_gate", AsyncMock(return_value=None)),
+        patch.object(threads_router, "_create_thread_sandbox_resources", return_value=None),
+        patch.object(threads_router, "_invalidate_resource_overview_cache", return_value=None),
+        patch.object(threads_router, "save_last_successful_config", return_value=None) as save_successful,
+    ):
+        result = _require_thread_result(await threads_router.create_thread(payload, "owner-1", app))
+
+    save_successful.assert_called_once_with(
+        app,
+        "owner-1",
+        "member-1",
+        {
+            "create_mode": "new",
+            "provider_config": "local",
+            "recipe": default_recipe_snapshot("local"),
+            "lease_id": None,
+            "model": "gpt-5.4-mini",
+            "workspace": "/tmp/fresh-local-thread",
+        },
+    )
+    assert app.state.thread_cwd[result["thread_id"]] == "/tmp/fresh-local-thread"
diff --git a/tests/Integration/test_threads_router.py b/tests/Integration/test_threads_router.py
new file mode 100644
index 000000000..5fd8d3e58
--- /dev/null
+++ b/tests/Integration/test_threads_router.py
@@ -0,0 +1,987 @@
+from __future__ import annotations
+
+import json
+from contextlib import contextmanager
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, cast
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastapi import Request
+from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
+
+from backend.web.models.requests import CreateThreadRequest, ResolvePermissionRequest, ThreadPermissionRuleRequest
+from backend.web.routers import threads as threads_router
+from core.runtime.loop import QueryLoop
+from core.runtime.middleware.monitor import AgentState
+from core.runtime.registry import ToolRegistry
+from core.runtime.state import AppState, BootstrapConfig, ToolPermissionState
+from storage.contracts import MemberRow, MemberType
+
+
+class _FakeMemberRepo:
+    def __init__(self) -> None:
+        self._members = {
+            "member-1": MemberRow(
+                id="member-1",
+                name="Toad",
+                type=MemberType.MYCEL_AGENT,
+                owner_user_id="owner-1",
+                created_at=1.0,
+            )
+        }
+        self._seq = {"member-1": 0}
+
+    def get_by_id(self, member_id: str):
+        return self._members.get(member_id)
+
+    def increment_thread_seq(self, member_id: str) -> int:
+        self._seq[member_id] += 1
+        return self._seq[member_id]
+
+    def update(self, member_id: str, **kwargs):
+        pass
+
+
+class _FakeThreadRepo:
+    def __init__(self) -> None:
+        self.rows: dict[str, dict] = {}
+
+    def get_by_id(self, thread_id: str):
+        row = self.rows.get(thread_id)
+        if row is None:
+            return None
+        return {"id": thread_id, **row}
+
+    def get_default_thread(self, member_id: str):
+        for row in self.rows.values():
+            if row["member_id"] == member_id and row["is_main"]:
+                return {"id": row["thread_id"], **row}
+        return None
+
+    def get_next_branch_index(self, member_id: str) -> int:
+        indices = [row["branch_index"] for row in self.rows.values() if row["member_id"] == member_id]
+        return max(indices, default=0) + 1
+
+    def create(self, **kwargs):
+        self.rows[kwargs["thread_id"]] = dict(kwargs)
+
+
+class _FakeAuthService:
+    def __init__(self) -> None:
+        self.tokens: list[str] = []
+
+    def verify_token(self, token: str) -> dict:
+        self.tokens.append(token)
+        return {"user_id": "owner-1"}
+
+
+def _make_request(headers: dict[str, str] | None = None) -> Request:
+    raw_headers = [(key.lower().encode("latin-1"), value.encode("latin-1")) for key, value in (headers or {}).items()]
+    scope = {
+        "type": "http",
+        "method": "GET",
+        "path": "/api/threads/thread-1/events",
+        "headers": raw_headers,
+    }
+    return Request(scope)
+
+
+def _decode_json_response(response: threads_router.JSONResponse) -> dict[str, Any]:
+    body = response.body
+    payload = body.tobytes() if isinstance(body, memoryview) else body
+    return cast(dict[str, Any], json.loads(payload.decode()))
+
+
+def _require_thread_result(result: dict[str, Any] | threads_router.JSONResponse) -> dict[str, Any]:
+    assert not isinstance(result, threads_router.JSONResponse)
+    return result
+
+
+def _require_app_state(loop: QueryLoop) -> AppState:
+    app_state = loop._app_state
+    assert app_state is not None
+    return app_state
+
+
+def _require_await_kwargs(mock: AsyncMock) -> dict[str, Any]:
+    await_args = mock.await_args
+    assert await_args is not None
+    return cast(dict[str, Any], await_args.kwargs)
+
+
+def _require_await_args(mock: AsyncMock) -> tuple[Any, ...]:
+    await_args = mock.await_args
+    assert await_args is not None
+    return cast(tuple[Any, ...], await_args.args)
+
+
+class _FakePermissionAgent:
+    def __init__(self) -> None:
+        self.pending = [
+            {
+                "request_id": "perm-1",
+                "thread_id": "thread-1",
+                "tool_name": "Write",
+                "args": {"path": "/tmp/demo.txt"},
+                "message": "needs approval",
+            }
+        ]
+        self.session_rules = {
+            "allow": ["Read"],
+            "deny": ["Bash"],
+            "ask": ["Edit"],
+        }
+        self.managed_only = False
+        self.resolve_calls: list[tuple[str, str, str | None, list[dict] | None, dict | None]] = []
+        self.rule_add_calls: list[tuple[str, str]] = []
+        self.rule_remove_calls: list[tuple[str, str]] = []
+        self.agent = SimpleNamespace(
+            aget_state=AsyncMock(return_value=SimpleNamespace(values={})),
+            apersist_state=AsyncMock(),
+        )
+
+    def get_pending_permission_requests(self, thread_id: str | None = None):
+        if thread_id is None:
+            return list(self.pending)
+        return [item for item in self.pending if item["thread_id"] == thread_id]
+
+    def resolve_permission_request(
+        self,
+        request_id: str,
+        *,
+        decision: str,
+        message: str | None = None,
+        answers: list[dict] | None = None,
+        annotations: dict | None = None,
+    ) -> bool:
+        self.resolve_calls.append((request_id, decision, message, answers, annotations))
+        if request_id != "perm-1":
+            return False
+        self.pending = []
+        return True
+
+    def drop_permission_request(self, request_id: str) -> bool:
+        before = len(self.pending)
+        self.pending = [item for item in self.pending if item["request_id"] != request_id]
+        return len(self.pending) != before
+
+    def get_thread_permission_rules(self, thread_id: str) -> dict[str, object]:
+        return {
+            "thread_id": thread_id,
+            "scope": "session",
+            "managed_only": self.managed_only,
+            "rules": dict(self.session_rules),
+        }
+
+    def add_thread_permission_rule(self, thread_id: str, *, behavior: str, tool_name: str) -> bool:
+        self.rule_add_calls.append((behavior, tool_name))
+        if self.managed_only:
+            return False
+        for bucket in self.session_rules.values():
+            if tool_name in bucket:
+                bucket.remove(tool_name)
+        bucket = self.session_rules.setdefault(behavior, [])
+        if tool_name not in bucket:
+            bucket.append(tool_name)
+        return True
+
+    def remove_thread_permission_rule(self, thread_id: str, *, behavior: str, tool_name: str) -> bool:
+        self.rule_remove_calls.append((behavior, tool_name))
+        bucket = self.session_rules.get(behavior, [])
+        if tool_name not in bucket:
+            return False
+        bucket.remove(tool_name)
+        return True
+
+
+class _MemoryCheckpointer:
+    def __init__(self, channel_values: dict | None = None) -> None:
+        self._checkpoint = {"channel_values": dict(channel_values or {})}
+
+    async def aget(self, _cfg):
+        return self._checkpoint
+
+
+class _LivePendingPermissionAgent:
+    def __init__(self) -> None:
+        app_state = AppState(
+            tool_permission_context=ToolPermissionState(alwaysAskRules={"session": ["Bash"]}),
+            pending_permission_requests={
+                "perm-live": {
+                    "request_id": "perm-live",
+                    "thread_id": "thread-1",
+                    "tool_name": "Bash",
+                    "args": {"command": "echo hi"},
+                    "message": "Permission required by rule: Bash",
+                }
+            },
+        )
+        self.agent = QueryLoop(
+            model=MagicMock(),
+            system_prompt=SystemMessage(content="sys"),
+            middleware=[],
+            checkpointer=_MemoryCheckpointer(channel_values={"messages": []}),
+            registry=ToolRegistry(),
+            app_state=app_state,
+            runtime=SimpleNamespace(current_state=AgentState.ACTIVE),
+            bootstrap=BootstrapConfig(
+                workspace_root=Path("/tmp"),
+                model_name="test-model",
+                permission_resolver_scope="thread",
+            ),
+            max_turns=1,
+        )
+
+    def get_pending_permission_requests(self, thread_id: str | None = None):
+        requests = list(_require_app_state(self.agent).pending_permission_requests.values())
+        if thread_id is None:
+            return requests
+        return [item for item in requests if item["thread_id"] == thread_id]
+
+    def get_thread_permission_rules(self, thread_id: str) -> dict[str, object]:
+        state = _require_app_state(self.agent).tool_permission_context
+        return {
+            "thread_id": thread_id,
+            "scope": "session",
+            "managed_only": state.allowManagedPermissionRulesOnly,
+            "rules": {
+                "allow": list(state.alwaysAllowRules.get("session", [])),
+                "deny": list(state.alwaysDenyRules.get("session", [])),
+                "ask": list(state.alwaysAskRules.get("session", [])),
+            },
+        }
+
+
+class _FakeAskUserQuestionAgent(_FakePermissionAgent):
+    def __init__(self) -> None:
+        super().__init__()
+        self.pending = [
+            {
+                "request_id": "perm-ask",
+                "thread_id": "thread-1",
+                "tool_name": "AskUserQuestion",
+                "args": {
+                    "questions": [
+                        {
+                            "header": "Style",
+                            "question": "Choose a style",
+                            "options": [
+                                {"label": "Minimal", "description": "Keep it simple"},
+                                {"label": "Bold", "description": "Make it loud"},
+                            ],
+                        }
+                    ]
+                },
+                "message": "Please answer the following questions so Leon can continue.",
+            }
+        ]
+
+    def resolve_permission_request(
+        self,
+        request_id: str,
+        *,
+        decision: str,
+        message: str | None = None,
+        answers: list[dict] | None = None,
+        annotations: dict | None = None,
+    ) -> bool:
+        self.resolve_calls.append((request_id, decision, message, answers, annotations))
+        if request_id != "perm-ask":
+            return False
+        self.pending = []
+        return True
+
+
+class _NullLock:
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc, tb):
+        return False
+
+
+class _FakeClearAgent:
+    def __init__(self, state: AgentState = AgentState.IDLE) -> None:
+        self.runtime = SimpleNamespace(current_state=state)
+        self.aclear_thread = AsyncMock()
+
+
+def _make_threads_app(
+    *,
+    member_repo=None,
+    thread_repo=None,
+    **state_overrides,
+):
+    return SimpleNamespace(
+        state=SimpleNamespace(
+            member_repo=member_repo or _FakeMemberRepo(),
+            thread_repo=thread_repo or _FakeThreadRepo(),
+            **state_overrides,
+        )
+    )
+
+
+def _make_clear_thread_app():
+    display_builder = SimpleNamespace(clear=MagicMock())
+    queue_manager = SimpleNamespace(clear_all=MagicMock())
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            display_builder=display_builder,
+            queue_manager=queue_manager,
+            thread_event_buffers={"thread-1": object()},
+        )
+    )
+    return app, display_builder, queue_manager
+
+
+@contextmanager
+def _patch_create_thread_noop_guards():
+    with (
+        patch.object(threads_router, "_validate_sandbox_provider_gate", return_value=None),
+        patch.object(threads_router, "_validate_mount_capability_gate", return_value=None),
+        patch.object(threads_router, "_create_thread_sandbox_resources", return_value=None) as create_resources,
+        patch.object(threads_router, "_invalidate_resource_overview_cache", return_value=None),
+        patch.object(threads_router, "save_last_successful_config", return_value=None),
+    ):
+        yield create_resources
+
+
+@contextmanager
+def _patch_local_clear_thread_agent(agent):
+    with (
+        patch.object(threads_router, "resolve_thread_sandbox", return_value="local"),
+        patch.object(threads_router, "get_or_create_agent", AsyncMock(return_value=agent)),
+        patch.object(threads_router, "get_thread_lock", AsyncMock(return_value=_NullLock())),
+    ):
+        yield
+
+
+@pytest.mark.asyncio
+async def test_get_thread_lease_status_returns_null_when_thread_has_no_lease():
+    with patch.object(threads_router, "get_lease_status", AsyncMock(return_value=None)) as get_lease_status:
+        result = await threads_router.get_thread_lease_status("thread-1", agent=object())
+
+    get_lease_status.assert_awaited_once()
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_preserves_legacy_sandbox_type_alias():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "sandbox_type": "daytona_selfhost",
+            "model": "gpt-5.4-mini",
+        }
+    )
+
+    with _patch_create_thread_noop_guards():
+        result = _require_thread_result(await threads_router.create_thread(payload, "owner-1", app))
+
+    assert result["sandbox"] == "daytona_selfhost"
+    assert app.state.thread_sandbox[result["thread_id"]] == "daytona_selfhost"
+    assert app.state.thread_repo.rows[result["thread_id"]]["sandbox_type"] == "daytona_selfhost"
+
+
+@pytest.mark.asyncio
+async def test_resolve_main_thread_returns_null_for_orphaned_main_thread_metadata():
+    thread_repo = _FakeThreadRepo()
+    thread_repo.create(
+        thread_id="thread-1",
+        member_id="member-1",
+        owner_user_id="owner-1",
+        sandbox_type="local",
+        is_main=True,
+        branch_index=0,
+    )
+    empty_member_repo = SimpleNamespace(get_by_id=lambda _mid: None)
+    app = _make_threads_app(thread_repo=thread_repo, member_repo=empty_member_repo)
+
+    payload = threads_router.ResolveMainThreadRequest(member_id="member-1")
+
+    result = await threads_router.resolve_main_thread(payload, "owner-1", app)
+
+    assert result == {
+        "member_id": "member-1",
+        "default_thread_id": None,
+        "thread": None,
+    }
+
+
+@pytest.mark.asyncio
+async def test_resolve_main_thread_exposes_default_thread_identity_without_hiding_thread_payload():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = threads_router.ResolveMainThreadRequest(member_id="member-1")
+
+    with _patch_create_thread_noop_guards():
+        created = _require_thread_result(
+            await threads_router.create_thread(payload=CreateThreadRequest(member_id="member-1"), user_id="owner-1", app=app)
+        )
+
+    result = await threads_router.resolve_main_thread(payload, "owner-1", app)
+
+    assert result["member_id"] == "member-1"
+    assert result["default_thread_id"] == created["thread_id"]
+    assert result["thread"]["thread_id"] == created["thread_id"]
+    assert result["thread"]["member_id"] == "member-1"
+
+
+@pytest.mark.asyncio
+async def test_create_thread_persists_dedicated_user_id_equal_to_thread_id():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+
+    with _patch_create_thread_noop_guards():
+        created = _require_thread_result(
+            await threads_router.create_thread(
+                payload=CreateThreadRequest(member_id="member-1"),
+                user_id="owner-1",
+                app=app,
+            )
+        )
+
+    row = app.state.thread_repo.rows[created["thread_id"]]
+    assert row["user_id"] == created["thread_id"]
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_uses_canonical_existing_lease_binding_helper():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "lease_id": "lease-1",
+            "cwd": "/workspace/reused",
+        }
+    )
+
+    with (
+        patch.object(
+            threads_router.sandbox_service,
+            "list_user_leases",
+            return_value=[{"lease_id": "lease-1", "provider_name": "local", "recipe": None}],
+        ),
+        patch.object(threads_router, "bind_thread_to_existing_lease", return_value="/workspace/reused") as bind_helper,
+        patch.object(threads_router, "_invalidate_resource_overview_cache", return_value=None),
+        patch.object(threads_router, "save_last_successful_config", return_value=None),
+    ):
+        result = _require_thread_result(await threads_router.create_thread(payload, "owner-1", app))
+
+    bind_helper.assert_called_once_with(
+        result["thread_id"],
+        "lease-1",
+        cwd="/workspace/reused",
+    )
+    assert app.state.thread_cwd[result["thread_id"]] == "/workspace/reused"
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_passes_local_cwd_into_sandbox_bootstrap():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "cwd": "/tmp/fresh-local-thread",
+        }
+    )
+
+    with _patch_create_thread_noop_guards() as create_resources:
+        result = _require_thread_result(await threads_router.create_thread(payload, "owner-1", app))
+
+    create_resources.assert_called_once_with(
+        result["thread_id"],
+        "local",
+        None,
+        "/tmp/fresh-local-thread",
+    )
+
+
+@pytest.mark.asyncio
+async def test_list_threads_hides_internal_subagent_threads():
+    app = _make_threads_app(
+        thread_repo=SimpleNamespace(
+            list_by_owner_user_id=lambda user_id: [
+                {
+                    "id": "main-thread",
+                    "sandbox_type": "local",
+                    "member_name": "Toad",
+                    "member_id": "member-1",
+                    "branch_index": 0,
+                    "is_main": True,
+                    "member_avatar": None,
+                },
+                {
+                    "id": "subagent-deadbeef",
+                    "sandbox_type": "local",
+                    "member_name": "Toad",
+                    "member_id": "member-1",
+                    "branch_index": 1,
+                    "is_main": False,
+                    "member_avatar": None,
+                },
+            ]
+        ),
+        agent_pool={},
+        thread_last_active={},
+    )
+
+    payload = await threads_router.list_threads("owner-1", app)
+
+    assert [item["thread_id"] for item in payload["threads"]] == ["main-thread"]
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_rejects_unavailable_provider():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "sandbox": "daytona",
+        }
+    )
+
+    with patch.object(threads_router.sandbox_service, "build_provider_from_config_name", return_value=None):
+        result = await threads_router.create_thread(payload, "owner-1", app)
+
+    assert isinstance(result, threads_router.JSONResponse)
+    assert result.status_code == 400
+    assert _decode_json_response(result) == {
+        "error": "sandbox_provider_unavailable",
+        "provider": "daytona",
+    }
+    assert app.state.thread_repo.rows == {}
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_rejects_unavailable_provider_for_existing_lease():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "lease_id": "lease-1",
+        }
+    )
+
+    with (
+        patch.object(
+            threads_router.sandbox_service,
+            "list_user_leases",
+            return_value=[{"lease_id": "lease-1", "provider_name": "daytona", "recipe": None}],
+        ),
+        patch.object(threads_router.sandbox_service, "build_provider_from_config_name", return_value=None),
+    ):
+        result = await threads_router.create_thread(payload, "owner-1", app)
+
+    assert isinstance(result, threads_router.JSONResponse)
+    assert result.status_code == 400
+    assert _decode_json_response(result) == {
+        "error": "sandbox_provider_unavailable",
+        "provider": "daytona",
+    }
+    assert app.state.thread_repo.rows == {}
+
+
+@pytest.mark.asyncio
+async def test_stream_thread_events_requires_token():
+    app = _make_threads_app(
+        auth_service=_FakeAuthService(),
+        thread_repo=SimpleNamespace(get_by_id=lambda _thread_id: None),
+        thread_event_buffers={},
+    )
+
+    with pytest.raises(threads_router.HTTPException) as exc_info:
+        await threads_router.stream_thread_events(
+            "thread-1",
+            request=_make_request(),
+            token=None,
+            app=app,
+        )
+
+    assert exc_info.value.status_code == 401
+    assert exc_info.value.detail == "Missing token"
+
+
+@pytest.mark.asyncio
+async def test_stream_thread_events_verifies_token_before_owner_check():
+    auth_service = _FakeAuthService()
+    thread_repo = SimpleNamespace(get_by_id=lambda _thread_id: {"member_id": "member-1"})
+    app = _make_threads_app(
+        auth_service=auth_service,
+        thread_repo=thread_repo,
+        thread_event_buffers={},
+    )
+
+    response = await threads_router.stream_thread_events(
+        "thread-1",
+        request=_make_request(),
+        token="tok-thread",
+        app=app,
+    )
+
+    assert auth_service.tokens == ["tok-thread"]
+    assert response is not None
+
+
+@pytest.mark.asyncio
+async def test_get_thread_permissions_returns_thread_scoped_pending_requests():
+    agent = _FakePermissionAgent()
+
+    result = await threads_router.get_thread_permissions(
+        "thread-1",
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {
+        "thread_id": "thread-1",
+        "requests": [
+            {
+                "request_id": "perm-1",
+                "thread_id": "thread-1",
+                "tool_name": "Write",
+                "args": {"path": "/tmp/demo.txt"},
+                "message": "needs approval",
+            }
+        ],
+        "session_rules": {
+            "allow": ["Read"],
+            "deny": ["Bash"],
+            "ask": ["Edit"],
+        },
+        "managed_only": False,
+    }
+
+
+@pytest.mark.asyncio
+async def test_get_thread_permissions_does_not_clear_live_pending_requests_during_active_run():
+    agent = _LivePendingPermissionAgent()
+    app_state = _require_app_state(agent.agent)
+
+    result = await threads_router.get_thread_permissions(
+        "thread-1",
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {
+        "thread_id": "thread-1",
+        "requests": [
+            {
+                "request_id": "perm-live",
+                "thread_id": "thread-1",
+                "tool_name": "Bash",
+                "args": {"command": "echo hi"},
+                "message": "Permission required by rule: Bash",
+            }
+        ],
+        "session_rules": {
+            "allow": [],
+            "deny": [],
+            "ask": ["Bash"],
+        },
+        "managed_only": False,
+    }
+    assert app_state.pending_permission_requests == {
+        "perm-live": {
+            "request_id": "perm-live",
+            "thread_id": "thread-1",
+            "tool_name": "Bash",
+            "args": {"command": "echo hi"},
+            "message": "Permission required by rule: Bash",
+        }
+    }
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_does_not_clear_live_pending_requests_during_active_run():
+    agent = _LivePendingPermissionAgent()
+    app_state = _require_app_state(agent.agent)
+    app_state.messages = [
+        HumanMessage(content="please run bash"),
+        ToolMessage(content="Permission required by rule: Bash", tool_call_id="call-1", name="Bash"),
+    ]
+
+    with (
+        patch.object(threads_router, "resolve_thread_sandbox", return_value="local"),
+        patch.object(
+            threads_router,
+            "get_or_create_agent",
+            AsyncMock(return_value=agent),
+        ),
+    ):
+        result = await threads_router.get_thread_history(
+            "thread-1",
+            limit=20,
+            truncate=0,
+            user_id="owner-1",
+            app=SimpleNamespace(state=SimpleNamespace()),
+        )
+
+    assert result["messages"] == [
+        {"role": "human", "text": "please run bash"},
+        {"role": "tool_result", "tool": "Bash", "text": "Permission required by rule: Bash"},
+    ]
+    assert app_state.pending_permission_requests == {
+        "perm-live": {
+            "request_id": "perm-live",
+            "thread_id": "thread-1",
+            "tool_name": "Bash",
+            "args": {"command": "echo hi"},
+            "message": "Permission required by rule: Bash",
+        }
+    }
+
+
+@pytest.mark.asyncio
+async def test_resolve_thread_permission_request_persists_resolution():
+    agent = _FakePermissionAgent()
+
+    result = await threads_router.resolve_thread_permission_request(
+        "thread-1",
+        "perm-1",
+        ResolvePermissionRequest(decision="allow", message="go ahead"),
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {"ok": True, "thread_id": "thread-1", "request_id": "perm-1"}
+    assert agent.resolve_calls == [("perm-1", "allow", "go ahead", None, None)]
+    agent.agent.apersist_state.assert_awaited_once_with("thread-1")
+
+
+@pytest.mark.asyncio
+async def test_resolve_ask_user_question_request_starts_followup_run_with_answers():
+    agent = _FakeAskUserQuestionAgent()
+    app = SimpleNamespace()
+    payload = ResolvePermissionRequest.model_validate(
+        {
+            "decision": "allow",
+            "message": None,
+            "answers": [
+                {
+                    "header": "Style",
+                    "question": "Choose a style",
+                    "selected_options": ["Minimal"],
+                }
+            ],
+            "annotations": {"source": "ask-user-ui"},
+        }
+    )
+
+    with patch(
+        "backend.web.services.message_routing.route_message_to_brain",
+        AsyncMock(return_value={"status": "started", "routing": "direct", "thread_id": "thread-1"}),
+    ) as route_message:
+        result = await threads_router.resolve_thread_permission_request(
+            "thread-1",
+            "perm-ask",
+            payload,
+            user_id="owner-1",
+            agent=agent,
+            app=app,
+        )
+
+    assert result == {
+        "ok": True,
+        "thread_id": "thread-1",
+        "request_id": "perm-ask",
+        "followup": {"status": "started", "routing": "direct", "thread_id": "thread-1"},
+    }
+    assert agent.resolve_calls == [
+        (
+            "perm-ask",
+            "allow",
+            None,
+            [
+                {
+                    "header": "Style",
+                    "question": "Choose a style",
+                    "selected_options": ["Minimal"],
+                }
+            ],
+            {"source": "ask-user-ui"},
+        )
+    ]
+    route_message.assert_awaited_once()
+    route_kwargs = _require_await_kwargs(route_message)
+    assert route_kwargs["source"] == "internal"
+    assert route_kwargs["message_metadata"] == {
+        "ask_user_question_answered": {
+            "questions": [
+                {
+                    "header": "Style",
+                    "question": "Choose a style",
+                    "options": [
+                        {"label": "Minimal", "description": "Keep it simple"},
+                        {"label": "Bold", "description": "Make it loud"},
+                    ],
+                }
+            ],
+            "answers": [
+                {
+                    "header": "Style",
+                    "question": "Choose a style",
+                    "selected_options": ["Minimal"],
+                }
+            ],
+            "annotations": {"source": "ask-user-ui"},
+        }
+    }
+    followup_message = _require_await_args(route_message)[2]
+    assert "AskUserQuestion" in followup_message
+    assert "Minimal" in followup_message
+    assert "Choose a style" in followup_message
+    assert agent.pending == []
+    assert agent.agent.apersist_state.await_count == 2
+    assert [call.args for call in agent.agent.apersist_state.await_args_list] == [("thread-1",), ("thread-1",)]
+
+
+@pytest.mark.asyncio
+async def test_resolve_ask_user_question_request_requires_answers_for_allow():
+    agent = _FakeAskUserQuestionAgent()
+
+    with pytest.raises(threads_router.HTTPException) as exc_info:
+        await threads_router.resolve_thread_permission_request(
+            "thread-1",
+            "perm-ask",
+            ResolvePermissionRequest(decision="allow", message=None, answers=None, annotations=None),
+            user_id="owner-1",
+            agent=agent,
+            app=SimpleNamespace(),
+        )
+
+    assert exc_info.value.status_code == 400
+    assert exc_info.value.detail == "AskUserQuestion answers are required when approving the request"
+    agent.agent.apersist_state.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_resolve_thread_permission_request_404s_missing_request():
+    agent = _FakePermissionAgent()
+
+    with pytest.raises(threads_router.HTTPException) as exc_info:
+        await threads_router.resolve_thread_permission_request(
+            "thread-1",
+            "missing",
+            ResolvePermissionRequest(decision="deny", message="no"),
+            user_id="owner-1",
+            agent=agent,
+        )
+
+    assert exc_info.value.status_code == 404
+    assert exc_info.value.detail == "Permission request not found"
+    agent.agent.apersist_state.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_add_thread_permission_rule_persists_session_rule():
+    agent = _FakePermissionAgent()
+
+    result = await threads_router.add_thread_permission_rule(
+        "thread-1",
+        ThreadPermissionRuleRequest(behavior="allow", tool_name="Write"),
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {
+        "ok": True,
+        "thread_id": "thread-1",
+        "scope": "session",
+        "rules": {
+            "allow": ["Read", "Write"],
+            "deny": ["Bash"],
+            "ask": ["Edit"],
+        },
+        "managed_only": False,
+    }
+    assert agent.rule_add_calls == [("allow", "Write")]
+    agent.agent.apersist_state.assert_awaited_once_with("thread-1")
+
+
+@pytest.mark.asyncio
+async def test_add_thread_permission_rule_fails_loud_when_managed_only():
+    agent = _FakePermissionAgent()
+    agent.managed_only = True
+
+    with pytest.raises(threads_router.HTTPException) as exc_info:
+        await threads_router.add_thread_permission_rule(
+            "thread-1",
+            ThreadPermissionRuleRequest(behavior="allow", tool_name="Write"),
+            user_id="owner-1",
+            agent=agent,
+        )
+
+    assert exc_info.value.status_code == 409
+    assert exc_info.value.detail == "Managed permission rules only; session overrides are disabled"
+    agent.agent.apersist_state.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_remove_thread_permission_rule_persists_session_rule_change():
+    agent = _FakePermissionAgent()
+
+    result = await threads_router.delete_thread_permission_rule(
+        "thread-1",
+        "deny",
+        "Bash",
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {
+        "ok": True,
+        "thread_id": "thread-1",
+        "scope": "session",
+        "rules": {
+            "allow": ["Read"],
+            "deny": [],
+            "ask": ["Edit"],
+        },
+        "managed_only": False,
+    }
+    assert agent.rule_remove_calls == [("deny", "Bash")]
+    agent.agent.apersist_state.assert_awaited_once_with("thread-1")
+
+
+@pytest.mark.asyncio
+async def test_clear_thread_route_clears_agent_state_and_thread_buffers():
+    agent = _FakeClearAgent()
+    app, display_builder, queue_manager = _make_clear_thread_app()
+
+    with _patch_local_clear_thread_agent(agent):
+        result = await threads_router.clear_thread_history(
+            "thread-1",
+            user_id="owner-1",
+            app=app,
+        )
+
+    assert result == {"ok": True, "thread_id": "thread-1"}
+    agent.aclear_thread.assert_awaited_once_with("thread-1")
+    display_builder.clear.assert_called_once_with("thread-1")
+    queue_manager.clear_all.assert_called_once_with("thread-1")
+    assert app.state.thread_event_buffers == {}
+
+
+@pytest.mark.asyncio
+async def test_clear_thread_route_rejects_active_run():
+    agent = _FakeClearAgent(state=AgentState.ACTIVE)
+    app, display_builder, queue_manager = _make_clear_thread_app()
+
+    with _patch_local_clear_thread_agent(agent):
+        with pytest.raises(threads_router.HTTPException) as exc_info:
+            await threads_router.clear_thread_history(
+                "thread-1",
+                user_id="owner-1",
+                app=app,
+            )
+
+    assert exc_info.value.status_code == 409
+    assert exc_info.value.detail == "Cannot clear thread while run is in progress"
+    agent.aclear_thread.assert_not_awaited()
+    display_builder.clear.assert_not_called()
+    queue_manager.clear_all.assert_not_called()
+    assert "thread-1" in app.state.thread_event_buffers
diff --git a/tests/Integration/test_web_runtime_startup_contract.py b/tests/Integration/test_web_runtime_startup_contract.py
new file mode 100644
index 000000000..58937f034
--- /dev/null
+++ b/tests/Integration/test_web_runtime_startup_contract.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+from psycopg import OperationalError
+
+from backend.web.core import lifespan as lifespan_module
+
+
+def test_web_runtime_contract_requires_postgres_checkpointer_env(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("LEON_POSTGRES_URL", raising=False)
+
+    with pytest.raises(RuntimeError, match="LEON_POSTGRES_URL"):
+        lifespan_module._require_web_runtime_contract()
+
+
+@pytest.mark.asyncio
+async def test_web_runtime_contract_fails_when_postgres_checkpointer_is_unreachable(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("LEON_POSTGRES_URL", "postgresql://example")
+
+    async def _connect(_dsn: str):
+        raise OperationalError("connection refused")
+
+    fake_async_connection = SimpleNamespace(connect=_connect)
+    monkeypatch.setattr(lifespan_module, "AsyncConnection", fake_async_connection)
+
+    with pytest.raises(OperationalError, match="connection refused"):
+        await lifespan_module._validate_web_checkpointer_contract()
diff --git a/tests/Unit/backend/test_auth_entity_resolution.py b/tests/Unit/backend/test_auth_entity_resolution.py
new file mode 100644
index 000000000..4c6e47937
--- /dev/null
+++ b/tests/Unit/backend/test_auth_entity_resolution.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.core import dependencies
+
+
+class _Request:
+    def __init__(self, *, token: str, payload: dict, member_exists: bool = True) -> None:
+        self.headers = {"Authorization": f"Bearer {token}"}
+        self.app = SimpleNamespace(
+            state=SimpleNamespace(
+                auth_service=SimpleNamespace(verify_token=lambda seen: payload if seen == token else None),
+                member_repo=SimpleNamespace(get_by_id=lambda _user_id: object() if member_exists else None),
+            )
+        )
+
+
+@pytest.mark.asyncio
+async def test_get_current_user_id_still_rejects_deleted_user():
+    request = _Request(token="tok-1", payload={"user_id": "ghost-user"}, member_exists=False)
+
+    with pytest.raises(HTTPException) as exc_info:
+        await dependencies.get_current_user_id(request)
+
+    assert exc_info.value.status_code == 401
+    assert exc_info.value.detail == "User no longer exists — please re-login"
diff --git a/tests/Unit/backend/test_auth_service_token_verification.py b/tests/Unit/backend/test_auth_service_token_verification.py
new file mode 100644
index 000000000..c9301a405
--- /dev/null
+++ b/tests/Unit/backend/test_auth_service_token_verification.py
@@ -0,0 +1,237 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from backend.web.services.auth_service import AuthService
+
+
+class _FakeSupabaseAuth:
+    def __init__(self, user_id: str = "user-1") -> None:
+        self.user_id = user_id
+        self.tokens: list[str] = []
+
+    def get_user(self, token: str):
+        self.tokens.append(token)
+        return SimpleNamespace(user=SimpleNamespace(id=self.user_id))
+
+
+class _FakeSupabaseClient:
+    def __init__(self, user_id: str = "user-1") -> None:
+        self.auth = _FakeSupabaseAuth(user_id=user_id)
+
+
+class _FakeLoginAuth:
+    def __init__(self) -> None:
+        self.calls: list[dict[str, str]] = []
+
+    def sign_in_with_password(self, payload: dict[str, str]):
+        self.calls.append(payload)
+        return SimpleNamespace(
+            user=SimpleNamespace(id="user-1"),
+            session=SimpleNamespace(access_token="tok-1"),
+        )
+
+
+class _FakeAuthClient:
+    def __init__(self) -> None:
+        self.auth = _FakeLoginAuth()
+
+
+class _FactoryBackedLoginAuth:
+    def __init__(self, owner: _FactoryBackedAuthClient) -> None:
+        self._owner = owner
+
+    def sign_in_with_password(self, payload: dict[str, str]):
+        self._owner.calls.append(payload)
+        return SimpleNamespace(
+            user=SimpleNamespace(id="user-1"),
+            session=SimpleNamespace(access_token="tok-1"),
+        )
+
+    def get_user(self, token: str):
+        self._owner.tokens.append(token)
+        return SimpleNamespace(user=SimpleNamespace(id="user-1"))
+
+
+class _FactoryBackedAuthClient:
+    def __init__(self) -> None:
+        self.calls: list[dict[str, str]] = []
+        self.tokens: list[str] = []
+        self.auth = _FactoryBackedLoginAuth(self)
+
+
+class _DirectAuthClient:
+    def __init__(self) -> None:
+        self.calls: list[dict[str, str]] = []
+        self.tokens: list[str] = []
+
+    def sign_in_with_password(self, payload: dict[str, str]):
+        self.calls.append(payload)
+        return SimpleNamespace(
+            user=SimpleNamespace(id="user-1"),
+            session=SimpleNamespace(access_token="tok-1"),
+        )
+
+    def get_user(self, token: str):
+        self.tokens.append(token)
+        return SimpleNamespace(user=SimpleNamespace(id="user-1"))
+
+    def sign_up(self, payload: dict[str, str]):
+        self.calls.append(payload)
+        return SimpleNamespace(user=SimpleNamespace(id="user-1"), session=None)
+
+    def verify_otp(self, payload: dict[str, str]):
+        self.calls.append(payload)
+        return SimpleNamespace(
+            user=SimpleNamespace(id="user-1"),
+            session=SimpleNamespace(access_token="temp-token-1"),
+        )
+
+
+def _service(
+    *,
+    supabase_client=None,
+    supabase_auth_client=None,
+    supabase_auth_client_factory=None,
+    member_repo=None,
+    invite_codes=None,
+) -> AuthService:
+    return AuthService(
+        members=member_repo or SimpleNamespace(),
+        supabase_client=supabase_client,
+        supabase_auth_client=supabase_auth_client,
+        supabase_auth_client_factory=supabase_auth_client_factory,
+        invite_codes=invite_codes,
+    )
+
+
+def test_verify_token_prefers_supabase_get_user_over_local_jwt_secret(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("SUPABASE_JWT_SECRET", raising=False)
+    sb = _FakeSupabaseClient(user_id="user-supabase")
+
+    payload = _service(supabase_auth_client=sb).verify_token("tok-live")
+
+    assert sb.auth.tokens == ["tok-live"]
+    assert payload == {"user_id": "user-supabase"}
+
+
+def test_verify_token_without_supabase_client_still_fails_loudly_when_secret_missing(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("SUPABASE_JWT_SECRET", raising=False)
+
+    with pytest.raises(RuntimeError, match="SUPABASE_JWT_SECRET env var required"):
+        _service().verify_token("tok-live")
+
+
+def test_login_uses_dedicated_auth_client_instead_of_storage_client():
+    auth_client = _FakeAuthClient()
+    member_repo = SimpleNamespace(
+        get_by_id=lambda _user_id: SimpleNamespace(name="codex", mycel_id=10001, email="codex@example.com", avatar=None),
+        list_by_owner_user_id=lambda _user_id: [],
+    )
+
+    result = _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client=auth_client,
+        member_repo=member_repo,
+    ).login("codex@example.com", "pw-1")
+
+    assert auth_client.auth.calls == [{"email": "codex@example.com", "password": "pw-1"}]
+    assert result["token"] == "tok-1"
+
+
+def test_login_uses_fresh_auth_client_from_factory_per_call():
+    created: list[_FactoryBackedAuthClient] = []
+
+    def factory() -> _FactoryBackedAuthClient:
+        client = _FactoryBackedAuthClient()
+        created.append(client)
+        return client
+
+    member_repo = SimpleNamespace(
+        get_by_id=lambda _user_id: SimpleNamespace(name="codex", mycel_id=10001, email="codex@example.com", avatar=None),
+        list_by_owner_user_id=lambda _user_id: [],
+    )
+    service = _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client_factory=factory,
+        member_repo=member_repo,
+    )
+
+    service.login("codex@example.com", "pw-1")
+    service.login("codex@example.com", "pw-2")
+
+    assert len(created) == 2
+    assert created[0].calls == [{"email": "codex@example.com", "password": "pw-1"}]
+    assert created[1].calls == [{"email": "codex@example.com", "password": "pw-2"}]
+
+
+def test_verify_token_uses_fresh_auth_client_from_factory_per_call(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("SUPABASE_JWT_SECRET", raising=False)
+    created: list[_FactoryBackedAuthClient] = []
+
+    def factory() -> _FactoryBackedAuthClient:
+        client = _FactoryBackedAuthClient()
+        created.append(client)
+        return client
+
+    service = _service(supabase_auth_client_factory=factory)
+
+    assert service.verify_token("tok-1") == {"user_id": "user-1"}
+    assert service.verify_token("tok-2") == {"user_id": "user-1"}
+    assert len(created) == 2
+    assert created[0].tokens == ["tok-1"]
+    assert created[1].tokens == ["tok-2"]
+
+
+def test_login_accepts_direct_gotrue_client_without_auth_wrapper():
+    auth_client = _DirectAuthClient()
+    member_repo = SimpleNamespace(
+        get_by_id=lambda _user_id: SimpleNamespace(name="codex", mycel_id=10001, email="codex@example.com", avatar=None),
+        list_by_owner_user_id=lambda _user_id: [],
+    )
+
+    result = _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client=auth_client,
+        member_repo=member_repo,
+    ).login("codex@example.com", "pw-1")
+
+    assert auth_client.calls == [{"email": "codex@example.com", "password": "pw-1"}]
+    assert result["token"] == "tok-1"
+
+
+def test_verify_token_accepts_direct_gotrue_client_without_auth_wrapper(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("SUPABASE_JWT_SECRET", raising=False)
+    auth_client = _DirectAuthClient()
+
+    payload = _service(supabase_auth_client=auth_client).verify_token("tok-direct")
+
+    assert auth_client.tokens == ["tok-direct"]
+    assert payload == {"user_id": "user-1"}
+
+
+def test_send_otp_accepts_direct_gotrue_client_without_auth_wrapper():
+    auth_client = _DirectAuthClient()
+    invite_codes = SimpleNamespace(is_valid=lambda code: code == "invite-1")
+
+    _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client=auth_client,
+        invite_codes=invite_codes,
+    ).send_otp("fresh@example.com", "pw-1", "invite-1")
+
+    assert auth_client.calls == [{"email": "fresh@example.com", "password": "pw-1"}]
+
+
+def test_verify_register_otp_accepts_direct_gotrue_client_without_auth_wrapper():
+    auth_client = _DirectAuthClient()
+
+    result = _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client=auth_client,
+    ).verify_register_otp("fresh@example.com", "123456")
+
+    assert auth_client.calls == [{"email": "fresh@example.com", "token": "123456", "type": "signup"}]
+    assert result == {"temp_token": "temp-token-1"}
diff --git a/tests/Unit/backend/test_message_routing.py b/tests/Unit/backend/test_message_routing.py
new file mode 100644
index 000000000..7a4f28633
--- /dev/null
+++ b/tests/Unit/backend/test_message_routing.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from backend.web.services.message_routing import route_message_to_brain
+from core.runtime.middleware.monitor import AgentState
+
+
+class _FakeQueueManager:
+    def enqueue(self, *args, **kwargs) -> None:
+        raise AssertionError("enqueue should not be used for idle -> active routing")
+
+
+class _FakeRuntime:
+    def __init__(self) -> None:
+        self.current_state = AgentState.IDLE
+
+    def transition(self, next_state: AgentState) -> bool:
+        self.current_state = next_state
+        return True
+
+
+class _FakeAgent:
+    def __init__(self) -> None:
+        self.runtime = _FakeRuntime()
+
+
+@pytest.mark.asyncio
+async def test_route_message_to_brain_clears_resource_overview_cache_when_starting_run() -> None:
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            queue_manager=_FakeQueueManager(),
+            thread_locks={},
+            thread_locks_guard=asyncio.Lock(),
+        )
+    )
+    agent = _FakeAgent()
+
+    with (
+        patch("backend.web.services.agent_pool.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.services.agent_pool.get_or_create_agent", AsyncMock(return_value=agent)),
+        patch("backend.web.services.streaming_service.start_agent_run", return_value="run-123"),
+        patch("backend.web.services.resource_cache.clear_monitor_resource_overview_cache") as clear_cache,
+    ):
+        result = await route_message_to_brain(app, "thread-1", "hello")
+
+    assert result == {"status": "started", "routing": "direct", "run_id": "run-123", "thread_id": "thread-1"}
+    clear_cache.assert_called_once_with()
diff --git a/tests/Unit/backend/test_thread_request_model.py b/tests/Unit/backend/test_thread_request_model.py
new file mode 100644
index 000000000..1bfe188be
--- /dev/null
+++ b/tests/Unit/backend/test_thread_request_model.py
@@ -0,0 +1,25 @@
+from backend.web.models.requests import CreateThreadRequest
+
+
+def test_create_thread_request_accepts_legacy_sandbox_type_key() -> None:
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "sandbox_type": "daytona_selfhost",
+            "model": "gpt-5.4-mini",
+        }
+    )
+
+    assert payload.sandbox == "daytona_selfhost"
+
+
+def test_create_thread_request_prefers_primary_sandbox_key() -> None:
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "sandbox": "local",
+            "sandbox_type": "daytona_selfhost",
+        }
+    )
+
+    assert payload.sandbox == "local"
diff --git a/tests/Unit/core/test_agent_pool.py b/tests/Unit/core/test_agent_pool.py
new file mode 100644
index 000000000..a117bc9f3
--- /dev/null
+++ b/tests/Unit/core/test_agent_pool.py
@@ -0,0 +1,292 @@
+import asyncio
+import time
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, cast
+
+import pytest
+
+from backend.web.services import agent_pool
+
+
+class _FakeThreadRepo:
+    def get_by_id(self, thread_id: str):
+        return {"id": thread_id, "cwd": "/tmp", "model": "leon:large"}
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_creates_once_per_thread(monkeypatch: pytest.MonkeyPatch):
+    created: list[object] = []
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        time.sleep(0.05)
+        obj = SimpleNamespace()
+        created.append(obj)
+        return obj
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-1")
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_FakeThreadRepo(),
+            thread_cwd={},
+            thread_sandbox={},
+        )
+    )
+
+    first, second = await asyncio.gather(
+        agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-1"),
+        agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-1"),
+    )
+
+    assert len(created) == 1
+    assert first is second
+    assert app.state.agent_pool["thread-1:local"] is first
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_ignores_unavailable_local_cwd(monkeypatch: pytest.MonkeyPatch):
+    captured: dict[str, object] = {}
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        captured["workspace_root"] = workspace_root
+        return SimpleNamespace()
+
+    class _ThreadRepo:
+        def get_by_id(self, thread_id: str):
+            return {
+                "id": thread_id,
+                "cwd": "/Users/lexicalmathical/Codebase/homeworks/aiagent",
+                "model": "leon:large",
+            }
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-2")
+    monkeypatch.setattr(Path, "exists", lambda self: False)
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_ThreadRepo(),
+            thread_cwd={},
+            thread_sandbox={},
+        )
+    )
+
+    await agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-2")
+
+    assert captured["workspace_root"] is None
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_honors_fresh_local_thread_cwd_even_when_missing(monkeypatch: pytest.MonkeyPatch, tmp_path):
+    captured: dict[str, object] = {}
+    requested = tmp_path / "fresh-workspace"
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        captured["workspace_root"] = workspace_root
+        return SimpleNamespace()
+
+    class _ThreadRepo:
+        def get_by_id(self, thread_id: str):
+            return {
+                "id": thread_id,
+                "cwd": None,
+                "model": "leon:large",
+            }
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-3")
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_ThreadRepo(),
+            thread_cwd={"thread-3": str(requested)},
+            thread_sandbox={},
+        )
+    )
+
+    await agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-3")
+
+    assert captured["workspace_root"] == requested.resolve()
+    assert requested.is_dir()
+    assert app.state.thread_cwd["thread-3"] == str(requested.resolve())
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_passes_member_bundle_dir(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+    captured: dict[str, object] = {}
+    member_dir = tmp_path / "members" / "member-1"
+    member_dir.mkdir(parents=True)
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        captured["bundle_dir"] = bundle_dir
+        return SimpleNamespace()
+
+    class _ThreadRepo:
+        def get_by_id(self, thread_id: str):
+            return {
+                "id": thread_id,
+                "cwd": None,
+                "model": "leon:large",
+                "member_id": "member-1",
+                "member_name": "Toad",
+            }
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-4")
+    monkeypatch.setattr(agent_pool, "preferred_existing_user_home_path", lambda *parts: member_dir)
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_ThreadRepo(),
+            thread_cwd={},
+            thread_sandbox={},
+        )
+    )
+
+    await agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-4")
+
+    assert captured["bundle_dir"] == member_dir.resolve()
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_uses_thread_user_id_for_chat_identity(monkeypatch: pytest.MonkeyPatch):
+    captured: dict[str, object] = {}
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        captured["chat_repos"] = chat_repos
+        return SimpleNamespace()
+
+    class _ThreadRepo:
+        def get_by_id(self, thread_id: str):
+            return {
+                "id": thread_id,
+                "user_id": "thread-user-5",
+                "cwd": None,
+                "model": "leon:large",
+                "member_id": "member-5",
+            }
+
+    class _MemberRepo:
+        def get_by_id(self, member_id: str):
+            return SimpleNamespace(id=member_id, owner_user_id="owner-5")
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-5")
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_ThreadRepo(),
+            member_repo=_MemberRepo(),
+            thread_cwd={},
+            thread_sandbox={},
+        )
+    )
+
+    await agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-5")
+
+    chat_repos = cast(dict[str, object], captured["chat_repos"])
+    assert chat_repos["chat_identity_id"] == "thread-user-5"
+    assert chat_repos["user_id"] == "thread-user-5"
+    assert chat_repos["owner_id"] == "owner-5"
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_requires_thread_user_id_for_chat_identity(monkeypatch: pytest.MonkeyPatch):
+    def _fake_create_agent_sync(**kwargs) -> object:
+        return SimpleNamespace()
+
+    class _ThreadRepo:
+        def get_by_id(self, thread_id: str):
+            return {
+                "id": thread_id,
+                "cwd": None,
+                "model": "leon:large",
+                "member_id": "member-6",
+            }
+
+    class _MemberRepo:
+        def get_by_id(self, member_id: str):
+            return SimpleNamespace(id=member_id, owner_user_id="owner-6")
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-6")
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_ThreadRepo(),
+            member_repo=_MemberRepo(),
+            thread_cwd={},
+            thread_sandbox={},
+        )
+    )
+
+    with pytest.raises(RuntimeError, match="thread.user_id"):
+        await agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-6")
diff --git a/tests/Unit/core/test_agent_service.py b/tests/Unit/core/test_agent_service.py
new file mode 100644
index 000000000..21d0a36a7
--- /dev/null
+++ b/tests/Unit/core/test_agent_service.py
@@ -0,0 +1,1496 @@
+"""Unit tests for AgentService sub-agent boundaries and policy."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+from unittest.mock import AsyncMock
+
+import pytest
+
+from core.agents.registry import AgentEntry, AgentRegistry
+from core.agents.service import (
+    AGENT_DISALLOWED,
+    AGENT_SCHEMA,
+    ASK_USER_QUESTION_SCHEMA,
+    EXPLORE_ALLOWED,
+    TASK_OUTPUT_SCHEMA,
+    AgentService,
+    _BashBackgroundRun,
+    _RunningTask,
+)
+from core.runtime.middleware import ToolCallRequest
+from core.runtime.registry import ToolEntry, ToolRegistry
+from core.runtime.runner import ToolRunner
+from core.runtime.state import AppState, BootstrapConfig, ToolUseContext
+from sandbox.manager import SandboxManager
+from sandbox.providers.local import LocalSessionProvider
+from sandbox.thread_context import get_current_thread_id, set_current_messages, set_current_thread_id
+
+
+class _CapturingRegistry(ToolRegistry):
+    def __init__(self) -> None:
+        super().__init__()
+        self.last_entry: ToolEntry | None = None
+
+    def register(self, entry: ToolEntry) -> None:
+        self.last_entry = entry
+        super().register(entry)
+
+
+class _FakeAgentRegistry(AgentRegistry):
+    def __init__(self) -> None:
+        self._latest_by_name_parent: dict[tuple[str, str | None], AgentEntry] = {}
+
+    async def register(self, entry):
+        self.entry = entry
+
+    async def update_status(self, agent_id: str, status: str):
+        self.last_status = (agent_id, status)
+
+    async def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None):
+        return self._latest_by_name_parent.get((name, parent_agent_id))
+
+
+class _FakeThreadRepo:
+    def __init__(self, rows: dict[str, dict] | None = None):
+        self.rows = rows or {}
+        self.created: list[dict] = []
+
+    def get_by_id(self, thread_id: str):
+        return self.rows.get(thread_id)
+
+    def get_next_branch_index(self, member_id: str) -> int:
+        branch_indexes = [int(row["branch_index"]) for row in self.rows.values() if row["member_id"] == member_id]
+        return (max(branch_indexes) if branch_indexes else 0) + 1
+
+    def create(
+        self,
+        thread_id: str,
+        member_id: str,
+        user_id: str,
+        sandbox_type: str,
+        cwd: str | None,
+        created_at: float,
+        **extra,
+    ):
+        row = {
+            "id": thread_id,
+            "member_id": member_id,
+            "user_id": user_id,
+            "sandbox_type": sandbox_type,
+            "cwd": cwd,
+            "model": extra.get("model"),
+            "is_main": bool(extra.get("is_main", False)),
+            "branch_index": int(extra["branch_index"]),
+            "created_at": created_at,
+        }
+        self.rows[thread_id] = row
+        self.created.append(row)
+
+
+class _FakeMemberRepo:
+    def __init__(self, names: dict[str, str]):
+        self._names = names
+
+    def get_by_id(self, member_id: str):
+        name = self._names.get(member_id)
+        if name is None:
+            return None
+        return SimpleNamespace(id=member_id, name=name, avatar=None)
+
+
+class _FakeChildAgent:
+    def __init__(self, workspace_root: Path, model_name: str):
+        self.workspace_root = workspace_root
+        self.model_name = model_name
+        self._bootstrap = BootstrapConfig(workspace_root=workspace_root, model_name=model_name)
+        self.apply_fork_calls: list[tuple[BootstrapConfig, ToolUseContext | None]] = []
+        self.cleanup_calls = 0
+        self.closed = False
+        self.close_kwargs: dict[str, object] = {}
+        self._agent_service = SimpleNamespace(
+            _parent_bootstrap=None,
+            _parent_tool_context=None,
+            cleanup_background_runs=self._cleanup_background_runs,
+        )
+        self.agent = SimpleNamespace(astream=self._astream)
+
+    async def ainit(self):
+        return None
+
+    async def _astream(self, *args, **kwargs):
+        if False:
+            yield None
+        return
+
+    async def _cleanup_background_runs(self):
+        self.cleanup_calls += 1
+
+    def close(self, **kwargs):
+        self.closed = True
+        self.close_kwargs = kwargs
+        return None
+
+    def apply_forked_child_context(
+        self,
+        bootstrap: BootstrapConfig,
+        *,
+        tool_context: ToolUseContext | None = None,
+    ) -> None:
+        self.apply_fork_calls.append((bootstrap, tool_context))
+        self._bootstrap = bootstrap
+        self.agent._bootstrap = bootstrap
+        self._agent_service._parent_bootstrap = bootstrap
+        if tool_context is not None:
+            self._agent_service._parent_tool_context = tool_context
+            self.agent._tool_abort_controller = tool_context.abort_controller
+
+
+class _FakeAsyncCommand:
+    def __init__(self):
+        self.done = False
+        self.stdout_buffer = []
+        self.stderr_buffer = []
+        self.exit_code = None
+        self.process = SimpleNamespace(terminate=self._terminate, kill=self._kill, wait=self._wait)
+        self.terminated = False
+        self.killed = False
+        self.wait_calls = 0
+
+    def _terminate(self):
+        self.terminated = True
+        self.done = True
+
+    def _kill(self):
+        self.killed = True
+        self.done = True
+
+    async def _wait(self):
+        self.wait_calls += 1
+        return 0
+
+
+def _make_parent_context(tmp_path: Path, model_name: str = "gpt-parent") -> ToolUseContext:
+    parent_state = AppState(turn_count=1)
+    return ToolUseContext(
+        bootstrap=BootstrapConfig(workspace_root=tmp_path, model_name=model_name),
+        get_app_state=parent_state.get_state,
+        set_app_state=parent_state.set_state,
+        set_app_state_for_tasks=parent_state.set_state,
+        read_file_state={"/tmp/readme.md": {"partial": False}},
+        loaded_nested_memory_paths={"/tmp/memory.md"},
+        discovered_skill_names={"skill-a"},
+        nested_memory_attachment_triggers={"turn-a"},
+        messages=["hello"],
+    )
+
+
+def _make_tool_request(
+    name: str,
+    args: dict[str, object],
+    *,
+    state: ToolUseContext,
+    call_id: str = "tc-1",
+) -> ToolCallRequest:
+    return ToolCallRequest(
+        tool_call={"name": name, "args": args, "id": call_id},
+        state=state,
+    )
+
+
+def _make_service(tmp_path: Path, **kwargs) -> AgentService:
+    tool_registry = kwargs.pop("tool_registry", None) or _CapturingRegistry()
+    agent_registry = kwargs.pop("agent_registry", None) or _FakeAgentRegistry()
+    model_name = kwargs.pop("model_name", "gpt-test")
+    return AgentService(
+        tool_registry=tool_registry,
+        agent_registry=agent_registry,
+        workspace_root=tmp_path,
+        model_name=model_name,
+        **kwargs,
+    )
+
+
+def _agent_tool_json(result) -> dict[str, Any]:
+    content = getattr(result, "content", result)
+    assert isinstance(content, str)
+    return json.loads(content)
+
+
+async def _sleep_forever():
+    while True:
+        await asyncio.sleep(3600)
+
+
+@pytest.mark.asyncio
+async def test_task_output_reports_running_command_honestly(tmp_path):
+    service = _make_service(tmp_path)
+    async_cmd = _FakeAsyncCommand()
+    service._tasks["cmd_test123"] = _BashBackgroundRun(async_cmd, "echo hello")
+
+    payload = json.loads(await service._handle_task_output("cmd_test123", block=False))
+
+    assert payload == {
+        "task_id": "cmd_test123",
+        "status": "running",
+        "message": "Command is still running.",
+    }
+
+
+@pytest.mark.asyncio
+async def test_task_output_keeps_agent_running_message_for_agent_tasks(tmp_path):
+    service = _make_service(tmp_path)
+    task = asyncio.create_task(_sleep_forever())
+    service._tasks["task_agent123"] = _RunningTask(
+        task=task,
+        agent_id="agent-1",
+        thread_id="thread-1",
+    )
+
+    try:
+        payload = json.loads(await service._handle_task_output("task_agent123", block=False))
+    finally:
+        task.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await task
+
+    assert payload == {
+        "task_id": "task_agent123",
+        "status": "running",
+        "message": "Agent is still running.",
+    }
+
+
+@pytest.mark.asyncio
+async def test_task_output_times_out_when_blocking_wait_expires(tmp_path):
+    service = _make_service(tmp_path)
+    task = asyncio.create_task(_sleep_forever())
+    service._tasks["task_agent123"] = _RunningTask(
+        task=task,
+        agent_id="agent-1",
+        thread_id="thread-1",
+    )
+
+    try:
+        payload = json.loads(await service._handle_task_output("task_agent123", timeout=1))
+    finally:
+        task.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await task
+
+    assert payload == {
+        "task_id": "task_agent123",
+        "status": "timeout",
+        "message": "Agent is still running.",
+    }
+
+
+@pytest.mark.asyncio
+async def test_run_agent_applies_forked_bootstrap_to_child_agent(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    service._parent_bootstrap = BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        original_cwd=Path("/launcher"),
+        project_root=Path("/workspace/project"),
+        cwd=Path("/workspace/project/src"),
+        model_name="gpt-parent",
+        api_key="sk-parent",
+        extra_allowed_paths=["/shared"],
+        total_cost_usd=1.5,
+        total_tool_duration_ms=77,
+        model_provider="openai",
+        base_url="https://api.example.com/v1",
+        context_limit=12345,
+    )
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    child = created[0]
+    assert child._bootstrap.original_cwd == Path("/launcher")
+    assert child._bootstrap.project_root == Path("/workspace/project")
+    assert child._bootstrap.cwd == Path("/workspace/project/src")
+    assert child._bootstrap.extra_allowed_paths == ["/shared"]
+    assert child._bootstrap.parent_session_id == service._parent_bootstrap.session_id
+    assert child._bootstrap.session_id != service._parent_bootstrap.session_id
+    assert child._bootstrap.total_cost_usd == 1.5
+    assert child._bootstrap.total_tool_duration_ms == 77
+    assert child._bootstrap.model_provider == "openai"
+    assert child._bootstrap.base_url == "https://api.example.com/v1"
+    assert child._bootstrap.context_limit == 12345
+
+
+@pytest.mark.asyncio
+async def test_run_agent_applies_isolated_tool_context_to_child_agent_service(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    child_context = created[0]._agent_service._parent_tool_context
+    assert child_context is not None
+    assert child_context is not parent_context
+    assert child_context.bootstrap.parent_session_id == parent_context.bootstrap.session_id
+    child_context.set_app_state(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert parent_context.get_app_state().turn_count == 1
+    child_context.set_app_state_for_tasks(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert parent_context.get_app_state().turn_count == 9
+
+
+@pytest.mark.asyncio
+async def test_run_agent_uses_explicit_child_fork_wiring_api(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert len(created[0].apply_fork_calls) == 1
+    applied_bootstrap, applied_context = created[0].apply_fork_calls[0]
+    assert applied_bootstrap is created[0]._bootstrap
+    assert applied_context is created[0]._agent_service._parent_tool_context
+
+
+@pytest.mark.asyncio
+async def test_run_agent_uses_injected_child_agent_factory(tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_child_agent_factory(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    service = _make_service(tmp_path, child_agent_factory=fake_child_agent_factory)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert len(created) == 1
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_fork_context_uses_parent_tool_context_messages(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    class _CapturingChild(_FakeChildAgent):
+        async def _astream(self, payload, *args, **kwargs):
+            captured["messages"] = payload["messages"]
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _CapturingChild(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {"prompt": "inspect", "description": "inspect workspace", "fork_context": True},
+        state=_make_parent_context(tmp_path),
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    assert result.content == "(Agent completed with no text output)"
+    assert captured["messages"] == [
+        "hello",
+        {
+            "role": "user",
+            "content": (
+                "\n\n### ENTERING SUB-AGENT ROUTINE ###\n"
+                "Messages above are from the parent thread (read-only context).\n"
+                "Only complete the specific task assigned below.\n\n"
+                "inspect"
+            ),
+        },
+    ]
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_fork_context_treats_empty_parent_messages_as_authoritative(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    class _CapturingChild(_FakeChildAgent):
+        async def _astream(self, payload, *args, **kwargs):
+            captured["messages"] = payload["messages"]
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _CapturingChild(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    set_current_messages([{"role": "user", "content": "AMBIENT_LEAK"}])
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    parent_context = _make_parent_context(tmp_path)
+    parent_context.messages = []
+    request = _make_tool_request(
+        "Agent",
+        {"prompt": "inspect", "description": "inspect workspace", "fork_context": True},
+        state=parent_context,
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    assert result.content == "(Agent completed with no text output)"
+    assert captured["messages"] == [
+        {
+            "role": "user",
+            "content": (
+                "\n\n### ENTERING SUB-AGENT ROUTINE ###\n"
+                "Messages above are from the parent thread (read-only context).\n"
+                "Only complete the specific task assigned below.\n\n"
+                "inspect"
+            ),
+        },
+    ]
+
+
+@pytest.mark.asyncio
+async def test_run_agent_rolls_child_bootstrap_costs_back_into_parent_bootstrap(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    class _CostReportingChild(_FakeChildAgent):
+        async def _astream(self, *args, **kwargs):
+            self._bootstrap.total_cost_usd = 9.75
+            self._bootstrap.total_tool_duration_ms = 222
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _CostReportingChild(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    service._parent_bootstrap = BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        model_name="gpt-parent",
+        total_cost_usd=1.5,
+        total_tool_duration_ms=77,
+    )
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert created[0]._bootstrap.total_cost_usd == 9.75
+    assert created[0]._bootstrap.total_tool_duration_ms == 222
+    assert service._parent_bootstrap is not None
+    assert service._parent_bootstrap.total_cost_usd == 9.75
+    assert service._parent_bootstrap.total_tool_duration_ms == 222
+
+
+@pytest.mark.asyncio
+async def test_run_agent_preserves_concurrent_parent_and_child_bootstrap_growth(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    class _ConcurrentCostChild(_FakeChildAgent):
+        async def _astream(self, *args, **kwargs):
+            assert service._parent_bootstrap is not None
+            service._parent_bootstrap.total_cost_usd = 2.0
+            service._parent_bootstrap.total_tool_duration_ms = 20
+            self._bootstrap.total_cost_usd = 1.5
+            self._bootstrap.total_tool_duration_ms = 15
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _ConcurrentCostChild(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    service._parent_bootstrap = BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        model_name="gpt-parent",
+        total_cost_usd=1.0,
+        total_tool_duration_ms=10,
+    )
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert created[0]._bootstrap.total_cost_usd == 1.5
+    assert created[0]._bootstrap.total_tool_duration_ms == 15
+    assert service._parent_bootstrap.total_cost_usd == 2.5
+    assert service._parent_bootstrap.total_tool_duration_ms == 25
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_live_runner_path_passes_isolated_tool_context_to_child(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    parent_context = _make_parent_context(tmp_path)
+    request = _make_tool_request(
+        "Agent",
+        {"prompt": "do work", "description": "do work"},
+        state=parent_context,
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    assert result.content == "(Agent completed with no text output)"
+    child_context = created[0]._agent_service._parent_tool_context
+    assert child_context is not None
+    assert child_context.bootstrap.parent_session_id == parent_context.bootstrap.session_id
+    child_context.set_app_state(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert parent_context.get_app_state().turn_count == 1
+
+
+@pytest.mark.asyncio
+async def test_run_agent_without_fork_context_does_not_inject_parent_messages(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    class _CapturingChild(_FakeChildAgent):
+        async def _astream(self, payload, *args, **kwargs):
+            captured["messages"] = payload["messages"]
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _CapturingChild(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+    parent_context.messages = [
+        {
+            "role": "user",
+            "content": "PARENT_CONTROL_PROMPT",
+        }
+    ]
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="child task only",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert captured["messages"] == [{"role": "user", "content": "child task only"}]
+
+
+@pytest.mark.asyncio
+async def test_run_agent_child_tool_context_deep_clones_read_file_state(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+    parent_context.read_file_state = {"/tmp/readme.md": {"partial": False, "meta": {"seen": 1}}}
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    child_context = created[0]._agent_service._parent_tool_context
+    child_context.read_file_state["/tmp/readme.md"]["partial"] = True
+    child_context.read_file_state["/tmp/readme.md"]["meta"]["seen"] = 9
+    assert parent_context.read_file_state["/tmp/readme.md"] == {
+        "partial": False,
+        "meta": {"seen": 1},
+    }
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_live_runner_path_applies_role_specific_tool_filters(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["workspace_root"] = Path(workspace_root)
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="gpt-parent")
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {"prompt": "inspect", "description": "inspect workspace", "subagent_type": "explore"},
+        state=_make_parent_context(tmp_path, model_name="gpt-parent"),
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    assert result.content == "(Agent completed with no text output)"
+    assert captured["model_name"] == "gpt-parent"
+    assert captured["kwargs"]["agent"] == "explore"
+    assert captured["kwargs"]["allowed_tools"] == EXPLORE_ALLOWED
+    assert captured["kwargs"]["extra_blocked_tools"] == AGENT_DISALLOWED
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_priority_prefers_env_over_tool_frontmatter_and_parent(monkeypatch, tmp_path):
+    agent_dir = tmp_path / ".leon" / "agents"
+    agent_dir.mkdir(parents=True)
+    (agent_dir / "explore.md").write_text(
+        "---\nname: explore\nmodel: frontmatter-model\ntools:\n  - Read\n---\nfrontmatter prompt\n",
+        encoding="utf-8",
+    )
+    captured: dict[str, Any] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    monkeypatch.setenv("CLAUDE_CODE_SUBAGENT_MODEL", "env-model")
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {
+            "prompt": "inspect",
+            "description": "inspect workspace",
+            "subagent_type": "explore",
+            "model": "tool-model",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "env-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_priority_prefers_tool_over_frontmatter_and_parent(monkeypatch, tmp_path):
+    agent_dir = tmp_path / ".leon" / "agents"
+    agent_dir.mkdir(parents=True)
+    (agent_dir / "explore.md").write_text(
+        "---\nname: explore\nmodel: frontmatter-model\ntools:\n  - Read\n---\nfrontmatter prompt\n",
+        encoding="utf-8",
+    )
+    captured: dict[str, Any] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {
+            "prompt": "inspect",
+            "description": "inspect workspace",
+            "subagent_type": "explore",
+            "model": "tool-model",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "tool-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_default_literal_inherits_parent_model(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {
+            "prompt": "inspect",
+            "description": "inspect workspace",
+            "subagent_type": "explore",
+            "model": "default",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "parent-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_inherit_literal_inherits_parent_model(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {
+            "prompt": "inspect",
+            "description": "inspect workspace",
+            "subagent_type": "explore",
+            "model": "inherit",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "parent-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_inherited_default_bootstrap_model_uses_parent_service_model(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-service-model")
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {"prompt": "inspect", "description": "inspect workspace", "subagent_type": "explore"},
+        state=_make_parent_context(tmp_path, model_name="default"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "parent-service-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_priority_prefers_frontmatter_over_parent(monkeypatch, tmp_path):
+    agent_dir = tmp_path / ".leon" / "agents"
+    agent_dir.mkdir(parents=True)
+    (agent_dir / "explore.md").write_text(
+        "---\nname: explore\nmodel: frontmatter-model\ntools:\n  - Read\n---\nfrontmatter prompt\n",
+        encoding="utf-8",
+    )
+    captured: dict[str, Any] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {"prompt": "inspect", "description": "inspect workspace", "subagent_type": "explore"},
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "frontmatter-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_priority_inherits_parent_when_no_env_tool_or_frontmatter(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="service-model")
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {"prompt": "inspect", "description": "inspect workspace", "subagent_type": "explore"},
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "parent-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_cleanup_background_runs_cancels_pending_agent_and_shell_runs(tmp_path):
+    service = _make_service(tmp_path)
+    agent_task = asyncio.create_task(_sleep_forever())
+    shell_cmd = _FakeAsyncCommand()
+    service._tasks["agent-task"] = _RunningTask(
+        task=agent_task,
+        agent_id="agent-task",
+        thread_id="subagent-agent-task",
+        description="agent task",
+    )
+    service._tasks["bash-task"] = _BashBackgroundRun(
+        async_cmd=shell_cmd,
+        command="sleep 999",
+        description="bash task",
+    )
+
+    await service.cleanup_background_runs()
+
+    assert agent_task.cancelled() is True
+    assert shell_cmd.terminated is True
+    assert shell_cmd.wait_calls == 1
+    assert service._tasks == {}
+
+
+@pytest.mark.asyncio
+async def test_cleanup_background_runs_does_not_relabel_completed_agent_run(tmp_path):
+    registry = _FakeAgentRegistry()
+    service = _make_service(tmp_path, agent_registry=registry)
+    completed_task = asyncio.create_task(asyncio.sleep(0, result="done"))
+    await completed_task
+    service._tasks["agent-task"] = _RunningTask(
+        task=completed_task,
+        agent_id="agent-task",
+        thread_id="subagent-agent-task",
+        description="agent task",
+    )
+
+    await service.cleanup_background_runs()
+
+    assert getattr(registry, "last_status", None) is None
+    assert service._tasks == {}
+
+
+@pytest.mark.asyncio
+async def test_run_agent_cleans_up_child_background_runs_before_close(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-task-1",
+        prompt="hello",
+        subagent_type="explore",
+        max_turns=None,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert created[0].cleanup_calls == 1
+    assert created[0].closed is True
+
+
+@pytest.mark.asyncio
+async def test_run_agent_links_child_abort_controller_to_parent_tool_context(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-task-1",
+        prompt="hello",
+        subagent_type="explore",
+        max_turns=None,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+
+    child_context = created[0]._agent_service._parent_tool_context
+    assert child_context is not None
+    assert getattr(created[0].agent, "_tool_abort_controller", None) is child_context.abort_controller
+
+    parent_context.abort_controller.abort()
+
+    assert child_context.abort_controller.is_aborted() is True
+
+
+@pytest.mark.asyncio
+async def test_run_agent_reuses_parent_lease_for_child_thread_terminal(monkeypatch, tmp_path, temp_db):
+    created: list[_FakeChildAgent] = []
+    observed: dict[str, str] = {}
+    parent_thread_id = "parent-thread"
+    child_thread_id = "subagent-child"
+
+    manager = SandboxManager(
+        provider=LocalSessionProvider(default_cwd=str(tmp_path)),
+        db_path=temp_db,
+    )
+    monkeypatch.setenv("LEON_SANDBOX_DB_PATH", str(temp_db))
+    monkeypatch.setattr(manager, "_setup_mounts", lambda thread_id: {"source": object(), "remote_path": str(tmp_path)})
+    monkeypatch.setattr(manager, "_sync_to_sandbox", lambda *args, **kwargs: None)
+
+    parent_capability = manager.get_sandbox(parent_thread_id)
+    parent_terminal_id = parent_capability._session.terminal.terminal_id
+    parent_lease_id = parent_capability._session.lease.lease_id
+
+    class _LeaseCapturingChild(_FakeChildAgent):
+        async def _astream(self, *args, **kwargs):
+            current_thread_id = get_current_thread_id()
+            assert current_thread_id is not None
+            child_capability = manager.get_sandbox(current_thread_id)
+            observed["child_terminal_id"] = child_capability._session.terminal.terminal_id
+            observed["child_lease_id"] = child_capability._session.lease.lease_id
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _LeaseCapturingChild(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    set_current_thread_id(parent_thread_id)
+
+    service = _make_service(tmp_path)
+
+    try:
+        result = await service._run_agent(
+            task_id="task-1",
+            agent_name="child",
+            thread_id=child_thread_id,
+            prompt="hello",
+            subagent_type="explore",
+            max_turns=None,
+        )
+
+        assert result == "(Agent completed with no text output)"
+        assert created
+        assert observed["child_terminal_id"] != parent_terminal_id
+        assert observed["child_lease_id"] == parent_lease_id
+    finally:
+        manager.close()
+
+
+@pytest.mark.asyncio
+async def test_run_agent_inherits_parent_sandbox_when_forking_child(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["workspace_root"] = Path(workspace_root)
+        captured["sandbox"] = kwargs.get("sandbox")
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    service._parent_bootstrap = BootstrapConfig(
+        workspace_root=Path("/home/daytona"),
+        original_cwd=Path("/home/daytona"),
+        project_root=Path("/home/daytona"),
+        cwd=Path("/home/daytona"),
+        model_name="gpt-parent",
+        sandbox_type="daytona_selfhost",
+    )
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert captured["workspace_root"] == Path("/home/daytona")
+    assert captured["sandbox"] == "daytona_selfhost"
+
+
+@pytest.mark.asyncio
+async def test_run_agent_child_cleanup_skips_sandbox_close(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert created[0].closed is True
+    assert created[0].close_kwargs == {"cleanup_sandbox": False}
+
+
+@pytest.mark.asyncio
+async def test_handle_agent_registers_subagent_thread_metadata_before_return(monkeypatch, tmp_path):
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    thread_repo = _FakeThreadRepo(
+        rows={
+            "parent-thread": {
+                "id": "parent-thread",
+                "member_id": "member-1",
+                "sandbox_type": "daytona_selfhost",
+                "cwd": "/home/daytona",
+                "model": "gpt-parent",
+                "is_main": True,
+                "branch_index": 0,
+                "created_at": 1.0,
+            }
+        }
+    )
+    member_repo = _FakeMemberRepo({"member-1": "Toad"})
+    service = _make_service(
+        tmp_path,
+        thread_repo=thread_repo,
+        member_repo=member_repo,
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw = await service._handle_agent(
+            prompt="do work",
+            name="worker-1",
+            run_in_background=True,
+        )
+        payload = _agent_tool_json(raw)
+        child_thread_id = payload["thread_id"]
+
+        child_thread = thread_repo.get_by_id(child_thread_id)
+
+        assert child_thread is not None
+        assert child_thread["member_id"] == "member-1"
+        assert child_thread["user_id"] == child_thread_id
+        assert child_thread["sandbox_type"] == "daytona_selfhost"
+        assert child_thread["cwd"] == "/home/daytona"
+        assert child_thread["is_main"] is False
+        assert child_thread["branch_index"] == 1
+    finally:
+        await service.cleanup_background_runs()
+        set_current_thread_id("")
+
+
+@pytest.mark.asyncio
+async def test_handle_agent_reuses_existing_completed_child_thread_for_same_parent_and_name(monkeypatch, tmp_path):
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    thread_repo = _FakeThreadRepo(
+        rows={
+            "parent-thread": {
+                "id": "parent-thread",
+                "member_id": "member-1",
+                "sandbox_type": "daytona_selfhost",
+                "cwd": "/home/daytona",
+                "model": "gpt-parent",
+                "is_main": True,
+                "branch_index": 0,
+                "created_at": 1.0,
+            },
+            "subagent-existing": {
+                "id": "subagent-existing",
+                "member_id": "member-1",
+                "sandbox_type": "daytona_selfhost",
+                "cwd": "/home/daytona",
+                "model": "gpt-test",
+                "is_main": False,
+                "branch_index": 1,
+                "created_at": 2.0,
+            },
+        }
+    )
+    registry = _FakeAgentRegistry()
+    registry._latest_by_name_parent[("worker-1", "parent-thread")] = AgentEntry(
+        agent_id="old-agent",
+        name="worker-1",
+        thread_id="subagent-existing",
+        status="completed",
+        parent_agent_id="parent-thread",
+        subagent_type="general",
+    )
+    service = _make_service(
+        tmp_path,
+        agent_registry=registry,
+        thread_repo=thread_repo,
+        member_repo=_FakeMemberRepo({"member-1": "Toad"}),
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw = await service._handle_agent(
+            prompt="continue work",
+            name="worker-1",
+            run_in_background=True,
+        )
+
+        payload = _agent_tool_json(raw)
+        assert payload["thread_id"] == "subagent-existing"
+        assert len(thread_repo.created) == 0
+    finally:
+        await service.cleanup_background_runs()
+        set_current_thread_id("")
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_blocking_result_preserves_child_identity_metadata(monkeypatch, tmp_path):
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    request = _make_tool_request(
+        "Agent",
+        {"prompt": "inspect", "description": "inspect workspace"},
+        state=_make_parent_context(tmp_path),
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    meta = result.additional_kwargs["tool_result_meta"]
+    assert meta["task_id"]
+    assert meta["subagent_thread_id"].startswith("subagent-")
+
+
+@pytest.mark.asyncio
+async def test_run_agent_uses_live_child_thread_bridge_when_web_app_present(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    async def fake_run_child_thread_live(agent, thread_id, prompt, app, *, input_messages):
+        captured["agent"] = agent
+        captured["thread_id"] = thread_id
+        captured["prompt"] = prompt
+        captured["app"] = app
+        captured["input_messages"] = input_messages
+        return "LIVE_CHILD_DONE"
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["child_web_app"] = kwargs.get("web_app")
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    monkeypatch.setattr("backend.web.services.streaming_service.run_child_thread_live", fake_run_child_thread_live)
+
+    web_app = SimpleNamespace()
+    service = _make_service(tmp_path, web_app=web_app)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "LIVE_CHILD_DONE"
+    assert captured["thread_id"] == "subagent-1"
+    assert captured["prompt"] == "do work"
+    assert captured["app"] is web_app
+    assert captured["child_web_app"] is web_app
+    assert len(captured["input_messages"]) == 1
+    assert captured["input_messages"][0]["role"] == "user"
+    assert captured["input_messages"][0]["content"] == "do work"
+    assert captured["agent"].cleanup_calls == 1
+    assert captured["agent"].closed is False
+
+
+@pytest.mark.asyncio
+async def test_run_agent_normalizes_workspace_suffix_in_child_prompt(monkeypatch, tmp_path):
+    captured: dict[str, Any] = {}
+
+    async def fake_run_child_thread_live(agent, thread_id, prompt, app, *, input_messages):
+        captured["prompt"] = prompt
+        captured["input_messages"] = input_messages
+        return "LIVE_CHILD_DONE"
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    monkeypatch.setattr("backend.web.services.streaming_service.run_child_thread_live", fake_run_child_thread_live)
+
+    service = _make_service(tmp_path, web_app=SimpleNamespace())
+    raw_prompt = f"Inspect the workspace at {tmp_path}/current working directory. Read-only only. Report existing files."
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt=raw_prompt,
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "LIVE_CHILD_DONE"
+    expected_prompt = f"Inspect the workspace at {tmp_path}. Read-only only. Report existing files."
+    assert captured["prompt"] == expected_prompt
+    assert captured["input_messages"][0]["content"] == expected_prompt
+
+
+def test_agent_schema_does_not_claim_general_has_full_tool_access():
+    description = AGENT_SCHEMA["description"]
+
+    assert "general (full tool access)" not in description
+    assert "general (broad tool access except Agent, TaskOutput, and TaskStop)" in description
+
+
+def test_agent_schema_requires_description():
+    assert AGENT_SCHEMA["parameters"]["required"] == ["prompt", "description"]
+
+
+def test_task_output_schema_exposes_block_and_timeout():
+    properties = TASK_OUTPUT_SCHEMA["parameters"]["properties"]
+
+    assert properties["block"]["default"] is True
+    assert properties["timeout"]["default"] == 30000
+    assert properties["timeout"]["maximum"] == 600000
+
+
+@pytest.mark.asyncio
+async def test_ask_user_question_requests_structured_question_payload(tmp_path):
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    app_state = AppState()
+    captured: dict[str, Any] = {}
+
+    def request_permission(name, args, context, request, message):
+        captured["name"] = name
+        captured["args"] = dict(args)
+        captured["message"] = message
+        return {"request_id": "ask-1"}
+
+    request = _make_tool_request(
+        "AskUserQuestion",
+        {
+            "questions": [
+                {
+                    "header": "Color",
+                    "question": "Which color should I use?",
+                    "options": [
+                        {"label": "Blue", "description": "Use blue"},
+                        {"label": "Green", "description": "Use green"},
+                    ],
+                }
+            ]
+        },
+        state=ToolUseContext(
+            bootstrap=BootstrapConfig(workspace_root=tmp_path, model_name="gpt-test"),
+            get_app_state=app_state.get_state,
+            set_app_state=app_state.set_state,
+            request_permission=request_permission,
+        ),
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    meta = result.additional_kwargs["tool_result_meta"]
+    assert meta["kind"] == "permission_request"
+    assert meta["request_id"] == "ask-1"
+    assert result.content == "User input required to continue."
+    assert captured["name"] == "AskUserQuestion"
+    assert captured["message"] == "Please answer the following questions so Leon can continue."
+    assert captured["args"] == {
+        "questions": [
+            {
+                "header": "Color",
+                "question": "Which color should I use?",
+                "options": [
+                    {"label": "Blue", "description": "Use blue"},
+                    {"label": "Green", "description": "Use green"},
+                ],
+            }
+        ]
+    }
+
+
+def test_ask_user_question_schema_requires_questions():
+    assert ASK_USER_QUESTION_SCHEMA["parameters"]["required"] == ["questions"]
diff --git a/tests/Unit/core/test_capability_async.py b/tests/Unit/core/test_capability_async.py
new file mode 100644
index 000000000..d07334c3d
--- /dev/null
+++ b/tests/Unit/core/test_capability_async.py
@@ -0,0 +1,195 @@
+import asyncio
+import uuid
+from pathlib import Path
+from types import SimpleNamespace
+
+from sandbox.base import LocalSandbox
+from sandbox.capability import SandboxCapability
+from sandbox.interfaces.executor import AsyncCommand, ExecuteResult
+from sandbox.thread_context import set_current_thread_id
+
+
+class _DummyState:
+    cwd = "/tmp"
+
+
+class _DummyTerminal:
+    terminal_id = "dummy-term"
+
+    def get_state(self):
+        return _DummyState()
+
+
+class _DummyRuntime:
+    def __init__(self):
+        self.commands: list[str] = []
+        self._async_commands: dict[str, AsyncCommand] = {}
+
+    async def execute(self, command: str, timeout=None):
+        self.commands.append(command)
+        await asyncio.sleep(0.01)
+        return ExecuteResult(exit_code=0, stdout=f"ok:{command}", stderr="")
+
+    async def start_command(self, command: str, cwd: str) -> AsyncCommand:
+        command_id = f"cmd_{uuid.uuid4().hex[:12]}"
+        result = await self.execute(command)
+        async_cmd = AsyncCommand(
+            command_id=command_id,
+            command_line=command,
+            cwd=cwd,
+            exit_code=result.exit_code,
+            done=True,
+            stdout_buffer=[result.stdout],
+        )
+        self._async_commands[command_id] = async_cmd
+        return async_cmd
+
+    async def get_command(self, command_id: str) -> AsyncCommand | None:
+        return self._async_commands.get(command_id)
+
+    async def wait_for_command(self, command_id: str, timeout: float | None = None) -> ExecuteResult | None:
+        cmd = self._async_commands.get(command_id)
+        if cmd is None:
+            return None
+        return ExecuteResult(
+            exit_code=cmd.exit_code or 0,
+            stdout="".join(cmd.stdout_buffer),
+            stderr="".join(cmd.stderr_buffer),
+        )
+
+
+class _DummySession:
+    def __init__(self):
+        self.terminal = _DummyTerminal()
+        self.runtime = _DummyRuntime()
+        self.touches = 0
+
+    def touch(self):
+        self.touches += 1
+
+
+async def _run_async_command_flow():
+    session = _DummySession()
+    capability = SandboxCapability(session)
+
+    async_cmd = await capability.command.execute_async("echo hi", cwd="/tmp/demo", env={"A": "1"})
+    assert async_cmd.command_id.startswith("cmd_")
+
+    status = await capability.command.get_status(async_cmd.command_id)
+    assert status is not None
+
+    result = await capability.command.wait_for(async_cmd.command_id, timeout=1.0)
+    assert result is not None
+    assert result.exit_code == 0
+    assert "echo hi" in result.stdout
+    assert session.touches > 0
+
+
+def test_command_wrapper_supports_execute_async():
+    asyncio.run(_run_async_command_flow())
+
+
+def test_local_sandbox_rebuilds_stale_closed_capability_before_execute_async(tmp_path):
+    root = Path(tmp_path)
+    thread_id = "thread-stale-session"
+    sandbox = LocalSandbox(str(root), db_path=root / "sandbox.db")
+    set_current_thread_id(thread_id)
+    capability = sandbox._get_capability()
+    stale_session_id = capability._session.session_id
+    sandbox.manager.session_manager.delete(stale_session_id, reason="test_close")
+
+    async def run():
+        async_cmd = await sandbox.shell().execute_async("sleep 0.01; echo hi")
+        result = await sandbox.shell().wait_for(async_cmd.command_id, timeout=1.0)
+        return async_cmd, result
+
+    async_cmd, result = asyncio.run(run())
+
+    assert capability._session.status == "closed"
+    refreshed = sandbox._get_capability()
+    assert refreshed._session.session_id != stale_session_id
+    assert async_cmd.command_id.startswith("cmd_")
+    assert result is not None
+    assert result.exit_code == 0
+    assert "hi" in result.stdout
+
+
+def test_filesystem_wrapper_auto_resumes_paused_lease_before_listing():
+    class _PausedLease:
+        def __init__(self):
+            self.observed_state = "paused"
+
+        def ensure_active_instance(self, _provider):
+            if self.observed_state == "paused":
+                raise RuntimeError("Sandbox lease lease-1 is paused. Resume before executing commands.")
+            return SimpleNamespace(instance_id="inst-1")
+
+    class _RemoteProvider:
+        def list_dir(self, instance_id: str, path: str):
+            assert instance_id == "inst-1"
+            assert path == "/home/daytona"
+            return [{"name": "demo.txt", "type": "file", "size": 7}]
+
+    lease = _PausedLease()
+    provider = _RemoteProvider()
+    resume_calls: list[tuple[str, str]] = []
+
+    class _RemoteSession:
+        def __init__(self):
+            self.thread_id = "thread-paused"
+            self.terminal = _DummyTerminal()
+            self.lease = lease
+            self.runtime = SimpleNamespace(provider=provider)
+            self.touches = 0
+
+        def touch(self):
+            self.touches += 1
+
+    session = _RemoteSession()
+    manager = SimpleNamespace(
+        resume_session=lambda thread_id, source="user_resume": (
+            resume_calls.append((thread_id, source)) or setattr(lease, "observed_state", "running") or True
+        )
+    )
+
+    capability = SandboxCapability(session, manager=manager)
+
+    result = capability.fs.list_dir("/home/daytona")
+
+    assert resume_calls == [("thread-paused", "auto_resume")]
+    assert [entry.name for entry in result.entries] == ["demo.txt"]
+    assert result.error is None
+
+
+def test_filesystem_wrapper_derives_remote_file_size_from_parent_listing():
+    class _Lease:
+        observed_state = "running"
+
+        def ensure_active_instance(self, _provider):
+            return SimpleNamespace(instance_id="inst-1")
+
+    class _RemoteProvider:
+        def list_dir(self, instance_id: str, path: str):
+            assert instance_id == "inst-1"
+            assert path == "/home/daytona"
+            return [
+                {"name": "demo.txt", "type": "file", "size": 42},
+                {"name": "nested", "type": "directory", "size": 0},
+            ]
+
+    class _RemoteSession:
+        def __init__(self):
+            self.thread_id = "thread-size"
+            self.terminal = _DummyTerminal()
+            self.lease = _Lease()
+            self.runtime = SimpleNamespace(provider=_RemoteProvider())
+            self.touches = 0
+
+        def touch(self):
+            self.touches += 1
+
+    capability = SandboxCapability(_RemoteSession())
+
+    assert capability.fs.file_size("/home/daytona/demo.txt") == 42
+    assert capability.fs.file_size("/home/daytona/missing.txt") is None
+    assert capability.fs.file_size("/") is None
diff --git a/tests/test_command_middleware.py b/tests/Unit/core/test_command_middleware.py
similarity index 71%
rename from tests/test_command_middleware.py
rename to tests/Unit/core/test_command_middleware.py
index 05d64edf1..713698736 100644
--- a/tests/test_command_middleware.py
+++ b/tests/Unit/core/test_command_middleware.py
@@ -5,10 +5,12 @@
 
 import pytest
 
+from core.runtime.registry import ToolRegistry
 from core.tools.command.base import AsyncCommand, BaseExecutor, ExecuteResult
 from core.tools.command.dispatcher import get_executor, get_shell_info
 from core.tools.command.hooks.dangerous_commands import DangerousCommandsHook
 from core.tools.command.middleware import CommandMiddleware
+from core.tools.command.service import CommandService
 
 
 class TestExecuteResult:
@@ -88,7 +90,8 @@ async def test_execute_async(self):
     @pytest.mark.asyncio
     async def test_get_status(self):
         executor = get_executor()
-        async_cmd = await executor.execute_async("sleep 0.1 && echo done")
+        command = "Start-Sleep -Milliseconds 100; Write-Output done" if executor.shell_name == "powershell" else "sleep 0.1 && echo done"
+        async_cmd = await executor.execute_async(command)
 
         status = await executor.get_status(async_cmd.command_id)
         assert status is not None
@@ -107,6 +110,36 @@ def test_block_rm_rf(self):
         assert not result.allow
         assert "SECURITY" in result.error_message
 
+    def test_allow_dangerous_text_inside_quotes(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command('echo "rm -rf /"', {})
+        assert result.allow
+
+    def test_allow_dangerous_text_inside_comment(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command("echo hi # rm -rf /", {})
+        assert result.allow
+
+    def test_block_obfuscated_dangerous_command_name_with_inline_quotes(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command('s"u"do echo hi', {})
+        assert not result.allow
+
+    def test_block_obfuscated_file_mutation_command_name_with_inline_quotes(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command('ch"mo"d 777 /tmp/x', {})
+        assert not result.allow
+
+    def test_block_ansi_c_quoted_obfuscation(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command("s$'udo' echo hi", {})
+        assert not result.allow
+
+    def test_block_locale_quoted_obfuscation(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command('$"chmod" 777 /tmp/x', {})
+        assert not result.allow
+
     def test_block_sudo(self):
         hook = DangerousCommandsHook()
         result = hook.check_command("sudo apt install", {})
@@ -185,6 +218,29 @@ def store_completed_result(self, command_id: str, command_line: str, cwd: str, r
         return None
 
 
+class _BlankErrorExecutor(BaseExecutor):
+    runtime_owns_cwd = True
+    shell_name = "bash"
+
+    class BlankCommandError(Exception):
+        pass
+
+    async def execute(self, command: str, cwd: str | None = None, timeout: float | None = None, env=None):
+        raise self.BlankCommandError()
+
+    async def execute_async(self, command: str, cwd: str | None = None, env=None):
+        raise self.BlankCommandError()
+
+    async def get_status(self, command_id: str):
+        return None
+
+    async def wait_for(self, command_id: str, timeout: float | None = None):
+        return None
+
+    def store_completed_result(self, command_id: str, command_line: str, cwd: str, result: ExecuteResult) -> None:
+        return None
+
+
 class TestCommandStatusFormatting:
     @pytest.mark.asyncio
     async def test_running_status_strips_pty_prompt_echo_noise(self, tmp_path):
@@ -224,3 +280,25 @@ async def test_running_status_includes_stderr_chunks(self, tmp_path):
         output_block = out.split("Output so far:\n", 1)[1]
         assert "out" in output_block
         assert "err" in output_block
+
+
+class TestFailLoudBlankExceptions:
+    @pytest.mark.asyncio
+    async def test_command_middleware_surfaces_exception_type_when_message_is_blank(self, tmp_path):
+        middleware = CommandMiddleware(workspace_root=tmp_path, executor=_BlankErrorExecutor(), verbose=False)
+
+        out = await middleware._execute_blocking("pwd", str(tmp_path), timeout=1)
+
+        assert out == "Error executing command: BlankCommandError"
+
+    @pytest.mark.asyncio
+    async def test_command_service_surfaces_exception_type_when_message_is_blank(self, tmp_path):
+        service = CommandService(
+            registry=ToolRegistry(),
+            workspace_root=tmp_path,
+            executor=_BlankErrorExecutor(),
+        )
+
+        out = await service._bash("pwd")
+
+        assert out == "Error executing command: BlankCommandError"
diff --git a/tests/test_event_bus.py b/tests/Unit/core/test_event_bus.py
similarity index 100%
rename from tests/test_event_bus.py
rename to tests/Unit/core/test_event_bus.py
diff --git a/tests/Unit/core/test_loop.py b/tests/Unit/core/test_loop.py
new file mode 100644
index 000000000..e743db1c0
--- /dev/null
+++ b/tests/Unit/core/test_loop.py
@@ -0,0 +1,3150 @@
+"""Unit tests for core.runtime.loop QueryLoop."""
+
+import asyncio
+import importlib
+import json
+import tempfile
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, cast
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage, RemoveMessage, SystemMessage, ToolMessage
+from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
+
+from core.runtime.checkpoint_store import ThreadCheckpointState
+from core.runtime.loop import ContinueReason, ContinueState, QueryLoop, StreamingToolExecutor, _ModelErrorRecoveryResult
+from core.runtime.middleware import AgentMiddleware
+from core.runtime.middleware.memory import MemoryMiddleware
+from core.runtime.middleware.monitor import AgentState
+from core.runtime.permissions import ToolPermissionContext
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.state import AppState, BootstrapConfig, ToolPermissionState
+from storage.providers.sqlite.kernel import connect_sqlite_async
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def make_registry(*entries):
+    reg = ToolRegistry()
+    for e in entries:
+        reg.register(e)
+    return reg
+
+
+def make_loop(model, registry=None, middleware=None, max_turns=10, app_state=None, runtime=None, bootstrap=None, checkpointer=None):
+    return QueryLoop(
+        model=model,
+        system_prompt=SystemMessage(content="You are a test assistant."),
+        middleware=middleware or [],
+        checkpointer=checkpointer,
+        registry=registry or make_registry(),
+        app_state=app_state,
+        runtime=runtime,
+        bootstrap=bootstrap or BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+        max_turns=max_turns,
+    )
+
+
+class _MemoryCheckpointer:
+    def __init__(self):
+        self.store = {}
+
+    async def aget(self, cfg):
+        return self.store.get(cfg["configurable"]["thread_id"])
+
+    async def aput(self, cfg, checkpoint, metadata, new_versions):
+        self.store[cfg["configurable"]["thread_id"]] = checkpoint
+
+
+class _VersionAwareBlobCheckpointer:
+    """Minimal saver that only persists blob-like channel values when versions advance."""
+
+    def __init__(self):
+        self.store = {}
+
+    async def aget(self, cfg):
+        return self.store.get(cfg["configurable"]["thread_id"])
+
+    def get_next_version(self, current, channel):
+        if current is None:
+            current_v = 0
+        elif isinstance(current, int):
+            current_v = current
+        else:
+            current_v = int(str(current).split(".")[0])
+        return f"{current_v + 1:032}.test"
+
+    async def aput(self, cfg, checkpoint, metadata, new_versions):
+        primitive = (str, int, float, bool, type(None))
+        persisted = checkpoint.copy()
+        persisted["channel_values"] = {
+            key: value for key, value in checkpoint["channel_values"].items() if isinstance(value, primitive) or key in new_versions
+        }
+        persisted["channel_versions"] = {
+            **dict(checkpoint.get("channel_versions", {}) or {}),
+            **new_versions,
+        }
+        persisted["updated_channels"] = list(new_versions)
+        self.store[cfg["configurable"]["thread_id"]] = persisted
+
+
+class _RecordingCheckpointStore:
+    def __init__(self):
+        self.saved: list[tuple[str, ThreadCheckpointState]] = []
+
+    async def load(self, thread_id: str) -> ThreadCheckpointState | None:
+        return None
+
+    async def save(self, thread_id: str, state: ThreadCheckpointState) -> None:
+        self.saved.append((thread_id, state))
+
+
+def mock_model_no_tools(text="Hello!"):
+    """Model that returns a plain AIMessage (no tool calls)."""
+    ai_msg = AIMessage(content=text)
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(return_value=ai_msg)
+    return model
+
+
+def mock_model_with_tool_call(tool_name="echo", args=None, call_id="tc-1", then_text="Done"):
+    """Model that first responds with a tool call, then responds with plain text."""
+    args = args or {"message": "hi"}
+    tool_call_msg = AIMessage(
+        content="",
+        tool_calls=[{"name": tool_name, "args": args, "id": call_id}],
+    )
+    final_msg = AIMessage(content=then_text)
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(side_effect=[tool_call_msg, final_msg])
+    return model
+
+
+def mock_model_with_two_tool_turns():
+    first = AIMessage(content="", tool_calls=[{"name": "echo", "args": {"message": "one"}, "id": "tc-1"}])
+    second = AIMessage(content="", tool_calls=[{"name": "echo", "args": {"message": "two"}, "id": "tc-2"}])
+    final = AIMessage(content="done")
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(side_effect=[first, second, final])
+    return model
+
+
+def _make_summary_memory_middleware(*, context_limit=40, keep_recent_tokens=10, compaction_threshold=0.1):
+    summary_model = MagicMock()
+    summary_model.bind.return_value = summary_model
+    summary_model.ainvoke = AsyncMock(return_value=AIMessage(content="SUMMARY"))
+
+    memory = MemoryMiddleware(
+        context_limit=context_limit,
+        compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=keep_recent_tokens),
+        compaction_threshold=compaction_threshold,
+    )
+    memory.set_model(summary_model)
+    return memory, summary_model
+
+
+def _make_prompt_too_long_model(*responses):
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(side_effect=list(responses))
+    return model
+
+
+def make_inline_tool(name, handler, *, schema=None, is_concurrency_safe=True):
+    return ToolEntry(
+        name=name,
+        mode=ToolMode.INLINE,
+        schema=schema or {"name": name, "description": name, "parameters": {}},
+        handler=handler,
+        source="test",
+        is_concurrency_safe=is_concurrency_safe,
+    )
+
+
+def _permission_context(*, is_read_only: bool = False, is_destructive: bool = False) -> ToolPermissionContext:
+    return ToolPermissionContext(is_read_only=is_read_only, is_destructive=is_destructive)
+
+
+def _require_request_permission(ctx) -> Any:
+    request_permission = ctx.request_permission
+    assert request_permission is not None
+    return request_permission
+
+
+def _require_consume_permission_resolution(ctx) -> Any:
+    consume_permission_resolution = ctx.consume_permission_resolution
+    assert consume_permission_resolution is not None
+    return consume_permission_resolution
+
+
+def _require_can_use_tool(ctx) -> Any:
+    can_use_tool = ctx.can_use_tool
+    assert can_use_tool is not None
+    return can_use_tool
+
+
+def test_tool_use_context_get_app_state_is_live_closure():
+    app_state = AppState(turn_count=1)
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx = loop._build_tool_use_context([])
+    assert ctx is not None
+    assert ctx.get_app_state().turn_count == 1
+
+    app_state.set_state(lambda prev: prev.model_copy(update={"turn_count": 7}))
+
+    assert ctx.get_app_state().turn_count == 7
+
+
+def test_tool_use_context_session_refs_persist_across_turns():
+    app_state = AppState()
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx1 = loop._build_tool_use_context([HumanMessage(content="one")])
+    ctx2 = loop._build_tool_use_context([HumanMessage(content="two")])
+
+    assert ctx1 is not None
+    assert ctx2 is not None
+
+    ctx1.discovered_skill_names.add("skill-a")
+    ctx1.loaded_nested_memory_paths.add("/tmp/memory.md")
+    ctx1.read_file_state["/tmp/file.py"] = {"partial": False}
+
+    assert ctx2.discovered_skill_names is ctx1.discovered_skill_names
+    assert ctx2.loaded_nested_memory_paths is ctx1.loaded_nested_memory_paths
+    assert ctx2.read_file_state is ctx1.read_file_state
+    assert "skill-a" in ctx2.discovered_skill_names
+    assert "/tmp/memory.md" in ctx2.loaded_nested_memory_paths
+    assert "/tmp/file.py" in ctx2.read_file_state
+
+
+def test_tool_use_context_turn_refs_are_fresh_per_turn():
+    app_state = AppState()
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx1 = loop._build_tool_use_context([HumanMessage(content="one")])
+    ctx2 = loop._build_tool_use_context([HumanMessage(content="two")])
+
+    assert ctx1 is not None
+    assert ctx2 is not None
+
+    ctx1.nested_memory_attachment_triggers.add("memo-a")
+
+    assert ctx2.nested_memory_attachment_triggers == set()
+    assert ctx2.nested_memory_attachment_triggers is not ctx1.nested_memory_attachment_triggers
+
+
+def test_tool_use_context_permission_request_surface_tracks_thread_pending_state():
+    app_state = AppState()
+    loop = make_loop(
+        mock_model_no_tools(),
+        app_state=app_state,
+        bootstrap=BootstrapConfig(
+            workspace_root=Path("/tmp"),
+            model_name="test-model",
+            permission_resolver_scope="thread",
+        ),
+    )
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    request_id = _require_request_permission(ctx)(
+        "Write",
+        {"path": "x"},
+        _permission_context(),
+        None,
+        "needs approval",
+    )
+
+    assert isinstance(request_id, str)
+    assert app_state.pending_permission_requests[request_id]["thread_id"] == "thread-a"
+    assert app_state.pending_permission_requests[request_id]["tool_name"] == "Write"
+
+
+def test_tool_use_context_consumes_resolved_permission_once():
+    app_state = AppState(
+        resolved_permission_requests={
+            "perm-1": {
+                "thread_id": "thread-a",
+                "tool_name": "Write",
+                "args": {"path": "x"},
+                "decision": "allow",
+                "message": "approved",
+            }
+        }
+    )
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    first = _require_consume_permission_resolution(ctx)("Write", {"path": "x"}, _permission_context(), None)
+    second = _require_consume_permission_resolution(ctx)("Write", {"path": "x"}, _permission_context(), None)
+
+    assert first == {"decision": "allow", "message": "approved"}
+    assert second is None
+    assert app_state.resolved_permission_requests == {}
+
+
+@pytest.mark.asyncio
+async def test_query_stops_after_permission_request_tool_result():
+    model = mock_model_with_tool_call(tool_name="AskUserQuestion", args={"questions": []}, then_text="should not happen")
+    loop = make_loop(model, app_state=AppState())
+    loop._execute_tools = AsyncMock(
+        return_value=[
+            ToolMessage(
+                content="User input required to continue.",
+                tool_call_id="tc-1",
+                name="AskUserQuestion",
+                additional_kwargs={
+                    "tool_result_meta": {
+                        "kind": "permission_request",
+                        "request_id": "ask-1",
+                        "request_kind": "ask_user_question",
+                    }
+                },
+            )
+        ]
+    )
+
+    events = []
+    async for event in loop.query(
+        {"messages": [{"role": "user", "content": "ask me something"}]},
+        config={"configurable": {"thread_id": "thread-ask"}},
+    ):
+        events.append(event)
+
+    assert model.ainvoke.await_count == 1
+    assert any("tools" in event for event in events)
+    terminal = next(event["terminal"] for event in events if "terminal" in event)
+    assert terminal.reason.value == "completed"
+
+
+def test_tool_use_context_can_use_tool_reads_app_state_permission_rules():
+    app_state = AppState()
+    app_state.tool_permission_context.alwaysAskRules["session"] = ["Write"]
+    loop = make_loop(
+        mock_model_no_tools(),
+        app_state=app_state,
+        bootstrap=BootstrapConfig(
+            workspace_root=Path("/tmp"),
+            model_name="test-model",
+            permission_resolver_scope="thread",
+        ),
+    )
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    decision = _require_can_use_tool(ctx)(
+        "Write",
+        {},
+        _permission_context(),
+        None,
+    )
+
+    assert decision == {
+        "decision": "ask",
+        "message": "Permission required by rule: Write",
+    }
+
+
+def test_tool_use_context_omits_permission_request_surface_without_interactive_resolver():
+    app_state = AppState()
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    assert ctx.request_permission is None
+
+
+def test_tool_use_context_fails_loud_when_ask_has_no_interactive_resolver():
+    app_state = AppState()
+    app_state.tool_permission_context.alwaysAskRules["session"] = ["Write"]
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    decision = _require_can_use_tool(ctx)(
+        "Write",
+        {},
+        _permission_context(),
+        None,
+    )
+
+    assert decision == {
+        "decision": "deny",
+        "message": "Permission required by rule: Write. No interactive permission resolver is available for this run.",
+    }
+
+
+class _CaptureTurnLocalStateMiddleware(AgentMiddleware):
+    def __init__(self):
+        self.turn_ids = []
+        self.trigger_snapshots = []
+
+    async def awrap_tool_call(self, request, handler):
+        self.turn_ids.append(request.state.turn_id)
+        self.trigger_snapshots.append(set(request.state.nested_memory_attachment_triggers))
+        if len(self.turn_ids) == 1:
+            request.state.nested_memory_attachment_triggers.add("first-turn-mark")
+        return await handler(request)
+
+
+@pytest.mark.asyncio
+async def test_query_loop_rebuilds_turn_local_tool_context_each_tool_turn():
+    model = mock_model_with_two_tool_turns()
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    capture = _CaptureTurnLocalStateMiddleware()
+    loop = make_loop(model, registry=make_registry(entry), middleware=[capture], app_state=AppState())
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "two turns"}]}):
+        pass
+
+    assert len(capture.turn_ids) == 2
+    assert capture.turn_ids[0] != capture.turn_ids[1]
+    assert capture.trigger_snapshots == [set(), set()]
+
+
+# ---------------------------------------------------------------------------
+# Tests: no tool calls → single agent chunk
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_no_tool_calls_yields_one_agent_chunk():
+    model = mock_model_no_tools("Hello world")
+    loop = make_loop(model)
+
+    chunks = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "hi"}]}):
+        chunks.append(chunk)
+
+    assert len(chunks) == 1
+    assert "agent" in chunks[0]
+    msgs = chunks[0]["agent"]["messages"]
+    assert len(msgs) == 1
+    assert msgs[0].content == "Hello world"
+
+
+@pytest.mark.asyncio
+async def test_no_tool_calls_model_called_once():
+    model = mock_model_no_tools()
+    loop = make_loop(model)
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "hi"}]}):
+        pass
+
+    assert model.ainvoke.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_query_loop_clear_resets_turn_state_but_preserves_accumulators():
+    model = mock_model_no_tools("after clear")
+    checkpointer = _MemoryCheckpointer()
+    app_state = AppState(total_cost=1.25, tool_overrides={"Bash": False})
+    bootstrap = BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model")
+    loop = make_loop(
+        model=model,
+        checkpointer=checkpointer,
+        app_state=app_state,
+        bootstrap=bootstrap,
+    )
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "hi"}]},
+        config={"configurable": {"thread_id": "clear-thread"}},
+    ):
+        pass
+
+    loop._tool_read_file_state["/tmp/file.py"] = {"partial": False}
+    loop._tool_loaded_nested_memory_paths.add("/tmp/memory.md")
+    loop._tool_discovered_skill_names.add("skill-a")
+    old_session_id = bootstrap.session_id
+
+    await loop.aclear("clear-thread")
+
+    assert checkpointer.store["clear-thread"]["channel_values"]["messages"] == []
+    assert app_state.messages == []
+    assert app_state.turn_count == 0
+    assert app_state.compact_boundary_index == 0
+    assert app_state.total_cost == 1.25
+    assert app_state.tool_overrides == {"Bash": False}
+    assert loop._tool_read_file_state == {}
+    assert loop._tool_loaded_nested_memory_paths == set()
+    assert loop._tool_discovered_skill_names == set()
+    assert bootstrap.session_id != old_session_id
+    assert bootstrap.parent_session_id == old_session_id
+
+
+@pytest.mark.asyncio
+async def test_query_loop_replays_messages_with_real_async_sqlite_saver():
+    db_path = Path(tempfile.mkdtemp()) / "checkpoints.db"
+    conn = await connect_sqlite_async(db_path)
+    saver = AsyncSqliteSaver(conn)
+    await saver.setup()
+
+    try:
+        model = mock_model_no_tools("persist me")
+        loop = make_loop(
+            model=model,
+            checkpointer=saver,
+            app_state=AppState(),
+        )
+
+        async for _ in loop.query(
+            {"messages": [{"role": "user", "content": "first"}]},
+            config={"configurable": {"thread_id": "persist-thread"}},
+        ):
+            pass
+
+        reloaded = await loop._load_messages("persist-thread")
+        assert [msg.content for msg in reloaded] == ["first", "persist me"]
+    finally:
+        await conn.close()
+
+
+@pytest.mark.asyncio
+async def test_query_loop_save_messages_advances_versions_for_blob_style_savers():
+    checkpointer = _VersionAwareBlobCheckpointer()
+    loop = make_loop(
+        model=mock_model_no_tools("unused"),
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+
+    await loop._save_messages("blob-thread", [HumanMessage(content="persist me")])
+
+    reloaded = await loop._load_messages("blob-thread")
+
+    assert [msg.content for msg in reloaded] == ["persist me"]
+    assert "messages" in checkpointer.store["blob-thread"]["channel_versions"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_saves_thread_state_via_checkpoint_store():
+    store = _RecordingCheckpointStore()
+    loop = make_loop(
+        model=mock_model_no_tools("unused"),
+        app_state=AppState(),
+    )
+    loop._checkpoint_store = store
+
+    await loop._save_messages("store-thread", [HumanMessage(content="persist me")])
+
+    assert len(store.saved) == 1
+    assert store.saved[0][0] == "store-thread"
+    assert [msg.content for msg in store.saved[0][1].messages] == ["persist me"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_rebuilds_checkpoint_store_when_checkpointer_is_set_later():
+    checkpointer = _MemoryCheckpointer()
+    loop = make_loop(
+        model=mock_model_no_tools("unused"),
+        app_state=AppState(),
+        checkpointer=None,
+    )
+
+    loop.checkpointer = checkpointer
+    await loop._save_messages("late-store-thread", [HumanMessage(content="persist me")])
+
+    assert checkpointer.store["late-store-thread"]["channel_values"]["messages"][0].content == "persist me"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aclear_wipes_real_async_sqlite_saver_history():
+    db_path = Path(tempfile.mkdtemp()) / "checkpoints.db"
+    conn = await connect_sqlite_async(db_path)
+    saver = AsyncSqliteSaver(conn)
+    await saver.setup()
+
+    try:
+        model = mock_model_no_tools("persist me")
+        loop = make_loop(
+            model=model,
+            checkpointer=saver,
+            app_state=AppState(total_cost=1.25),
+            bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model", total_cost_usd=1.25),
+        )
+
+        async for _ in loop.query(
+            {"messages": [{"role": "user", "content": "first"}]},
+            config={"configurable": {"thread_id": "clear-real-thread"}},
+        ):
+            pass
+
+        assert [msg.content for msg in await loop._load_messages("clear-real-thread")] == ["first", "persist me"]
+
+        await loop.aclear("clear-real-thread")
+
+        assert await loop._load_messages("clear-real-thread") == []
+        assert loop._app_state is not None
+        assert loop._app_state.total_cost == 1.25
+    finally:
+        await conn.close()
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aget_state_exposes_messages_for_backend_callers():
+    model = mock_model_no_tools("state me")
+    checkpointer = _MemoryCheckpointer()
+    loop = make_loop(
+        model=model,
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+    config = {"configurable": {"thread_id": "state-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "hello"}]},
+        config=config,
+    ):
+        pass
+
+    state = await loop.aget_state(config)
+
+    assert state.values is not None
+    assert [msg.content for msg in state.values["messages"]] == ["hello", "state me"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aget_state_exposes_persisted_permission_state_for_backend_callers():
+    checkpointer = _MemoryCheckpointer()
+    pending = {
+        "perm-1": {
+            "request_id": "perm-1",
+            "thread_id": "perm-thread",
+            "tool_name": "Write",
+            "args": {"path": "/tmp/a.txt"},
+            "message": "needs approval",
+        }
+    }
+    resolved = {
+        "perm-2": {
+            "request_id": "perm-2",
+            "thread_id": "perm-thread",
+            "tool_name": "Edit",
+            "args": {"path": "/tmp/b.txt"},
+            "decision": "allow",
+            "message": "approved",
+        }
+    }
+    loop = make_loop(
+        model=mock_model_no_tools("persist permissions"),
+        checkpointer=checkpointer,
+        app_state=AppState(
+            tool_permission_context=ToolPermissionState(
+                alwaysAllowRules={"session": ["Write"]},
+                alwaysDenyRules={"session": ["Bash"]},
+                alwaysAskRules={"session": ["Edit"]},
+            ),
+            pending_permission_requests=pending,
+            resolved_permission_requests=resolved,
+        ),
+    )
+    config = {"configurable": {"thread_id": "perm-thread"}}
+
+    await loop._save_messages("perm-thread", [HumanMessage(content="hello")])
+
+    reloaded = make_loop(
+        model=mock_model_no_tools("unused"),
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+
+    state = await reloaded.aget_state(config)
+
+    assert state.values["pending_permission_requests"] == pending
+    assert state.values["resolved_permission_requests"] == resolved
+    assert state.values["tool_permission_context"] == {
+        "alwaysAllowRules": {"session": ["Write"]},
+        "alwaysDenyRules": {"session": ["Bash"]},
+        "alwaysAskRules": {"session": ["Edit"]},
+        "allowManagedPermissionRulesOnly": False,
+    }
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aget_state_uses_live_permission_state_while_active():
+    checkpointer = _MemoryCheckpointer()
+    app_state = AppState(
+        messages=[HumanMessage(content="live human")],
+        tool_permission_context=ToolPermissionState(alwaysAskRules={"session": ["Bash"]}),
+        pending_permission_requests={
+            "perm-live": {
+                "request_id": "perm-live",
+                "thread_id": "perm-thread",
+                "tool_name": "Bash",
+                "args": {"command": "echo hi"},
+                "message": "Permission required by rule: Bash",
+            }
+        },
+    )
+    loop = make_loop(
+        model=mock_model_no_tools("unused"),
+        checkpointer=checkpointer,
+        app_state=app_state,
+        runtime=SimpleNamespace(current_state=AgentState.ACTIVE),
+    )
+    config = {"configurable": {"thread_id": "perm-thread"}}
+
+    state = await loop.aget_state(config)
+
+    assert [msg.content for msg in state.values["messages"]] == ["live human"]
+    assert state.values["pending_permission_requests"] == {
+        "perm-live": {
+            "request_id": "perm-live",
+            "thread_id": "perm-thread",
+            "tool_name": "Bash",
+            "args": {"command": "echo hi"},
+            "message": "Permission required by rule: Bash",
+        }
+    }
+    assert state.values["tool_permission_context"] == {
+        "alwaysAllowRules": {},
+        "alwaysDenyRules": {},
+        "alwaysAskRules": {"session": ["Bash"]},
+        "allowManagedPermissionRulesOnly": False,
+    }
+
+
+@pytest.mark.asyncio
+async def test_query_loop_restores_persisted_permission_state_into_live_app_state():
+    checkpointer = _MemoryCheckpointer()
+    pending = {
+        "perm-1": {
+            "request_id": "perm-1",
+            "thread_id": "perm-thread",
+            "tool_name": "Write",
+            "args": {"path": "/tmp/a.txt"},
+            "message": "needs approval",
+        }
+    }
+    resolved = {
+        "perm-2": {
+            "request_id": "perm-2",
+            "thread_id": "perm-thread",
+            "tool_name": "Edit",
+            "args": {"path": "/tmp/b.txt"},
+            "decision": "allow",
+            "message": "approved",
+        }
+    }
+    seed_loop = make_loop(
+        model=mock_model_no_tools("seed"),
+        checkpointer=checkpointer,
+        app_state=AppState(
+            tool_permission_context=ToolPermissionState(
+                alwaysAllowRules={"session": ["Write"]},
+                alwaysDenyRules={"session": ["Bash"]},
+                alwaysAskRules={"session": ["Edit"]},
+            ),
+            pending_permission_requests=pending,
+            resolved_permission_requests=resolved,
+        ),
+    )
+    await seed_loop._save_messages("perm-thread", [HumanMessage(content="existing")])
+
+    app_state = AppState()
+    reloaded = make_loop(
+        model=mock_model_no_tools("after restore"),
+        checkpointer=checkpointer,
+        app_state=app_state,
+    )
+
+    async for _ in reloaded.query(
+        {"messages": [{"role": "user", "content": "continue"}]},
+        config={"configurable": {"thread_id": "perm-thread"}},
+    ):
+        pass
+
+    assert app_state.pending_permission_requests == pending
+    assert app_state.resolved_permission_requests == resolved
+    assert app_state.tool_permission_context.alwaysAllowRules == {"session": ["Write"]}
+    assert app_state.tool_permission_context.alwaysDenyRules == {"session": ["Bash"]}
+    assert app_state.tool_permission_context.alwaysAskRules == {"session": ["Edit"]}
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_cleared_permission_state_after_resolution_consumed():
+    checkpointer = _MemoryCheckpointer()
+    request_id = "perm-ask"
+    thread_id = "perm-thread"
+    args = {
+        "questions": [
+            {
+                "header": "Choice",
+                "question": "Pick one.",
+                "multiSelect": False,
+                "options": [{"label": "Alpha", "description": "Alpha"}],
+            }
+        ]
+    }
+    app_state = AppState(
+        messages=[HumanMessage(content="existing")],
+        pending_permission_requests={
+            request_id: {
+                "request_id": request_id,
+                "thread_id": thread_id,
+                "tool_name": "AskUserQuestion",
+                "args": args,
+                "message": "Answer questions?",
+            }
+        },
+    )
+    loop = make_loop(
+        model=mock_model_no_tools("seed"),
+        checkpointer=checkpointer,
+        app_state=app_state,
+    )
+
+    resolved_payload = {
+        "request_id": request_id,
+        "thread_id": thread_id,
+        "tool_name": "AskUserQuestion",
+        "args": args,
+        "decision": "allow",
+        "message": "Answer questions?",
+        "answers": [
+            {
+                "header": "Choice",
+                "question": "Pick one.",
+                "selected_options": ["Alpha"],
+            }
+        ],
+    }
+    app_state.set_state(
+        lambda prev: prev.model_copy(
+            update={
+                "pending_permission_requests": {},
+                "resolved_permission_requests": {request_id: resolved_payload},
+            }
+        )
+    )
+
+    await loop.apersist_state(thread_id)
+    persisted = await loop._load_checkpoint_channel_values(thread_id)
+    assert persisted["pending_permission_requests"] == {}
+    assert persisted["resolved_permission_requests"] == {request_id: resolved_payload}
+
+    ctx = loop._build_tool_use_context([], thread_id=thread_id)
+    assert ctx is not None
+    assert _require_consume_permission_resolution(ctx)("AskUserQuestion", args, _permission_context(), None) == {
+        "decision": "allow",
+        "message": "Answer questions?",
+    }
+    assert app_state.pending_permission_requests == {}
+    assert app_state.resolved_permission_requests == {}
+
+    await loop.apersist_state(thread_id)
+    persisted = await loop._load_checkpoint_channel_values(thread_id)
+    assert persisted["pending_permission_requests"] == {}
+    assert persisted["resolved_permission_requests"] == {}
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aupdate_state_appends_start_messages_for_resume():
+    model = mock_model_no_tools("after resume")
+    checkpointer = _MemoryCheckpointer()
+    loop = make_loop(
+        model=model,
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+    config = {"configurable": {"thread_id": "resume-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "first"}]},
+        config=config,
+    ):
+        pass
+
+    await loop.aupdate_state(
+        config,
+        {"messages": [HumanMessage(content="second")]},
+        as_node="__start__",
+    )
+
+    state = await loop.aget_state(config)
+    assert [msg.content for msg in state.values["messages"]] == ["first", "after resume", "second"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aupdate_state_applies_remove_and_insert_message_repairs():
+    checkpointer = _MemoryCheckpointer()
+    broken_ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Read", "args": {"file_path": "/tmp/a.txt"}, "id": "tc-1"}],
+    )
+    tool_reply = ToolMessage(content="old", tool_call_id="tc-1", name="Read")
+    trailing = HumanMessage(content="after tool")
+    tool_reply.id = "tool-old"
+    trailing.id = "human-after"
+    checkpointer.store["repair-thread"] = {"channel_values": {"messages": [broken_ai, tool_reply, trailing]}}
+
+    loop = make_loop(
+        model=mock_model_no_tools("unused"),
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+    config = {"configurable": {"thread_id": "repair-thread"}}
+
+    await loop.aupdate_state(
+        config,
+        {
+            "messages": [
+                RemoveMessage(id="tool-old"),
+                RemoveMessage(id="human-after"),
+                ToolMessage(content="repaired", tool_call_id="tc-1", name="Read"),
+                HumanMessage(content="after tool"),
+            ]
+        },
+    )
+
+    state = await loop.aget_state(config)
+    contents = [getattr(msg, "content", None) for msg in state.values["messages"]]
+    assert contents == ["", "repaired", "after tool"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_astream_none_resumes_after_state_injection():
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(
+        side_effect=[
+            AIMessage(content="first answer"),
+            AIMessage(content="resumed answer"),
+        ]
+    )
+    checkpointer = _MemoryCheckpointer()
+    loop = QueryLoop(
+        model=model,
+        system_prompt=SystemMessage(content="You are a test assistant."),
+        middleware=[],
+        checkpointer=checkpointer,
+        registry=make_registry(),
+        app_state=AppState(),
+        runtime=None,
+        bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+        max_turns=10,
+    )
+    config = {"configurable": {"thread_id": "resume-stream-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "first"}]},
+        config=config,
+    ):
+        pass
+
+    await loop.aupdate_state(
+        config,
+        {"messages": [HumanMessage(content="followup")]},
+        as_node="__start__",
+    )
+
+    events = []
+    async for event in loop.astream(cast(dict[str, Any], None), config=config):
+        events.append(event)
+
+    assert any(msg.content == "resumed answer" for event in events for msg in event.get("agent", {}).get("messages", []))
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aclear_deletes_persisted_summary_for_thread():
+    db_path = Path(tempfile.mkdtemp()) / "memory.db"
+    mm = MemoryMiddleware(db_path=db_path)
+    assert mm.summary_store is not None
+    mm.summary_store.save_summary(
+        thread_id="clear-summary-thread",
+        summary_text="STALE SUMMARY",
+        compact_up_to_index=2,
+        compacted_at=2,
+    )
+
+    loop = QueryLoop(
+        model=mock_model_no_tools("done"),
+        system_prompt=SystemMessage(content="You are a test assistant."),
+        middleware=[mm],
+        checkpointer=None,
+        registry=make_registry(),
+        app_state=AppState(total_cost=1.25),
+        runtime=None,
+        bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model", total_cost_usd=1.25),
+        max_turns=10,
+    )
+
+    await loop.aclear("clear-summary-thread")
+
+    assert mm.summary_store.get_latest_summary("clear-summary-thread") is None
+
+
+# ---------------------------------------------------------------------------
+# Tests: with tool calls → agent chunk + tools chunk
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_tool_call_yields_agent_then_tools():
+    model = mock_model_with_tool_call()
+
+    # Register a simple echo tool
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {"type": "object", "properties": {}}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    registry = make_registry(entry)
+    loop = make_loop(model, registry=registry)
+
+    chunks = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "call echo"}]}):
+        chunks.append(chunk)
+
+    # First chunk: agent (with tool_calls)
+    # Second chunk: tools (ToolMessage results)
+    # Third chunk: agent (final text response)
+    agent_chunks = [c for c in chunks if "agent" in c]
+    tools_chunks = [c for c in chunks if "tools" in c]
+
+    assert len(agent_chunks) >= 1
+    assert len(tools_chunks) >= 1
+
+    # Tool result should be a ToolMessage
+    tool_msgs = tools_chunks[0]["tools"]["messages"]
+    assert len(tool_msgs) == 1
+    assert isinstance(tool_msgs[0], ToolMessage)
+
+
+@pytest.mark.asyncio
+async def test_tool_call_result_content():
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "test-val"})
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "d", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    loop = make_loop(model, registry=make_registry(entry))
+
+    tool_results = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "x"}]}):
+        if "tools" in chunk:
+            tool_results.extend(chunk["tools"]["messages"])
+
+    assert len(tool_results) == 1
+    assert "echo: test-val" in tool_results[0].content
+
+
+def test_tool_concurrency_safety_does_not_infer_from_read_only():
+    entry = ToolEntry(
+        name="readonly_serial",
+        mode=ToolMode.INLINE,
+        schema={"name": "readonly_serial", "description": "d", "parameters": {}},
+        handler=lambda: "ok",
+        source="test",
+        is_read_only=True,
+        is_concurrency_safe=False,
+    )
+    loop = make_loop(mock_model_no_tools(), registry=make_registry(entry))
+
+    assert loop._tool_is_concurrency_safe({"name": "readonly_serial", "args": {}}) is False
+
+
+# ---------------------------------------------------------------------------
+# Tests: max_turns guard
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_max_turns_stops_loop():
+    """Agent that hits max_turns should fail loudly on the caller-facing astream surface."""
+
+    def noop_handler() -> str:
+        return "ok"
+
+    entry = ToolEntry(
+        name="noop",
+        mode=ToolMode.INLINE,
+        schema={"name": "noop", "description": "d", "parameters": {}},
+        handler=noop_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+
+    # Build a model that always returns a tool call
+    tool_call_msg = AIMessage(
+        content="",
+        tool_calls=[{"name": "noop", "args": {}, "id": "tc-1"}],
+    )
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(return_value=tool_call_msg)
+
+    loop = make_loop(model, registry=make_registry(entry), max_turns=3)
+
+    with pytest.raises(RuntimeError, match="max_turns"):
+        async for _ in loop.astream({"messages": [{"role": "user", "content": "go"}]}):
+            pass
+
+    assert model.ainvoke.call_count == 3
+
+
+# ---------------------------------------------------------------------------
+# Tests: input parsing
+# ---------------------------------------------------------------------------
+
+
+def test_parse_input_dict_messages():
+    msgs = QueryLoop._parse_input({"messages": [{"role": "user", "content": "hello"}]})
+    assert len(msgs) == 1
+    assert isinstance(msgs[0], HumanMessage)
+    assert msgs[0].content == "hello"
+
+
+def test_parse_input_langchain_messages():
+    human = HumanMessage(content="hi")
+    msgs = QueryLoop._parse_input({"messages": [human]})
+    assert msgs[0] is human
+
+
+def test_parse_input_empty():
+    assert QueryLoop._parse_input({}) == []
+    assert QueryLoop._parse_input({"messages": []}) == []
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_app_state_on_completion():
+    model = mock_model_no_tools("AppState wired")
+    app_state = AppState(compact_boundary_index=99)
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=1.25))
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "sync"}]}):
+        pass
+
+    assert app_state.turn_count == 1
+    assert app_state.total_cost == 1.25
+    assert app_state.compact_boundary_index == 0
+    assert len(app_state.messages) == 2
+    assert app_state.messages[0].content == "sync"
+    assert app_state.messages[1].content == "AppState wired"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_does_not_decrease_total_cost_when_runtime_reports_less():
+    model = mock_model_no_tools("cost stays monotonic")
+    app_state = AppState(total_cost=1.25)
+    bootstrap = BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model", total_cost_usd=1.25)
+    loop = QueryLoop(
+        model=model,
+        system_prompt=SystemMessage(content="You are a test assistant."),
+        middleware=[],
+        checkpointer=None,
+        registry=make_registry(),
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+        bootstrap=bootstrap,
+        max_turns=10,
+    )
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "sync"}]}):
+        pass
+
+    assert app_state.total_cost == 1.25
+    assert bootstrap.total_cost_usd == 1.25
+
+
+@pytest.mark.asyncio
+async def test_query_loop_resets_dirty_app_state_turn_count_between_runs():
+    model = mock_model_no_tools("fresh")
+    app_state = AppState(turn_count=99, compact_boundary_index=7)
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    first = await loop.ainvoke({"messages": [{"role": "user", "content": "hi"}]})
+    second = await loop.ainvoke({"messages": [{"role": "user", "content": "again"}]})
+
+    assert first["reason"] == "completed"
+    assert second["reason"] == "completed"
+    assert app_state.turn_count == 1
+    assert app_state.compact_boundary_index == 0
+    assert len(app_state.messages) == 2
+
+
+@pytest.mark.asyncio
+async def test_query_loop_refreshes_tools_between_tool_turns():
+    events: list[str] = []
+
+    async def refresh_tools() -> None:
+        events.append("refresh")
+
+    def echo_handler(message: str) -> str:
+        events.append("tool")
+        return f"echo: {message}"
+
+    tool_call_msg = AIMessage(
+        content="",
+        tool_calls=[{"name": "echo", "args": {"message": "hi"}, "id": "tc-1"}],
+    )
+    final_msg = AIMessage(content="done")
+    model = MagicMock()
+    model.bind_tools.return_value = model
+
+    async def ainvoke_side_effect(*args, **kwargs):
+        if not events:
+            events.append("model-1")
+            return tool_call_msg
+        assert events == ["model-1", "tool", "refresh"]
+        events.append("model-2")
+        return final_msg
+
+    model.ainvoke = AsyncMock(side_effect=ainvoke_side_effect)
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {"type": "object", "properties": {}}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(model, registry=make_registry(entry))
+    loop._refresh_tools = refresh_tools
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "call echo"}]}):
+        pass
+
+    assert events == ["model-1", "tool", "refresh", "model-2"]
+
+
+@pytest.mark.asyncio
+async def test_streaming_overlap_snapshots_reused_live_chunks_before_final_aggregation():
+    class ReusedChunkModel:
+        def bind_tools(self, tools):
+            return self
+
+        async def astream(self, messages):
+            chunk = AIMessageChunk(
+                content="",
+                response_metadata={"model_provider": "openai"},
+                id="shared-chunk",
+                tool_calls=[],
+                invalid_tool_calls=[],
+                tool_call_chunks=[],
+            )
+            yield chunk
+            chunk.content = "HEL"
+            yield chunk
+            chunk.content = "LO"
+            yield chunk
+            chunk.content = ""
+            chunk.usage_metadata = {"input_tokens": 10, "output_tokens": 2, "total_tokens": 12}
+            yield chunk
+            chunk.chunk_position = "last"
+            yield chunk
+
+    loop = make_loop(ReusedChunkModel())
+
+    agent_messages = []
+    async for event in loop.query({"messages": [{"role": "user", "content": "hi"}]}):
+        if "agent" in event:
+            agent_messages.extend(event["agent"]["messages"])
+
+    assert len(agent_messages) == 1
+    assert agent_messages[0].content == "HELLO"
+    assert agent_messages[0].usage_metadata == {
+        "input_tokens": 10,
+        "output_tokens": 2,
+        "total_tokens": 12,
+    }
+
+
+class _CaptureToolContextMiddleware:
+    def __init__(self):
+        self.messages = None
+        self.boundary = None
+
+    async def awrap_tool_call(self, request, handler):
+        self.messages = list(request.state.messages)
+        self.boundary = request.state.get_app_state().compact_boundary_index
+        return await handler(request)
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_tool_context_messages_to_query_time_array():
+    capture = _CaptureToolContextMiddleware()
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "ctx"}, then_text="done")
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = make_inline_tool("echo", echo_handler)
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[capture],
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "call echo"}]}):
+        pass
+
+    assert capture.messages is not None
+    assert len(capture.messages) == 1
+    assert capture.messages[0].content == "call echo"
+
+
+class _SummaryBoundaryMiddleware:
+    def __init__(self, boundary_index: int):
+        self.boundary_index = boundary_index
+        self.compact_boundary_index = boundary_index
+
+    async def awrap_model_call(self, request, handler):
+        rewritten = [SystemMessage(content="summary")] + list(request.messages[self.boundary_index :])
+        return await handler(request.override(messages=rewritten))
+
+
+class _ReactiveCompactMiddleware:
+    compact_boundary_index = 2
+
+    async def compact_messages_for_recovery(self, messages):
+        return [SystemMessage(content="[Conversation Summary]\nSUMMARY")] + list(messages[-1:])
+
+
+class _CollapseDrainMiddleware:
+    def __init__(self):
+        self.calls = 0
+
+    async def recover_from_overflow(self, messages):
+        self.calls += 1
+        return {
+            "committed": 1,
+            "messages": [SystemMessage(content="[Collapsed Context]\nDRAINED")] + list(messages[-1:]),
+        }
+
+
+class _EscalationModel:
+    def __init__(self):
+        self.max_tokens_values = []
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        self.max_tokens_values.append(kwargs.get("max_tokens"))
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            raise RuntimeError("max_output_tokens")
+        return AIMessage(content="after escalate")
+
+
+class _EscalationThenRecoveryModel:
+    def __init__(self):
+        self.max_tokens_values = []
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        self.max_tokens_values.append(kwargs.get("max_tokens"))
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        if self.calls in (1, 2):
+            raise RuntimeError("max_output_tokens")
+        return AIMessage(content="after recovery")
+
+
+class _ContextOverflowModel:
+    def __init__(self):
+        self.calls = 0
+        self.max_tokens_values = []
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        self.max_tokens_values.append(kwargs.get("max_tokens"))
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            raise RuntimeError("input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000")
+        return AIMessage(content="after parsed overflow")
+
+
+class _TransientAPIError(Exception):
+    def __init__(self, status: int, message: str, headers: dict[str, str] | None = None):
+        super().__init__(message)
+        self.status = status
+        self.headers = headers or {}
+
+
+class _RetryOnceModel:
+    def __init__(self, status: int, headers: dict[str, str] | None = None):
+        self.calls = 0
+        self.status = status
+        self.headers = headers or {}
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            raise _TransientAPIError(self.status, f"transient {self.status}", self.headers)
+        return AIMessage(content=f"after retry {self.status}")
+
+
+class _EmptyStreamModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        if False:
+            yield AIMessageChunk(content="")
+
+
+class _TruncatedResponseModel:
+    def __init__(self, responses):
+        self.responses = list(responses)
+        self.calls = 0
+        self.max_tokens_values = []
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        self.max_tokens_values.append(kwargs.get("max_tokens"))
+        return self
+
+    async def ainvoke(self, messages):
+        response = self.responses[self.calls]
+        self.calls += 1
+        return response
+
+
+class _QueryOkWithFailingCompactorModel:
+    def __init__(self):
+        self.query_calls = 0
+        self.compact_calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        system_text = ""
+        if messages and messages[0].__class__.__name__ == "SystemMessage":
+            system_text = getattr(messages[0], "content", "") or ""
+        if "tasked with summarizing conversations" in system_text or "split turn" in system_text.lower():
+            self.compact_calls += 1
+            raise RuntimeError("compaction failed")
+        self.query_calls += 1
+        return AIMessage(content="OK")
+
+
+class _StreamingToolModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(content="thinking")
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "echo", "args": '{"message":"hi"}', "id": "tc-1", "index": 0}],
+            )
+            await asyncio.sleep(0.05)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _SplitArgsStreamingToolModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "Read", "args": "", "id": "tc-read", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": None, "args": '{"file_path":"/tmp/a.txt"}', "id": "tc-read", "index": 0}],
+            )
+            await asyncio.sleep(0.01)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _SplitStringValueStreamingToolModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "Read", "args": '{"file_path":"/', "id": "tc-read", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": None, "args": 'tmp/a.txt"}', "id": "tc-read", "index": 0}],
+            )
+            await asyncio.sleep(0.01)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _SplitAnyOfStreamingToolModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "read_messages", "args": "", "id": "tc-chat-read", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": None, "args": '{"chat_id":"chat-1"}', "id": "tc-chat-read", "index": 0}],
+            )
+            await asyncio.sleep(0.01)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _TwoToolStreamingModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "unsafe", "args": '{"message":"u"}', "id": "tc-unsafe", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+            )
+            await asyncio.sleep(0.05)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _FailingStreamingToolModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        yield AIMessageChunk(
+            content="",
+            tool_call_chunks=[{"name": "echo", "args": '{"message":"boom"}', "id": "tc-1", "index": 0}],
+        )
+        await asyncio.sleep(0.005)
+        raise RuntimeError("stream exploded")
+
+
+class _FailingQueuedStreamingToolModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        yield AIMessageChunk(
+            content="",
+            tool_call_chunks=[{"name": "unsafe", "args": '{"message":"u"}', "id": "tc-unsafe", "index": 0}],
+        )
+        yield AIMessageChunk(
+            content="",
+            tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+        )
+        await asyncio.sleep(0.005)
+        raise RuntimeError("stream exploded")
+
+
+class _ToolThenFinalStreamingModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "echo", "args": '{"message":"boom"}', "id": "tc-1", "index": 0}],
+            )
+            await asyncio.sleep(0.01)
+            yield AIMessageChunk(content="tool turn")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _UnsafeThenSafeGapStreamingModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "unsafe", "args": '{"message":"u"}', "id": "tc-unsafe", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+            )
+            await asyncio.sleep(0.08)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _BashAndSafeStreamingModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "bash", "args": '{"command":"boom"}', "id": "tc-bash", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+            )
+            await asyncio.sleep(0.05)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _ExplodingToolMiddleware:
+    async def awrap_tool_call(self, request, handler):
+        raise RuntimeError("middleware boom")
+
+
+@pytest.mark.asyncio
+async def test_query_loop_does_not_double_apply_compact_boundary_before_memory_middleware():
+    capture = _CaptureToolContextMiddleware()
+    memory = _SummaryBoundaryMiddleware(boundary_index=3)
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "ctx"}, then_text="done")
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = make_inline_tool("echo", echo_handler)
+    history = [
+        HumanMessage(content="h0"),
+        AIMessage(content="a1"),
+        HumanMessage(content="h2"),
+        HumanMessage(content="call echo"),
+    ]
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[memory, capture],
+        app_state=AppState(compact_boundary_index=3),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    assert capture.messages is not None
+    assert len(capture.messages) == 2
+    assert capture.messages[0].content == "summary"
+    assert capture.messages[1].content == "call echo"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_compact_boundary_index_from_memory_middleware():
+    memory = _SummaryBoundaryMiddleware(boundary_index=3)
+    model = mock_model_no_tools("done")
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[memory],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "hello"}]}):
+        pass
+
+    assert app_state.compact_boundary_index == 3
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_tool_context_after_real_memory_compaction():
+    capture = _CaptureToolContextMiddleware()
+    memory, _summary_model = _make_summary_memory_middleware()
+
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "ctx"}, then_text="done")
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = make_inline_tool("echo", echo_handler)
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="call echo"),
+    ]
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[memory, capture],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    assert capture.messages is not None
+    assert isinstance(capture.messages[0], SystemMessage)
+    assert "Conversation Summary" in capture.messages[0].content
+    assert capture.messages[-1].content == "call echo"
+    assert app_state.compact_boundary_index > 0
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_compact_boundary_before_tool_execution():
+    capture = _CaptureToolContextMiddleware()
+    memory, _summary_model = _make_summary_memory_middleware()
+
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "ctx"}, then_text="done")
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="call echo"),
+    ]
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[memory, capture],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    assert capture.messages is not None
+    assert capture.boundary == app_state.compact_boundary_index
+    assert capture.boundary is not None
+    assert capture.boundary > 0
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_compaction_notice_when_boundary_advances():
+    memory, _summary_model = _make_summary_memory_middleware()
+
+    app_state = AppState()
+    loop = make_loop(
+        mock_model_no_tools("after compact"),
+        middleware=[memory],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="hello after compact"),
+    ]
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    compact_notices = [
+        msg
+        for msg in app_state.messages
+        if msg.__class__.__name__ == "HumanMessage" and ((getattr(msg, "metadata", None) or {}).get("notification_type") == "compact")
+    ]
+
+    assert len(compact_notices) == 1
+    assert "Conversation compacted" in compact_notices[0].content
+    assert compact_notices[0].metadata["source"] == "system"
+    assert compact_notices[0].metadata["compact_boundary_index"] == app_state.compact_boundary_index
+    assert app_state.compact_boundary_index > 0
+
+
+@pytest.mark.asyncio
+async def test_memory_middleware_emits_runtime_compaction_notice():
+    memory, _summary_model = _make_summary_memory_middleware()
+    runtime = SimpleNamespace(cost=0.0, events=[], set_flag=lambda *_args, **_kwargs: None)
+    runtime.emit_activity_event = lambda event: runtime.events.append(event)
+    memory.set_runtime(runtime)
+
+    loop = make_loop(
+        mock_model_no_tools("after compact"),
+        middleware=[memory],
+        app_state=AppState(),
+        runtime=runtime,
+    )
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="hello after compact"),
+    ]
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    compact_events = [event for event in runtime.events if event.get("event") == "notice"]
+
+    assert len(compact_events) == 1
+    payload = json.loads(compact_events[0]["data"])
+    assert payload["notification_type"] == "compact"
+    assert "Conversation compacted" in payload["content"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_recovers_from_max_output_tokens_with_explicit_continuation():
+    model = _EscalationThenRecoveryModel()
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "max_output_tokens_recovery"
+    assert model.calls == 3
+    assert model.max_tokens_values == [64000, 64000]
+    assert any(
+        getattr(msg, "content", "") == "Output token limit hit. Resume directly with no apology or recap." for msg in app_state.messages
+    )
+
+
+@pytest.mark.asyncio
+async def test_query_loop_escalates_max_output_tokens_before_continuation_recovery():
+    model = _EscalationModel()
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "max_output_tokens_escalate"
+    assert model.max_tokens_values == [64000]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_parses_context_overflow_error_into_targeted_max_tokens_override():
+    model = _ContextOverflowModel()
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["messages"][-1].content == "after parsed overflow"
+    assert model.max_tokens_values == [10941]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_retries_once_after_529_capacity_error():
+    model = _RetryOnceModel(529)
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["messages"][-1].content == "after retry 529"
+    assert model.calls == 2
+
+
+@pytest.mark.asyncio
+async def test_query_loop_retries_once_after_429_rate_limit_error():
+    model = _RetryOnceModel(429, headers={"retry-after": "0"})
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["messages"][-1].content == "after retry 429"
+    assert model.calls == 2
+
+
+@pytest.mark.asyncio
+async def test_query_loop_astream_raises_loudly_on_empty_stream():
+    loop = make_loop(_EmptyStreamModel(), app_state=AppState(), runtime=SimpleNamespace(cost=0.0))
+
+    with pytest.raises(RuntimeError, match="streaming model returned no AIMessageChunk"):
+        async for _ in loop.astream({"messages": [{"role": "user", "content": "hi"}]}, stream_mode=["messages", "updates"]):
+            pass
+
+
+@pytest.mark.asyncio
+async def test_query_loop_detects_truncated_response_and_escalates_without_yielding_partial():
+    model = _TruncatedResponseModel(
+        [
+            AIMessage(content="partial", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="after escalate"),
+        ]
+    )
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "max_output_tokens_escalate"
+    assert [msg.content for msg in result["messages"]] == ["after escalate"]
+    assert model.max_tokens_values == [64000]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_recovers_from_truncated_response_with_withheld_message_pattern():
+    model = _TruncatedResponseModel(
+        [
+            AIMessage(content="partial-1", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-2", response_metadata={"stop_reason": "max_tokens"}),
+            AIMessage(content="after recovery"),
+        ]
+    )
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "max_output_tokens_recovery"
+    assert any(getattr(msg, "content", "") == "partial-2" for msg in app_state.messages)
+    assert any(
+        getattr(msg, "content", "") == "Output token limit hit. Resume directly with no apology or recap." for msg in app_state.messages
+    )
+
+
+@pytest.mark.asyncio
+async def test_query_loop_surfaces_withheld_truncated_message_after_recovery_exhausts():
+    model = _TruncatedResponseModel(
+        [
+            AIMessage(content="partial-1", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-2", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-3", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-4", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-5", response_metadata={"finish_reason": "length"}),
+        ]
+    )
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "model_error"
+    assert result["messages"][-1].content == "partial-5"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_retries_prompt_too_long_via_reactive_compact():
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        AIMessage(content="after compact"),
+    )
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[_ReactiveCompactMiddleware()],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "reactive_compact_retry"
+    assert model.ainvoke.call_count == 2
+    assert isinstance(app_state.messages[0], SystemMessage)
+    assert "Conversation Summary" in app_state.messages[0].content
+
+
+@pytest.mark.asyncio
+async def test_handle_model_error_recovery_returns_typed_result_object():
+    loop = make_loop(mock_model_no_tools(), app_state=AppState(), runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop._handle_model_error_recovery(
+        exc=RuntimeError("max_output_tokens exceeded"),
+        thread_id="thread-a",
+        messages=[HumanMessage(content="start")],
+        turn=1,
+        transition=None,
+        max_output_tokens_recovery_count=0,
+        has_attempted_reactive_compact=False,
+        max_output_tokens_override=None,
+        transient_api_retry_count=0,
+    )
+
+    assert result is not None
+    assert not isinstance(result, dict)
+    assert result.transition is not None
+    assert result.transition.reason.value == "max_output_tokens_escalate"
+    assert result.max_output_tokens_override == 64000
+
+
+@pytest.mark.asyncio
+async def test_handle_model_error_recovery_uses_ordered_strategy_chain(monkeypatch):
+    loop = make_loop(mock_model_no_tools(), app_state=AppState(), runtime=SimpleNamespace(cost=0.0))
+    calls: list[str] = []
+
+    async def first(_ctx):
+        calls.append("first")
+        return None
+
+    async def second(_ctx):
+        calls.append("second")
+        return _ModelErrorRecoveryResult(
+            messages=[HumanMessage(content="from-second")],
+            transition=ContinueState(reason=ContinueReason.api_retry),
+            max_output_tokens_recovery_count=7,
+            has_attempted_reactive_compact=True,
+            max_output_tokens_override=1234,
+            transient_api_retry_count=9,
+            terminal=None,
+        )
+
+    monkeypatch.setattr(loop, "_model_error_recovery_strategies", lambda: (first, second), raising=False)
+
+    result = await loop._handle_model_error_recovery(
+        exc=RuntimeError("max_output_tokens exceeded"),
+        thread_id="thread-a",
+        messages=[HumanMessage(content="start")],
+        turn=1,
+        transition=None,
+        max_output_tokens_recovery_count=0,
+        has_attempted_reactive_compact=False,
+        max_output_tokens_override=None,
+        transient_api_retry_count=0,
+    )
+
+    assert calls == ["first", "second"]
+    assert result is not None
+    assert result.messages[-1].content == "from-second"
+    assert result.transition is not None
+    assert result.transition.reason is ContinueReason.api_retry
+    assert result.max_output_tokens_override == 1234
+
+
+@pytest.mark.asyncio
+async def test_query_loop_retries_prompt_too_long_via_collapse_drain_before_compact():
+    collapse = _CollapseDrainMiddleware()
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        AIMessage(content="after drain"),
+    )
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[collapse],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "collapse_drain_retry"
+    assert collapse.calls == 1
+    assert model.ainvoke.call_count == 2
+    assert isinstance(app_state.messages[0], SystemMessage)
+    assert "Collapsed Context" in app_state.messages[0].content
+
+
+@pytest.mark.asyncio
+async def test_query_loop_collapse_drain_is_single_shot_before_reactive_compact():
+    collapse = _CollapseDrainMiddleware()
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        RuntimeError("prompt is too long"),
+        AIMessage(content="after compact"),
+    )
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[collapse, _ReactiveCompactMiddleware()],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "reactive_compact_retry"
+    assert collapse.calls == 1
+    assert model.ainvoke.call_count == 3
+    assert isinstance(app_state.messages[0], SystemMessage)
+    assert "Conversation Summary" in app_state.messages[0].content
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_prompt_too_long_notice_after_recovery_exhausts():
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        RuntimeError("prompt is too long"),
+    )
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[_ReactiveCompactMiddleware()],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "prompt_too_long"
+    notices = [
+        msg
+        for msg in app_state.messages
+        if msg.__class__.__name__ == "HumanMessage" and ((getattr(msg, "metadata", None) or {}).get("source") == "system")
+    ]
+    assert notices
+    assert notices[-1].content == "Prompt is too long. Automatic recovery exhausted. Clear the thread or start a new one."
+
+
+@pytest.mark.asyncio
+async def test_query_loop_astream_raises_prompt_too_long_notice_text_after_recovery_exhausts():
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        RuntimeError("prompt is too long"),
+    )
+    loop = make_loop(
+        model,
+        middleware=[_ReactiveCompactMiddleware()],
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    with pytest.raises(
+        RuntimeError,
+        match="Prompt is too long. Automatic recovery exhausted. Clear the thread or start a new one.",
+    ):
+        async for _ in loop.astream({"messages": [{"role": "user", "content": "start"}]}, stream_mode=["updates"]):
+            pass
+
+
+@pytest.mark.asyncio
+async def test_query_loop_opens_and_clears_thread_scoped_compaction_breaker(tmp_path):
+    thread_id = "compact-breaker-thread"
+    checkpointer = _MemoryCheckpointer()
+    model = _QueryOkWithFailingCompactorModel()
+
+    def make_breaker_loop():
+        memory = MemoryMiddleware(
+            context_limit=10000,
+            compaction_threshold=0.5,
+            db_path=tmp_path / "compact-breaker.db",
+            compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=10),
+        )
+        memory.set_model(model)
+        return QueryLoop(
+            model=model,
+            system_prompt=SystemMessage(content="You are a test assistant."),
+            middleware=[memory],
+            checkpointer=checkpointer,
+            registry=make_registry(),
+            app_state=AppState(),
+            runtime=SimpleNamespace(cost=0.0),
+            bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+            max_turns=10,
+        )
+
+    loop = make_breaker_loop()
+    config = {"configurable": {"thread_id": thread_id}}
+
+    for attempt in range(1, 4):
+        result = await loop.ainvoke(
+            {
+                "messages": [
+                    {"role": "user", "content": "A" * 8000},
+                    {"role": "assistant", "content": "B" * 8000},
+                    {"role": "user", "content": f"start {attempt} " + ("C" * 8000)},
+                ]
+            },
+            config=config,
+        )
+        assert result["reason"] == "completed"
+        assert model.compact_calls == attempt
+
+    state = await loop.aget_state(config)
+    breaker_notices = [
+        msg
+        for msg in state.values["messages"]
+        if msg.__class__.__name__ == "HumanMessage"
+        and ((getattr(msg, "metadata", None) or {}).get("notification_type") == "compact_breaker")
+    ]
+    assert len(breaker_notices) == 1
+    assert "Automatic compaction disabled for this thread after repeated failures." in breaker_notices[0].content
+
+    reloaded = make_breaker_loop()
+    result = await reloaded.ainvoke(
+        {
+            "messages": [
+                {"role": "user", "content": "A" * 8000},
+                {"role": "assistant", "content": "B" * 8000},
+                {"role": "user", "content": "after breaker " + ("C" * 8000)},
+            ]
+        },
+        config=config,
+    )
+    assert result["reason"] == "completed"
+    assert model.compact_calls == 3
+
+    await reloaded.aclear(thread_id)
+
+    post_clear = make_breaker_loop()
+    result = await post_clear.ainvoke(
+        {
+            "messages": [
+                {"role": "user", "content": "A" * 8000},
+                {"role": "assistant", "content": "B" * 8000},
+                {"role": "user", "content": "after clear " + ("C" * 8000)},
+            ]
+        },
+        config=config,
+    )
+    assert result["reason"] == "completed"
+    assert model.compact_calls == 4
+
+
+@pytest.mark.asyncio
+async def test_query_loop_can_emit_tool_results_before_final_agent_message():
+    model = _StreamingToolModel()
+
+    async def echo_handler(message: str) -> str:
+        await asyncio.sleep(0.01)
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    event_order: list[str] = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "call echo"}]}):
+        if "tools" in chunk:
+            event_order.append("tools")
+        if "agent" in chunk:
+            event_order.append("agent")
+
+    assert "tools" in event_order
+    assert "agent" in event_order
+    assert event_order.index("tools") < event_order.index("agent")
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_blocks_safe_tool_behind_running_unsafe_tool():
+    model = _TwoToolStreamingModel()
+    starts: list[str] = []
+
+    async def unsafe_handler(message: str) -> str:
+        starts.append(f"start-unsafe-{message}")
+        await asyncio.sleep(0.03)
+        starts.append(f"end-unsafe-{message}")
+        return f"unsafe: {message}"
+
+    async def safe_handler(message: str) -> str:
+        starts.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-{message}")
+        return f"safe: {message}"
+
+    unsafe_entry = ToolEntry(
+        name="unsafe",
+        mode=ToolMode.INLINE,
+        schema={"name": "unsafe", "description": "unsafe", "parameters": {}},
+        handler=unsafe_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(unsafe_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "call both"}]}):
+        pass
+
+    assert starts == [
+        "start-unsafe-u",
+        "end-unsafe-u",
+        "start-safe-s",
+        "end-safe-s",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_discards_running_tasks_on_stream_failure():
+    model = _FailingStreamingToolModel()
+    events: list[str] = []
+
+    async def echo_handler(message: str) -> str:
+        events.append(f"start-{message}")
+        try:
+            await asyncio.sleep(0.05)
+        except asyncio.CancelledError:
+            events.append(f"cancel-{message}")
+            raise
+        events.append(f"finish-{message}")
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call echo"}]})
+    await asyncio.sleep(0.06)
+
+    assert result["reason"] == "model_error"
+    assert "start-boom" in events
+    assert "cancel-boom" in events
+    assert "finish-boom" not in events
+    assert any("streaming discarded: streaming_error" in msg.content for msg in result["messages"])
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_discards_queued_tools_without_starting_them():
+    model = _FailingQueuedStreamingToolModel()
+    events: list[str] = []
+
+    async def unsafe_handler(message: str) -> str:
+        events.append(f"start-unsafe-{message}")
+        try:
+            await asyncio.sleep(0.05)
+        except asyncio.CancelledError:
+            events.append(f"cancel-unsafe-{message}")
+            raise
+        events.append(f"finish-unsafe-{message}")
+        return f"unsafe: {message}"
+
+    async def safe_handler(message: str) -> str:
+        events.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        events.append(f"finish-safe-{message}")
+        return f"safe: {message}"
+
+    unsafe_entry = ToolEntry(
+        name="unsafe",
+        mode=ToolMode.INLINE,
+        schema={"name": "unsafe", "description": "unsafe", "parameters": {}},
+        handler=unsafe_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(unsafe_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call both"}]})
+    await asyncio.sleep(0.06)
+
+    assert result["reason"] == "model_error"
+    assert "start-unsafe-u" in events
+    assert "cancel-unsafe-u" in events
+    assert "finish-unsafe-u" not in events
+    assert "start-safe-s" not in events
+    tool_errors = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert {msg.tool_call_id for msg in tool_errors} == {"tc-unsafe", "tc-safe"}
+    assert all("streaming discarded: streaming_error" in msg.content for msg in tool_errors)
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_uses_per_call_concurrency_safety():
+    class _DynamicConcurrencyStreamingModel:
+        def __init__(self):
+            self.calls = 0
+
+        def bind_tools(self, tools):
+            return self
+
+        async def astream(self, messages):
+            self.calls += 1
+            if self.calls == 1:
+                yield AIMessageChunk(
+                    content="",
+                    tool_call_chunks=[{"name": "maybe_parallel", "args": '{"message":"u","parallel":false}', "id": "tc-maybe", "index": 0}],
+                )
+                yield AIMessageChunk(
+                    content="",
+                    tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+                )
+                await asyncio.sleep(0.05)
+                yield AIMessageChunk(content="done")
+                return
+            yield AIMessageChunk(content="final answer")
+
+    model = _DynamicConcurrencyStreamingModel()
+    starts: list[str] = []
+
+    async def maybe_parallel_handler(message: str, parallel: bool) -> str:
+        starts.append(f"start-maybe-{message}")
+        await asyncio.sleep(0.02)
+        starts.append(f"end-maybe-{message}")
+        return f"maybe: {message}"
+
+    async def safe_handler(message: str) -> str:
+        starts.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-{message}")
+        return f"safe: {message}"
+
+    maybe_entry = ToolEntry(
+        name="maybe_parallel",
+        mode=ToolMode.INLINE,
+        schema={"name": "maybe_parallel", "description": "maybe", "parameters": {}},
+        handler=maybe_parallel_handler,
+        source="test",
+        is_concurrency_safe=lambda parsed: bool(parsed.get("parallel")),
+    )
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(maybe_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "call both"}]}):
+        pass
+
+    assert starts == [
+        "start-maybe-u",
+        "end-maybe-u",
+        "start-safe-s",
+        "end-safe-s",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_missing_tool_completes_without_blocking_next_safe_tool():
+    class _MissingThenSafeStreamingModel:
+        def __init__(self):
+            self.calls = 0
+
+        def bind_tools(self, tools):
+            return self
+
+        async def astream(self, messages):
+            self.calls += 1
+            if self.calls == 1:
+                yield AIMessageChunk(
+                    content="",
+                    tool_call_chunks=[{"name": "missing_tool", "args": "{}", "id": "tc-missing", "index": 0}],
+                )
+                yield AIMessageChunk(
+                    content="",
+                    tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+                )
+                await asyncio.sleep(0.02)
+                yield AIMessageChunk(content="done")
+                return
+            yield AIMessageChunk(content="final answer")
+
+    model = _MissingThenSafeStreamingModel()
+    starts: list[str] = []
+
+    async def safe_handler(message: str) -> str:
+        starts.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-{message}")
+        return f"safe: {message}"
+
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    pre_agent_tool_ids = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "call missing then safe"}]}):
+        if "tools" in chunk:
+            pre_agent_tool_ids.extend(msg.tool_call_id for msg in chunk["tools"]["messages"])
+        if "agent" in chunk:
+            break
+
+    assert pre_agent_tool_ids == ["tc-missing", "tc-safe"]
+    assert starts == ["start-safe-s", "end-safe-s"]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_missing_tool_is_immediately_completed():
+    async def safe_handler(message: str) -> str:
+        return f"safe:{message}"
+
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        mock_model_no_tools(),
+        registry=make_registry(safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+    executor = loop._make_streaming_tool_executor(tool_context=None)
+
+    await executor.add_tool({"name": "missing_tool", "args": {}, "id": "tc-missing"})
+    await executor.add_tool({"name": "safe", "args": {"message": "s"}, "id": "tc-safe"})
+
+    assert [(tracked.tool_call.get("id"), tracked.status) for tracked in executor._tracked] == [
+        ("tc-missing", "completed"),
+        ("tc-safe", "executing"),
+    ]
+    assert executor._tracked[0].result is not None
+    assert "Tool 'missing_tool' not found" in executor._tracked[0].result.content
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_can_run_with_injected_dependencies_without_query_loop():
+    loop_module = importlib.import_module("core.runtime.loop")
+    executor_cls = getattr(loop_module, "StreamingToolExecutor")
+    seen_ids: list[str] = []
+
+    async def execute_tool(tool_call: dict[str, object], tool_context: object | None) -> ToolMessage:
+        seen_ids.append(str(tool_call["id"]))
+        return ToolMessage(
+            content="safe:s",
+            tool_call_id=str(tool_call["id"]),
+            name=str(tool_call["name"]),
+        )
+
+    executor = executor_cls(
+        execute_tool=execute_tool,
+        is_concurrency_safe=lambda tool_call: True,
+        lookup_tool=lambda name: object() if name == "safe" else None,
+        tool_context=None,
+    )
+
+    await executor.add_tool({"name": "safe", "args": {"message": "s"}, "id": "tc-safe"})
+    ready = await executor.drain_remaining()
+
+    assert [msg.tool_call_id for msg in ready] == ["tc-safe"]
+    assert seen_ids == ["tc-safe"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_builds_streaming_executor_from_its_dependencies():
+    executed: list[str] = []
+
+    async def safe_handler(message: str) -> str:
+        executed.append(message)
+        return f"safe:{message}"
+
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        mock_model_no_tools(),
+        registry=make_registry(safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    executor = loop._make_streaming_tool_executor(tool_context=None)
+    await executor.add_tool({"name": "safe", "args": {"message": "s"}, "id": "tc-safe"})
+    ready = await executor.drain_remaining()
+
+    assert isinstance(executor, StreamingToolExecutor)
+    assert [msg.tool_call_id for msg in ready] == ["tc-safe"]
+    assert ready[0].content == "safe:s"
+    assert executed == ["s"]
+
+
+@pytest.mark.asyncio
+async def test_execute_tools_preserves_order_blocking_for_safe_after_unsafe():
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(
+        side_effect=[
+            AIMessage(
+                content="",
+                tool_calls=[
+                    {"name": "safe_a", "args": {"message": "a"}, "id": "tc-safe-a"},
+                    {"name": "unsafe_b", "args": {"message": "b"}, "id": "tc-unsafe-b"},
+                    {"name": "safe_c", "args": {"message": "c"}, "id": "tc-safe-c"},
+                ],
+            ),
+            AIMessage(content="done"),
+        ]
+    )
+    starts: list[str] = []
+
+    async def safe_a_handler(message: str) -> str:
+        starts.append(f"start-safe-a-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-a-{message}")
+        return f"safe-a: {message}"
+
+    async def unsafe_b_handler(message: str) -> str:
+        starts.append(f"start-unsafe-b-{message}")
+        await asyncio.sleep(0.02)
+        starts.append(f"end-unsafe-b-{message}")
+        return f"unsafe-b: {message}"
+
+    async def safe_c_handler(message: str) -> str:
+        starts.append(f"start-safe-c-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-c-{message}")
+        return f"safe-c: {message}"
+
+    loop = make_loop(
+        model,
+        registry=make_registry(
+            ToolEntry(
+                name="safe_a",
+                mode=ToolMode.INLINE,
+                schema={"name": "safe_a", "description": "safe_a", "parameters": {}},
+                handler=safe_a_handler,
+                source="test",
+                is_concurrency_safe=True,
+            ),
+            ToolEntry(
+                name="unsafe_b",
+                mode=ToolMode.INLINE,
+                schema={"name": "unsafe_b", "description": "unsafe_b", "parameters": {}},
+                handler=unsafe_b_handler,
+                source="test",
+                is_concurrency_safe=False,
+            ),
+            ToolEntry(
+                name="safe_c",
+                mode=ToolMode.INLINE,
+                schema={"name": "safe_c", "description": "safe_c", "parameters": {}},
+                handler=safe_c_handler,
+                source="test",
+                is_concurrency_safe=True,
+            ),
+        ),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "call ordered tools"}]}):
+        pass
+
+    assert starts == [
+        "start-safe-a-a",
+        "end-safe-a-a",
+        "start-unsafe-b-b",
+        "end-unsafe-b-b",
+        "start-safe-c-c",
+        "end-safe-c-c",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_surfaces_middleware_exception_as_tool_error():
+    model = _ToolThenFinalStreamingModel()
+
+    async def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[_ExplodingToolMiddleware()],
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call echo"}]})
+
+    assert result["reason"] == "completed"
+    assert any(
+        isinstance(msg, ToolMessage) and msg.tool_call_id == "tc-1" and "middleware boom" in msg.content for msg in result["messages"]
+    )
+    assert any(isinstance(msg, AIMessage) and msg.content == "final answer" for msg in result["messages"])
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_restarts_queue_after_unsafe_completion_before_final_chunk():
+    model = _UnsafeThenSafeGapStreamingModel()
+    starts: list[str] = []
+
+    async def unsafe_handler(message: str) -> str:
+        starts.append(f"start-unsafe-{message}")
+        await asyncio.sleep(0.01)
+        starts.append(f"end-unsafe-{message}")
+        return f"unsafe: {message}"
+
+    async def safe_handler(message: str) -> str:
+        starts.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-{message}")
+        return f"safe: {message}"
+
+    unsafe_entry = ToolEntry(
+        name="unsafe",
+        mode=ToolMode.INLINE,
+        schema={"name": "unsafe", "description": "unsafe", "parameters": {}},
+        handler=unsafe_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(unsafe_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    chunks = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "call both"}]}):
+        chunks.append(chunk)
+
+    first_agent_index = next(i for i, chunk in enumerate(chunks) if "agent" in chunk)
+    pre_agent_tool_ids = [msg.tool_call_id for chunk in chunks[:first_agent_index] for msg in chunk.get("tools", {}).get("messages", [])]
+
+    assert starts == [
+        "start-unsafe-u",
+        "end-unsafe-u",
+        "start-safe-s",
+        "end-safe-s",
+    ]
+    assert pre_agent_tool_ids == ["tc-unsafe", "tc-safe"]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_bash_error_cancels_siblings_without_killing_parent():
+    model = _BashAndSafeStreamingModel()
+    events: list[str] = []
+
+    async def bash_handler(command: str) -> str:
+        events.append(f"start-bash-{command}")
+        await asyncio.sleep(0.005)
+        raise RuntimeError("bash exploded")
+
+    async def safe_handler(message: str) -> str:
+        events.append(f"start-safe-{message}")
+        try:
+            await asyncio.sleep(0.05)
+        except asyncio.CancelledError:
+            events.append(f"cancel-safe-{message}")
+            raise
+        events.append(f"finish-safe-{message}")
+        return f"safe: {message}"
+
+    bash_entry = make_inline_tool("bash", bash_handler)
+    safe_entry = make_inline_tool("safe", safe_handler)
+    loop = make_loop(
+        model,
+        registry=make_registry(bash_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call bash and safe"}]})
+
+    assert result["reason"] == "completed"
+    assert "start-bash-boom" in events
+    assert "start-safe-s" in events
+    assert "cancel-safe-s" in events
+    assert "finish-safe-s" not in events
+    tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert {msg.tool_call_id for msg in tool_messages} == {"tc-bash", "tc-safe"}
+    assert any(msg.tool_call_id == "tc-bash" and "bash exploded" in msg.content for msg in tool_messages)
+    assert any(msg.tool_call_id == "tc-safe" and "sibling" in msg.content for msg in tool_messages)
+
+
+@pytest.mark.asyncio
+async def test_query_loop_messages_updates_mode_forwards_live_stream_chunks():
+    model = _StreamingToolModel()
+
+    async def echo_handler(message: str) -> str:
+        await asyncio.sleep(0.01)
+        return f"echo: {message}"
+
+    entry = make_inline_tool("echo", echo_handler)
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    events = []
+    async for chunk in loop.astream(
+        {"messages": [{"role": "user", "content": "call echo"}]},
+        stream_mode=["messages", "updates"],
+    ):
+        events.append(chunk)
+
+    message_events = [data for mode, data in events if mode == "messages"]
+    texts = [msg.content for msg, _ in message_events if getattr(msg, "content", "")]
+    tool_update_index = next(i for i, item in enumerate(events) if item[0] == "updates" and "tools" in item[1])
+    thinking_index = next(i for i, item in enumerate(events) if item[0] == "messages" and item[1][0].content == "thinking")
+    tool_chunk_index = next(
+        i
+        for i, item in enumerate(events)
+        if item[0] == "messages" and getattr(item[1][0], "tool_call_chunks", None) and item[1][0].tool_call_chunks[0]["id"] == "tc-1"
+    )
+
+    assert thinking_index < tool_update_index
+    assert tool_chunk_index < tool_update_index
+    assert any(msg.content == "thinking" for msg, _ in message_events)
+    assert any(getattr(msg, "tool_call_chunks", None) and msg.tool_call_chunks[0]["id"] == "tc-1" for msg, _ in message_events)
+    assert texts == ["thinking", "done", "final answer"]
+
+
+@pytest.mark.asyncio
+async def test_streaming_overlap_waits_for_split_tool_call_args_before_execution():
+    model = _SplitArgsStreamingToolModel()
+    seen_args = []
+
+    def read_handler(file_path: str) -> str:
+        seen_args.append(file_path)
+        return f"read:{file_path}"
+
+    entry = ToolEntry(
+        name="Read",
+        mode=ToolMode.INLINE,
+        schema={
+            "name": "Read",
+            "description": "read",
+            "parameters": {
+                "type": "object",
+                "required": ["file_path"],
+                "properties": {"file_path": {"type": "string"}},
+            },
+        },
+        handler=read_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call read"}]})
+
+    tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert seen_args == ["/tmp/a.txt"]
+    assert any(msg.tool_call_id == "tc-read" and msg.content == "read:/tmp/a.txt" for msg in tool_messages)
+    assert not any("InputValidationError" in msg.content for msg in tool_messages)
+
+
+@pytest.mark.asyncio
+async def test_streaming_overlap_waits_for_split_string_value_before_execution():
+    model = _SplitStringValueStreamingToolModel()
+    seen_args = []
+
+    def read_handler(file_path: str) -> str:
+        seen_args.append(file_path)
+        return f"read:{file_path}"
+
+    entry = ToolEntry(
+        name="Read",
+        mode=ToolMode.INLINE,
+        schema={
+            "name": "Read",
+            "description": "read",
+            "parameters": {
+                "type": "object",
+                "required": ["file_path"],
+                "properties": {"file_path": {"type": "string"}},
+            },
+        },
+        handler=read_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call read"}]})
+
+    tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert seen_args == ["/tmp/a.txt"]
+    assert any(msg.tool_call_id == "tc-read" and msg.content == "read:/tmp/a.txt" for msg in tool_messages)
+    assert not any("InputValidationError" in msg.content for msg in tool_messages)
+
+
+@pytest.mark.asyncio
+async def test_streaming_overlap_waits_for_anyof_tool_args_before_execution():
+    model = _SplitAnyOfStreamingToolModel()
+    seen_calls = []
+
+    def read_messages_handler(entity_id: str | None = None, chat_id: str | None = None) -> str:
+        seen_calls.append({"entity_id": entity_id, "chat_id": chat_id})
+        if chat_id:
+            return f"chat:{chat_id}"
+        if entity_id:
+            return f"entity:{entity_id}"
+        return "Provide entity_id or chat_id."
+
+    entry = ToolEntry(
+        name="read_messages",
+        mode=ToolMode.INLINE,
+        schema={
+            "name": "read_messages",
+            "description": "read chat",
+            "parameters": {
+                "type": "object",
+                "required": [],
+                "properties": {
+                    "entity_id": {"type": "string"},
+                    "chat_id": {"type": "string"},
+                },
+                "x-leon-required-any-of": [
+                    ["entity_id"],
+                    ["chat_id"],
+                ],
+            },
+        },
+        handler=read_messages_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "read chat"}]})
+
+    tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert seen_calls == [{"entity_id": None, "chat_id": "chat-1"}]
+    assert any(msg.tool_call_id == "tc-chat-read" and msg.content == "chat:chat-1" for msg in tool_messages)
+    assert not any(msg.content == "Provide entity_id or chat_id." for msg in tool_messages)
+
+
+def test_normalize_stream_tool_call_keeps_aggregate_args_when_chunk_args_are_empty():
+    entry = ToolEntry(
+        name="read_messages",
+        mode=ToolMode.INLINE,
+        schema={
+            "name": "read_messages",
+            "description": "read chat",
+            "parameters": {
+                "type": "object",
+                "required": [],
+                "properties": {
+                    "entity_id": {"type": "string"},
+                    "chat_id": {"type": "string"},
+                },
+                "x-leon-required-any-of": [
+                    ["entity_id"],
+                    ["chat_id"],
+                ],
+            },
+        },
+        handler=lambda **_kwargs: "ok",
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        mock_model_no_tools(),
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    normalized = loop._normalize_stream_tool_call(
+        {"name": "read_messages", "args": {"chat_id": "chat-1"}, "id": "tc-chat-read"},
+        [{"name": "read_messages", "args": "", "id": "tc-chat-read", "index": 0}],
+    )
+
+    assert normalized == {
+        "name": "read_messages",
+        "args": {"chat_id": "chat-1"},
+        "id": "tc-chat-read",
+    }
diff --git a/tests/test_queue_formatters.py b/tests/Unit/core/test_queue_formatters.py
similarity index 59%
rename from tests/test_queue_formatters.py
rename to tests/Unit/core/test_queue_formatters.py
index 9d2e0982a..02cfaa7a8 100644
--- a/tests/test_queue_formatters.py
+++ b/tests/Unit/core/test_queue_formatters.py
@@ -2,7 +2,32 @@
 
 import xml.etree.ElementTree as ET
 
-from core.runtime.middleware.queue.formatters import format_command_notification
+from core.runtime.middleware.queue.formatters import format_chat_notification, format_command_notification
+
+
+def _require_child(parent: ET.Element, tag: str) -> ET.Element:
+    child = parent.find(tag)
+    assert child is not None
+    return child
+
+
+def _require_text(element: ET.Element) -> str:
+    assert element.text is not None
+    return element.text
+
+
+class TestFormatChatNotification:
+    def test_includes_explicit_read_messages_and_send_message_instructions(self):
+        result = format_chat_notification(
+            sender_name="alice",
+            chat_id="chat-123",
+            unread_count=2,
+        )
+
+        assert 'read_messages(chat_id="chat-123")' in result
+        assert 'send_message(chat_id="chat-123", content="...")' in result
+        assert "Prefer using this exact chat_id directly" in result
+        assert "Do not treat your normal assistant text as a chat reply." in result
 
 
 class TestFormatCommandNotification:
@@ -25,11 +50,11 @@ def test_basic_format(self):
         # Check CommandNotification structure
         notif = root.find("CommandNotification")
         assert notif is not None
-        assert notif.find("CommandId").text == "cmd-123"
-        assert notif.find("Status").text == "completed"
-        assert notif.find("ExitCode").text == "0"
-        assert notif.find("CommandLine").text == "echo hello"
-        assert notif.find("Output").text == "hello\n"
+        assert _require_text(_require_child(notif, "CommandId")) == "cmd-123"
+        assert _require_text(_require_child(notif, "Status")) == "completed"
+        assert _require_text(_require_child(notif, "ExitCode")) == "0"
+        assert _require_text(_require_child(notif, "CommandLine")) == "echo hello"
+        assert _require_text(_require_child(notif, "Output")) == "hello\n"
 
     def test_failed_status(self):
         """Test failed command notification."""
@@ -42,9 +67,9 @@ def test_failed_status(self):
         )
 
         root = ET.fromstring(result)
-        notif = root.find("CommandNotification")
-        assert notif.find("Status").text == "failed"
-        assert notif.find("ExitCode").text == "1"
+        notif = _require_child(root, "CommandNotification")
+        assert _require_text(_require_child(notif, "Status")) == "failed"
+        assert _require_text(_require_child(notif, "ExitCode")) == "1"
 
     def test_output_truncation(self):
         """Test output is truncated to 1000 characters."""
@@ -58,8 +83,8 @@ def test_output_truncation(self):
         )
 
         root = ET.fromstring(result)
-        notif = root.find("CommandNotification")
-        output_text = notif.find("Output").text
+        notif = _require_child(root, "CommandNotification")
+        output_text = _require_text(_require_child(notif, "Output"))
         assert len(output_text) == 1000
         assert output_text == "x" * 1000
 
@@ -74,8 +99,8 @@ def test_empty_output(self):
         )
 
         root = ET.fromstring(result)
-        notif = root.find("CommandNotification")
-        output_elem = notif.find("Output")
+        notif = _require_child(root, "CommandNotification")
+        output_elem = _require_child(notif, "Output")
         assert output_elem.text is None or output_elem.text == ""
 
     def test_xml_special_characters_escaped(self):
@@ -90,14 +115,14 @@ def test_xml_special_characters_escaped(self):
 
         # Should parse without error
         root = ET.fromstring(result)
-        notif = root.find("CommandNotification")
+        notif = _require_child(root, "CommandNotification")
 
         # Check escaped content is preserved
-        cmd_line = notif.find("CommandLine").text
+        cmd_line = _require_text(_require_child(notif, "CommandLine"))
         assert "<tag>" in cmd_line
         assert "&" in cmd_line
 
-        output = notif.find("Output").text
+        output = _require_text(_require_child(notif, "Output"))
         assert "<output>" in output
         assert "&" in output
 
@@ -112,8 +137,8 @@ def test_multiline_output(self):
         )
 
         root = ET.fromstring(result)
-        notif = root.find("CommandNotification")
-        output = notif.find("Output").text
+        notif = _require_child(root, "CommandNotification")
+        output = _require_text(_require_child(notif, "Output"))
         assert "line1" in output
         assert "line2" in output
         assert "line3" in output
diff --git a/tests/test_runtime.py b/tests/Unit/core/test_runtime.py
similarity index 90%
rename from tests/test_runtime.py
rename to tests/Unit/core/test_runtime.py
index ef168ebbe..4d1016ed9 100644
--- a/tests/test_runtime.py
+++ b/tests/Unit/core/test_runtime.py
@@ -5,6 +5,7 @@
 import sqlite3
 import sys
 import time
+from datetime import UTC, datetime
 from unittest.mock import MagicMock
 
 import pytest
@@ -13,11 +14,12 @@
 from sandbox.interfaces.executor import ExecuteResult
 from sandbox.lease import SandboxInstance, lease_from_row
 from sandbox.provider import ProviderExecResult
+from sandbox.providers.local import LocalPersistentShellRuntime
 from sandbox.runtime import (
-    LocalPersistentShellRuntime,
     RemoteWrappedRuntime,
     _extract_state_from_output,
     _normalize_pty_result,
+    _RemoteRuntimeBase,
 )
 from sandbox.terminal import TerminalState, terminal_from_row
 from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
@@ -89,6 +91,45 @@ def _wrap_remote_state_output(
     return "\n".join(lines) + "\n"
 
 
+def _make_instance(
+    *,
+    instance_id: str = "inst-123",
+    provider_name: str = "test-provider",
+    status: str = "running",
+) -> SandboxInstance:
+    return SandboxInstance(
+        instance_id=instance_id,
+        provider_name=provider_name,
+        status=status,
+        created_at=datetime.now(UTC),
+    )
+
+
+def test_sqlite_terminal_repo_create_repairs_stale_active_pointer(temp_db):
+    repo = SQLiteTerminalRepo(db_path=temp_db)
+    try:
+        repo.create("term-old", "thread-1", "lease-1", "/tmp")
+        repo._conn.execute("DELETE FROM abstract_terminals WHERE terminal_id = ?", ("term-old",))
+        repo._conn.commit()
+
+        repo.create("term-new", "thread-1", "lease-1", "/tmp")
+
+        active = repo.get_active("thread-1")
+        assert active is not None
+        assert active["terminal_id"] == "term-new"
+    finally:
+        repo.close()
+
+
+def test_remote_runtime_treats_daytona_pty_1011_as_infra_error():
+    text = 'Failed to send input to PTY: received 1011 (internal error) {"exitCode":1}'
+    assert _RemoteRuntimeBase._looks_like_infra_error(text) is True
+
+
+def test_remote_runtime_treats_broken_pipe_as_infra_error():
+    assert _RemoteRuntimeBase._looks_like_infra_error("[Errno 32] Broken pipe") is True
+
+
 # TODO(windows-compat): LocalPersistentShellRuntime uses Unix PTY + /tmp paths.
 # Tracked in: https://github.com/OpenDCAI/Mycel/issues — Windows shell support needed.
 @pytest.mark.skipif(sys.platform == "win32", reason="LocalPersistentShellRuntime requires a Unix shell")
@@ -202,12 +243,7 @@ async def test_execute_simple_command(self, terminal_store, lease_store, mock_pr
         lease = lease_store.create("lease-1", "test-provider")
 
         # Mock lease to return instance
-        instance = SandboxInstance(
-            instance_id="inst-123",
-            provider_name="test-provider",
-            status="running",
-            created_at=None,
-        )
+        instance = _make_instance()
         lease.ensure_active_instance = MagicMock(return_value=instance)
 
         def mock_execute(_instance_id, wrapped_command, **_kwargs):
@@ -231,12 +267,7 @@ async def test_hydrate_state_on_first_execution(self, terminal_store, lease_stor
         lease = lease_store.create("lease-1", "test-provider")
 
         # Mock lease to return instance
-        instance = SandboxInstance(
-            instance_id="inst-123",
-            provider_name="test-provider",
-            status="running",
-            created_at=None,
-        )
+        instance = _make_instance()
         lease.ensure_active_instance = MagicMock(return_value=instance)
 
         def mock_execute(_instance_id, wrapped_command, **_kwargs):
@@ -260,12 +291,7 @@ async def test_execute_updates_cwd(self, terminal_store, lease_store, mock_provi
         lease = lease_store.create("lease-1", "test-provider")
 
         # Mock lease to return instance
-        instance = SandboxInstance(
-            instance_id="inst-123",
-            provider_name="test-provider",
-            status="running",
-            created_at=None,
-        )
+        instance = _make_instance()
         lease.ensure_active_instance = MagicMock(return_value=instance)
 
         # Mock provider execute
@@ -304,12 +330,7 @@ async def test_infra_error_retries_once(self, terminal_store, lease_store, mock_
         terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1", "/root"), terminal_store.db_path)
         lease = lease_store.create("lease-1", "test-provider")
 
-        instance = SandboxInstance(
-            instance_id="inst-123",
-            provider_name="test-provider",
-            status="running",
-            created_at=None,
-        )
+        instance = _make_instance()
         lease.ensure_active_instance = MagicMock(return_value=instance)
         lease.refresh_instance_status = MagicMock(return_value="detached")
 
@@ -339,12 +360,7 @@ async def test_non_infra_error_no_retry(self, terminal_store, lease_store, mock_
         terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1", "/root"), terminal_store.db_path)
         lease = lease_store.create("lease-1", "test-provider")
 
-        instance = SandboxInstance(
-            instance_id="inst-123",
-            provider_name="test-provider",
-            status="running",
-            created_at=None,
-        )
+        instance = _make_instance()
         lease.ensure_active_instance = MagicMock(return_value=instance)
         lease.refresh_instance_status = MagicMock(return_value="running")
 
@@ -367,12 +383,7 @@ async def test_daytona_transient_no_ip_error_retries_once(self, terminal_store,
         terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1", "/root"), terminal_store.db_path)
         lease = lease_store.create("lease-1", "test-provider")
 
-        instance = SandboxInstance(
-            instance_id="inst-123",
-            provider_name="test-provider",
-            status="running",
-            created_at=None,
-        )
+        instance = _make_instance()
         lease.ensure_active_instance = MagicMock(return_value=instance)
         lease.refresh_instance_status = MagicMock(return_value="running")
 
@@ -561,12 +572,7 @@ async def test_daytona_runtime_hydrates_once_per_pty_session(terminal_store, lea
     pytest.importorskip("daytona_sdk")
     terminal = terminal_from_row(terminal_store.create("term-3", "thread-3", "lease-3", "/tmp"), terminal_store.db_path)
     lease = lease_store.create("lease-3", "daytona")
-    instance = SandboxInstance(
-        instance_id="inst-daytona-test",
-        provider_name="daytona",
-        status="running",
-        created_at=None,
-    )
+    instance = _make_instance(instance_id="inst-daytona-test", provider_name="daytona")
     lease.ensure_active_instance = MagicMock(return_value=instance)  # type: ignore[method-assign]
 
     provider = MagicMock()
@@ -639,6 +645,36 @@ def _fake_run(handle, command: str, timeout: float | None, on_stdout_chunk=None)
     await runtime.close()
 
 
+@pytest.mark.asyncio
+async def test_daytona_runtime_retries_once_after_broken_pipe(terminal_store, lease_store):
+    terminal = terminal_from_row(terminal_store.create("term-3b", "thread-3b", "lease-3b", "/tmp"), terminal_store.db_path)
+    lease = lease_store.create("lease-3b", "daytona")
+    provider = MagicMock()
+    from sandbox.providers.daytona import DaytonaSessionRuntime
+
+    runtime = DaytonaSessionRuntime(terminal, lease, provider)
+    calls: list[str] = []
+    recover_events: list[str] = []
+
+    def _fake_execute_once_sync(command: str, timeout: float | None = None, on_stdout_chunk=None):
+        calls.append(command)
+        if len(calls) == 1:
+            raise RuntimeError("[Errno 32] Broken pipe")
+        return ExecuteResult(exit_code=0, stdout="ok\n", stderr="")
+
+    runtime._execute_once_sync = _fake_execute_once_sync  # type: ignore[attr-defined]
+    runtime._recover_infra = lambda: recover_events.append("recover")  # type: ignore[attr-defined]
+    runtime._close_shell_sync = lambda: recover_events.append("close")  # type: ignore[attr-defined]
+    runtime._schedule_snapshot = lambda generation, timeout: None  # type: ignore[attr-defined]
+
+    result = await runtime.execute("echo ok")
+
+    assert result.exit_code == 0
+    assert result.stdout == "ok\n"
+    assert calls == ["echo ok", "echo ok"]
+    assert recover_events == ["recover", "close"]
+
+
 def test_extract_state_from_output_ignores_prompt_noise():
     start = "__LEON_STATE_START_deadbeef__"
     end = "__LEON_STATE_END_deadbeef__"
@@ -694,12 +730,7 @@ async def test_daytona_runtime_sanitizes_corrupted_terminal_state_before_create(
         )
     )
     lease = lease_store.create("lease-4", "daytona")
-    instance = SandboxInstance(
-        instance_id="inst-daytona-sanitize",
-        provider_name="daytona",
-        status="running",
-        created_at=None,
-    )
+    instance = _make_instance(instance_id="inst-daytona-sanitize", provider_name="daytona")
     lease.ensure_active_instance = MagicMock(return_value=instance)  # type: ignore[method-assign]
 
     provider = MagicMock()
diff --git a/tests/Unit/core/test_runtime_agent.py b/tests/Unit/core/test_runtime_agent.py
new file mode 100644
index 000000000..4999719e5
--- /dev/null
+++ b/tests/Unit/core/test_runtime_agent.py
@@ -0,0 +1,44 @@
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from core.runtime.abort import AbortController
+from core.runtime.agent import LeonAgent
+from core.runtime.state import BootstrapConfig
+
+
+def test_apply_forked_child_context_updates_agent_and_service_seams():
+    agent = object.__new__(LeonAgent)
+    agent.agent = SimpleNamespace(_bootstrap=None, _tool_abort_controller=None)
+    agent._agent_service = SimpleNamespace(_parent_bootstrap=None, _parent_tool_context=None)
+
+    bootstrap = BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model")
+    tool_context = SimpleNamespace(abort_controller=AbortController())
+
+    LeonAgent.apply_forked_child_context(agent, bootstrap, tool_context=tool_context)
+
+    assert agent._bootstrap is bootstrap
+    assert agent.agent._bootstrap is bootstrap
+    assert agent._agent_service._parent_bootstrap is bootstrap
+    assert agent._agent_service._parent_tool_context is tool_context
+    assert agent.agent._tool_abort_controller is tool_context.abort_controller
+
+
+def test_close_skips_sandbox_cleanup_and_stays_idempotent():
+    agent = object.__new__(LeonAgent)
+    agent._session_started = False
+    agent._session_ended = False
+    agent._closing = False
+    agent._closed = False
+    agent._cleanup_sandbox = MagicMock()
+    agent._mark_terminated = MagicMock()
+    agent._cleanup_mcp_client = MagicMock()
+    agent._cleanup_sqlite_connection = MagicMock()
+
+    LeonAgent.close(agent, cleanup_sandbox=False)
+    LeonAgent.close(agent, cleanup_sandbox=True)
+
+    agent._cleanup_sandbox.assert_not_called()
+    agent._mark_terminated.assert_called_once()
+    agent._cleanup_mcp_client.assert_called_once()
+    agent._cleanup_sqlite_connection.assert_called_once()
diff --git a/tests/Unit/core/test_runtime_support.py b/tests/Unit/core/test_runtime_support.py
new file mode 100644
index 000000000..1fb809a10
--- /dev/null
+++ b/tests/Unit/core/test_runtime_support.py
@@ -0,0 +1,251 @@
+"""Focused runtime support tests for cleanup, fork, and state helpers."""
+
+import asyncio
+import signal
+from pathlib import Path
+from typing import Any, get_type_hints
+
+import pytest
+
+import core.runtime.state as runtime_state
+from core.runtime.abort import AbortController
+from core.runtime.cleanup import CleanupRegistry
+from core.runtime.fork import create_subagent_context, fork_context
+from core.runtime.state import AppState, BootstrapConfig, ToolUseContext
+
+
+@pytest.fixture
+def runtime_parent_bootstrap():
+    return BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        original_cwd=Path("/launcher"),
+        project_root=Path("/workspace/project"),
+        cwd=Path("/workspace/project/src"),
+        model_name="claude-opus-4-5",
+        api_key="sk-parent",
+        block_dangerous_commands=True,
+        block_network_commands=True,
+        enable_audit_log=False,
+        enable_web_tools=True,
+        allowed_file_extensions=[".py"],
+        extra_allowed_paths=["/shared"],
+        max_turns=20,
+        model_provider="anthropic",
+        base_url="https://api.anthropic.com",
+        context_limit=200000,
+        total_cost_usd=1.25,
+        total_tool_duration_ms=42,
+    )
+
+
+@pytest.fixture
+def runtime_parent_tool_context(runtime_parent_bootstrap):
+    app_state = AppState(turn_count=1, tool_overrides={"Bash": True})
+
+    def set_app_state_for_tasks(updater):
+        app_state.set_state(updater)
+
+    return ToolUseContext(
+        bootstrap=runtime_parent_bootstrap,
+        get_app_state=app_state.get_state,
+        set_app_state=app_state.set_state,
+        set_app_state_for_tasks=set_app_state_for_tasks,
+        refresh_tools=None,
+        read_file_state={"/tmp/file.py": {"partial": False}},
+        loaded_nested_memory_paths={"/tmp/memory.md"},
+        discovered_skill_names={"skill-a"},
+        nested_memory_attachment_triggers={"turn-a"},
+        messages=["msg-1"],
+    )
+
+
+def test_bootstrap_config_minimal_creation():
+    bc = BootstrapConfig(workspace_root=Path("/tmp"), model_name="claude-3-5-sonnet-20241022")
+    assert bc.workspace_root == Path("/tmp")
+    assert bc.project_root == Path("/tmp")
+    assert bc.cwd == Path("/tmp")
+    assert bc.model_name == "claude-3-5-sonnet-20241022"
+    assert bc.api_key is None
+
+
+def test_bootstrap_config_directory_lifetimes_can_be_distinct():
+    bc = BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        original_cwd=Path("/launcher"),
+        project_root=Path("/workspace/project"),
+        cwd=Path("/workspace/project/src"),
+        model_name="test",
+    )
+    assert bc.original_cwd == Path("/launcher")
+    assert bc.project_root == Path("/workspace/project")
+    assert bc.cwd == Path("/workspace/project/src")
+    assert bc.workspace_root == Path("/workspace")
+
+
+def test_app_state_defaults_cover_permission_tracks():
+    s = AppState()
+    assert s.messages == []
+    assert s.turn_count == 0
+    assert s.total_cost == 0.0
+    assert s.compact_boundary_index == 0
+    assert s.tool_permission_context.alwaysAllowRules == {}
+    assert s.tool_permission_context.alwaysDenyRules == {}
+    assert s.tool_permission_context.alwaysAskRules == {}
+    assert s.pending_permission_requests == {}
+    assert s.resolved_permission_requests == {}
+
+
+def test_app_state_session_hooks_can_be_added_and_removed_per_event():
+    seen = []
+
+    def start_hook(payload):
+        seen.append(payload["event"])
+
+    s = AppState()
+    s.add_session_hook("SessionStart", start_hook)
+
+    hooks = s.get_session_hooks("SessionStart")
+    assert hooks == [start_hook]
+
+    hooks[0]({"event": "SessionStart"})
+    assert seen == ["SessionStart"]
+
+    s.remove_session_hook("SessionStart", start_hook)
+    assert s.get_session_hooks("SessionStart") == []
+
+
+def test_tool_use_context_subagent_noop_set_state():
+    bc = BootstrapConfig(workspace_root=Path("/tmp"), model_name="test")
+    app_state = AppState(turn_count=5)
+    calls = []
+
+    def noop(_value):
+        calls.append("called")
+
+    ctx = ToolUseContext(bootstrap=bc, get_app_state=lambda: app_state, set_app_state=noop)
+    ctx.set_app_state(AppState(turn_count=99))
+    assert len(calls) == 1
+    assert app_state.turn_count == 5
+
+
+def test_tool_use_context_core_callable_fields_are_not_typed_as_any():
+    hints = get_type_hints(ToolUseContext, globalns=vars(runtime_state))
+
+    assert hints["get_app_state"] is not Any
+    assert hints["set_app_state"] is not Any
+    assert hints["set_app_state_for_tasks"] is not Any
+    assert hints["refresh_tools"] is not Any
+    assert hints["can_use_tool"] is not Any
+    assert hints["request_permission"] is not Any
+    assert hints["consume_permission_resolution"] is not Any
+    assert hints["abort_controller"] is not Any
+
+
+def test_fork_context_copies_bootstrap_and_generates_new_session_id(runtime_parent_bootstrap):
+    child = fork_context(runtime_parent_bootstrap)
+    assert child.workspace_root == runtime_parent_bootstrap.workspace_root
+    assert child.original_cwd == runtime_parent_bootstrap.original_cwd
+    assert child.project_root == runtime_parent_bootstrap.project_root
+    assert child.cwd == runtime_parent_bootstrap.cwd
+    assert child.model_name == runtime_parent_bootstrap.model_name
+    assert child.api_key == runtime_parent_bootstrap.api_key
+    assert child.session_id != runtime_parent_bootstrap.session_id
+    assert child.parent_session_id == runtime_parent_bootstrap.session_id
+
+
+def test_create_subagent_context_keeps_parent_state_isolation(runtime_parent_tool_context):
+    child = create_subagent_context(runtime_parent_tool_context)
+
+    child.set_app_state(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert runtime_parent_tool_context.get_app_state().turn_count == 1
+
+    child.set_app_state_for_tasks(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert runtime_parent_tool_context.get_app_state().turn_count == 9
+
+
+def test_create_subagent_context_copies_read_state_and_abort_link(runtime_parent_tool_context):
+    runtime_parent_tool_context.read_file_state = {"/tmp/readme.md": {"partial": False, "meta": {"seen": 1}}}
+    runtime_parent_tool_context.abort_controller = AbortController()
+
+    child = create_subagent_context(runtime_parent_tool_context)
+    child.read_file_state["/tmp/readme.md"]["partial"] = True
+    child.read_file_state["/tmp/readme.md"]["meta"]["seen"] = 9
+    child.abort_controller.abort()
+
+    assert runtime_parent_tool_context.read_file_state["/tmp/readme.md"] == {
+        "partial": False,
+        "meta": {"seen": 1},
+    }
+    assert runtime_parent_tool_context.abort_controller.is_aborted() is False
+
+
+@pytest.mark.asyncio
+async def test_cleanup_registry_runs_in_priority_order_and_survives_failures():
+    order = []
+    reg = CleanupRegistry()
+
+    def failing():
+        raise RuntimeError("boom")
+
+    reg.register(lambda: order.append(3), priority=3)
+    reg.register(failing, priority=1)
+    reg.register(lambda: order.append(2), priority=2)
+    await reg.run_cleanup()
+    assert order == [2, 3]
+
+
+@pytest.mark.asyncio
+async def test_cleanup_registry_reuses_first_inflight_run():
+    order = []
+    release = asyncio.Event()
+    reg = CleanupRegistry()
+
+    async def slow():
+        order.append("start")
+        await release.wait()
+        order.append("done")
+
+    reg.register(slow, priority=1)
+
+    first = asyncio.create_task(reg.run_cleanup())
+    for _ in range(10):
+        if order == ["start"]:
+            break
+        await asyncio.sleep(0)
+
+    second = asyncio.create_task(reg.run_cleanup())
+    await asyncio.sleep(0)
+    release.set()
+    await asyncio.gather(first, second)
+
+    assert order == ["start", "done"]
+
+
+def test_cleanup_registry_register_returns_deregister_handle():
+    order = []
+    reg = CleanupRegistry()
+
+    unregister = reg.register(lambda: order.append("gone"), priority=1)
+    reg.register(lambda: order.append("kept"), priority=2)
+    unregister()
+
+    asyncio.run(reg.run_cleanup())
+    assert order == ["kept"]
+
+
+def test_cleanup_registry_installs_signal_handlers(monkeypatch):
+    registered = []
+
+    class _FakeLoop:
+        def add_signal_handler(self, sig, handler):
+            registered.append(sig)
+
+    monkeypatch.setattr(asyncio, "get_event_loop", lambda: _FakeLoop())
+
+    CleanupRegistry()
+
+    expected = {signal.SIGINT, signal.SIGTERM}
+    if hasattr(signal, "SIGHUP"):
+        expected.add(signal.SIGHUP)
+
+    assert set(registered) == expected
diff --git a/tests/test_spill_buffer.py b/tests/Unit/core/test_spill_buffer.py
similarity index 70%
rename from tests/test_spill_buffer.py
rename to tests/Unit/core/test_spill_buffer.py
index 553011a24..faa7aefea 100644
--- a/tests/test_spill_buffer.py
+++ b/tests/Unit/core/test_spill_buffer.py
@@ -1,11 +1,13 @@
 """Tests for core.spill_buffer: spill_if_needed() and SpillBufferMiddleware."""
 
-import os
-from types import SimpleNamespace
+import posixpath
+from dataclasses import dataclass
+from typing import Any, cast
 from unittest.mock import MagicMock
 
 from langchain_core.messages import ToolMessage
 
+from core.runtime.middleware import ModelRequest
 from core.runtime.middleware.spill_buffer.middleware import SKIP_TOOLS, SpillBufferMiddleware
 from core.runtime.middleware.spill_buffer.spill import PREVIEW_BYTES, spill_if_needed
 
@@ -21,9 +23,28 @@ def _make_fs_backend():
     return backend
 
 
+@dataclass
+class _ToolCallRequestHarness:
+    tool_call: dict[str, Any]
+
+
+@dataclass
+class _ModelRequestHarness:
+    messages: list[Any]
+
+
 def _make_request(tool_name: str, tool_call_id: str = "call_abc123"):
-    """Build a fake ToolCallRequest with a .tool_call dict."""
-    return SimpleNamespace(tool_call={"name": tool_name, "id": tool_call_id})
+    """Build a minimal request harness matching the middleware surface."""
+    return cast(Any, _ToolCallRequestHarness(tool_call={"name": tool_name, "id": tool_call_id}))
+
+
+def _make_model_request() -> ModelRequest:
+    return cast(ModelRequest, _ModelRequestHarness(messages=[]))
+
+
+def _require_text_content(message: ToolMessage) -> str:
+    assert isinstance(message.content, str)
+    return message.content
 
 
 # ===========================================================================
@@ -61,12 +82,12 @@ def test_large_output_triggers_spill_and_preview(self):
         )
 
         # Verify write_file was called with the correct spill path.
-        expected_path = os.path.join("/workspace", ".leon", "tool-results", "call_big.txt")
+        expected_path = posixpath.join("/workspace", ".leon", "tool-results", "call_big.txt")
         fs.write_file.assert_called_once_with(expected_path, large)
 
         # Result must mention the file path and include a preview.
         assert expected_path in result
-        assert "Output too large" in result
+        assert result.startswith("<persisted-output")
         assert f"{len(large.encode('utf-8'))} bytes" in result
         assert f"Preview (first {PREVIEW_BYTES} bytes)" in result
         # Preview text is the first PREVIEW_BYTES chars of the original.
@@ -101,7 +122,7 @@ def test_threshold_boundary_one_byte_over_triggers(self):
             workspace_root="/w",
         )
         assert result != content
-        assert "Output too large" in result
+        assert result.startswith("<persisted-output")
         fs.write_file.assert_called_once()
 
     def test_unicode_byte_counting(self):
@@ -120,7 +141,7 @@ def test_unicode_byte_counting(self):
             fs_backend=fs,
             workspace_root="/w",
         )
-        assert "Output too large" in result
+        assert result.startswith("<persisted-output")
         assert "30 bytes" in result
         fs.write_file.assert_called_once()
 
@@ -167,7 +188,7 @@ def test_write_failure_graceful_degradation(self):
         )
 
         # Should still return a preview, not raise.
-        assert "Output too large" in result
+        assert result.startswith("<persisted-output")
         assert "Preview" in result
         # Must include the warning note about write failure.
         assert "Warning: failed to save full output to disk" in result
@@ -192,6 +213,74 @@ def test_preview_length_capped(self):
         # But not the full content.
         assert large not in result
 
+    def test_large_output_uses_persisted_output_wrapper(self):
+        """Large spilled output is wrapped as persisted-output, not plain prose."""
+        fs = _make_fs_backend()
+        large = "A" * 60_000
+
+        result = spill_if_needed(
+            content=large,
+            threshold_bytes=50_000,
+            tool_call_id="call_wrapped",
+            fs_backend=fs,
+            workspace_root="/workspace",
+        )
+
+        assert result.startswith("<persisted-output")
+        assert "</persisted-output>" in result
+        assert 'path="/workspace/.leon/tool-results/call_wrapped.txt"' in result
+        assert f'bytes="{len(large.encode("utf-8"))}"' in result
+
+    def test_image_block_content_bypasses_spill(self):
+        """Image-containing blocks should bypass persistence logic."""
+        fs = _make_fs_backend()
+        content = [
+            {"type": "text", "text": "caption"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/a.png"}},
+        ]
+
+        result = spill_if_needed(
+            content=content,
+            threshold_bytes=1,
+            tool_call_id="call_image",
+            fs_backend=fs,
+            workspace_root="/workspace",
+        )
+
+        assert result is content
+        fs.write_file.assert_not_called()
+
+    def test_mcp_binary_blocks_are_saved_and_rewritten(self):
+        fs = _make_fs_backend()
+        mw = SpillBufferMiddleware(
+            fs_backend=fs,
+            workspace_root="/workspace",
+            default_threshold=50_000,
+        )
+        request = _make_request("mcp__server__image_tool", "call_mcp")
+        original_msg = ToolMessage(
+            content=[
+                {"type": "text", "text": "caption"},
+                {"type": "image", "base64": "QUJD", "mime_type": "image/png"},
+            ],
+            tool_call_id="call_mcp",
+            additional_kwargs={"tool_result_meta": {"source": "mcp"}},
+        )
+
+        result = mw._maybe_spill(request, original_msg)
+
+        expected_path = posixpath.join(
+            "/workspace",
+            ".leon",
+            "tool-results",
+            "call_mcp-1.png.base64",
+        )
+        fs.write_file.assert_called_once_with(expected_path, "QUJD")
+        assert isinstance(result.content, str)
+        assert "caption" in result.content
+        assert expected_path in result.content
+        assert "QUJD" not in result.content
+
 
 # ===========================================================================
 # SpillBufferMiddleware
@@ -222,7 +311,7 @@ def test_small_output_passes_through(self):
 
         handler.assert_called_once_with(request)
         assert result is original_msg
-        assert result.content == "small"
+        assert _require_text_content(result) == "small"
 
     def test_large_output_gets_spilled(self):
         """Tool output exceeding default threshold is replaced."""
@@ -235,8 +324,9 @@ def test_large_output_gets_spilled(self):
         result = mw.wrap_tool_call(request, handler)
 
         handler.assert_called_once_with(request)
-        assert result.content != large_content
-        assert "Output too large" in result.content
+        content = _require_text_content(result)
+        assert content != large_content
+        assert content.startswith("<persisted-output")
         assert result.tool_call_id == "call_2"
         fs.write_file.assert_called_once()
 
@@ -253,7 +343,7 @@ def test_per_tool_threshold(self):
 
         result = mw.wrap_tool_call(request, handler)
 
-        assert "Output too large" in result.content
+        assert _require_text_content(result).startswith("<persisted-output")
         fs.write_file.assert_called_once()
 
     def test_per_tool_threshold_not_triggered(self):
@@ -285,7 +375,7 @@ def test_default_threshold_for_unlisted_tool(self):
 
         result = mw.wrap_tool_call(request, handler)
 
-        assert "Output too large" in result.content
+        assert _require_text_content(result).startswith("<persisted-output")
 
     def test_read_file_is_skipped(self):
         """read_file is in SKIP_TOOLS and must never be spilled."""
@@ -300,7 +390,7 @@ def test_read_file_is_skipped(self):
         result = mw.wrap_tool_call(request, handler)
 
         assert result is original_msg
-        assert result.content == large_content
+        assert _require_text_content(result) == large_content
         fs.write_file.assert_not_called()
 
     def test_non_toolmessage_passthrough(self):
@@ -314,18 +404,6 @@ def test_non_toolmessage_passthrough(self):
 
         assert result == non_tool_result
 
-    def test_wrap_model_call_passthrough(self):
-        """wrap_model_call simply delegates to handler."""
-        mw, _fs = self._make_middleware()
-        sentinel = object()
-        handler = MagicMock(return_value=sentinel)
-        request = {"messages": []}
-
-        result = mw.wrap_model_call(request, handler)
-
-        handler.assert_called_once_with(request)
-        assert result is sentinel
-
     def test_awrap_tool_call_delegates_to_maybe_spill(self):
         """awrap_tool_call uses the same _maybe_spill logic (sync mock)."""
         mw, fs = self._make_middleware(default_threshold=50)
@@ -346,27 +424,10 @@ async def async_handler(req):
         finally:
             loop.close()
 
-        assert "Output too large" in result.content
+        assert _require_text_content(result).startswith("<persisted-output")
         assert result.tool_call_id == "call_async"
         fs.write_file.assert_called_once()
 
-    def test_awrap_model_call_passthrough(self):
-        """awrap_model_call simply awaits handler."""
-        import asyncio
-
-        mw, _fs = self._make_middleware()
-        sentinel = object()
-
-        async def async_handler(req):
-            return sentinel
-
-        loop = asyncio.new_event_loop()
-        try:
-            result = loop.run_until_complete(mw.awrap_model_call({"messages": []}, async_handler))
-        finally:
-            loop.close()
-        assert result is sentinel
-
     def test_spill_path_uses_tool_call_id(self):
         """Verify the spill file name is derived from tool_call_id."""
         mw, fs = self._make_middleware(default_threshold=10)
@@ -378,6 +439,35 @@ def test_spill_path_uses_tool_call_id(self):
 
         result = mw.wrap_tool_call(request, handler)
 
-        expected_path = os.path.join("/workspace", ".leon", "tool-results", f"{unique_id}.txt")
+        expected_path = posixpath.join("/workspace", ".leon", "tool-results", f"{unique_id}.txt")
         fs.write_file.assert_called_once_with(expected_path, content)
-        assert expected_path in result.content
+        assert expected_path in _require_text_content(result)
+
+    def test_whitespace_output_is_normalized(self):
+        """Whitespace-only tool output becomes an explicit no-output marker."""
+        mw, fs = self._make_middleware(default_threshold=10)
+        request = _make_request("run_command", "call_empty")
+        original_msg = ToolMessage(content="   \n\t", tool_call_id="call_empty", name="run_command")
+        handler = MagicMock(return_value=original_msg)
+
+        result = mw.wrap_tool_call(request, handler)
+
+        assert _require_text_content(result) == "(run_command completed with no output)"
+        fs.write_file.assert_not_called()
+
+    def test_spilled_tool_message_preserves_name_and_metadata(self):
+        """Spill replacement must not discard tool name or additional metadata."""
+        mw, _fs = self._make_middleware(default_threshold=10)
+        request = _make_request("run_command", "call_meta")
+        original_msg = ToolMessage(
+            content="M" * 100,
+            tool_call_id="call_meta",
+            name="run_command",
+            additional_kwargs={"tool_result_meta": {"kind": "success", "source": "local"}},
+        )
+        handler = MagicMock(return_value=original_msg)
+
+        result = mw.wrap_tool_call(request, handler)
+
+        assert result.name == "run_command"
+        assert result.additional_kwargs == original_msg.additional_kwargs
diff --git a/tests/Unit/core/test_supabase_factory.py b/tests/Unit/core/test_supabase_factory.py
new file mode 100644
index 000000000..551e578ad
--- /dev/null
+++ b/tests/Unit/core/test_supabase_factory.py
@@ -0,0 +1,62 @@
+from supabase_auth._sync.gotrue_client import SyncGoTrueClient
+
+from backend.web.core.supabase_factory import create_messaging_supabase_client, create_supabase_auth_client
+
+
+def test_create_supabase_auth_client_prefers_auth_url(monkeypatch):
+    monkeypatch.setenv("SUPABASE_PUBLIC_URL", "http://storage.example.test")
+    monkeypatch.setenv("SUPABASE_AUTH_URL", "http://auth.example.test")
+    monkeypatch.setenv("SUPABASE_ANON_KEY", "anon-key")
+
+    client = create_supabase_auth_client()
+
+    assert isinstance(client, SyncGoTrueClient)
+    assert client._url == "http://auth.example.test"
+
+
+def test_create_supabase_auth_client_uses_direct_gotrue_for_auth_url(monkeypatch):
+    monkeypatch.delenv("SUPABASE_PUBLIC_URL", raising=False)
+    monkeypatch.setenv("SUPABASE_AUTH_URL", "http://auth.example.test")
+    monkeypatch.setenv("SUPABASE_ANON_KEY", "anon-key")
+
+    client = create_supabase_auth_client()
+
+    assert isinstance(client, SyncGoTrueClient)
+    assert client._url == "http://auth.example.test"
+
+
+def test_create_messaging_supabase_client_uses_service_role_key(monkeypatch):
+    captured: dict[str, object] = {}
+
+    def fake_create_client(url, key, options=None):
+        captured["url"] = url
+        captured["key"] = key
+        captured["options"] = options
+        return object()
+
+    monkeypatch.setenv("SUPABASE_INTERNAL_URL", "http://storage.example.test")
+    monkeypatch.setenv("LEON_SUPABASE_SERVICE_ROLE_KEY", "service-role-key")
+    monkeypatch.setenv("LEON_DB_SCHEMA", "staging")
+    monkeypatch.setattr("backend.web.core.supabase_factory.create_client", fake_create_client)
+
+    create_messaging_supabase_client()
+
+    assert captured["url"] == "http://storage.example.test"
+    assert captured["key"] == "service-role-key"
+
+
+def test_create_messaging_supabase_client_forces_public_schema(monkeypatch):
+    captured: dict[str, object] = {}
+
+    def fake_create_client(url, key, options=None):
+        captured["options"] = options
+        return object()
+
+    monkeypatch.setenv("SUPABASE_INTERNAL_URL", "http://storage.example.test")
+    monkeypatch.setenv("LEON_SUPABASE_SERVICE_ROLE_KEY", "service-role-key")
+    monkeypatch.setenv("LEON_DB_SCHEMA", "staging")
+    monkeypatch.setattr("backend.web.core.supabase_factory.create_client", fake_create_client)
+
+    create_messaging_supabase_client()
+
+    assert getattr(captured["options"], "schema", None) == "public"
diff --git a/tests/Unit/core/test_terminal_notifications.py b/tests/Unit/core/test_terminal_notifications.py
new file mode 100644
index 000000000..7b3afd295
--- /dev/null
+++ b/tests/Unit/core/test_terminal_notifications.py
@@ -0,0 +1,39 @@
+from core.runtime.notifications import is_terminal_background_notification
+
+
+def test_is_terminal_background_notification_accepts_system_terminal_markers():
+    assert (
+        is_terminal_background_notification(
+            "<task-notification>done</task-notification>",
+            source="system",
+            notification_type="agent",
+        )
+        is True
+    )
+    assert (
+        is_terminal_background_notification(
+            "<CommandNotification>done</CommandNotification>",
+            source="system",
+            notification_type="command",
+        )
+        is True
+    )
+
+
+def test_is_terminal_background_notification_rejects_non_system_or_non_terminal_messages():
+    assert (
+        is_terminal_background_notification(
+            "<task-notification>done</task-notification>",
+            source="owner",
+            notification_type="agent",
+        )
+        is False
+    )
+    assert (
+        is_terminal_background_notification(
+            "plain reminder",
+            source="system",
+            notification_type="agent",
+        )
+        is False
+    )
diff --git a/tests/Unit/core/test_tool_registry_runner.py b/tests/Unit/core/test_tool_registry_runner.py
new file mode 100644
index 000000000..b79b250d2
--- /dev/null
+++ b/tests/Unit/core/test_tool_registry_runner.py
@@ -0,0 +1,2441 @@
+"""Tests for ToolRegistry, ToolRunner, and ToolValidator (P0/P1 verification).
+
+Covers:
+- P0: Three-tier error normalization (Layer 1: validation, Layer 2: execution, Layer 3: soft)
+- P1: ToolRegistry inline/deferred mode
+- P1: ToolRunner dispatches registered tools and normalizes errors
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, cast
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+
+from core.runtime.agent import _make_mcp_tool_entry
+from core.runtime.errors import InputValidationError
+from core.runtime.middleware import AgentMiddleware, ToolCallRequest
+from core.runtime.permissions import ToolPermissionContext, can_auto_approve
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.runner import ToolRunner
+from core.runtime.state import AppState, BootstrapConfig, ToolUseContext
+from core.runtime.tool_result import ToolResultEnvelope, tool_permission_denied
+from core.runtime.validator import ToolValidator
+from core.tools.command.hooks.dangerous_commands import DangerousCommandsHook
+from core.tools.command.service import CommandService
+from core.tools.filesystem.read import ReadLimits
+from core.tools.filesystem.read import read_file as read_file_dispatch
+from core.tools.filesystem.read.readers.pdf import read_pdf
+from core.tools.filesystem.service import FileSystemService
+from core.tools.tool_search.service import ToolSearchService
+from core.tools.web.service import WebService
+from sandbox.interfaces.filesystem import DirListResult, FileReadResult, FileSystemBackend, FileWriteResult
+
+# ---------------------------------------------------------------------------
+# ToolRegistry
+# ---------------------------------------------------------------------------
+
+
+class TestToolRegistry:
+    def _make_entry(self, name: str, mode: ToolMode = ToolMode.INLINE) -> ToolEntry:
+        return ToolEntry(
+            name=name,
+            mode=mode,
+            schema={"name": name, "description": f"{name} tool"},
+            handler=lambda: f"result:{name}",
+            source="test",
+        )
+
+    def test_register_and_get(self):
+        reg = ToolRegistry()
+        entry = self._make_entry("Read")
+        reg.register(entry)
+        assert reg.get("Read") is entry
+
+    def test_get_unknown_returns_none(self):
+        reg = ToolRegistry()
+        assert reg.get("NonExistent") is None
+
+    def test_inline_tools_appear_in_get_inline_schemas(self):
+        reg = ToolRegistry()
+        reg.register(self._make_entry("Read", ToolMode.INLINE))
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        schemas = reg.get_inline_schemas()
+        names = [s["name"] for s in schemas]
+        assert "Read" in names
+        assert "TaskCreate" not in names  # P1: deferred not in inline
+
+    def test_deferred_tools_not_in_inline_schemas(self):
+        reg = ToolRegistry()
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        reg.register(self._make_entry("TaskUpdate", ToolMode.DEFERRED))
+        assert reg.get_inline_schemas() == []
+
+    def test_search_finds_by_name(self):
+        reg = ToolRegistry()
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        reg.register(self._make_entry("Read", ToolMode.INLINE))
+        results = reg.search("task")
+        names = [e.name for e in results]
+        assert "TaskCreate" in names
+
+    def test_search_includes_deferred_tools(self):
+        """tool_search must discover deferred tools too."""
+        reg = ToolRegistry()
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        results = reg.search("TaskCreate")
+        assert any(e.name == "TaskCreate" for e in results)
+
+    def test_search_no_match_returns_empty_results(self):
+        reg = ToolRegistry()
+        reg.register(self._make_entry("Read", ToolMode.INLINE))
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        assert reg.search("nonesuch") == []
+
+    def test_allowed_tools_filter(self):
+        reg = ToolRegistry(allowed_tools={"Read", "Grep"})
+        reg.register(self._make_entry("Read"))
+        reg.register(self._make_entry("Grep"))
+        reg.register(self._make_entry("Bash"))
+        assert reg.get("Read") is not None
+        assert reg.get("Grep") is not None
+        assert reg.get("Bash") is None  # filtered out
+
+    def test_dynamic_schema_callable(self):
+        call_count = 0
+
+        def schema_fn() -> dict:
+            nonlocal call_count
+            call_count += 1
+            return {"name": "DynTool", "description": "dynamic"}
+
+        reg = ToolRegistry()
+        entry = ToolEntry(
+            name="DynTool",
+            mode=ToolMode.INLINE,
+            schema=schema_fn,
+            handler=lambda: "ok",
+            source="test",
+        )
+        reg.register(entry)
+        schemas = reg.get_inline_schemas()
+        assert call_count >= 1
+        assert any(s["name"] == "DynTool" for s in schemas)
+
+
+def test_agent_middleware_tools_are_not_shared_mutable_state():
+    first = AgentMiddleware()
+    second = AgentMiddleware()
+
+    first.__dict__["tools"] = ("x",)
+
+    assert second.tools == ()
+
+
+def test_inline_schemas_strip_runtime_only_schema_metadata():
+    reg = ToolRegistry()
+    reg.register(
+        ToolEntry(
+            name="ChatRead",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "ChatRead",
+                "description": "chat read",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "chat_id": {"type": "string"},
+                    },
+                    "x-leon-required-any-of": [["chat_id"]],
+                },
+            },
+            handler=lambda **_kwargs: "ok",
+            source="test",
+        )
+    )
+
+    [schema] = reg.get_inline_schemas()
+
+    assert "x-leon-required-any-of" not in schema["parameters"]
+
+
+# ---------------------------------------------------------------------------
+# ToolValidator
+# ---------------------------------------------------------------------------
+
+
+class TestToolValidator:
+    def _schema(self, required: list[str], props: dict) -> dict:
+        return {
+            "name": "TestTool",
+            "parameters": {
+                "type": "object",
+                "required": required,
+                "properties": {k: {"type": v} for k, v in props.items()},
+            },
+        }
+
+    def test_valid_args_pass(self):
+        v = ToolValidator()
+        schema = self._schema(["file_path"], {"file_path": "string"})
+        result = v.validate(schema, {"file_path": "/tmp/x"})
+        assert result.ok
+
+    def test_missing_required_raises_layer1(self):
+        v = ToolValidator()
+        schema = self._schema(["file_path"], {"file_path": "string"})
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {})
+        assert "file_path" in str(exc_info.value)
+        assert "missing" in str(exc_info.value)
+        assert exc_info.value.error_code == "REQUIRED_FIELD_MISSING"
+        assert exc_info.value.details[0]["field"] == "file_path"
+
+    def test_wrong_type_raises_layer1(self):
+        v = ToolValidator()
+        schema = self._schema(["count"], {"count": "integer"})
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {"count": "not-an-int"})
+        assert exc_info.value.error_code == "INVALID_TYPE"
+        assert exc_info.value.details[0]["field"] == "count"
+
+    def test_extra_params_allowed(self):
+        v = ToolValidator()
+        schema = self._schema(["a"], {"a": "string"})
+        result = v.validate(schema, {"a": "hello", "extra": "ok"})
+        assert result.ok
+
+    def test_required_any_of_requires_one_alternative(self):
+        v = ToolValidator()
+        schema = {
+            "name": "ChatRead",
+            "parameters": {
+                "type": "object",
+                "required": [],
+                "properties": {
+                    "entity_id": {"type": "string"},
+                    "chat_id": {"type": "string"},
+                },
+                "x-leon-required-any-of": [
+                    ["entity_id"],
+                    ["chat_id"],
+                ],
+            },
+        }
+
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {})
+
+        assert "entity_id" in str(exc_info.value)
+        assert "chat_id" in str(exc_info.value)
+
+    def test_required_any_of_accepts_present_alternative(self):
+        v = ToolValidator()
+        schema = {
+            "name": "ChatRead",
+            "parameters": {
+                "type": "object",
+                "required": [],
+                "properties": {
+                    "entity_id": {"type": "string"},
+                    "chat_id": {"type": "string"},
+                },
+                "x-leon-required-any-of": [
+                    ["entity_id"],
+                    ["chat_id"],
+                ],
+            },
+        }
+
+        result = v.validate(schema, {"chat_id": "chat-1"})
+        assert result.ok
+
+    def test_string_constraints_raise_layer1(self):
+        v = ToolValidator()
+        schema = {
+            "name": "Read",
+            "parameters": {
+                "type": "object",
+                "required": ["file_path"],
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "minLength": 1,
+                        "pattern": "^/",
+                    }
+                },
+            },
+        }
+
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {"file_path": "relative/path.txt"})
+
+        assert "file_path" in str(exc_info.value)
+        assert "match pattern" in str(exc_info.value)
+        assert exc_info.value.error_code == "PATTERN_MISMATCH"
+        assert exc_info.value.details[0]["error_code"] == "PATTERN_MISMATCH"
+
+    def test_absolute_path_pattern_accepts_windows_drive_paths(self):
+        v = ToolValidator()
+        schema = {
+            "name": "Read",
+            "parameters": {
+                "type": "object",
+                "required": ["file_path"],
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "minLength": 1,
+                        "pattern": r"^(?:/|[A-Za-z]:[\\/])",
+                    }
+                },
+            },
+        }
+
+        result = v.validate(schema, {"file_path": r"C:\tmp\file.txt"})
+
+        assert result.ok
+
+    def test_numeric_maximum_raises_layer1(self):
+        v = ToolValidator()
+        schema = {
+            "name": "TaskOutput",
+            "parameters": {
+                "type": "object",
+                "required": ["timeout"],
+                "properties": {
+                    "timeout": {
+                        "type": "integer",
+                        "maximum": 600000,
+                    }
+                },
+            },
+        }
+
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {"timeout": 600001})
+
+        assert "timeout" in str(exc_info.value)
+        assert "at most" in str(exc_info.value)
+        assert exc_info.value.error_code == "NUMBER_TOO_LARGE"
+        assert exc_info.value.details[0]["field"] == "timeout"
+
+
+# ---------------------------------------------------------------------------
+# ToolRunner — P0 error normalization
+# ---------------------------------------------------------------------------
+
+
+def _make_runner(entries: list[ToolEntry]) -> ToolRunner:
+    reg = ToolRegistry()
+    for e in entries:
+        reg.register(e)
+    return ToolRunner(registry=reg)
+
+
+@dataclass
+class _ToolCallRequestHarness:
+    tool_call: dict[str, Any]
+    tool: Any = None
+    state: Any = None
+    runtime: Any = None
+
+    def override(self, **changes: Any) -> _ToolCallRequestHarness:
+        return _ToolCallRequestHarness(
+            tool_call=changes.get("tool_call", self.tool_call),
+            tool=changes.get("tool", self.tool),
+            state=changes.get("state", self.state),
+            runtime=changes.get("runtime", self.runtime),
+        )
+
+
+def _make_tool_call_request(name: str, args: dict, call_id: str = "tc-1") -> Any:
+    # @@@nu59-test-fixture-request-helper - tests often mutate req.state after
+    # creation; keep the runtime surface real but return a permissive harness object.
+    return cast(Any, _ToolCallRequestHarness(tool_call={"name": name, "args": args, "id": call_id}))
+
+
+def _require_text_content(content: str | list[str | dict[Any, Any]]) -> str:
+    assert isinstance(content, str)
+    return content
+
+
+class TestToolRunnerErrorNormalization:
+    """P0: three-tier error normalization."""
+
+    def test_layer1_missing_param_returns_input_validation_error(self):
+        entry = ToolEntry(
+            name="Read",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Read",
+                "parameters": {
+                    "type": "object",
+                    "required": ["file_path"],
+                    "properties": {"file_path": {"type": "string"}},
+                },
+            },
+            handler=lambda file_path: "content",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Read", {})  # missing file_path
+
+        called_upstream = []
+
+        def upstream(r):
+            called_upstream.append(r)
+            return MagicMock()
+
+        result = runner.wrap_tool_call(req, upstream)
+        # Layer 1 error format: InputValidationError: {name} failed due to...
+        assert "InputValidationError" in result.content
+        assert "Read" in result.content
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "REQUIRED_FIELD_MISSING"
+        assert not called_upstream  # must not fall through to upstream
+
+    def test_layer1_schema_failure_returns_structured_error_details(self):
+        entry = ToolEntry(
+            name="Bash",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Bash",
+                "parameters": {
+                    "type": "object",
+                    "required": ["timeout"],
+                    "properties": {
+                        "timeout": {"type": "integer", "maximum": 600000},
+                    },
+                },
+            },
+            handler=lambda timeout: timeout,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Bash", {"timeout": 600001})
+
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert meta["error_type"] == "input_validation"
+        assert meta["error_code"] == "NUMBER_TOO_LARGE"
+        assert meta["error_details"][0]["field"] == "timeout"
+
+    def test_layer2_handler_exception_returns_tool_use_error(self):
+        def bad_handler(**kwargs):
+            raise ValueError("disk full")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": [],
+                    "properties": {},
+                },
+            },
+            handler=bad_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+        # Layer 2 error format: <tool_use_error>...</tool_use_error>
+        assert "<tool_use_error>" in result.content
+        assert "disk full" in result.content
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_read_preserves_image_blocks_on_local_path(self, tmp_path):
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=tmp_path,
+        )
+        image = tmp_path / "tiny.png"
+        image.write_bytes(b"fake-png-payload")
+
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": str(image)})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert isinstance(result.content, list)
+        assert any(isinstance(block, dict) and block.get("type") == "image" for block in result.content)
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "local"
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_read_preserves_image_blocks_on_remote_path(self, tmp_path):
+        class RemoteImageBackend(FileSystemBackend):
+            is_remote = True
+
+            def __init__(self):
+                self._raw = b"remote-png-payload"
+
+            def read_file(self, path: str) -> FileReadResult:
+                return FileReadResult(content="opaque-binary-placeholder", size=len(self._raw))
+
+            def write_file(self, path: str, content: str) -> FileWriteResult:
+                return FileWriteResult(success=True)
+
+            def file_exists(self, path: str) -> bool:
+                return True
+
+            def file_mtime(self, path: str) -> float | None:
+                return None
+
+            def file_size(self, path: str) -> int | None:
+                return len(self._raw)
+
+            def is_dir(self, path: str) -> bool:
+                return False
+
+            def list_dir(self, path: str) -> DirListResult:
+                return DirListResult(entries=[])
+
+            def download_bytes(self, path: str) -> bytes:
+                return self._raw
+
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=Path("/workspace"),
+            backend=RemoteImageBackend(),
+        )
+
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": "/workspace/tiny.png"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert isinstance(result.content, list)
+        assert any(isinstance(block, dict) and block.get("type") == "image" for block in result.content)
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "local"
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_read_remote_pdf_uses_special_reader_path(self, tmp_path):
+        pdf_bytes = b"%PDF-1.4\nnot-a-real-pdf\n"
+        local_pdf = tmp_path / "sample.pdf"
+        local_pdf.write_bytes(pdf_bytes)
+        expected = read_file_dispatch(path=local_pdf, limits=ReadLimits()).format_output()
+        expected = expected.replace(str(local_pdf), "/workspace/sample.pdf")
+
+        class RemotePdfBackend(FileSystemBackend):
+            is_remote = True
+
+            def read_file(self, path: str) -> FileReadResult:
+                return FileReadResult(content="opaque-pdf-placeholder", size=len(pdf_bytes))
+
+            def write_file(self, path: str, content: str) -> FileWriteResult:
+                return FileWriteResult(success=True)
+
+            def file_exists(self, path: str) -> bool:
+                return True
+
+            def file_mtime(self, path: str) -> float | None:
+                return None
+
+            def file_size(self, path: str) -> int | None:
+                return len(pdf_bytes)
+
+            def is_dir(self, path: str) -> bool:
+                return False
+
+            def list_dir(self, path: str) -> DirListResult:
+                return DirListResult(entries=[])
+
+            def download_bytes(self, path: str) -> bytes:
+                return pdf_bytes
+
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=Path("/workspace"),
+            backend=RemotePdfBackend(),
+        )
+
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": "/workspace/sample.pdf"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == expected
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_remote_special_file_fails_before_download_when_size_known(self):
+        class RemoteLargePdfBackend(FileSystemBackend):
+            is_remote = True
+
+            def read_file(self, path: str) -> FileReadResult:
+                raise AssertionError("read_file should not run for oversize remote preflight")
+
+            def write_file(self, path: str, content: str) -> FileWriteResult:
+                return FileWriteResult(success=True)
+
+            def file_exists(self, path: str) -> bool:
+                return True
+
+            def file_mtime(self, path: str) -> float | None:
+                return None
+
+            def file_size(self, path: str) -> int | None:
+                return 11 * 1024 * 1024
+
+            def is_dir(self, path: str) -> bool:
+                return False
+
+            def list_dir(self, path: str) -> DirListResult:
+                return DirListResult(entries=[])
+
+            def download_bytes(self, path: str) -> bytes:
+                raise AssertionError("download_bytes should not run for oversize remote preflight")
+
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=Path("/workspace"),
+            backend=RemoteLargePdfBackend(),
+        )
+
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": "/workspace/huge.pdf"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        text = _require_text_content(result.content)
+        assert "ToolValidationError" in text
+        assert "too large" in text.lower()
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "FILE_TOO_LARGE"
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_read_accepts_pdf_pages_argument(self, tmp_path):
+        pdf_bytes = b"%PDF-1.4\nnot-a-real-pdf\n"
+        local_pdf = tmp_path / "paged.pdf"
+        local_pdf.write_bytes(pdf_bytes)
+        expected = read_pdf(local_pdf, ReadLimits(), start_page=1, limit_pages=1).format_output()
+
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=tmp_path,
+        )
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": str(local_pdf), "pages": "1"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == expected
+
+    def test_layer3_handler_returns_soft_failure_text(self):
+        def soft_fail(**kwargs):
+            return "No files found"
+
+        entry = ToolEntry(
+            name="Glob",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Glob",
+                "parameters": {
+                    "type": "object",
+                    "required": ["pattern"],
+                    "properties": {"pattern": {"type": "string"}},
+                },
+            },
+            handler=soft_fail,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Glob", {"pattern": "**/*.xyz"})
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+        # Layer 3: plain text, no tags
+        assert result.content == "No files found"
+        assert "<tool_use_error>" not in result.content
+        assert "InputValidationError" not in result.content
+
+    def test_unknown_tool_falls_through_to_upstream(self):
+        runner = _make_runner([])  # empty registry
+        req = _make_tool_call_request("UnknownMCPTool", {})
+        upstream_called = []
+
+        def upstream(r):
+            upstream_called.append(r)
+            msg = MagicMock()
+            msg.content = "mcp result"
+            return msg
+
+        result = runner.wrap_tool_call(req, upstream)
+        assert upstream_called
+        assert result.content == "mcp result"
+
+    @pytest.mark.asyncio
+    async def test_non_mcp_post_tool_use_hook_sees_materialized_tool_message(self):
+        events = []
+
+        def local_handler(**kwargs):
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=local_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def post_tool_use(message, request):
+            events.append((type(message).__name__, message.content, message.additional_kwargs["tool_result_meta"]["source"]))
+            return message
+
+        req.state.post_tool_use = post_tool_use
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "plain success"
+        assert events == [("ToolMessage", "plain success", "local")]
+
+    @pytest.mark.asyncio
+    async def test_async_post_tool_use_hooks_run_in_parallel(self):
+        def local_handler(**kwargs):
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=local_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def post_hook_one(message, request):
+            await asyncio.sleep(0.05)
+            return None
+
+        async def post_hook_two(message, request):
+            await asyncio.sleep(0.05)
+            return None
+
+        req.state.post_tool_use = [post_hook_one, post_hook_two]
+
+        started = time.perf_counter()
+        result = await runner.awrap_tool_call(req, AsyncMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "plain success"
+        assert elapsed < 0.09
+
+    @pytest.mark.asyncio
+    async def test_async_post_tool_use_hook_timeout_cancels_hook_and_preserves_result(self):
+        events = []
+
+        def local_handler(**kwargs):
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=local_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+        req.state.hook_timeout_ms = 50
+
+        async def stuck_hook(message, request):
+            try:
+                await asyncio.Future()
+            except asyncio.CancelledError:
+                events.append("post-cancelled")
+                raise
+
+        req.state.post_tool_use = stuck_hook
+
+        started = time.perf_counter()
+        result = await runner.awrap_tool_call(req, AsyncMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "plain success"
+        assert elapsed < 0.2
+        assert events == ["post-cancelled"]
+
+    @pytest.mark.asyncio
+    async def test_async_pre_tool_use_hook_timeout_cancels_hook_and_preserves_execution(self):
+        events = []
+
+        def local_handler(**kwargs):
+            events.append("handler")
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=local_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+        req.state.hook_timeout_ms = 50
+
+        async def stuck_hook(payload, request):
+            try:
+                await asyncio.Future()
+            except asyncio.CancelledError:
+                events.append("pre-cancelled")
+                raise
+
+        req.state.pre_tool_use = stuck_hook
+
+        started = time.perf_counter()
+        result = await runner.awrap_tool_call(req, AsyncMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "plain success"
+        assert elapsed < 0.2
+        assert events == ["pre-cancelled", "handler"]
+
+    @pytest.mark.asyncio
+    async def test_post_tool_use_failure_hook_runs_on_materialized_error_message(self):
+        seen = []
+
+        def bad_handler(**kwargs):
+            raise ValueError("disk full")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=bad_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def post_tool_use_failure(message, request):
+            seen.append((type(message).__name__, message.additional_kwargs["tool_result_meta"]["kind"]))
+            return message
+
+        req.state.post_tool_use_failure = post_tool_use_failure
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "<tool_use_error>" in result.content
+        assert seen == [("ToolMessage", "error")]
+
+    @pytest.mark.asyncio
+    async def test_permission_denied_result_keeps_distinct_metadata(self):
+        def denied_handler(**kwargs):
+            return tool_permission_denied(
+                "permission denied",
+                top_level_blocks=[{"type": "text", "text": "extra-block"}],
+                metadata={"policy": "workspace"},
+            )
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=denied_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "permission denied"
+        assert meta["kind"] == "permission_denied"
+        assert meta["source"] == "local"
+        assert meta["top_level_blocks"] == [{"type": "text", "text": "extra-block"}]
+        assert meta["policy"] == "workspace"
+
+    @pytest.mark.asyncio
+    async def test_mcp_post_tool_use_hook_can_modify_result_before_materialization(self):
+        runner = _make_runner([])  # unknown tool => upstream/MCP path
+        req = _make_tool_call_request("mcp__server__tool", {})
+        req.state = MagicMock()
+        seen = []
+
+        def post_tool_use(payload, request):
+            seen.append(type(payload).__name__)
+            assert isinstance(payload, ToolResultEnvelope)
+            return ToolResultEnvelope(
+                kind=payload.kind,
+                content="hooked mcp result",
+                is_error=payload.is_error,
+                top_level_blocks=payload.top_level_blocks,
+                metadata={**payload.metadata, "hooked": True},
+            )
+
+        req.state.post_tool_use = post_tool_use
+
+        async def upstream(_request):
+            return ToolResultEnvelope(kind="success", content="raw mcp result")
+
+        result = await runner.awrap_tool_call(req, cast(Any, upstream))
+
+        assert seen == ["ToolResultEnvelope"]
+        assert result.content == "hooked mcp result"
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "mcp"
+        assert result.additional_kwargs["tool_result_meta"]["hooked"] is True
+
+    @pytest.mark.asyncio
+    async def test_command_hook_denial_uses_permission_denied_result_path(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook()],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": "rm -rf /"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert "SECURITY" in result.content
+        assert meta["kind"] == "permission_denied"
+        assert meta["source"] == "local"
+        assert meta["policy"] == "command_hook"
+
+    @pytest.mark.asyncio
+    async def test_command_hook_does_not_block_quoted_dangerous_text(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook(verbose=False)],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": 'echo "rm -rf /"'})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "SECURITY ERROR" not in result.content
+        assert "rm -rf /" in result.content
+
+    @pytest.mark.asyncio
+    async def test_command_hook_does_not_block_commented_dangerous_text(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook(verbose=False)],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": "echo hi # rm -rf /"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "SECURITY ERROR" not in result.content
+        assert "hi" in result.content
+
+    @pytest.mark.asyncio
+    async def test_command_hook_blocks_obfuscated_dangerous_command_name_with_inline_quotes(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook(verbose=False)],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": 's"u"do echo hi'})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "SECURITY ERROR" in result.content
+        assert result.additional_kwargs["tool_result_meta"]["kind"] == "permission_denied"
+
+    @pytest.mark.asyncio
+    async def test_command_hook_blocks_ansi_c_quoted_obfuscation(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook(verbose=False)],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": "s$'udo' echo hi"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "SECURITY ERROR" in result.content
+        assert result.additional_kwargs["tool_result_meta"]["kind"] == "permission_denied"
+
+    @pytest.mark.asyncio
+    async def test_registered_mcp_tool_executes_through_runner_with_mcp_source(self):
+        @tool
+        async def sample_mcp_tool(x: int) -> str:
+            """sample mcp"""
+            return f"mcp:{x}"
+
+        registry = ToolRegistry()
+        registry.register(_make_mcp_tool_entry(sample_mcp_tool))
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("sample_mcp_tool", {"x": 3})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "mcp:3"
+        assert meta["source"] == "mcp"
+        assert meta["kind"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_registered_mcp_tool_post_hook_sees_envelope_before_materialization(self):
+        @tool
+        async def sample_mcp_tool(x: int) -> str:
+            """sample mcp"""
+            return f"mcp:{x}"
+
+        registry = ToolRegistry()
+        registry.register(_make_mcp_tool_entry(sample_mcp_tool))
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("sample_mcp_tool", {"x": 3})
+        req.state = MagicMock()
+        seen = []
+
+        def post_tool_use(payload, request):
+            seen.append(type(payload).__name__)
+            assert isinstance(payload, ToolResultEnvelope)
+            return payload
+
+        req.state.post_tool_use = post_tool_use
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert seen == ["ToolResultEnvelope"]
+        assert result.content == "mcp:3"
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "mcp"
+
+    @pytest.mark.asyncio
+    async def test_registered_mcp_tool_preserves_content_blocks_before_spill(self):
+        @tool
+        async def sample_mcp_tool(x: int) -> list[dict[str, str]]:
+            """sample mcp"""
+            return [
+                {"type": "text", "text": f"mcp:{x}"},
+                {"type": "image", "base64": "QUJD", "mime_type": "image/png"},
+            ]
+
+        registry = ToolRegistry()
+        registry.register(_make_mcp_tool_entry(sample_mcp_tool))
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("sample_mcp_tool", {"x": 3})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == [
+            {"type": "text", "text": "mcp:3"},
+            {"type": "image", "base64": "QUJD", "mime_type": "image/png"},
+        ]
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "mcp"
+
+    @pytest.mark.asyncio
+    async def test_registered_mcp_hook_rematerialization_keeps_mcp_source(self):
+        @tool
+        async def sample_mcp_tool(x: int) -> str:
+            """sample mcp"""
+            return f"mcp:{x}"
+
+        registry = ToolRegistry()
+        registry.register(_make_mcp_tool_entry(sample_mcp_tool))
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("sample_mcp_tool", {"x": 3})
+        req.state = MagicMock()
+
+        def post_tool_use(payload, request):
+            return ToolResultEnvelope(
+                kind="success",
+                content="hooked-remat",
+                metadata={"hooked": True},
+            )
+
+        req.state.post_tool_use = post_tool_use
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "hooked-remat"
+        assert meta["source"] == "mcp"
+        assert meta["hooked"] is True
+
+    @pytest.mark.asyncio
+    async def test_pre_tool_use_does_not_run_before_schema_validation(self):
+        events = []
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": ["path"],
+                    "properties": {"path": {"type": "string"}},
+                },
+            },
+            handler=lambda path: f"ok:{path}",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def pre_tool_use(payload, request):
+            events.append("pre")
+            return payload
+
+        req.state.pre_tool_use = pre_tool_use
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "InputValidationError" in result.content
+        assert events == []
+
+    @pytest.mark.asyncio
+    async def test_tool_specific_validation_runs_before_pre_tool_use_and_handler(self):
+        events = []
+
+        def validate_input(args, request):
+            events.append("tool-validate")
+            return {"path": args["path"], "normalized": True}
+
+        def handler(path, normalized=False):
+            events.append(("handler", path, normalized))
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": ["path"],
+                    "properties": {"path": {"type": "string"}},
+                },
+            },
+            handler=handler,
+            source="test",
+            validate_input=validate_input,
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {"path": "/tmp/a"})
+        req.state = MagicMock()
+
+        def pre_tool_use(payload, request):
+            events.append(("pre", dict(payload["args"])))
+            return payload
+
+        req.state.pre_tool_use = pre_tool_use
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+        assert events == [
+            "tool-validate",
+            ("pre", {"path": "/tmp/a", "normalized": True}),
+            ("handler", "/tmp/a", True),
+        ]
+
+    @pytest.mark.asyncio
+    async def test_tool_specific_validation_failure_object_stops_before_handler(self):
+        events = []
+
+        def validate_input(args, request):
+            events.append("tool-validate")
+            return {"result": False, "message": "tool says no", "errorCode": "E_NO"}
+
+        def handler(**kwargs):
+            events.append(("handler", kwargs))
+            return "should-not-run"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": [],
+                    "properties": {},
+                },
+            },
+            handler=handler,
+            source="test",
+            validate_input=validate_input,
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "ToolValidationError" in result.content
+        assert "tool says no" in result.content
+        assert result.additional_kwargs["tool_result_meta"]["error_type"] == "tool_input_validation"
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "E_NO"
+        assert events == ["tool-validate"]
+
+    @pytest.mark.asyncio
+    async def test_filesystem_list_dir_outside_workspace_fails_with_structured_error_code(self, tmp_path):
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=tmp_path,
+        )
+        runner = _make_runner(registry.list_all())
+        outside = (tmp_path.parent / "outside").resolve()
+        req = _make_tool_call_request("list_dir", {"path": str(outside)})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        text = _require_text_content(result.content)
+        assert "ToolValidationError" in text
+        assert "outside workspace" in text.lower()
+        assert result.additional_kwargs["tool_result_meta"]["error_type"] == "tool_input_validation"
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "PATH_OUTSIDE_WORKSPACE"
+
+    @pytest.mark.asyncio
+    async def test_filesystem_read_large_file_fails_before_handler_as_tool_validation(self, tmp_path):
+        class LargeFileBackend(FileSystemBackend):
+            is_remote = False
+
+            def __init__(self):
+                self.read_calls = 0
+
+            def read_file(self, path: str) -> FileReadResult:
+                self.read_calls += 1
+                raise AssertionError("read_file should not run for oversize preflight")
+
+            def write_file(self, path: str, content: str) -> FileWriteResult:
+                return FileWriteResult(success=True)
+
+            def file_exists(self, path: str) -> bool:
+                return True
+
+            def file_mtime(self, path: str) -> float | None:
+                return None
+
+            def file_size(self, path: str) -> int | None:
+                return 11 * 1024 * 1024
+
+            def is_dir(self, path: str) -> bool:
+                return False
+
+            def list_dir(self, path: str) -> DirListResult:
+                return DirListResult(entries=[])
+
+        backend = LargeFileBackend()
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=tmp_path,
+            backend=backend,
+        )
+        runner = _make_runner(registry.list_all())
+        target = (tmp_path / "too-large.txt").resolve()
+        req = _make_tool_call_request("Read", {"file_path": str(target)})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        text = _require_text_content(result.content)
+        assert "ToolValidationError" in text
+        assert "too large" in text.lower()
+        assert result.additional_kwargs["tool_result_meta"]["error_type"] == "tool_input_validation"
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "FILE_TOO_LARGE"
+        assert backend.read_calls == 0
+
+    @pytest.mark.asyncio
+    async def test_hook_allow_cannot_bypass_permission_deny_rule(self):
+        def handler(**kwargs):
+            raise AssertionError("handler should not run when permission denies")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def pre_tool_use(payload, request):
+            return {"permission": "allow"}
+
+        def can_use_tool(name, args, context, request):
+            return {"decision": "deny", "message": "settings deny"}
+
+        req.state.pre_tool_use = pre_tool_use
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "settings deny"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+
+    @pytest.mark.asyncio
+    async def test_parallel_pre_tool_use_hooks_keep_deny_precedence(self):
+        def handler(**kwargs):
+            raise AssertionError("handler should not run when a hook denies")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def allow_hook(payload, request):
+            await asyncio.sleep(0.01)
+            return {"permission": "allow", "message": "hook allow"}
+
+        async def deny_hook(payload, request):
+            await asyncio.sleep(0.01)
+            return {"decision": "deny", "message": "hook deny"}
+
+        req.state.pre_tool_use = [allow_hook, deny_hook]
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "hook deny"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+
+    @pytest.mark.asyncio
+    async def test_pre_tool_use_can_update_args_before_permission_and_handler(self):
+        seen = []
+
+        def handler(path):
+            seen.append(("handler", path))
+            return f"ok:{path}"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": ["path"],
+                    "properties": {"path": {"type": "string"}},
+                },
+            },
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {"path": "raw"})
+        req.state = MagicMock()
+
+        def pre_tool_use(payload, request):
+            return {"args": {"path": "mutated"}}
+
+        def can_use_tool(name, args, context, request):
+            seen.append(("permission", args["path"]))
+            return {"decision": "allow"}
+
+        req.state.pre_tool_use = pre_tool_use
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok:mutated"
+        assert seen == [("permission", "mutated"), ("handler", "mutated")]
+
+    @pytest.mark.asyncio
+    async def test_async_pre_tool_use_hooks_run_in_parallel(self):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def hook_one(payload, request):
+            await asyncio.sleep(0.05)
+            return None
+
+        async def hook_two(payload, request):
+            await asyncio.sleep(0.05)
+            return None
+
+        req.state.pre_tool_use = [hook_one, hook_two]
+
+        started = time.perf_counter()
+        result = await runner.awrap_tool_call(req, AsyncMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "ok"
+        assert elapsed < 0.09
+
+    @pytest.mark.asyncio
+    async def test_permission_checker_receives_permission_context_not_scheduler_flag(self):
+        seen = []
+
+        entry = ToolEntry(
+            name="Read",
+            mode=ToolMode.INLINE,
+            schema={"name": "Read", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+            is_read_only=True,
+            is_concurrency_safe=True,
+            is_destructive=True,
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Read", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            seen.append((context.is_read_only, context.is_destructive, hasattr(context, "is_concurrency_safe")))
+            return {"decision": "allow"}
+
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+        assert seen == [(True, True, False)]
+
+    @pytest.mark.asyncio
+    async def test_async_permission_checker_is_awaited_before_handler(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when async permission denies")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "async deny"}
+
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "async deny"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    def test_sync_wrap_tool_call_awaits_async_permission_checker(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when async permission denies on sync path")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "async deny sync-path"}
+
+        req.state.can_use_tool = can_use_tool
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "async deny sync-path"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    def test_sync_wrap_tool_call_uses_shared_async_core(self, monkeypatch):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "sync-only fallback",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        expected = ToolMessage(
+            content="from shared async core",
+            tool_call_id="tc-1",
+            name="Write",
+            additional_kwargs={"tool_result_meta": {"kind": "success", "source": "local"}},
+        )
+        shared_async_core = AsyncMock(return_value=expected)
+        monkeypatch.setattr(runner, "_validate_and_run_async", shared_async_core)
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        assert result is expected
+        shared_async_core.assert_awaited_once_with(req, "Write", {}, "tc-1")
+
+    @pytest.mark.asyncio
+    async def test_sync_wrap_tool_call_awaits_async_permission_checker_inside_running_loop(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when async permission denies on nested-loop sync path")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "async deny nested-loop"}
+
+        req.state.can_use_tool = can_use_tool
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "async deny nested-loop"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    def test_sync_wrap_tool_call_awaits_async_post_tool_use_hook(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def post_hook(result, request):
+            seen.append("post-start")
+            await asyncio.sleep(0)
+            seen.append("post-end")
+            return result
+
+        req.state.post_tool_use = post_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        assert result.content == "plain success"
+        assert seen == ["handler", "post-start", "post-end"]
+
+    def test_sync_wrap_tool_call_awaits_async_pre_tool_use_hook(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def pre_hook(payload, request):
+            seen.append("pre-start")
+            await asyncio.sleep(0)
+            seen.append("pre-end")
+            return payload
+
+        req.state.pre_tool_use = pre_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        assert result.content == "plain success"
+        assert seen == ["pre-start", "pre-end", "handler"]
+
+    def test_sync_wrap_tool_call_times_out_async_post_tool_use_hook(self):
+        events = []
+
+        def handler():
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+        req.state.hook_timeout_ms = 50
+
+        async def stuck_hook(result, request):
+            try:
+                await asyncio.Future()
+            except asyncio.CancelledError:
+                events.append("post-cancelled")
+                raise
+
+        req.state.post_tool_use = stuck_hook
+
+        started = time.perf_counter()
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "plain success"
+        assert elapsed < 0.2
+        assert events == ["post-cancelled"]
+
+    @pytest.mark.asyncio
+    async def test_sync_wrap_tool_call_awaits_async_post_tool_use_hook_inside_running_loop(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def post_hook(result, request):
+            seen.append("post-start")
+            await asyncio.sleep(0)
+            seen.append("post-end")
+            return result
+
+        req.state.post_tool_use = post_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        assert result.content == "plain success"
+        assert seen == ["handler", "post-start", "post-end"]
+
+    @pytest.mark.asyncio
+    async def test_permission_request_hook_can_allow_without_creating_request(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "ask", "message": "needs approval"}
+
+        def request_permission(*args, **kwargs):
+            raise AssertionError("request surface should not run when permission_request hook allows")
+
+        async def permission_request_hook(payload, request):
+            seen.append("permission-request-hook")
+            return {"decision": "allow"}
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = request_permission
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+        req.state.permission_request_hooks = permission_request_hook
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+        assert seen == ["checker", "permission-request-hook", "handler"]
+
+    def test_sync_wrap_tool_call_runs_permission_request_hook_before_prompt(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "ask", "message": "needs approval"}
+
+        def request_permission(*args, **kwargs):
+            raise AssertionError("request surface should not run when permission_request hook denies")
+
+        async def permission_request_hook(payload, request):
+            seen.append("permission-request-hook")
+            return {"decision": "deny", "message": "hook blocked"}
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = request_permission
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+        req.state.permission_request_hooks = permission_request_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "hook blocked"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker", "permission-request-hook"]
+
+    @pytest.mark.asyncio
+    async def test_sync_wrap_tool_call_runs_permission_request_hook_inside_running_loop(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "ask", "message": "needs approval"}
+
+        def request_permission(*args, **kwargs):
+            raise AssertionError("request surface should not run when permission_request hook allows")
+
+        async def permission_request_hook(payload, request):
+            seen.append("permission-request-hook")
+            await asyncio.sleep(0)
+            return {"decision": "allow"}
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = request_permission
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+        req.state.permission_request_hooks = permission_request_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        assert result.content == "ok"
+        assert seen == ["checker", "permission-request-hook", "handler"]
+
+    @pytest.mark.asyncio
+    async def test_ask_permission_returns_permission_request_when_request_surface_exists(self):
+        requests = {}
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            return {"decision": "ask", "message": "needs approval"}
+
+        def request_permission(name, args, context, request, message):
+            requests["perm-1"] = {
+                "thread_id": "thread-a",
+                "tool_name": name,
+                "args": dict(args),
+                "message": message,
+            }
+            return {"request_id": "perm-1"}
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = request_permission
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "needs approval"
+        assert meta["kind"] == "permission_request"
+        assert meta["decision"] == "ask"
+        assert meta["request_id"] == "perm-1"
+        assert requests["perm-1"]["message"] == "needs approval"
+
+    @pytest.mark.asyncio
+    async def test_ask_permission_fails_loud_when_request_surface_is_missing(self):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            return {
+                "decision": "ask",
+                "message": "Permission required by rule: Write. No interactive permission resolver is available for this run.",
+            }
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = None
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "Permission required by rule: Write. No interactive permission resolver is available for this run."
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+
+    def test_sync_ask_permission_fails_loud_when_request_surface_is_missing(self):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            return {
+                "decision": "ask",
+                "message": "Permission required by rule: Write. No interactive permission resolver is available for this run.",
+            }
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = None
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "Permission required by rule: Write. No interactive permission resolver is available for this run."
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+
+    @pytest.mark.asyncio
+    async def test_consumed_permission_resolution_allows_single_retry_without_reprompt(self):
+        seen = []
+        resolution = {"decision": "allow", "message": "approved"}
+
+        def handler():
+            seen.append("handler")
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def consume_permission_resolution(name, args, context, request):
+            nonlocal resolution
+            current = resolution
+            resolution = None
+            return current
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "ask", "message": "needs approval"}
+
+        req.state.consume_permission_resolution = consume_permission_resolution
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+        assert seen == ["checker", "handler"]
+
+    @pytest.mark.asyncio
+    async def test_stale_resolved_allow_does_not_override_current_async_deny(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when current deny overrides stale approval")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def consume_permission_resolution(name, args, context, request):
+            seen.append("resolution")
+            return {"decision": "allow", "message": "approved earlier"}
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "deny now"}
+
+        req.state.consume_permission_resolution = consume_permission_resolution
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "deny now"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    def test_stale_resolved_allow_does_not_override_current_sync_deny(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when current deny overrides stale approval")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def consume_permission_resolution(name, args, context, request):
+            seen.append("resolution")
+            return {"decision": "allow", "message": "approved earlier"}
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "deny now"}
+
+        req.state.consume_permission_resolution = consume_permission_resolution
+        req.state.can_use_tool = can_use_tool
+
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "deny now"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    @pytest.mark.asyncio
+    async def test_destructive_metadata_is_advisory_not_runtime_deny(self):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+            is_destructive=True,
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+
+    @pytest.mark.asyncio
+    async def test_runner_injects_tool_context_into_handler_when_requested(self):
+        entry = ToolEntry(
+            name="Agent",
+            mode=ToolMode.INLINE,
+            schema={"name": "Agent", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda tool_context: f"context:{tool_context.turn_id}",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Agent", {})
+        app_state = AppState()
+        req.state = ToolUseContext(
+            bootstrap=BootstrapConfig(workspace_root=Path("/tmp/workspace"), model_name="gpt-test"),
+            get_app_state=app_state.get_state,
+            set_app_state=app_state.set_state,
+        )
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == f"context:{req.state.turn_id}"
+
+    @pytest.mark.asyncio
+    async def test_runner_maps_context_schema_fields_into_handler_kwargs(self):
+        seen = {}
+
+        def needs_ctx(*, boot):
+            seen["boot"] = boot
+            return f"boot:{boot}"
+
+        entry = ToolEntry(
+            name="NeedsCtx",
+            mode=ToolMode.INLINE,
+            schema={"name": "NeedsCtx", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=needs_ctx,
+            source="test",
+            context_schema={"boot": "bootstrap.model_name"},
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("NeedsCtx", {})
+        app_state = AppState()
+        req.state = ToolUseContext(
+            bootstrap=BootstrapConfig(workspace_root=Path("/tmp/workspace"), model_name="MODEL_X"),
+            get_app_state=app_state.get_state,
+            set_app_state=app_state.set_state,
+        )
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert seen == {"boot": "MODEL_X"}
+        assert result.content == "boot:MODEL_X"
+
+
+class TestToolRunnerInlineInjection:
+    """P1: ToolRunner injects inline schemas into model call."""
+
+    def test_inline_schemas_injected(self):
+        entry = ToolEntry(
+            name="Read",
+            mode=ToolMode.INLINE,
+            schema={"name": "Read", "description": "read file"},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+
+        # Build a mock ModelRequest
+        request = MagicMock()
+        request.tools = []
+
+        captured = []
+
+        def handler(req):
+            captured.append(req)
+            return MagicMock()
+
+        request.override.return_value = request
+        runner.wrap_model_call(request, handler)
+
+        # Should have called override with tools containing Read
+        assert request.override.called
+        call_kwargs = request.override.call_args
+        _tools_arg = call_kwargs[1].get("tools") or (call_kwargs[0][0] if call_kwargs[0] else None)
+        # override was called — inline tools were injected
+
+    def test_deferred_schemas_not_injected(self):
+        deferred = ToolEntry(
+            name="TaskCreate",
+            mode=ToolMode.DEFERRED,
+            schema={"name": "TaskCreate", "description": "create task"},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([deferred])
+        schemas = runner._registry.get_inline_schemas()
+        assert all(s["name"] != "TaskCreate" for s in schemas)
+
+
+# ---------------------------------------------------------------------------
+# P1: tool_modes from config honored
+# ---------------------------------------------------------------------------
+
+
+class TestToolModeFromConfig:
+    """Verify tool_modes config is applied during service init."""
+
+    def test_task_service_registers_deferred(self, tmp_path):
+        reg = ToolRegistry()
+        from core.tools.task.service import TaskService
+
+        _svc = TaskService(registry=reg, db_path=tmp_path / "test.db", repo=object())
+        # TaskCreate/TaskUpdate/TaskList/TaskGet should be DEFERRED
+        for tool_name in ["TaskCreate", "TaskGet", "TaskList", "TaskUpdate"]:
+            entry = reg.get(tool_name)
+            assert entry is not None, f"{tool_name} not registered"
+            assert entry.mode == ToolMode.DEFERRED, f"{tool_name} should be DEFERRED, got {entry.mode}"
+
+    def test_search_service_registers_inline(self, tmp_path):
+        reg = ToolRegistry()
+        from core.tools.search.service import SearchService
+
+        _svc = SearchService(registry=reg, workspace_root=tmp_path)
+        for tool_name in ["Grep", "Glob"]:
+            entry = reg.get(tool_name)
+            assert entry is not None, f"{tool_name} not registered"
+            assert entry.mode == ToolMode.INLINE, f"{tool_name} should be INLINE, got {entry.mode}"
+
+    def test_task_service_read_only_queries_are_concurrency_safe(self, tmp_path):
+        reg = ToolRegistry()
+        from core.tools.task.service import TaskService
+
+        _svc = TaskService(registry=reg, db_path=tmp_path / "test.db", repo=object())
+
+        for tool_name in ["TaskGet", "TaskList"]:
+            entry = reg.get(tool_name)
+            assert entry is not None, f"{tool_name} not registered"
+            assert entry.is_read_only is True
+            assert entry.is_concurrency_safe is True
+
+
+class TestToolSearchService:
+    def test_tool_search_schema_says_exact_lookup_is_for_deferred_tools(self):
+        reg = ToolRegistry()
+        ToolSearchService(reg)
+
+        entry = reg.get("tool_search")
+        assert entry is not None
+        schema = entry.get_schema()
+
+        assert "deferred" in schema["description"].lower()
+        assert "deferred" in schema["parameters"]["properties"]["query"]["description"].lower()
+
+    def _make_ctx(self) -> ToolUseContext:
+        app = AppState()
+        return ToolUseContext(
+            bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+            get_app_state=lambda: app,
+            set_app_state=lambda fn: None,
+        )
+
+    def test_tool_search_keyword_results_are_capped_to_five(self):
+        reg = ToolRegistry()
+        for index in range(7):
+            reg.register(
+                ToolEntry(
+                    name=f"Deferred{index}",
+                    mode=ToolMode.DEFERRED,
+                    schema={"name": f"Deferred{index}", "description": "alpha helper"},
+                    handler=lambda: "ok",
+                    source="test",
+                )
+            )
+        ToolSearchService(reg)
+        runner = _make_runner(reg.list_all())
+        req = ToolCallRequest(
+            tool_call={"name": "tool_search", "args": {"query": "alpha"}, "id": "tc-search"},
+            state=self._make_ctx(),
+        )
+
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+
+        payload = json.loads(_require_text_content(result.content))
+        assert len(payload) == 5
+
+    def test_tool_search_excludes_inline_tools(self):
+        reg = ToolRegistry()
+        reg.register(
+            ToolEntry(
+                name="Read",
+                mode=ToolMode.INLINE,
+                schema={"name": "Read", "description": "read file content"},
+                handler=lambda: "read",
+                source="test",
+            )
+        )
+        reg.register(
+            ToolEntry(
+                name="TaskCreate",
+                mode=ToolMode.DEFERRED,
+                schema={"name": "TaskCreate", "description": "create task"},
+                handler=lambda: "task",
+                source="test",
+            )
+        )
+        ToolSearchService(reg)
+        ctx = self._make_ctx()
+        runner = _make_runner(reg.list_all())
+        req = ToolCallRequest(
+            tool_call={"name": "tool_search", "args": {"query": "read"}, "id": "tc-search"},
+            state=ctx,
+        )
+
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+
+        assert json.loads(_require_text_content(result.content)) == []
+        assert ctx.discovered_tool_names == set()
+
+    def test_tool_search_exact_select_fails_loudly_for_inline_tools(self):
+        reg = ToolRegistry()
+        reg.register(
+            ToolEntry(
+                name="Read",
+                mode=ToolMode.INLINE,
+                schema={"name": "Read", "description": "read file content"},
+                handler=lambda: "read",
+                source="test",
+            )
+        )
+        reg.register(
+            ToolEntry(
+                name="TaskCreate",
+                mode=ToolMode.DEFERRED,
+                schema={"name": "TaskCreate", "description": "create task"},
+                handler=lambda: "task",
+                source="test",
+            )
+        )
+        ToolSearchService(reg)
+        runner = _make_runner(reg.list_all())
+        req = ToolCallRequest(
+            tool_call={"name": "tool_search", "args": {"query": "select:Read,TaskCreate"}, "id": "tc-search"},
+            state=self._make_ctx(),
+        )
+
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+
+        text = _require_text_content(result.content)
+        assert "<tool_use_error>" in text
+        assert "Read" in text
+        assert "inline" in text.lower()
+        assert "TaskCreate" not in text
+
+
+class TestWebToolRegistration:
+    def test_web_tools_are_deferred_not_inline(self):
+        reg = ToolRegistry()
+        WebService(registry=reg)
+
+        web_search = reg.get("WebSearch")
+        web_fetch = reg.get("WebFetch")
+        assert web_search is not None
+        assert web_fetch is not None
+        assert web_search.mode == ToolMode.DEFERRED
+        assert web_fetch.mode == ToolMode.DEFERRED
+        assert [schema["name"] for schema in reg.get_inline_schemas()] == []
+
+    @pytest.mark.asyncio
+    async def test_web_search_schema_uses_allowed_and_blocked_domains(self):
+        reg = ToolRegistry()
+        service = WebService(registry=reg)
+        seen: dict[str, object] = {}
+
+        class _FakeSearcher:
+            async def search(self, *, query, max_results, include_domains=None, exclude_domains=None):
+                seen["query"] = query
+                seen["max_results"] = max_results
+                seen["include_domains"] = include_domains
+                seen["exclude_domains"] = exclude_domains
+                return SimpleNamespace(error=None, format_output=lambda: "fake results")
+
+        service._searchers = [("fake", _FakeSearcher())]
+
+        entry = reg.get("WebSearch")
+        assert entry is not None
+        schema = entry.get_schema()
+        props = schema["parameters"]["properties"]
+        assert "allowed_domains" in props
+        assert "blocked_domains" in props
+        assert "include_domains" not in props
+        assert "exclude_domains" not in props
+
+        result = await service._web_search(
+            query="docs",
+            allowed_domains=["example.com"],
+            blocked_domains=["bad.com"],
+        )
+
+        assert result == "fake results"
+        assert seen["include_domains"] == ["example.com"]
+        assert seen["exclude_domains"] == ["bad.com"]
+
+    def test_web_search_schema_carries_query_and_max_result_constraints(self):
+        reg = ToolRegistry()
+        WebService(registry=reg)
+
+        entry = reg.get("WebSearch")
+        assert entry is not None
+        schema = entry.get_schema()
+        props = schema["parameters"]["properties"]
+
+        assert props["query"]["minLength"] == 1
+        assert props["max_results"]["minimum"] == 1
+        assert props["max_results"]["maximum"] == 10
+
+    @pytest.mark.asyncio
+    async def test_web_search_rejects_out_of_range_max_results_at_validation_layer(self):
+        reg = ToolRegistry()
+        WebService(registry=reg)
+        runner = _make_runner(reg.list_all())
+        req = _make_tool_call_request("WebSearch", {"query": "docs", "max_results": 11})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "InputValidationError" in result.content
+        assert "max_results" in result.content
+        assert "at most 10" in result.content
+
+    def test_web_fetch_schema_carries_non_empty_url_and_prompt_constraints(self):
+        reg = ToolRegistry()
+        WebService(registry=reg)
+
+        entry = reg.get("WebFetch")
+        assert entry is not None
+        schema = entry.get_schema()
+        props = schema["parameters"]["properties"]
+
+        assert props["url"]["minLength"] == 1
+        assert props["prompt"]["minLength"] == 1
+
+    def test_list_dir_schema_uses_path(self, tmp_path):
+        reg = ToolRegistry()
+        FileSystemService(
+            registry=reg,
+            workspace_root=tmp_path,
+        )
+
+        entry = reg.get("list_dir")
+        assert entry is not None
+        schema = entry.get_schema()
+        props = schema["parameters"]["properties"]
+        assert "path" in props
+        assert "directory_path" not in props
+        assert schema["parameters"]["required"] == ["path"]
+
+    def test_bash_schema_carries_command_and_timeout_constraints(self, tmp_path):
+        reg = ToolRegistry()
+        CommandService(
+            registry=reg,
+            workspace_root=tmp_path,
+        )
+
+        entry = reg.get("Bash")
+        assert entry is not None
+        schema = entry.get_schema()
+        props = schema["parameters"]["properties"]
+
+        assert props["command"]["minLength"] == 1
+        assert props["timeout"]["minimum"] == 1
+        assert props["timeout"]["maximum"] == 600000
+
+    @pytest.mark.asyncio
+    async def test_bash_rejects_out_of_range_timeout_at_validation_layer(self, tmp_path):
+        reg = ToolRegistry()
+        CommandService(
+            registry=reg,
+            workspace_root=tmp_path,
+        )
+        runner = _make_runner(reg.list_all())
+        req = _make_tool_call_request("Bash", {"command": "echo hi", "timeout": 600001})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "InputValidationError" in result.content
+        assert "timeout" in result.content
+        assert "at most 600000" in result.content
+
+    def test_can_auto_approve_only_for_read_only_non_destructive_tools(self):
+        assert can_auto_approve(ToolPermissionContext(is_read_only=True, is_destructive=False)) is True
+        assert can_auto_approve(ToolPermissionContext(is_read_only=False, is_destructive=False)) is False
+        assert can_auto_approve(ToolPermissionContext(is_read_only=True, is_destructive=True)) is False
diff --git a/tests/test_filesystem_extra_paths.py b/tests/Unit/filesystem/test_filesystem_extra_paths.py
similarity index 100%
rename from tests/test_filesystem_extra_paths.py
rename to tests/Unit/filesystem/test_filesystem_extra_paths.py
diff --git a/tests/Unit/filesystem/test_filesystem_service.py b/tests/Unit/filesystem/test_filesystem_service.py
new file mode 100644
index 000000000..f3cf219ac
--- /dev/null
+++ b/tests/Unit/filesystem/test_filesystem_service.py
@@ -0,0 +1,401 @@
+from __future__ import annotations
+
+import threading
+import time
+from pathlib import Path, PurePosixPath
+
+from core.runtime.registry import ToolRegistry
+from core.runtime.tool_result import ToolResultEnvelope
+from core.tools.filesystem.service import FileSystemService, _ReadFileStateCache
+from sandbox.interfaces.filesystem import DirListResult, FileReadResult, FileSystemBackend, FileWriteResult
+
+
+def _make_service(
+    workspace: Path,
+    *,
+    max_read_cache_entries: int = 100,
+    max_edit_file_size: int | None = None,
+) -> FileSystemService:
+    return FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=workspace,
+        max_read_cache_entries=max_read_cache_entries,
+        max_edit_file_size=max_edit_file_size,
+    )
+
+
+def _require_text_result(result: str | ToolResultEnvelope) -> str:
+    assert isinstance(result, str)
+    return result
+
+
+def test_edit_rejects_if_last_read_was_partial_view(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "sample.txt"
+    target.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
+
+    read_result = _require_text_result(service._read_file(str(target), offset=2, limit=1))
+    assert "<file" in read_result
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="beta",
+        new_string="BETA",
+    )
+
+    assert "full file" in edit_result.lower()
+    assert "read" in edit_result.lower()
+    assert target.read_text(encoding="utf-8") == "alpha\nbeta\ngamma\n"
+
+
+def test_edit_allows_read_that_covered_entire_file_with_offset_one(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "sample.txt"
+    target.write_text("alpha\nbeta\n", encoding="utf-8")
+
+    read_result = _require_text_result(service._read_file(str(target), offset=1, limit=2000))
+    assert "<file" in read_result
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="beta",
+        new_string="BETA",
+    )
+
+    assert "File edited" in edit_result
+    assert target.read_text(encoding="utf-8") == "alpha\nBETA\n"
+
+
+def test_edit_rejects_notebook_files_even_after_read(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "nb.ipynb"
+    target.write_text('{"cells": [], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}\n', encoding="utf-8")
+
+    read_result = _require_text_result(service._read_file(str(target)))
+    assert "nb.ipynb" in read_result
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="[]",
+        new_string='[{"cell_type":"markdown","source":["hi"]}]',
+    )
+
+    assert "ipynb" in edit_result.lower()
+    assert "write" in edit_result.lower()
+
+
+def test_write_normalizes_crlf_to_lf(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "created.txt"
+
+    result = service._write_file(str(target), "a\r\nb\r\n")
+
+    assert "File created" in result
+    assert target.read_bytes() == b"a\nb\n"
+
+
+def test_write_overwrites_existing_file_with_full_replacement(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "existing.txt"
+    target.write_text("old\r\ncontent\r\n", encoding="utf-8")
+
+    result = service._write_file(str(target), "new\r\ncontent\r\n")
+
+    assert "File created" in result
+    assert target.read_bytes() == b"new\ncontent\n"
+
+
+def test_read_tracking_lru_eviction_restores_read_before_edit_gate(tmp_path: Path):
+    service = _make_service(tmp_path, max_read_cache_entries=2)
+
+    first = tmp_path / "first.txt"
+    second = tmp_path / "second.txt"
+    third = tmp_path / "third.txt"
+    for path in (first, second, third):
+        path.write_text(f"{path.stem}\n", encoding="utf-8")
+
+    assert "<file" in _require_text_result(service._read_file(str(first)))
+    assert "<file" in _require_text_result(service._read_file(str(second)))
+    assert "<file" in _require_text_result(service._read_file(str(third)))
+
+    edit_result = service._edit_file(
+        str(first),
+        old_string="first",
+        new_string="FIRST",
+    )
+
+    assert "read" in edit_result.lower()
+    assert "full file" in edit_result.lower()
+    assert first.read_text(encoding="utf-8") == "first\n"
+
+
+def test_edit_preserves_crlf_line_endings(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "windows.txt"
+    target.write_bytes(b"alpha\r\nbeta\r\n")
+
+    assert "<file" in _require_text_result(service._read_file(str(target)))
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="beta",
+        new_string="BETA",
+    )
+
+    assert "File edited" in edit_result
+    assert target.read_bytes() == b"alpha\r\nBETA\r\n"
+
+
+def test_edit_with_empty_old_string_creates_missing_file(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "created-via-edit.txt"
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="",
+        new_string="hello\n",
+    )
+
+    assert "File created" in edit_result
+    assert target.read_text(encoding="utf-8") == "hello\n"
+
+
+def test_edit_rejects_file_larger_than_edit_cap(tmp_path: Path):
+    service = _make_service(tmp_path, max_edit_file_size=8)
+    target = tmp_path / "large.txt"
+    target.write_text("123456789\n", encoding="utf-8")
+
+    assert "<file" in _require_text_result(service._read_file(str(target)))
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="123",
+        new_string="abc",
+    )
+
+    assert "too large" in edit_result.lower()
+    assert "8" in edit_result
+
+
+def test_default_edit_size_cap_matches_default_read_size_cap(tmp_path: Path):
+    service = FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=tmp_path,
+    )
+
+    assert service.max_edit_file_size == service.max_file_size
+
+
+def test_read_state_cache_clone_is_independent(tmp_path: Path):
+    first = (tmp_path / "a.txt").resolve()
+    cache = _ReadFileStateCache(max_entries=2)
+    cache.set(first, state=cache.make_state(timestamp=1.0, is_partial=False))
+
+    clone = cache.clone()
+    second = (tmp_path / "b.txt").resolve()
+    clone.set(second, state=clone.make_state(timestamp=2.0, is_partial=True))
+
+    assert cache.get(first) is not None
+    assert cache.get(second) is None
+    assert clone.get(second) is not None
+
+
+def test_read_state_cache_merge_prefers_newer_timestamp(tmp_path: Path):
+    target = (tmp_path / "shared.txt").resolve()
+    older = _ReadFileStateCache(max_entries=2)
+    older.set(target, state=older.make_state(timestamp=1.0, is_partial=False))
+
+    newer = _ReadFileStateCache(max_entries=2)
+    newer.set(target, state=newer.make_state(timestamp=2.0, is_partial=True))
+
+    older.merge(newer)
+
+    merged = older.get(target)
+    assert merged is not None
+    assert merged.timestamp == 2.0
+    assert merged.is_partial is True
+
+
+def test_edit_rechecks_staleness_inside_critical_section(tmp_path: Path):
+    class RacingBackend(FileSystemBackend):
+        is_remote = False
+
+        def __init__(self):
+            self._mtime = 1.0
+            self._content = "alpha\nbeta\n"
+            self.writes: list[str] = []
+
+        def read_file(self, path: str) -> FileReadResult:
+            before = self._content
+            self._content = "alpha\nEXTERNAL\n"
+            self._mtime = 2.0
+            return FileReadResult(content=before, size=len(before))
+
+        def write_file(self, path: str, content: str) -> FileWriteResult:
+            self.writes.append(content)
+            self._content = content
+            return FileWriteResult(success=True)
+
+        def file_exists(self, path: str) -> bool:
+            return True
+
+        def file_mtime(self, path: str) -> float | None:
+            return self._mtime
+
+        def file_size(self, path: str) -> int | None:
+            return len(self._content.encode("utf-8"))
+
+        def is_dir(self, path: str) -> bool:
+            return False
+
+        def list_dir(self, path: str) -> DirListResult:
+            return DirListResult(entries=[])
+
+    backend = RacingBackend()
+    service = FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=tmp_path,
+        backend=backend,
+    )
+    target = (tmp_path / "race.txt").resolve()
+    service._read_files.set(
+        target,
+        state=service._read_files.make_state(timestamp=1.0, is_partial=False),
+    )
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="beta",
+        new_string="BETA",
+    )
+
+    assert "modified since last read" in edit_result
+    assert backend.writes == []
+    assert backend._content == "alpha\nEXTERNAL\n"
+
+
+def test_concurrent_edits_do_not_both_commit_from_same_stale_read(tmp_path: Path):
+    class ConcurrentBackend(FileSystemBackend):
+        is_remote = False
+
+        def __init__(self):
+            self._mtime = 1.0
+            self._content = "alpha\nbeta\n"
+            self._write_lock = threading.Lock()
+            self.writes: list[str] = []
+
+        def read_file(self, path: str) -> FileReadResult:
+            return FileReadResult(content=self._content, size=len(self._content))
+
+        def write_file(self, path: str, content: str) -> FileWriteResult:
+            time.sleep(0.05)
+            with self._write_lock:
+                self.writes.append(content)
+                self._content = content
+                self._mtime += 1.0
+            return FileWriteResult(success=True)
+
+        def file_exists(self, path: str) -> bool:
+            return True
+
+        def file_mtime(self, path: str) -> float | None:
+            return self._mtime
+
+        def file_size(self, path: str) -> int | None:
+            return len(self._content.encode("utf-8"))
+
+        def is_dir(self, path: str) -> bool:
+            return False
+
+        def list_dir(self, path: str) -> DirListResult:
+            return DirListResult(entries=[])
+
+    backend = ConcurrentBackend()
+    service = FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=tmp_path,
+        backend=backend,
+    )
+    target = (tmp_path / "race.txt").resolve()
+    service._read_files.set(
+        target,
+        state=service._read_files.make_state(timestamp=1.0, is_partial=False),
+    )
+
+    results: list[str] = []
+
+    def run_edit(new_string: str) -> None:
+        results.append(
+            service._edit_file(
+                str(target),
+                old_string="beta",
+                new_string=new_string,
+            )
+        )
+
+    t1 = threading.Thread(target=run_edit, args=("BETA-ONE",))
+    t2 = threading.Thread(target=run_edit, args=("BETA-TWO",))
+    t1.start()
+    t2.start()
+    t1.join()
+    t2.join()
+
+    success_count = sum("File edited" in result for result in results)
+    failure_count = sum(("modified since last read" in result) or ("String not found in file" in result) for result in results)
+
+    assert success_count == 1
+    assert failure_count == 1
+    assert len(backend.writes) == 1
+
+
+def test_remote_edit_does_not_trust_false_negative_exists_probe(tmp_path: Path):
+    class FlakyRemoteBackend(FileSystemBackend):
+        is_remote = True
+
+        def __init__(self):
+            self._content = "result = 3\n"
+            self.writes: list[str] = []
+
+        def read_file(self, path: str) -> FileReadResult:
+            return FileReadResult(content=self._content, size=len(self._content))
+
+        def write_file(self, path: str, content: str) -> FileWriteResult:
+            self.writes.append(content)
+            self._content = content
+            return FileWriteResult(success=True)
+
+        def file_exists(self, path: str) -> bool:
+            return False
+
+        def file_mtime(self, path: str) -> float | None:
+            return None
+
+        def file_size(self, path: str) -> int | None:
+            return len(self._content.encode("utf-8"))
+
+        def is_dir(self, path: str) -> bool:
+            return False
+
+        def list_dir(self, path: str) -> DirListResult:
+            return DirListResult(entries=[])
+
+    backend = FlakyRemoteBackend()
+    service = FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=Path("/home/daytona"),
+        backend=backend,
+    )
+    target = PurePosixPath("/home/daytona/interleave.py")
+    service._read_files.set(
+        target,
+        state=service._read_files.make_state(timestamp=None, is_partial=False),
+    )
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="result = 3",
+        new_string="result = 5",
+    )
+
+    assert "File edited" in edit_result
+    assert backend.writes == ["result = 5\n"]
diff --git a/tests/test_read_file_limits.py b/tests/Unit/filesystem/test_read_file_limits.py
similarity index 100%
rename from tests/test_read_file_limits.py
rename to tests/Unit/filesystem/test_read_file_limits.py
diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py
new file mode 100644
index 000000000..809ede5e1
--- /dev/null
+++ b/tests/Unit/monitor/test_monitor_compat.py
@@ -0,0 +1,405 @@
+from backend.web.services import monitor_service
+
+
+def test_list_leases_exposes_semantic_groups_and_summary(monkeypatch):
+    class FakeRepo:
+        def query_leases(self):
+            return [
+                {
+                    "lease_id": "lease-healthy",
+                    "provider_name": "local",
+                    "desired_state": "running",
+                    "observed_state": "running",
+                    "current_instance_id": "inst-1",
+                    "last_error": None,
+                    "updated_at": "2026-04-06T00:10:00",
+                    "thread_id": "thread-1",
+                },
+                {
+                    "lease_id": "lease-diverged",
+                    "provider_name": "local",
+                    "desired_state": "running",
+                    "observed_state": "detached",
+                    "current_instance_id": "inst-2",
+                    "last_error": "drift",
+                    "updated_at": "2026-04-06T00:11:00",
+                    "thread_id": "thread-2",
+                },
+                {
+                    "lease_id": "lease-orphan-diverged",
+                    "provider_name": "local",
+                    "desired_state": "running",
+                    "observed_state": "detached",
+                    "current_instance_id": "inst-3",
+                    "last_error": None,
+                    "updated_at": "2026-04-06T00:12:00",
+                    "thread_id": None,
+                },
+                {
+                    "lease_id": "lease-orphan",
+                    "provider_name": "local",
+                    "desired_state": "stopped",
+                    "observed_state": "stopped",
+                    "current_instance_id": "inst-4",
+                    "last_error": None,
+                    "updated_at": "2026-04-06T00:13:00",
+                    "thread_id": None,
+                },
+            ]
+
+        def close(self):
+            return None
+
+    monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeRepo())
+    monkeypatch.setattr(
+        monitor_service,
+        "_hours_since",
+        lambda iso_timestamp: {
+            "2026-04-06T00:10:00": 0.5,
+            "2026-04-06T00:11:00": 0.5,
+            "2026-04-06T00:12:00": 10.0,
+            "2026-04-06T00:13:00": 10.0,
+        }.get(iso_timestamp),
+    )
+
+    payload = monitor_service.list_leases()
+
+    assert payload["summary"] == {
+        "total": 4,
+        "healthy": 1,
+        "diverged": 1,
+        "orphan": 1,
+        "orphan_diverged": 1,
+    }
+    assert [group["key"] for group in payload["groups"]] == [
+        "orphan_diverged",
+        "diverged",
+        "orphan",
+        "healthy",
+    ]
+    assert payload["triage"]["summary"] == {
+        "total": 4,
+        "active_drift": 1,
+        "detached_residue": 0,
+        "orphan_cleanup": 2,
+        "healthy_capacity": 1,
+    }
+    assert [group["key"] for group in payload["triage"]["groups"]] == [
+        "active_drift",
+        "detached_residue",
+        "orphan_cleanup",
+        "healthy_capacity",
+    ]
+    by_id = {item["lease_id"]: item for item in payload["items"]}
+    assert by_id["lease-healthy"]["semantics"]["category"] == "healthy"
+    assert by_id["lease-healthy"]["triage"]["category"] == "healthy_capacity"
+    assert by_id["lease-diverged"]["semantics"]["category"] == "diverged"
+    assert by_id["lease-diverged"]["triage"]["category"] == "active_drift"
+    assert by_id["lease-orphan-diverged"]["semantics"]["category"] == "orphan_diverged"
+    assert by_id["lease-orphan-diverged"]["triage"]["category"] == "orphan_cleanup"
+    assert by_id["lease-orphan"]["semantics"]["category"] == "orphan"
+    assert by_id["lease-orphan"]["triage"]["category"] == "orphan_cleanup"
+
+
+def test_list_leases_marks_old_detached_running_rows_as_detached_residue(monkeypatch):
+    class FakeRepo:
+        def query_leases(self):
+            return [
+                {
+                    "lease_id": "lease-stale",
+                    "provider_name": "local",
+                    "desired_state": "running",
+                    "observed_state": "detached",
+                    "current_instance_id": "inst-9",
+                    "last_error": None,
+                    "updated_at": "2026-04-05T00:00:00",
+                    "thread_id": "subagent-1234",
+                }
+            ]
+
+        def close(self):
+            return None
+
+    monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeRepo())
+    monkeypatch.setattr(monitor_service, "_hours_since", lambda _: 24.0)
+
+    payload = monitor_service.list_leases()
+
+    item = payload["items"][0]
+    assert item["semantics"]["category"] == "diverged"
+    assert item["triage"]["category"] == "detached_residue"
+    assert payload["triage"]["summary"]["detached_residue"] == 1
+
+
+def test_get_lease_falls_back_to_historical_session_rows(monkeypatch):
+    class FakeRepo:
+        def query_lease(self, lease_id):
+            return None
+
+        def query_lease_threads(self, lease_id):
+            return []
+
+        def query_lease_events(self, lease_id):
+            return []
+
+        def query_lease_sessions(self, lease_id):
+            return [
+                {
+                    "chat_session_id": "sess-old",
+                    "thread_id": "thread-historical",
+                    "status": "closed",
+                    "started_at": "2026-04-06T10:00:00",
+                    "ended_at": "2026-04-06T10:05:00",
+                    "close_reason": "expired",
+                    "lease_id": lease_id,
+                    "provider_name": None,
+                    "desired_state": None,
+                    "observed_state": None,
+                    "current_instance_id": None,
+                    "last_error": None,
+                }
+            ]
+
+        def close(self):
+            return None
+
+    monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeRepo())
+
+    payload = monitor_service.get_lease("lease-historical")
+
+    assert payload["lease_id"] == "lease-historical"
+    assert payload["info"]["provider"] == "unknown"
+    assert payload["state"]["text"] == "destroyed"
+    assert payload["related_threads"]["items"] == [{"thread_id": "thread-historical", "thread_url": "/thread/thread-historical"}]
+
+
+def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_materialize():
+    payload = monitor_service.build_evaluation_operator_surface(
+        status="provisional",
+        notes=("runner=direct rc=1 sandbox=local run_dir=/tmp/eval stdout_log=/tmp/eval/out.log stderr_log=/tmp/eval/err.log"),
+        score={
+            "score_gate": "provisional",
+            "publishable": False,
+            "run_dir": "/tmp/eval",
+            "manifest_path": "/tmp/eval/run_manifest.json",
+            "eval_summary_path": None,
+            "trace_summaries_path": None,
+            "scored": False,
+        },
+        threads_total=0,
+        threads_running=0,
+        threads_done=0,
+    )
+
+    assert payload["kind"] == "bootstrap_failure"
+    assert payload["tone"] == "danger"
+    assert payload["headline"] == "Runner exited before evaluation threads materialized."
+    assert "bootstrap failure" in payload["summary"]
+    assert payload["facts"][-2:] == [
+        {"label": "Runner", "value": "direct"},
+        {"label": "Exit code", "value": "1"},
+    ]
+    artifact_labels = {item["label"] for item in payload["artifacts"]}
+    assert artifact_labels == {
+        "Run directory",
+        "Run manifest",
+        "STDOUT log",
+        "STDERR log",
+        "Eval summary",
+        "Trace summaries",
+    }
+    assert payload["artifact_summary"] == {
+        "present": 4,
+        "missing": 2,
+        "total": 6,
+    }
+    assert payload["artifacts"][0]["status"] == "present"
+    assert payload["artifacts"][-1]["status"] == "missing"
+
+
+def test_build_evaluation_operator_surface_marks_running_waiting_for_threads():
+    payload = monitor_service.build_evaluation_operator_surface(
+        status="running",
+        notes="runner=direct rc=0",
+        score={
+            "score_gate": "provisional",
+            "publishable": False,
+            "run_dir": "/tmp/eval",
+            "manifest_path": "/tmp/eval/run_manifest.json",
+            "eval_summary_path": None,
+            "trace_summaries_path": None,
+            "scored": False,
+        },
+        threads_total=0,
+        threads_running=2,
+        threads_done=0,
+    )
+
+    assert payload["kind"] == "running_waiting_for_threads"
+    assert payload["tone"] == "default"
+    assert "actively running" in payload["headline"]
+    assert payload["artifact_summary"]["present"] == 2
+
+
+def test_build_evaluation_operator_surface_marks_completed_with_errors():
+    payload = monitor_service.build_evaluation_operator_surface(
+        status="completed_with_errors",
+        notes="runner=direct rc=0",
+        score={
+            "score_gate": "final",
+            "publishable": True,
+            "run_dir": "/tmp/eval",
+            "manifest_path": "/tmp/eval/run_manifest.json",
+            "eval_summary_path": "/tmp/eval/eval_summary.json",
+            "trace_summaries_path": "/tmp/eval/trace_summaries.jsonl",
+            "scored": True,
+            "error_instances": 2,
+        },
+        threads_total=10,
+        threads_running=0,
+        threads_done=10,
+    )
+
+    assert payload["kind"] == "completed_with_errors"
+    assert payload["tone"] == "warning"
+    assert "completed with recorded errors" in payload["headline"]
+    assert payload["artifact_summary"] == {
+        "present": 4,
+        "missing": 2,
+        "total": 6,
+    }
+
+
+def test_cleanup_resource_leases_deletes_allowed_detached_residue(monkeypatch):
+    rows = [
+        {
+            "lease_id": "lease-stale",
+            "provider_name": "local",
+            "desired_state": "running",
+            "observed_state": "detached",
+            "current_instance_id": None,
+            "last_error": None,
+            "updated_at": "2026-04-05T00:00:00",
+            "thread_id": "subagent-1234",
+        }
+    ]
+
+    class FakeMonitorRepo:
+        def query_leases(self):
+            return list(rows)
+
+        def query_lease_sessions(self, lease_id):
+            assert lease_id == "lease-stale"
+            return [{"chat_session_id": "sess-old", "status": "closed"}]
+
+        def close(self):
+            return None
+
+    class FakeLeaseRepo:
+        def __init__(self):
+            self.deleted = []
+
+        def delete(self, lease_id):
+            self.deleted.append(lease_id)
+            rows[:] = [row for row in rows if row["lease_id"] != lease_id]
+
+        def close(self):
+            return None
+
+    class FakeChatSessionRepo:
+        def lease_has_running_command(self, lease_id):
+            assert lease_id == "lease-stale"
+            return False
+
+        def close(self):
+            return None
+
+    lease_repo = FakeLeaseRepo()
+    monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeMonitorRepo())
+    monkeypatch.setattr(monitor_service, "make_lease_repo", lambda: lease_repo)
+    monkeypatch.setattr(monitor_service, "make_chat_session_repo", lambda: FakeChatSessionRepo())
+    monkeypatch.setattr(monitor_service, "init_providers_and_managers", lambda: ({}, {}))
+    monkeypatch.setattr(monitor_service, "_hours_since", lambda _: 24.0)
+
+    payload = monitor_service.cleanup_resource_leases(
+        action="cleanup_residue",
+        lease_ids=["lease-stale"],
+        expected_category="detached_residue",
+    )
+
+    assert lease_repo.deleted == ["lease-stale"]
+    assert payload["attempted"] == ["lease-stale"]
+    assert payload["cleaned"] == [{"lease_id": "lease-stale", "category": "detached_residue"}]
+    assert payload["skipped"] == []
+    assert payload["errors"] == []
+    assert payload["refreshed_summary"]["detached_residue"] == 0
+
+
+def test_cleanup_resource_leases_reports_category_mismatch_without_deleting(monkeypatch):
+    rows = [
+        {
+            "lease_id": "lease-live",
+            "provider_name": "local",
+            "desired_state": "running",
+            "observed_state": "detached",
+            "current_instance_id": "inst-live",
+            "last_error": None,
+            "updated_at": "2026-04-06T00:00:00",
+            "thread_id": "thread-1",
+        }
+    ]
+
+    class FakeMonitorRepo:
+        def query_leases(self):
+            return list(rows)
+
+        def query_lease_sessions(self, lease_id):
+            assert lease_id == "lease-live"
+            return [{"chat_session_id": "sess-live", "status": "active"}]
+
+        def close(self):
+            return None
+
+    class FakeLeaseRepo:
+        def __init__(self):
+            self.deleted = []
+
+        def delete(self, lease_id):
+            self.deleted.append(lease_id)
+
+        def close(self):
+            return None
+
+    class FakeChatSessionRepo:
+        def lease_has_running_command(self, lease_id):
+            assert lease_id == "lease-live"
+            return True
+
+        def close(self):
+            return None
+
+    lease_repo = FakeLeaseRepo()
+    monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeMonitorRepo())
+    monkeypatch.setattr(monitor_service, "make_lease_repo", lambda: lease_repo)
+    monkeypatch.setattr(monitor_service, "make_chat_session_repo", lambda: FakeChatSessionRepo())
+    monkeypatch.setattr(monitor_service, "init_providers_and_managers", lambda: ({}, {}))
+    monkeypatch.setattr(monitor_service, "_hours_since", lambda _: 0.5)
+
+    payload = monitor_service.cleanup_resource_leases(
+        action="cleanup_residue",
+        lease_ids=["lease-live"],
+        expected_category="detached_residue",
+    )
+
+    assert lease_repo.deleted == []
+    assert payload["attempted"] == ["lease-live"]
+    assert payload["cleaned"] == []
+    assert payload["skipped"] == ["lease-live"]
+    assert payload["errors"] == [
+        {
+            "lease_id": "lease-live",
+            "reason": "category_mismatch",
+            "expected_category": "detached_residue",
+            "actual_category": "active_drift",
+        }
+    ]
diff --git a/tests/Unit/monitor/test_monitor_resource_overview_cache.py b/tests/Unit/monitor/test_monitor_resource_overview_cache.py
new file mode 100644
index 000000000..6f961cb7d
--- /dev/null
+++ b/tests/Unit/monitor/test_monitor_resource_overview_cache.py
@@ -0,0 +1,141 @@
+from backend.web.services import resource_cache as cache
+
+
+def _triage_payload(category: str) -> dict:
+    summary = {
+        "total": 1,
+        "active_drift": 0,
+        "detached_residue": 0,
+        "orphan_cleanup": 0,
+        "healthy_capacity": 0,
+    }
+    summary[category] = 1
+    return {
+        "triage": {
+            "summary": summary,
+            "groups": [{"key": category, "items": [{"lease_id": "lease-1"}]}],
+        }
+    }
+
+
+def test_resource_overview_cache_refresh_adds_metadata(monkeypatch):
+    cache.clear_monitor_resource_overview_cache()
+    monkeypatch.setattr(
+        cache.resource_service,
+        "list_resource_providers",
+        lambda: {
+            "summary": {
+                "snapshot_at": "2026-03-03T00:00:00Z",
+                "total_providers": 1,
+                "active_providers": 1,
+                "unavailable_providers": 0,
+                "running_sessions": 2,
+            },
+            "providers": [{"id": "local"}],
+        },
+    )
+    monkeypatch.setattr(
+        cache,
+        "monitor_service",
+        type("_MonitorService", (), {"list_leases": staticmethod(lambda: _triage_payload("detached_residue"))}),
+        raising=False,
+    )
+
+    payload = cache.refresh_monitor_resource_overview_sync()
+    assert payload["summary"]["refresh_status"] == "ok"
+    assert payload["summary"]["refresh_error"] is None
+    assert payload["summary"]["last_refreshed_at"] == "2026-03-03T00:00:00Z"
+    assert payload["triage"]["summary"]["detached_residue"] == 1
+
+    cached = cache.get_monitor_resource_overview_snapshot()
+    assert cached["providers"][0]["id"] == "local"
+    assert cached["triage"]["groups"][0]["key"] == "detached_residue"
+
+
+def test_resource_overview_cache_keeps_last_snapshot_on_refresh_error(monkeypatch):
+    cache.clear_monitor_resource_overview_cache()
+    monkeypatch.setattr(
+        cache.resource_service,
+        "list_resource_providers",
+        lambda: {
+            "summary": {
+                "snapshot_at": "2026-03-03T00:00:00Z",
+                "total_providers": 1,
+                "active_providers": 1,
+                "unavailable_providers": 0,
+                "running_sessions": 1,
+            },
+            "providers": [{"id": "docker"}],
+        },
+    )
+    monkeypatch.setattr(
+        cache,
+        "monitor_service",
+        type("_MonitorService", (), {"list_leases": staticmethod(lambda: _triage_payload("orphan_cleanup"))}),
+        raising=False,
+    )
+    cache.refresh_monitor_resource_overview_sync()
+
+    def _raise():
+        raise RuntimeError("probe failed")
+
+    monkeypatch.setattr(cache.resource_service, "list_resource_providers", _raise)
+    degraded = cache.refresh_monitor_resource_overview_sync()
+    assert degraded["providers"][0]["id"] == "docker"
+    assert degraded["summary"]["refresh_status"] == "error"
+    assert degraded["summary"]["refresh_error"] == "probe failed"
+    assert degraded["triage"]["groups"][0]["key"] == "orphan_cleanup"
+
+
+def test_resource_overview_cache_refreshes_when_live_session_counts_drift(monkeypatch):
+    cache.clear_monitor_resource_overview_cache()
+
+    stale_payload = {
+        "summary": {
+            "snapshot_at": "2026-03-03T00:00:00Z",
+            "total_providers": 1,
+            "active_providers": 0,
+            "unavailable_providers": 0,
+            "running_sessions": 0,
+        },
+        "providers": [
+            {
+                "id": "local",
+                "sessions": [],
+                "telemetry": {"running": {"used": 0}},
+            }
+        ],
+    }
+    fresh_payload = {
+        "summary": {
+            "snapshot_at": "2026-03-03T00:01:00Z",
+            "total_providers": 1,
+            "active_providers": 1,
+            "unavailable_providers": 0,
+            "running_sessions": 1,
+        },
+        "providers": [
+            {
+                "id": "local",
+                "sessions": [{"id": "lease-1:m_thread"}],
+                "telemetry": {"running": {"used": 1}},
+            }
+        ],
+    }
+
+    calls = iter([stale_payload, fresh_payload])
+    monkeypatch.setattr(cache.resource_service, "list_resource_providers", lambda: next(calls))
+    monkeypatch.setattr(cache.resource_service, "visible_resource_session_stats", lambda: {"local": {"sessions": 1, "running": 1}})
+    monkeypatch.setattr(
+        cache,
+        "monitor_service",
+        type("_MonitorService", (), {"list_leases": staticmethod(lambda: _triage_payload("healthy_capacity"))}),
+        raising=False,
+    )
+
+    cache.refresh_monitor_resource_overview_sync()
+    payload = cache.get_monitor_resource_overview_snapshot()
+
+    assert payload["providers"][0]["telemetry"]["running"]["used"] == 1
+    assert len(payload["providers"][0]["sessions"]) == 1
+    assert payload["triage"]["summary"]["healthy_capacity"] == 1
diff --git a/tests/Unit/monitor/test_monitor_resource_overview_uniqueness.py b/tests/Unit/monitor/test_monitor_resource_overview_uniqueness.py
new file mode 100644
index 000000000..dfcf08ba8
--- /dev/null
+++ b/tests/Unit/monitor/test_monitor_resource_overview_uniqueness.py
@@ -0,0 +1,323 @@
+from backend.web.services import resource_service
+
+
+class _FakeRepo:
+    def __init__(self, rows, lease_threads=None):
+        self._rows = rows
+        self._lease_threads = lease_threads or {}
+
+    def list_sessions_with_leases(self):
+        return list(self._rows)
+
+    def query_lease_threads(self, lease_id: str):
+        return [{"thread_id": tid} for tid in self._lease_threads.get(lease_id, [])]
+
+    def close(self):
+        pass
+
+
+class _FakeThreadRepo:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def get_by_id(self, thread_id: str):
+        return self._rows.get(thread_id)
+
+    def close(self):
+        pass
+
+
+class _FakeMember:
+    def __init__(self, member_id: str, name: str, avatar: str | None = None):
+        self.id = member_id
+        self.name = name
+        self.avatar = avatar
+
+
+class _FakeMemberRepo:
+    def __init__(self, members):
+        self._members = members
+
+    def list_all(self):
+        return list(self._members)
+
+    def close(self):
+        pass
+
+
+def test_list_resource_providers_deduplicates_terminal_fallback_rows(monkeypatch):
+    rows = [
+        {
+            "provider": "local",
+            "session_id": None,
+            "thread_id": "thread-1",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+        {
+            "provider": "local",
+            "session_id": None,
+            "thread_id": "thread-1",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+    ]
+
+    monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows))
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "local", "available": True}],
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_thread_owners",
+        lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Toad", "avatar_url": None} for tid in thread_ids},
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+    local = payload["providers"][0]
+
+    assert local["telemetry"]["running"]["used"] == 1
+    assert local["sessions"] == [
+        {
+            "id": "lease-1:thread-1",
+            "leaseId": "lease-1",
+            "threadId": "thread-1",
+            "memberId": "member-1",
+            "memberName": "Toad",
+            "avatarUrl": None,
+            "status": "running",
+            "startedAt": "2026-04-04T00:00:00",
+            "metrics": None,
+        }
+    ]
+
+
+def test_list_resource_providers_resolves_owner_metadata_from_runtime_storage(monkeypatch):
+    rows = [
+        {
+            "provider": "daytona",
+            "session_id": "sess-1",
+            "thread_id": "thread-supabase",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+    ]
+
+    monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows))
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "daytona", "available": True}],
+    )
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "build_thread_repo",
+        lambda **_kwargs: _FakeThreadRepo({"thread-supabase": {"member_id": "member-1"}}),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "build_member_repo",
+        lambda **_kwargs: _FakeMemberRepo([_FakeMember("member-1", "Toad")]),
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+
+    assert payload["providers"][0]["sessions"] == [
+        {
+            "id": "lease-1:thread-supabase",
+            "leaseId": "lease-1",
+            "threadId": "thread-supabase",
+            "memberId": "member-1",
+            "memberName": "Toad",
+            "avatarUrl": None,
+            "status": "running",
+            "startedAt": "2026-04-04T00:00:00",
+            "metrics": None,
+        }
+    ]
+
+
+def test_list_resource_providers_hides_subagent_threads(monkeypatch):
+    rows = [
+        {
+            "provider": "daytona",
+            "session_id": "sess-parent",
+            "thread_id": "thread-parent",
+            "lease_id": "lease-parent",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+        {
+            "provider": "daytona",
+            "session_id": "sess-child",
+            "thread_id": "subagent-deadbeef",
+            "lease_id": "lease-child",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:01",
+        },
+    ]
+
+    monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows))
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "daytona", "available": True}],
+    )
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_thread_owners",
+        lambda thread_ids: {tid: {"member_id": tid, "member_name": tid, "avatar_url": None} for tid in thread_ids},
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+    sessions = payload["providers"][0]["sessions"]
+
+    assert [session["threadId"] for session in sessions] == ["thread-parent"]
+    assert payload["summary"]["running_sessions"] == 1
+
+
+def test_list_resource_providers_projects_visible_parent_when_raw_monitor_row_is_subagent(monkeypatch):
+    rows = [
+        {
+            "provider": "daytona_selfhost",
+            "session_id": None,
+            "thread_id": "subagent-deadbeef",
+            "lease_id": "lease-1",
+            "observed_state": "paused",
+            "desired_state": "paused",
+            "created_at": "2026-04-04T00:00:00",
+        },
+    ]
+
+    monkeypatch.setattr(
+        resource_service,
+        "make_sandbox_monitor_repo",
+        lambda: _FakeRepo(rows, lease_threads={"lease-1": ["subagent-deadbeef", "thread-parent"]}),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "daytona_selfhost", "available": True}],
+    )
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_thread_owners",
+        lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Morel", "avatar_url": None} for tid in thread_ids},
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+    sessions = payload["providers"][0]["sessions"]
+
+    assert sessions == [
+        {
+            "id": "lease-1:thread-parent",
+            "leaseId": "lease-1",
+            "threadId": "thread-parent",
+            "memberId": "member-1",
+            "memberName": "Morel",
+            "avatarUrl": None,
+            "status": "paused",
+            "startedAt": "2026-04-04T00:00:00",
+            "metrics": None,
+        }
+    ]
+
+
+def test_list_resource_providers_deduplicates_same_lease_thread_even_with_distinct_session_ids(monkeypatch):
+    rows = [
+        {
+            "provider": "daytona_selfhost",
+            "session_id": "sess-a",
+            "thread_id": "thread-parent",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+        {
+            "provider": "daytona_selfhost",
+            "session_id": "sess-b",
+            "thread_id": "thread-parent",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:01",
+        },
+    ]
+
+    monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows))
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "daytona_selfhost", "available": True}],
+    )
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_thread_owners",
+        lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Toad", "avatar_url": None} for tid in thread_ids},
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+    sessions = payload["providers"][0]["sessions"]
+
+    assert sessions == [
+        {
+            "id": "lease-1:thread-parent",
+            "leaseId": "lease-1",
+            "threadId": "thread-parent",
+            "memberId": "member-1",
+            "memberName": "Toad",
+            "avatarUrl": None,
+            "status": "running",
+            "startedAt": "2026-04-04T00:00:00",
+            "metrics": None,
+        }
+    ]
diff --git a/tests/test_monitor_resource_probe.py b/tests/Unit/monitor/test_monitor_resource_probe.py
similarity index 58%
rename from tests/test_monitor_resource_probe.py
rename to tests/Unit/monitor/test_monitor_resource_probe.py
index 9cb8d35ab..d893ea4d2 100644
--- a/tests/test_monitor_resource_probe.py
+++ b/tests/Unit/monitor/test_monitor_resource_probe.py
@@ -15,6 +15,17 @@ def _make_probe_repo(targets: list[dict]):
     return repo
 
 
+class _FakeSnapshotRepo:
+    def __init__(self) -> None:
+        self.upserts: list[dict] = []
+
+    def close(self) -> None:
+        return None
+
+    def upsert_lease_resource_snapshot(self, **kwargs):
+        self.upserts.append(kwargs)
+
+
 def test_refresh_resource_snapshots_probes_running_leases_only(monkeypatch):
     monkeypatch.setattr(resource_service, "ensure_resource_snapshot_table", lambda: None)
     monkeypatch.setattr(
@@ -28,6 +39,7 @@ def test_refresh_resource_snapshots_probes_running_leases_only(monkeypatch):
         ),
     )
     monkeypatch.setattr(resource_service, "build_provider_from_config_name", lambda _: _FakeProvider())
+    monkeypatch.setattr(resource_service, "build_resource_snapshot_repo", lambda: _FakeSnapshotRepo())
 
     calls: list[dict] = []
 
@@ -60,19 +72,46 @@ def test_refresh_resource_snapshots_counts_provider_build_error(monkeypatch):
         ),
     )
     monkeypatch.setattr(resource_service, "build_provider_from_config_name", lambda _: None)
-    upserts: list[dict] = []
+    snapshot_repo = _FakeSnapshotRepo()
+    monkeypatch.setattr(resource_service, "build_resource_snapshot_repo", lambda: snapshot_repo)
+
+    result = resource_service.refresh_resource_snapshots()
+    assert result["probed"] == 0
+    assert result["errors"] == 1
+    assert result["running_targets"] == 1
+    assert result["non_running_targets"] == 0
+    assert len(snapshot_repo.upserts) == 1
+    assert snapshot_repo.upserts[0]["lease_id"] == "l-1"
+    assert snapshot_repo.upserts[0]["probe_mode"] == "running_runtime"
+    assert snapshot_repo.upserts[0]["probe_error"] == "provider init failed: p-missing"
+
+
+def test_refresh_resource_snapshots_prefers_shared_runtime_snapshot_repo(monkeypatch):
+    monkeypatch.setattr(resource_service, "ensure_resource_snapshot_table", lambda: None)
     monkeypatch.setattr(
         resource_service,
-        "upsert_resource_snapshot",
-        lambda **kwargs: upserts.append(kwargs),
+        "make_sandbox_monitor_repo",
+        lambda: _make_probe_repo(
+            [
+                {"provider_name": "p-missing", "instance_id": "s-1", "lease_id": "l-1", "observed_state": "detached"},
+            ]
+        ),
     )
+    monkeypatch.setattr(resource_service, "build_provider_from_config_name", lambda _: None)
+
+    repo = _FakeSnapshotRepo()
+    monkeypatch.setattr(resource_service, "build_resource_snapshot_repo", lambda: repo, raising=False)
 
     result = resource_service.refresh_resource_snapshots()
+
     assert result["probed"] == 0
     assert result["errors"] == 1
-    assert result["running_targets"] == 1
-    assert result["non_running_targets"] == 0
-    assert len(upserts) == 1
-    assert upserts[0]["lease_id"] == "l-1"
-    assert upserts[0]["probe_mode"] == "running_runtime"
-    assert upserts[0]["probe_error"] == "provider init failed: p-missing"
+    assert repo.upserts == [
+        {
+            "lease_id": "l-1",
+            "provider_name": "p-missing",
+            "observed_state": "detached",
+            "probe_mode": "running_runtime",
+            "probe_error": "provider init failed: p-missing",
+        }
+    ]
diff --git a/tests/Unit/monitor/test_monitor_sqlite_sandbox_repo.py b/tests/Unit/monitor/test_monitor_sqlite_sandbox_repo.py
new file mode 100644
index 000000000..637b35143
--- /dev/null
+++ b/tests/Unit/monitor/test_monitor_sqlite_sandbox_repo.py
@@ -0,0 +1,256 @@
+import sqlite3
+
+from storage.providers.sqlite.sandbox_monitor_repo import SQLiteSandboxMonitorRepo
+from storage.providers.supabase.sandbox_monitor_repo import SupabaseSandboxMonitorRepo
+from tests.fakes.supabase import FakeSupabaseClient
+
+
+def _bootstrap_monitor_db(db_path):
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.executescript(
+            """
+            CREATE TABLE sandbox_leases (
+                lease_id TEXT PRIMARY KEY,
+                provider_name TEXT,
+                desired_state TEXT,
+                observed_state TEXT,
+                current_instance_id TEXT,
+                created_at TEXT,
+                updated_at TEXT
+            );
+
+            CREATE TABLE abstract_terminals (
+                terminal_id TEXT PRIMARY KEY,
+                lease_id TEXT,
+                thread_id TEXT,
+                cwd TEXT,
+                created_at TEXT
+            );
+
+            CREATE TABLE chat_sessions (
+                chat_session_id TEXT PRIMARY KEY,
+                thread_id TEXT,
+                lease_id TEXT,
+                status TEXT,
+                started_at TEXT,
+                last_active_at TEXT
+            );
+            """
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def test_list_sessions_with_leases_keeps_raw_newest_terminal_truth(tmp_path):
+    db_path = tmp_path / "sandbox.db"
+    _bootstrap_monitor_db(db_path)
+
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.execute(
+            """
+            INSERT INTO sandbox_leases (
+                lease_id, provider_name, desired_state, observed_state, current_instance_id, created_at, updated_at
+            ) VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                "lease-1",
+                "daytona_selfhost",
+                "paused",
+                "paused",
+                "instance-1",
+                "2026-04-05T13:00:00",
+                "2026-04-05T23:59:00",
+            ),
+        )
+        conn.executemany(
+            """
+            INSERT INTO abstract_terminals (terminal_id, lease_id, thread_id, cwd, created_at)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            [
+                ("term-parent", "lease-1", "thread-parent", "/home/daytona/files/app", "2026-04-05T13:35:08"),
+                ("term-subagent", "lease-1", "subagent-deadbeef", "/home/daytona/files/app", "2026-04-05T23:51:40"),
+            ],
+        )
+        conn.executemany(
+            """
+            INSERT INTO chat_sessions (chat_session_id, thread_id, lease_id, status, started_at)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            [
+                ("sess-parent", "thread-parent", "lease-1", "closed", "2026-04-05T23:24:06"),
+                ("sess-subagent", "subagent-deadbeef", "lease-1", "closed", "2026-04-05T23:51:42"),
+            ],
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    repo = SQLiteSandboxMonitorRepo(db_path=db_path)
+    try:
+        rows = repo.list_sessions_with_leases()
+    finally:
+        repo.close()
+
+    assert len(rows) == 2
+    assert {row["thread_id"] for row in rows} == {"thread-parent", "subagent-deadbeef"}
+    assert all(row["lease_id"] == "lease-1" for row in rows)
+
+
+def test_query_threads_accepts_optional_thread_filter(tmp_path):
+    db_path = tmp_path / "sandbox.db"
+    _bootstrap_monitor_db(db_path)
+
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.execute(
+            """
+            INSERT INTO sandbox_leases (
+                lease_id, provider_name, desired_state, observed_state, current_instance_id, created_at, updated_at
+            ) VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+            ("lease-1", "local", "running", "running", "instance-1", "2026-04-05T10:00:00", "2026-04-05T10:00:00"),
+        )
+        conn.executemany(
+            """
+            INSERT INTO chat_sessions (chat_session_id, thread_id, lease_id, status, started_at)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            [
+                ("sess-1", "thread-1", "lease-1", "active", "2026-04-05T10:00:00"),
+                ("sess-2", "thread-2", "lease-1", "active", "2026-04-05T10:05:00"),
+            ],
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    repo = SQLiteSandboxMonitorRepo(db_path=db_path)
+    try:
+        rows = repo.query_threads(thread_id="thread-2")
+    finally:
+        repo.close()
+
+    assert [row["thread_id"] for row in rows] == ["thread-2"]
+
+
+def test_supabase_list_sessions_with_leases_matches_sqlite_terminal_and_recent_session_fallback(tmp_path):
+    db_path = tmp_path / "sandbox.db"
+    _bootstrap_monitor_db(db_path)
+
+    sqlite_conn = sqlite3.connect(db_path)
+    try:
+        sqlite_conn.executemany(
+            """
+            INSERT INTO sandbox_leases (
+                lease_id, provider_name, desired_state, observed_state, current_instance_id, created_at, updated_at
+            ) VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+            [
+                ("lease-active", "local", "running", "running", "instance-a", "2026-04-05T10:00:00", "2026-04-05T10:00:00"),
+                ("lease-terminal", "daytona_selfhost", "paused", "paused", "instance-b", "2026-04-05T11:00:00", "2026-04-05T11:00:00"),
+                ("lease-recent", "docker", "paused", "paused", "instance-c", "2026-04-05T12:00:00", "2026-04-05T12:00:00"),
+            ],
+        )
+        sqlite_conn.executemany(
+            """
+            INSERT INTO abstract_terminals (terminal_id, lease_id, thread_id, cwd, created_at)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            [
+                ("term-parent", "lease-terminal", "thread-parent", "/workspace", "2026-04-05T11:05:00"),
+                ("term-subagent", "lease-terminal", "subagent-deadbeef", "/workspace", "2026-04-05T11:06:00"),
+            ],
+        )
+        sqlite_conn.executemany(
+            """
+            INSERT INTO chat_sessions (chat_session_id, thread_id, lease_id, status, started_at)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            [
+                ("sess-active", "thread-active", "lease-active", "active", "2026-04-05T10:01:00"),
+                ("sess-recent-a", "thread-old", "lease-recent", "closed", "2026-04-05T12:01:00"),
+                ("sess-recent-b", "thread-new", "lease-recent", "closed", "2026-04-05T12:02:00"),
+            ],
+        )
+        sqlite_conn.commit()
+    finally:
+        sqlite_conn.close()
+
+    sqlite_repo = SQLiteSandboxMonitorRepo(db_path=db_path)
+    try:
+        sqlite_rows = sqlite_repo.list_sessions_with_leases()
+    finally:
+        sqlite_repo.close()
+
+    supabase_tables = {
+        "sandbox_leases": [
+            {
+                "lease_id": "lease-active",
+                "provider_name": "local",
+                "desired_state": "running",
+                "observed_state": "running",
+                "current_instance_id": "instance-a",
+                "created_at": "2026-04-05T10:00:00",
+                "updated_at": "2026-04-05T10:00:00",
+            },
+            {
+                "lease_id": "lease-terminal",
+                "provider_name": "daytona_selfhost",
+                "desired_state": "paused",
+                "observed_state": "paused",
+                "current_instance_id": "instance-b",
+                "created_at": "2026-04-05T11:00:00",
+                "updated_at": "2026-04-05T11:00:00",
+            },
+            {
+                "lease_id": "lease-recent",
+                "provider_name": "docker",
+                "desired_state": "paused",
+                "observed_state": "paused",
+                "current_instance_id": "instance-c",
+                "created_at": "2026-04-05T12:00:00",
+                "updated_at": "2026-04-05T12:00:00",
+            },
+        ],
+        "abstract_terminals": [
+            {"terminal_id": "term-parent", "lease_id": "lease-terminal", "thread_id": "thread-parent", "created_at": "2026-04-05T11:05:00"},
+            {
+                "terminal_id": "term-subagent",
+                "lease_id": "lease-terminal",
+                "thread_id": "subagent-deadbeef",
+                "created_at": "2026-04-05T11:06:00",
+            },
+        ],
+        "chat_sessions": [
+            {
+                "chat_session_id": "sess-active",
+                "thread_id": "thread-active",
+                "lease_id": "lease-active",
+                "status": "active",
+                "started_at": "2026-04-05T10:01:00",
+            },
+            {
+                "chat_session_id": "sess-recent-a",
+                "thread_id": "thread-old",
+                "lease_id": "lease-recent",
+                "status": "closed",
+                "started_at": "2026-04-05T12:01:00",
+            },
+            {
+                "chat_session_id": "sess-recent-b",
+                "thread_id": "thread-new",
+                "lease_id": "lease-recent",
+                "status": "closed",
+                "started_at": "2026-04-05T12:02:00",
+            },
+        ],
+    }
+    supabase_repo = SupabaseSandboxMonitorRepo(FakeSupabaseClient(supabase_tables))
+
+    supabase_rows = supabase_repo.list_sessions_with_leases()
+
+    assert supabase_rows == sqlite_rows
diff --git a/tests/test_agentbay_capability_override.py b/tests/Unit/platform/test_agentbay_capability_override.py
similarity index 59%
rename from tests/test_agentbay_capability_override.py
rename to tests/Unit/platform/test_agentbay_capability_override.py
index f54d6ccd7..fd214a285 100644
--- a/tests/test_agentbay_capability_override.py
+++ b/tests/Unit/platform/test_agentbay_capability_override.py
@@ -1,18 +1,41 @@
 import sys
 import types
+from typing import Any, cast
 
 from sandbox.providers.agentbay import AgentBayProvider
 
 
 def _install_fake_agentbay_module(monkeypatch) -> None:
-    fake_mod = types.ModuleType("agentbay")
+    fake_mod = cast(Any, types.ModuleType("agentbay"))
+    fake_api_mod = cast(Any, types.ModuleType("agentbay.api"))
+    fake_api_models_mod = cast(Any, types.ModuleType("agentbay.api.models"))
 
     class FakeAgentBay:
         def __init__(self, api_key: str):
             self.api_key = api_key
 
+    class FakeCreateSessionParams:
+        def __init__(self):
+            self.image_id = None
+            self.context_syncs = None
+
+    class FakeContextSync:
+        @staticmethod
+        def new(context_id: str, path: str):
+            return {"context_id": context_id, "path": path}
+
+    class FakeGetSessionRequest:
+        def __init__(self, authorization: str, session_id: str):
+            self.authorization = authorization
+            self.session_id = session_id
+
     fake_mod.AgentBay = FakeAgentBay
+    fake_mod.CreateSessionParams = FakeCreateSessionParams
+    fake_mod.ContextSync = FakeContextSync
+    fake_api_models_mod.GetSessionRequest = FakeGetSessionRequest
     monkeypatch.setitem(sys.modules, "agentbay", fake_mod)
+    monkeypatch.setitem(sys.modules, "agentbay.api", fake_api_mod)
+    monkeypatch.setitem(sys.modules, "agentbay.api.models", fake_api_models_mod)
 
 
 def test_agentbay_capability_default_from_class(monkeypatch):
@@ -55,7 +78,12 @@ def screenshot(self):
             return _ScreenshotResult()
 
     class _FakeSession:
-        computer = _FakeComputer()
+        def __init__(self) -> None:
+            self.session_id = "sess-1"
+            self.token = "tok"
+            self.link_url = "https://link"
+            self.mcpTools = [object()]
+            self.computer = _FakeComputer()
 
     provider._sessions["sess-1"] = _FakeSession()
     screenshot = provider.screenshot("sess-1")
diff --git a/tests/Unit/platform/test_lsp_service.py b/tests/Unit/platform/test_lsp_service.py
new file mode 100644
index 000000000..8e851850e
--- /dev/null
+++ b/tests/Unit/platform/test_lsp_service.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import json
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from core.runtime.registry import ToolRegistry
+from core.tools.lsp.service import LSPService
+
+
+class _FakeSession:
+    def __init__(self):
+        self.calls: list[tuple[str, str, int, int]] = []
+
+    async def request_definition(self, rel_path: str, line: int, character: int):
+        self.calls.append(("definition", rel_path, line, character))
+        return [
+            {
+                "absolutePath": "/tmp/example.py",
+                "range": {"start": {"line": line, "character": character}},
+            }
+        ]
+
+
+class _FakePyright:
+    def __init__(self):
+        self.calls: list[tuple[str, str, int, int]] = []
+
+    async def request_implementation(self, rel_path: str, line: int, character: int):
+        self.calls.append(("implementation", rel_path, line, character))
+        return [
+            {
+                "absolutePath": "/tmp/example.py",
+                "range": {"start": {"line": line, "character": character}},
+            }
+        ]
+
+
+def test_lsp_schema_uses_one_based_character_positions(tmp_path):
+    reg = ToolRegistry()
+    LSPService(registry=reg, workspace_root=tmp_path)
+
+    schema = reg.get("LSP").get_schema()
+    props = schema["parameters"]["properties"]
+
+    assert "character" in props
+    assert "column" not in props
+    assert "1-based" in props["line"]["description"]
+    assert "1-based" in props["character"]["description"]
+
+
+@pytest.mark.asyncio
+async def test_lsp_handle_converts_one_based_positions_to_zero_based_for_definition(tmp_path):
+    reg = ToolRegistry()
+    service = LSPService(registry=reg, workspace_root=tmp_path)
+    fake = _FakeSession()
+    service._get_session = AsyncMock(return_value=fake)
+
+    file_path = tmp_path / "example.py"
+    file_path.write_text("x = 1\n", encoding="utf-8")
+
+    result = await service._handle(
+        operation="goToDefinition",
+        file_path=str(file_path),
+        line=5,
+        character=3,
+    )
+
+    assert fake.calls == [("definition", "example.py", 4, 2)]
+    payload = json.loads(result)
+    assert payload[0]["line"] == 4
+    assert payload[0]["column"] == 2
+
+
+@pytest.mark.asyncio
+async def test_lsp_handle_offloads_gitignored_filtering_from_event_loop(tmp_path, monkeypatch):
+    reg = ToolRegistry()
+    service = LSPService(registry=reg, workspace_root=tmp_path)
+    fake = _FakeSession()
+    service._get_session = AsyncMock(return_value=fake)
+
+    file_path = tmp_path / "example.py"
+    file_path.write_text("x = 1\n", encoding="utf-8")
+
+    filter_results = [
+        {
+            "absolutePath": "/tmp/example.py",
+            "range": {"start": {"line": 0, "character": 0}},
+        }
+    ]
+    filter_mock = MagicMock(return_value=filter_results)
+    service._filter_gitignored_batched = filter_mock
+
+    calls: list[tuple[object, tuple[object, ...]]] = []
+
+    async def fake_to_thread(func, *args, **kwargs):
+        calls.append((func, args))
+        return func(*args, **kwargs)
+
+    monkeypatch.setattr("core.tools.lsp.service.asyncio.to_thread", fake_to_thread)
+
+    result = await service._handle(
+        operation="goToDefinition",
+        file_path=str(file_path),
+        line=1,
+        character=1,
+    )
+
+    assert calls == [(filter_mock, (filter_mock.call_args.args[0],))]
+    assert filter_mock.call_count == 1
+    payload = json.loads(result)
+    assert payload[0]["file"] == "/tmp/example.py"
+
+
+@pytest.mark.asyncio
+async def test_lsp_handle_converts_one_based_positions_to_zero_based_for_pyright_ops(tmp_path):
+    reg = ToolRegistry()
+    service = LSPService(registry=reg, workspace_root=tmp_path)
+    fake = _FakePyright()
+    service._get_pyright = AsyncMock(return_value=fake)
+
+    file_path = tmp_path / "example.py"
+    file_path.write_text("x = 1\n", encoding="utf-8")
+
+    result = await service._handle(
+        operation="goToImplementation",
+        file_path=str(file_path),
+        line=7,
+        character=4,
+    )
+
+    assert fake.calls == [("implementation", "example.py", 6, 3)]
+    payload = json.loads(result)
+    assert payload[0]["line"] == 6
+    assert payload[0]["column"] == 3
diff --git a/tests/test_marketplace_client.py b/tests/Unit/platform/test_marketplace_client.py
similarity index 95%
rename from tests/test_marketplace_client.py
rename to tests/Unit/platform/test_marketplace_client.py
index 3a8897d3a..c79ad28a1 100644
--- a/tests/test_marketplace_client.py
+++ b/tests/Unit/platform/test_marketplace_client.py
@@ -1,5 +1,6 @@
 """Tests for marketplace_client business logic (publish/download)."""
 
+import importlib
 import json
 from unittest.mock import patch
 
@@ -37,6 +38,17 @@ def test_initial_version(self):
         assert _bump_version("0.1.0", "patch") == "0.1.1"
 
 
+# ── Hub client contract ──
+
+
+def test_hub_client_disables_env_proxy_trust():
+    import backend.web.services.marketplace_client as marketplace_client
+
+    marketplace_client = importlib.reload(marketplace_client)
+
+    assert marketplace_client._hub_client._trust_env is False
+
+
 # ── Helpers ──
 
 
diff --git a/tests/test_marketplace_models.py b/tests/Unit/platform/test_marketplace_models.py
similarity index 87%
rename from tests/test_marketplace_models.py
rename to tests/Unit/platform/test_marketplace_models.py
index 1b56722c0..835345c0d 100644
--- a/tests/test_marketplace_models.py
+++ b/tests/Unit/platform/test_marketplace_models.py
@@ -40,15 +40,15 @@ def test_valid_all_fields(self):
 
     def test_invalid_type_raises(self):
         with pytest.raises(ValidationError):
-            PublishToMarketplaceRequest(member_id="ok", type="unknown")
+            PublishToMarketplaceRequest.model_validate({"member_id": "ok", "type": "unknown"})
 
     def test_invalid_bump_type_raises(self):
         with pytest.raises(ValidationError):
-            PublishToMarketplaceRequest(member_id="ok", bump_type="hotfix")
+            PublishToMarketplaceRequest.model_validate({"member_id": "ok", "bump_type": "hotfix"})
 
     def test_invalid_visibility_raises(self):
         with pytest.raises(ValidationError):
-            PublishToMarketplaceRequest(member_id="ok", visibility="unlisted")
+            PublishToMarketplaceRequest.model_validate({"member_id": "ok", "visibility": "unlisted"})
 
     def test_invalid_member_id_path_traversal(self):
         with pytest.raises(ValidationError):
@@ -77,7 +77,7 @@ def test_valid(self):
 
     def test_missing_item_id_raises(self):
         with pytest.raises(ValidationError):
-            InstallFromMarketplaceRequest()
+            InstallFromMarketplaceRequest.model_validate({})
 
 
 # ── CheckUpdatesRequest ──
@@ -102,7 +102,7 @@ def test_empty_items_list(self):
     def test_default_items(self):
         # items is required (no default), so omitting should raise
         with pytest.raises(ValidationError):
-            CheckUpdatesRequest()
+            CheckUpdatesRequest.model_validate({})
 
 
 # ── UpgradeFromMarketplaceRequest ──
@@ -116,4 +116,4 @@ def test_valid(self):
 
     def test_missing_fields_raises(self):
         with pytest.raises(ValidationError):
-            UpgradeFromMarketplaceRequest(member_id="only-one")
+            UpgradeFromMarketplaceRequest.model_validate({"member_id": "only-one"})
diff --git a/tests/Unit/platform/test_mcp_resource_tool_service.py b/tests/Unit/platform/test_mcp_resource_tool_service.py
new file mode 100644
index 000000000..1377c4cbd
--- /dev/null
+++ b/tests/Unit/platform/test_mcp_resource_tool_service.py
@@ -0,0 +1,191 @@
+from __future__ import annotations
+
+import json
+from collections.abc import Awaitable
+from contextlib import asynccontextmanager
+from types import SimpleNamespace
+from typing import Any, cast
+
+import pytest
+from pydantic import AnyUrl, TypeAdapter
+
+from core.runtime.registry import ToolRegistry
+from core.runtime.tool_result import ToolResultEnvelope
+from core.tools.mcp_resources.service import McpResourceToolService
+
+
+class _FakeSession:
+    def __init__(self, resources: list[SimpleNamespace], contents_by_uri: dict[str, list[SimpleNamespace]]) -> None:
+        self._resources = resources
+        self._contents_by_uri = contents_by_uri
+
+    async def list_resources(self):
+        return SimpleNamespace(resources=self._resources)
+
+    async def read_resource(self, uri: str):
+        return SimpleNamespace(contents=self._contents_by_uri[uri])
+
+
+class _FakeClient:
+    def __init__(self, sessions: dict[str, _FakeSession]) -> None:
+        self.connections = {name: object() for name in sessions}
+        self._sessions = sessions
+
+    @asynccontextmanager
+    async def session(self, server_name: str, *, auto_initialize: bool = True):
+        assert auto_initialize is True
+        yield self._sessions[server_name]
+
+
+def _unwrap_text(result: str | ToolResultEnvelope) -> str:
+    if isinstance(result, ToolResultEnvelope):
+        return cast(str, result.content)
+    return result
+
+
+async def _invoke_handler(handler: Any, /, **kwargs: Any) -> str | ToolResultEnvelope:
+    result = handler(**kwargs)
+    if isinstance(result, Awaitable):
+        return await result
+    return result
+
+
+@pytest.mark.asyncio
+async def test_mcp_resource_tool_service_registers_list_and_read_tools() -> None:
+    registry = ToolRegistry()
+    client = _FakeClient(
+        {
+            "demo": _FakeSession(
+                resources=[
+                    SimpleNamespace(
+                        uri="memo://alpha",
+                        name="alpha",
+                        mimeType="text/plain",
+                        description="first resource",
+                    )
+                ],
+                contents_by_uri={
+                    "memo://alpha": [
+                        SimpleNamespace(
+                            uri="memo://alpha",
+                            mimeType="text/plain",
+                            text="hello from resource",
+                        )
+                    ]
+                },
+            )
+        }
+    )
+
+    McpResourceToolService(
+        registry=registry,
+        client_fn=lambda: client,
+        server_configs_fn=lambda: {"demo": object()},
+    )
+
+    list_entry = registry.get("ListMcpResources")
+    read_entry = registry.get("ReadMcpResource")
+    assert list_entry is not None
+    assert read_entry is not None
+
+    listed = json.loads(_unwrap_text(await _invoke_handler(list_entry.handler)))
+    assert listed == {
+        "items": [
+            {
+                "server": "demo",
+                "uri": "memo://alpha",
+                "name": "alpha",
+                "mime_type": "text/plain",
+                "description": "first resource",
+            }
+        ],
+        "total": 1,
+    }
+
+    content = json.loads(_unwrap_text(await _invoke_handler(read_entry.handler, server="demo", uri="memo://alpha")))
+    assert content == {
+        "server": "demo",
+        "uri": "memo://alpha",
+        "contents": [
+            {
+                "uri": "memo://alpha",
+                "mime_type": "text/plain",
+                "text": "hello from resource",
+            }
+        ],
+    }
+
+
+def test_mcp_resource_tool_service_skips_registration_without_servers() -> None:
+    registry = ToolRegistry()
+    McpResourceToolService(
+        registry=registry,
+        client_fn=lambda: None,
+        server_configs_fn=lambda: {},
+    )
+
+    assert registry.get("ListMcpResources") is None
+    assert registry.get("ReadMcpResource") is None
+
+
+@pytest.mark.asyncio
+async def test_mcp_resource_tool_service_fails_loudly_for_unknown_server() -> None:
+    registry = ToolRegistry()
+    client = _FakeClient({"demo": _FakeSession(resources=[], contents_by_uri={})})
+    McpResourceToolService(
+        registry=registry,
+        client_fn=lambda: client,
+        server_configs_fn=lambda: {"demo": object()},
+    )
+
+    read_entry = registry.get("ReadMcpResource")
+    assert read_entry is not None
+
+    with pytest.raises(ValueError, match='MCP server not found: "missing"'):
+        await _invoke_handler(read_entry.handler, server="missing", uri="memo://alpha")
+
+
+@pytest.mark.asyncio
+async def test_mcp_resource_tool_service_serializes_url_like_resource_uris() -> None:
+    registry = ToolRegistry()
+    uri = TypeAdapter(AnyUrl).validate_python("memo://alpha")
+    client = _FakeClient(
+        {
+            "demo": _FakeSession(
+                resources=[
+                    SimpleNamespace(
+                        uri=uri,
+                        name="alpha",
+                        mimeType="text/plain",
+                        description="first resource",
+                    )
+                ],
+                contents_by_uri={
+                    "memo://alpha": [
+                        SimpleNamespace(
+                            uri=uri,
+                            mimeType="text/plain",
+                            text="hello from resource",
+                        )
+                    ]
+                },
+            )
+        }
+    )
+
+    McpResourceToolService(
+        registry=registry,
+        client_fn=lambda: client,
+        server_configs_fn=lambda: {"demo": object()},
+    )
+
+    list_entry = registry.get("ListMcpResources")
+    read_entry = registry.get("ReadMcpResource")
+    assert list_entry is not None
+    assert read_entry is not None
+
+    listed = json.loads(_unwrap_text(await _invoke_handler(list_entry.handler)))
+    assert listed["items"][0]["uri"] == "memo://alpha"
+
+    content = json.loads(_unwrap_text(await _invoke_handler(read_entry.handler, server="demo", uri="memo://alpha")))
+    assert content["contents"][0]["uri"] == "memo://alpha"
diff --git a/tests/Unit/platform/test_mcp_transport.py b/tests/Unit/platform/test_mcp_transport.py
new file mode 100644
index 000000000..f560f4d50
--- /dev/null
+++ b/tests/Unit/platform/test_mcp_transport.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from config.schema import MCPConfig, MCPServerConfig
+from core.runtime.agent import LeonAgent
+
+
+@pytest.mark.asyncio
+async def test_init_mcp_tools_respects_explicit_websocket_transport(monkeypatch):
+    captured: dict[str, object] = {}
+
+    class FakeClient:
+        def __init__(self, configs, tool_name_prefix=False):
+            captured["configs"] = configs
+
+        async def get_tools(self):
+            return []
+
+        async def close(self):
+            return None
+
+    agent = LeonAgent.__new__(LeonAgent)
+    agent.config = SimpleNamespace(
+        mcp=MCPConfig(
+            enabled=True,
+            servers={
+                "wsdemo": MCPServerConfig(
+                    transport="websocket",
+                    url="ws://example.test/mcp",
+                )
+            },
+        )
+    )
+    agent.verbose = False
+    agent._mcp_client = None
+
+    monkeypatch.setattr(
+        "langchain_mcp_adapters.client.MultiServerMCPClient",
+        FakeClient,
+    )
+
+    await LeonAgent._init_mcp_tools(agent)
+
+    assert captured["configs"] == {
+        "wsdemo": {
+            "transport": "websocket",
+            "url": "ws://example.test/mcp",
+        }
+    }
diff --git a/tests/test_model_config_enrichment.py b/tests/Unit/platform/test_model_config_enrichment.py
similarity index 70%
rename from tests/test_model_config_enrichment.py
rename to tests/Unit/platform/test_model_config_enrichment.py
index 6e1e3e53d..e1471c176 100644
--- a/tests/test_model_config_enrichment.py
+++ b/tests/Unit/platform/test_model_config_enrichment.py
@@ -1,9 +1,12 @@
 """Tests for model config enrichment (based_on + context_limit)."""
 
+import importlib
+
 import pytest
 from pydantic import ValidationError
 
 from config.models_schema import ActiveModel, CustomModelConfig, ModelsConfig, ModelSpec, PoolConfig
+from core.runtime.middleware.monitor import cost as cost_module
 from core.runtime.middleware.monitor.cost import fetch_openrouter_pricing, get_model_context_limit
 from core.runtime.middleware.monitor.middleware import MonitorMiddleware
 
@@ -13,47 +16,59 @@
 DEFAULT_LIMIT = 128000
 
 
+def _model_spec(**kwargs) -> ModelSpec:
+    return ModelSpec.model_validate(kwargs)
+
+
+def _active_model(**kwargs) -> ActiveModel:
+    return ActiveModel.model_validate(kwargs)
+
+
+def _custom_model_config(**kwargs) -> CustomModelConfig:
+    return CustomModelConfig.model_validate(kwargs)
+
+
 class TestModelSpecFields:
     """ModelSpec 和 ActiveModel 支持 based_on + context_limit 字段"""
 
     def test_model_spec_accepts_based_on_and_context_limit(self):
-        spec = ModelSpec(model="Alice", based_on="claude-sonnet-4.5", context_limit=32768)
+        spec = _model_spec(model="Alice", based_on="claude-sonnet-4.5", context_limit=32768)
         assert spec.based_on == "claude-sonnet-4.5"
         assert spec.context_limit == 32768
 
     def test_model_spec_defaults_none(self):
-        spec = ModelSpec(model="Alice")
+        spec = _model_spec(model="Alice")
         assert spec.based_on is None
         assert spec.context_limit is None
 
     def test_active_model_accepts_based_on_and_context_limit(self):
-        active = ActiveModel(model="Alice", based_on="claude-sonnet-4.5", context_limit=32768)
+        active = _active_model(model="Alice", based_on="claude-sonnet-4.5", context_limit=32768)
         assert active.based_on == "claude-sonnet-4.5"
         assert active.context_limit == 32768
 
     def test_context_limit_rejects_zero_or_negative(self):
         with pytest.raises(ValidationError):
-            ModelSpec(model="x", context_limit=0)
+            _model_spec(model="x", context_limit=0)
         with pytest.raises(ValidationError):
-            ModelSpec(model="x", context_limit=-1)
+            _model_spec(model="x", context_limit=-1)
 
 
 class TestResolveModelOverrides:
     """resolve_model 把 based_on/context_limit 放入 overrides"""
 
     def test_virtual_model_passes_based_on(self):
-        config = ModelsConfig(mapping={"leon:custom": ModelSpec(model="Alice", based_on="claude-sonnet-4.5")})
+        config = ModelsConfig(mapping={"leon:custom": _model_spec(model="Alice", based_on="claude-sonnet-4.5")})
         name, overrides = config.resolve_model("leon:custom")
         assert name == "Alice"
         assert overrides["based_on"] == "claude-sonnet-4.5"
 
     def test_virtual_model_passes_context_limit(self):
-        config = ModelsConfig(mapping={"leon:custom": ModelSpec(model="Alice", context_limit=32768)})
+        config = ModelsConfig(mapping={"leon:custom": _model_spec(model="Alice", context_limit=32768)})
         name, overrides = config.resolve_model("leon:custom")
         assert overrides["context_limit"] == 32768
 
     def test_non_virtual_model_passes_active_overrides(self):
-        config = ModelsConfig(active=ActiveModel(model="Alice", based_on="claude-sonnet-4.5", context_limit=32768))
+        config = ModelsConfig(active=_active_model(model="Alice", based_on="claude-sonnet-4.5", context_limit=32768))
         name, overrides = config.resolve_model("Alice")
         assert name == "Alice"
         assert overrides["based_on"] == "claude-sonnet-4.5"
@@ -66,17 +81,17 @@ def test_non_virtual_no_active_returns_empty(self):
         assert overrides == {}
 
     def test_non_virtual_active_no_based_on_no_context_returns_empty(self):
-        config = ModelsConfig(active=ActiveModel(model="Alice"))
+        config = ModelsConfig(active=_active_model(model="Alice"))
         name, overrides = config.resolve_model("Alice")
         assert overrides == {}
 
     def test_virtual_model_inherits_custom_config(self):
         """虚拟模型映射到自定义模型时，继承 custom_config"""
         config = ModelsConfig(
-            mapping={"leon:medium": ModelSpec(model="Day53")},
+            mapping={"leon:medium": _model_spec(model="Day53")},
             pool=PoolConfig(
                 custom=["Day53"],
-                custom_config={"Day53": CustomModelConfig(based_on="deepseek-chat", context_limit=65536)},
+                custom_config={"Day53": _custom_model_config(based_on="deepseek-chat", context_limit=65536)},
             ),
         )
         name, overrides = config.resolve_model("leon:medium")
@@ -87,9 +102,9 @@ def test_virtual_model_inherits_custom_config(self):
     def test_virtual_model_mapping_overrides_custom_config(self):
         """mapping 级别的 based_on/context_limit 优先于 custom_config"""
         config = ModelsConfig(
-            mapping={"leon:medium": ModelSpec(model="Day53", based_on="gpt-4o", context_limit=128000)},
+            mapping={"leon:medium": _model_spec(model="Day53", based_on="gpt-4o", context_limit=128000)},
             pool=PoolConfig(
-                custom_config={"Day53": CustomModelConfig(based_on="deepseek-chat", context_limit=65536)},
+                custom_config={"Day53": _custom_model_config(based_on="deepseek-chat", context_limit=65536)},
             ),
         )
         name, overrides = config.resolve_model("leon:medium")
@@ -129,7 +144,28 @@ def test_update_model_unknown_no_based_on_gets_default(self):
     def test_update_model_based_on_affects_cost_calculator(self):
         mw = MonitorMiddleware(model_name="claude-sonnet-4.5")
         mw.update_model("Alice", overrides={"based_on": "claude-sonnet-4.5"})
-        assert mw._token_monitor.cost_calculator.costs != {}
+        cost_calculator = mw._token_monitor.cost_calculator
+        assert cost_calculator is not None
+        assert cost_calculator.costs != {}
+
+    def test_empty_cached_pricing_falls_back_to_bundled_models(self, monkeypatch: pytest.MonkeyPatch):
+        importlib.reload(cost_module)
+
+        monkeypatch.setattr(
+            cost_module,
+            "_load_cache",
+            lambda: (
+                {},
+                {"claude-sonnet-4.5": SONNET_LIMIT},
+                {"claude-sonnet-4.5": "anthropic"},
+            ),
+        )
+        monkeypatch.setattr(cost_module, "_fetch_from_openrouter", lambda: None)
+
+        prices = cost_module.fetch_openrouter_pricing()
+
+        assert prices.get("claude-sonnet-4.5") is not None
+        assert cost_module.CostCalculator("claude-sonnet-4.5").costs != {}
 
 
 class TestThreeLevelPriority:
diff --git a/tests/test_model_params.py b/tests/Unit/platform/test_model_params.py
similarity index 100%
rename from tests/test_model_params.py
rename to tests/Unit/platform/test_model_params.py
diff --git a/tests/test_search_tools.py b/tests/Unit/platform/test_search_tools.py
similarity index 100%
rename from tests/test_search_tools.py
rename to tests/Unit/platform/test_search_tools.py
diff --git a/tests/Unit/sandbox/test_agentbay_provider.py b/tests/Unit/sandbox/test_agentbay_provider.py
new file mode 100644
index 000000000..7d0ad0dca
--- /dev/null
+++ b/tests/Unit/sandbox/test_agentbay_provider.py
@@ -0,0 +1,346 @@
+import json
+import sys
+import types
+from dataclasses import replace
+from types import SimpleNamespace
+from typing import Any, cast
+
+from sandbox.providers.agentbay import AgentBayProvider
+
+
+def _install_fake_agentbay_module(monkeypatch) -> None:
+    fake_mod = cast(Any, types.ModuleType("agentbay"))
+    fake_api_mod = cast(Any, types.ModuleType("agentbay.api"))
+    fake_api_models_mod = cast(Any, types.ModuleType("agentbay.api.models"))
+
+    class FakeCreateSessionParams:
+        def __init__(self):
+            self.image_id = None
+            self.context_syncs = None
+
+    class FakeContextSync:
+        @staticmethod
+        def new(context_id: str, path: str):
+            return {"context_id": context_id, "path": path}
+
+    class FakeGetSessionRequest:
+        def __init__(self, authorization: str, session_id: str):
+            self.authorization = authorization
+            self.session_id = session_id
+
+    fake_mod.CreateSessionParams = FakeCreateSessionParams
+    fake_mod.ContextSync = FakeContextSync
+    fake_api_models_mod.GetSessionRequest = FakeGetSessionRequest
+    monkeypatch.setitem(sys.modules, "agentbay", fake_mod)
+    monkeypatch.setitem(sys.modules, "agentbay.api", fake_api_mod)
+    monkeypatch.setitem(sys.modules, "agentbay.api.models", fake_api_models_mod)
+
+
+def _provider_with_fake_client(fake_client) -> AgentBayProvider:
+    provider = AgentBayProvider.__new__(AgentBayProvider)
+    provider.name = "agentbay"
+    provider.client = fake_client
+    provider.default_context_path = "/home/wuying"
+    provider.image_id = None
+    provider._sessions = {}
+    provider._capability = AgentBayProvider.CAPABILITY
+    return provider
+
+
+def test_create_session_refreshes_agentbay_session_when_direct_call_fields_missing(monkeypatch):
+    _install_fake_agentbay_module(monkeypatch)
+    raw_session = SimpleNamespace(session_id="sess-123", token="", link_url="", mcpTools=[])
+    hydrated_session = SimpleNamespace(session_id="sess-123", token="tok", link_url="https://link", mcpTools=[object()])
+    fake_client = SimpleNamespace(
+        context=SimpleNamespace(get=lambda *args, **kwargs: None),
+        create=lambda params: SimpleNamespace(success=True, session=raw_session, error_message=""),
+        get=lambda session_id: SimpleNamespace(success=True, session=hydrated_session, error_message=""),
+    )
+    provider = _provider_with_fake_client(fake_client)
+
+    info = provider.create_session()
+
+    assert info.session_id == "sess-123"
+    assert provider._sessions["sess-123"] is hydrated_session
+
+
+def test_get_session_refreshes_stale_cached_agentbay_session():
+    stale_session = SimpleNamespace(session_id="sess-123", token="", link_url="", mcpTools=[])
+    hydrated_session = SimpleNamespace(session_id="sess-123", token="tok", link_url="https://link", mcpTools=[object()])
+    fake_client = SimpleNamespace(
+        get=lambda session_id: SimpleNamespace(success=True, session=hydrated_session, error_message=""),
+    )
+    provider = _provider_with_fake_client(fake_client)
+    provider._sessions["sess-123"] = stale_session
+
+    session = provider._get_session("sess-123")
+
+    assert session is hydrated_session
+    assert provider._sessions["sess-123"] is hydrated_session
+
+
+def test_destroy_session_skips_sync_when_pause_capability_is_disabled():
+    calls: list[bool] = []
+
+    class _DeleteResult:
+        success = True
+
+    class _Session:
+        def __init__(self) -> None:
+            self.session_id = "sess-123"
+            self.token = "tok"
+            self.link_url = "https://link"
+            self.mcpTools = [object()]
+
+        def delete(self, *, sync_context: bool):
+            calls.append(sync_context)
+            return _DeleteResult()
+
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._capability = replace(AgentBayProvider.CAPABILITY, can_pause=False, can_resume=False)
+    provider._sessions["sess-123"] = _Session()
+
+    assert provider.destroy_session("sess-123") is True
+    assert calls == [False]
+    assert "sess-123" not in provider._sessions
+
+
+def test_pause_and_resume_session_use_current_session_sdk_methods():
+    calls: list[str] = []
+
+    class _Result:
+        success = True
+        error_message = ""
+        message = ""
+
+    class _Session:
+        def __init__(self) -> None:
+            self.session_id = "sess-123"
+            self.token = "tok"
+            self.link_url = "https://link"
+            self.mcpTools = [object()]
+
+        def beta_pause(self):
+            calls.append("pause")
+            return _Result()
+
+        def beta_resume(self):
+            calls.append("resume")
+            return _Result()
+
+    session = _Session()
+    fake_client = SimpleNamespace(get=lambda _session_id: SimpleNamespace(success=True, session=session, error_message=""))
+    provider = _provider_with_fake_client(fake_client)
+    provider._sessions["sess-123"] = session
+
+    assert provider.pause_session("sess-123") is True
+    assert provider.resume_session("sess-123") is True
+    assert calls == ["pause", "resume"]
+
+
+def test_execute_prefers_link_url_shell_path_when_session_has_direct_call_metadata():
+    calls: list[tuple[str, object]] = []
+
+    class _Tool:
+        name = "shell"
+        server = "wuying_shell"
+
+    def _command_execute(**kwargs):
+        calls.append(("command", kwargs))
+        return SimpleNamespace(success=False, output="", error_message="should not be used")
+
+    session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        link_url="https://link",
+        mcpTools=[_Tool()],
+        _get_mcp_server_for_tool=lambda tool_name: "wuying_shell" if tool_name == "shell" else None,
+        command=SimpleNamespace(execute_command=_command_execute),
+    )
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._sessions["sess-123"] = session
+    provider._call_link_url_tool = lambda session, tool_name, args, server_name: (
+        calls.append(("link", {"tool": tool_name, "args": args, "server": server_name}))
+        or AgentBayProvider._provider_exec_result_from_tool_result(
+            SimpleNamespace(
+                success=True,
+                data=json.dumps({"stdout": "/home/wuying\n", "stderr": "", "exit_code": 0}),
+                error_message="",
+            )
+        )
+    )
+
+    result = provider.execute("sess-123", "pwd", timeout_ms=5000, cwd="/home/wuying")
+
+    assert result.output == "/home/wuying\n"
+    assert result.exit_code == 0
+    assert result.error is None
+    assert calls == [
+        (
+            "link",
+            {
+                "tool": "shell",
+                "args": {"command": "pwd", "timeout_ms": 5000, "cwd": "/home/wuying"},
+                "server": "wuying_shell",
+            },
+        )
+    ]
+
+
+def test_get_session_hydrates_sdk_shape_session_from_raw_get_session_metadata(monkeypatch):
+    _install_fake_agentbay_module(monkeypatch)
+    sdk_shape_session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        resource_url="https://resource",
+        mcp_tools=[],
+    )
+    fake_response = SimpleNamespace(
+        to_map=lambda: {
+            "body": {
+                "Success": True,
+                "Data": {
+                    "LinkUrl": "https://link",
+                    "Token": "tok",
+                    "ToolList": [{"Name": "shell", "Server": "wuying_shell"}],
+                },
+            }
+        }
+    )
+    fake_client = SimpleNamespace(
+        api_key="api-key",
+        get=lambda session_id: SimpleNamespace(success=True, session=sdk_shape_session, error_message=""),
+        client=SimpleNamespace(get_session=lambda request: fake_response),
+    )
+    provider = _provider_with_fake_client(fake_client)
+
+    session = provider._get_session("sess-123")
+
+    assert session is sdk_shape_session
+    assert getattr(session, "link_url") == "https://link"
+    assert getattr(session, "token") == "tok"
+    assert len(getattr(session, "mcp_tools")) == 1
+    assert getattr(session, "mcpTools") == getattr(session, "mcp_tools")
+    assert provider._resolve_shell_server(session) == "wuying_shell"
+
+
+def test_execute_prefers_link_url_shell_path_for_sdk_shape_session():
+    calls: list[tuple[str, object]] = []
+
+    def _command_execute(**kwargs):
+        calls.append(("command", kwargs))
+        return SimpleNamespace(success=False, output="", error_message="should not be used")
+
+    session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        link_url="https://link",
+        mcp_tools=[SimpleNamespace(name="shell", server="wuying_shell")],
+        _find_server_for_tool=lambda tool_name: "wuying_shell" if tool_name == "shell" else "",
+        command=SimpleNamespace(execute_command=_command_execute),
+    )
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._sessions["sess-123"] = session
+    provider._call_link_url_tool = lambda session, tool_name, args, server_name: (
+        calls.append(("link", {"tool": tool_name, "args": args, "server": server_name}))
+        or AgentBayProvider._provider_exec_result_from_tool_result(
+            SimpleNamespace(
+                success=True,
+                data=json.dumps({"stdout": "/home/wuying\n", "stderr": "", "exit_code": 0}),
+                error_message="",
+            )
+        )
+    )
+
+    result = provider.execute("sess-123", "pwd", timeout_ms=5000, cwd="/home/wuying")
+
+    assert result.output == "/home/wuying\n"
+    assert result.exit_code == 0
+    assert result.error is None
+    assert calls == [
+        (
+            "link",
+            {
+                "tool": "shell",
+                "args": {"command": "pwd", "timeout_ms": 5000, "cwd": "/home/wuying"},
+                "server": "wuying_shell",
+            },
+        )
+    ]
+
+
+def test_list_processes_uses_current_sdk_process_field_names():
+    class _Result:
+        success = True
+        data = [SimpleNamespace(pid=101, pname="python", cmdline="python app.py")]
+
+    class _Computer:
+        def list_visible_apps(self):
+            return _Result()
+
+    session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        link_url="https://link",
+        mcpTools=[object()],
+        computer=_Computer(),
+    )
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._sessions["sess-123"] = session
+
+    assert provider.list_processes("sess-123") == [{"pid": 101, "name": "python", "cmd": "python app.py"}]
+
+
+def test_resolve_shell_server_falls_back_to_mcp_tools_when_sdk_resolver_raises():
+    session = SimpleNamespace(
+        mcp_tools=[SimpleNamespace(name="shell", server="wuying_shell")],
+        _find_server_for_tool=lambda tool_name: (_ for _ in ()).throw(StopIteration()),
+    )
+
+    assert AgentBayProvider._resolve_shell_server(session) == "wuying_shell"
+
+
+def test_execute_uses_provider_owned_link_call_instead_of_sdk_private_method():
+    calls: list[tuple[str, object]] = []
+
+    def _sdk_link(*args, **kwargs):
+        raise StopIteration()
+
+    def _provider_link(session: object, tool_name: str, args: dict, server_name: str):
+        calls.append(("provider-link", {"tool": tool_name, "args": args, "server": server_name}))
+        return AgentBayProvider._provider_exec_result_from_tool_result(
+            SimpleNamespace(
+                success=True,
+                data=json.dumps({"stdout": "/home/wuying\n", "stderr": "", "exit_code": 0}),
+                error_message="",
+            )
+        )
+
+    session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        link_url="https://link",
+        mcp_tools=[SimpleNamespace(name="shell", server="wuying_shell")],
+        _find_server_for_tool=lambda tool_name: "wuying_shell",
+        _call_mcp_tool_link_url=_sdk_link,
+        command=SimpleNamespace(execute_command=lambda **kwargs: None),
+    )
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._sessions["sess-123"] = session
+    provider._call_link_url_tool = _provider_link
+
+    result = provider.execute("sess-123", "pwd", timeout_ms=5000, cwd="/home/wuying")
+
+    assert result.output == "/home/wuying\n"
+    assert result.exit_code == 0
+    assert result.error is None
+    assert calls == [
+        (
+            "provider-link",
+            {
+                "tool": "shell",
+                "args": {"command": "pwd", "timeout_ms": 5000, "cwd": "/home/wuying"},
+                "server": "wuying_shell",
+            },
+        )
+    ]
diff --git a/tests/test_daytona_provider.py b/tests/Unit/sandbox/test_daytona_provider.py
similarity index 100%
rename from tests/test_daytona_provider.py
rename to tests/Unit/sandbox/test_daytona_provider.py
diff --git a/tests/Unit/sandbox/test_daytona_provider_proxy.py b/tests/Unit/sandbox/test_daytona_provider_proxy.py
new file mode 100644
index 000000000..1eb541ba3
--- /dev/null
+++ b/tests/Unit/sandbox/test_daytona_provider_proxy.py
@@ -0,0 +1,55 @@
+"""Unit tests for Daytona local toolbox URL normalization."""
+
+import sys
+from types import ModuleType
+from typing import Any, cast
+
+import pytest
+
+from sandbox.providers.daytona import DaytonaProvider
+
+
+def test_daytona_provider_rewrites_local_toolbox_proxy_url_to_loopback():
+    provider = object.__new__(DaytonaProvider)
+    provider.api_url = "http://localhost:3986/api"
+
+    rewritten = provider._normalize_toolbox_proxy_url("http://172.18.0.1:4000/toolbox")
+
+    assert rewritten == "http://127.0.0.1:4000/toolbox"
+
+
+def test_daytona_provider_leaves_remote_toolbox_proxy_url_unchanged():
+    provider = object.__new__(DaytonaProvider)
+    provider.api_url = "https://daytona.example.com/api"
+
+    untouched = provider._normalize_toolbox_proxy_url("https://proxy.example.com/toolbox")
+
+    assert untouched == "https://proxy.example.com/toolbox"
+
+
+def test_daytona_provider_passes_target_through_sdk_config(monkeypatch: pytest.MonkeyPatch):
+    captured: dict[str, object] = {}
+
+    class FakeConfig:
+        def __init__(self, *, api_key: str, api_url: str, target: str) -> None:
+            self.api_key = api_key
+            self.api_url = api_url
+            self.target = target
+
+    class FakeClient:
+        def __init__(self, config: FakeConfig) -> None:
+            captured["config"] = config
+            self._get_proxy_toolbox_url = lambda sandbox_id, region_id: "http://proxy/toolbox"
+
+    fake_module = cast(Any, ModuleType("daytona_sdk"))
+    fake_module.Daytona = FakeClient
+    fake_module.DaytonaConfig = FakeConfig
+    monkeypatch.setitem(sys.modules, "daytona_sdk", fake_module)
+
+    provider = DaytonaProvider(api_key="test-key", api_url="http://daytona.test/api", target="self-host")
+
+    config = captured["config"]
+    assert getattr(config, "api_key", None) == "test-key"
+    assert getattr(config, "api_url", None) == "http://daytona.test/api"
+    assert getattr(config, "target", None) == "self-host"
+    assert provider.client is not None
diff --git a/tests/test_e2b_provider.py b/tests/Unit/sandbox/test_e2b_provider.py
similarity index 57%
rename from tests/test_e2b_provider.py
rename to tests/Unit/sandbox/test_e2b_provider.py
index 8c88b614d..d64f72663 100644
--- a/tests/test_e2b_provider.py
+++ b/tests/Unit/sandbox/test_e2b_provider.py
@@ -1,16 +1,60 @@
 """Smoke test for E2B provider and sandbox."""
 
+import builtins
 import os
 import sys
+from types import SimpleNamespace
+
+import pytest
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from sandbox.providers.e2b import E2BProvider
 
 
+def test_e2b_provider_requires_sdk(monkeypatch):
+    real_import = builtins.__import__
+
+    def fake_import(name, globals=None, locals=None, fromlist=(), level=0):
+        if name == "e2b":
+            raise ModuleNotFoundError("No module named 'e2b'")
+        return real_import(name, globals, locals, fromlist, level)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+
+    with pytest.raises(ModuleNotFoundError, match="No module named 'e2b'"):
+        E2BProvider(api_key="test-key", timeout=60)
+
+
+def test_e2b_create_session_bootstraps_workspace_files_dir(monkeypatch):
+    calls: list[tuple[str, str | None, float | None]] = []
+
+    class _FakeCommands:
+        def run(self, command, cwd=None, timeout=None):
+            calls.append((command, cwd, timeout))
+            return SimpleNamespace(stdout="", stderr="", exit_code=0)
+
+    class _FakeSandbox:
+        def __init__(self):
+            self.sandbox_id = "sbx-123"
+            self.commands = _FakeCommands()
+
+        @classmethod
+        def beta_create(cls, template, timeout, auto_pause, api_key):
+            return cls()
+
+    monkeypatch.setitem(sys.modules, "e2b", SimpleNamespace(Sandbox=_FakeSandbox))
+
+    provider = E2BProvider(api_key="test-key", timeout=60)
+    info = provider.create_session()
+
+    assert info.session_id == "sbx-123"
+    assert calls == [("mkdir -p /home/user/workspace/files", "/home/user", 10.0)]
+
+
 def test_e2b_provider():
     api_key = os.getenv("E2B_API_KEY")
-    if not api_key:
+    if not api_key or not api_key.startswith("e2b_"):
         print("E2B_API_KEY not set, skipping")
         return
 
diff --git a/tests/test_lifecycle.py b/tests/Unit/sandbox/test_lifecycle.py
similarity index 100%
rename from tests/test_lifecycle.py
rename to tests/Unit/sandbox/test_lifecycle.py
diff --git a/tests/Unit/sandbox/test_local_provider_metrics.py b/tests/Unit/sandbox/test_local_provider_metrics.py
new file mode 100644
index 000000000..1cb1daabc
--- /dev/null
+++ b/tests/Unit/sandbox/test_local_provider_metrics.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+import builtins
+import io
+from types import SimpleNamespace
+
+from sandbox.providers import local as local_module
+from sandbox.providers.local import LocalSessionProvider
+
+
+def test_local_provider_reads_linux_procfs_metrics_without_top_or_free(monkeypatch) -> None:
+    provider = LocalSessionProvider()
+
+    cpu_samples = iter(
+        [
+            "cpu  100 0 100 800 0 0 0 0 0 0\n",
+            "cpu  130 0 120 850 0 0 0 0 0 0\n",
+        ]
+    )
+
+    def fake_open(path: str, *args, **kwargs):
+        if path == "/proc/stat":
+            return io.StringIO(next(cpu_samples))
+        if path == "/proc/meminfo":
+            return io.StringIO("MemTotal:       1048576 kB\nMemAvailable:    524288 kB\n")
+        raise FileNotFoundError(path)
+
+    monkeypatch.setattr("sandbox.providers.local.platform.system", lambda: "Linux")
+    monkeypatch.setattr(builtins, "open", fake_open)
+    monkeypatch.setattr(
+        local_module.os,
+        "statvfs",
+        lambda _path: SimpleNamespace(f_frsize=4096, f_blocks=262144, f_bavail=131072),
+        raising=False,
+    )
+
+    metrics = provider.get_metrics("host")
+
+    assert metrics is not None
+    assert metrics.cpu_percent == 50.0
+    assert metrics.memory_total_mb == 1024.0
+    assert metrics.memory_used_mb == 512.0
+    assert metrics.disk_total_gb == 1.0
+    assert metrics.disk_used_gb == 0.5
diff --git a/tests/Unit/sandbox/test_remote_sandbox_init_commands.py b/tests/Unit/sandbox/test_remote_sandbox_init_commands.py
new file mode 100644
index 000000000..bc186370b
--- /dev/null
+++ b/tests/Unit/sandbox/test_remote_sandbox_init_commands.py
@@ -0,0 +1,33 @@
+from types import SimpleNamespace
+from typing import Any, cast
+
+import pytest
+
+from sandbox.base import RemoteSandbox
+from sandbox.config import SandboxConfig
+
+
+class _RecordingCommand:
+    def __init__(self) -> None:
+        self.calls: list[str] = []
+
+    async def execute(self, command: str):
+        self.calls.append(command)
+        return SimpleNamespace(exit_code=0, stderr="", stdout="")
+
+
+@pytest.mark.asyncio
+async def test_run_init_commands_avoids_same_loop_threadsafe_wait(monkeypatch: pytest.MonkeyPatch):
+    command = _RecordingCommand()
+    capability = cast(Any, SimpleNamespace(command=command))
+    sandbox = RemoteSandbox.__new__(RemoteSandbox)
+    sandbox._config = SandboxConfig(init_commands=["echo init"])
+
+    def _unexpected_threadsafe(*args, **kwargs):
+        raise AssertionError("same-loop run_coroutine_threadsafe path should not be used")
+
+    monkeypatch.setattr("sandbox.base.asyncio.run_coroutine_threadsafe", _unexpected_threadsafe)
+
+    sandbox._run_init_commands(capability)
+
+    assert command.calls == ["echo init"]
diff --git a/tests/Unit/sandbox/test_sandbox_manager_volume_repo.py b/tests/Unit/sandbox/test_sandbox_manager_volume_repo.py
new file mode 100644
index 000000000..fa1a6e054
--- /dev/null
+++ b/tests/Unit/sandbox/test_sandbox_manager_volume_repo.py
@@ -0,0 +1,569 @@
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, cast
+
+import sandbox.manager as sandbox_manager_module
+from sandbox.manager import SandboxManager
+from sandbox.providers.local import LocalSessionProvider
+from sandbox.volume_source import DaytonaVolume, HostVolume
+
+
+class _FakeVolumeRepo:
+    def __init__(self, source: dict[str, str]) -> None:
+        self._source = source
+        self.closed = False
+        self.requested_ids: list[str] = []
+        self.created: list[tuple[str, str | None]] = []
+
+    def get(self, volume_id: str) -> dict[str, str] | None:
+        self.requested_ids.append(volume_id)
+        if self.created and volume_id == self.created[-1][0]:
+            return {"source": json.dumps(self._source)}
+        return {"source": json.dumps(self._source)}
+
+    def create(self, volume_id: str, source_json: str, name: str | None, created_at: str) -> None:
+        self.created.append((volume_id, name))
+        self._source = json.loads(source_json)
+
+    def close(self) -> None:
+        self.closed = True
+
+
+class _FakeVolume:
+    def __init__(self) -> None:
+        self.mount_calls: list[tuple[str, str]] = []
+        self.upload_calls: list[tuple[str, str]] = []
+        self.download_calls: list[tuple[str, str]] = []
+        self.cleared: list[str] = []
+
+    def resolve_mount_path(self) -> str:
+        return "/workspace"
+
+    def mount(self, thread_id: str, source, remote_path: str) -> None:
+        self.mount_calls.append((thread_id, remote_path))
+
+    def mount_managed_volume(self, thread_id: str, volume_name: str, remote_path: str) -> None:
+        self.mount_calls.append((thread_id, remote_path))
+
+    def sync_upload(self, thread_id: str, session_id: str, source, remote_path: str, files=None) -> None:
+        self.upload_calls.append((thread_id, session_id))
+
+    def sync_download(self, thread_id: str, session_id: str, source, remote_path: str) -> None:
+        self.download_calls.append((thread_id, session_id))
+
+    def clear_sync_state(self, thread_id: str) -> None:
+        self.cleared.append(thread_id)
+
+
+class _FakeThreadRepo:
+    def __init__(self, row):
+        self._row = row
+        self.closed = False
+
+    def get_by_id(self, _thread_id: str):
+        return self._row
+
+    def close(self) -> None:
+        self.closed = True
+
+
+class _FakeUpdateRepo:
+    def __init__(self) -> None:
+        self.updated: list[tuple[str, str]] = []
+        self.closed = False
+
+    def update_source(self, volume_id: str, source_json: str) -> None:
+        self.updated.append((volume_id, source_json))
+
+    def close(self) -> None:
+        self.closed = True
+
+
+class _FakeLeaseStore:
+    def __init__(self) -> None:
+        self.volume_updates: list[tuple[str, str]] = []
+
+    def set_volume_id(self, lease_id: str, volume_id: str) -> None:
+        self.volume_updates.append((lease_id, volume_id))
+
+
+class _FakeSessionManager:
+    def __init__(self, active_rows) -> None:
+        self._active_rows = active_rows
+        self.deleted: list[tuple[str, str]] = []
+
+    def list_active(self):
+        return list(self._active_rows)
+
+    def delete(self, session_id: str, reason: str) -> None:
+        self.deleted.append((session_id, reason))
+
+
+class _FakeDaytonaProvider:
+    def __init__(self) -> None:
+        self.calls: list[tuple[str, str]] = []
+        self.ready_waits: list[str] = []
+
+    def create_managed_volume(self, member_id: str, mount_path: str) -> str:
+        self.calls.append((member_id, mount_path))
+        return f"leon-volume-{member_id}"
+
+    def wait_managed_volume_ready(self, volume_name: str) -> None:
+        self.ready_waits.append(volume_name)
+
+
+def _new_test_manager() -> Any:
+    # @@@nu59-sandbox-manager-harness - these tests intentionally bypass
+    # SandboxManager.__init__ and monkey-build partial instances. Treat that
+    # object as a test harness, not a fully typed production manager.
+    return cast(Any, object.__new__(SandboxManager))
+
+
+def test_setup_mounts_reads_volume_from_active_storage_repo(tmp_path):
+    manager = _new_test_manager()
+    manager.provider_capability = SimpleNamespace(runtime_kind="local")
+    manager.volume = _FakeVolume()
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(lease_id="lease-1")
+    manager._get_lease = lambda _lease_id: SimpleNamespace(volume_id="volume-1")
+    repo = _FakeVolumeRepo(HostVolume(Path(tmp_path) / "vol").serialize())
+    manager._sandbox_volume_repo = lambda: repo
+
+    result = manager._setup_mounts("thread-1")
+
+    assert repo.requested_ids == ["volume-1"]
+    assert repo.closed is True
+    assert isinstance(result["source"], HostVolume)
+    assert manager.volume.mount_calls == [("thread-1", "/workspace")]
+
+
+def test_resolve_volume_source_reads_volume_from_active_storage_repo(tmp_path):
+    manager = _new_test_manager()
+    manager.provider_capability = SimpleNamespace(runtime_kind="agentbay")
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(lease_id="lease-1")
+    manager._get_lease = lambda _lease_id: SimpleNamespace(volume_id="volume-1")
+    repo = _FakeVolumeRepo(HostVolume(Path(tmp_path) / "vol").serialize())
+    manager._sandbox_volume_repo = lambda: repo
+
+    source = manager.resolve_volume_source("thread-1")
+
+    assert repo.requested_ids == ["volume-1"]
+    assert repo.closed is True
+    assert isinstance(source, HostVolume)
+
+
+def test_setup_mounts_provisions_missing_remote_volume_metadata(monkeypatch, tmp_path):
+    manager = _new_test_manager()
+    manager.provider_capability = SimpleNamespace(runtime_kind="agentbay")
+    manager.volume = _FakeVolume()
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(lease_id="lease-1")
+    lease = SimpleNamespace(lease_id="lease-1", volume_id=None)
+    manager._get_lease = lambda _lease_id: lease
+    manager.lease_store = _FakeLeaseStore()
+    repo = _FakeVolumeRepo(HostVolume(Path(tmp_path) / "vol").serialize())
+    manager._sandbox_volume_repo = lambda: repo
+    monkeypatch.setenv("LEON_SANDBOX_VOLUME_ROOT", str(tmp_path / "volumes"))
+
+    result = manager._setup_mounts("thread-1")
+
+    assert lease.volume_id is not None
+    assert repo.created == [(lease.volume_id, "vol-thread-1")]
+    assert manager.lease_store.volume_updates == [("lease-1", lease.volume_id)]
+    assert repo.requested_ids == [lease.volume_id]
+    assert isinstance(result["source"], HostVolume)
+
+
+def test_setup_mounts_recreates_missing_remote_volume_row_for_existing_volume_id(monkeypatch, tmp_path):
+    class _MissingRowRepo(_FakeVolumeRepo):
+        def __init__(self) -> None:
+            super().__init__(HostVolume(tmp_path / "vol").serialize())
+            self._rows: dict[str, dict[str, str]] = {}
+
+        def get(self, volume_id: str):
+            self.requested_ids.append(volume_id)
+            return self._rows.get(volume_id)
+
+        def create(self, volume_id: str, source_json: str, name: str | None, created_at: str) -> None:
+            super().create(volume_id, source_json, name, created_at)
+            self._rows[volume_id] = {"source": source_json}
+
+        def update_source(self, volume_id: str, source_json: str) -> None:
+            self._rows[volume_id] = {"source": source_json}
+            self._source = json.loads(source_json)
+
+    manager = _new_test_manager()
+    manager.provider_capability = SimpleNamespace(runtime_kind="daytona_pty")
+    manager.provider = _FakeDaytonaProvider()
+    manager.volume = _FakeVolume()
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(lease_id="lease-1")
+    lease = SimpleNamespace(lease_id="lease-1", volume_id="volume-missing")
+    manager._get_lease = lambda _lease_id: lease
+    manager.lease_store = _FakeLeaseStore()
+    repo = _MissingRowRepo()
+    manager._sandbox_volume_repo = lambda: repo
+    thread_repo = _FakeThreadRepo({"member_id": "member-daytona"})
+    monkeypatch.setattr(
+        sandbox_manager_module,
+        "build_thread_repo",
+        lambda **_kwargs: thread_repo,
+        raising=False,
+    )
+    monkeypatch.setenv("LEON_SANDBOX_VOLUME_ROOT", str(tmp_path / "volumes"))
+
+    result = manager._setup_mounts("thread-1")
+
+    assert repo.created == [("volume-missing", "vol-thread-1")]
+    assert manager.lease_store.volume_updates == []
+    assert repo.requested_ids == ["volume-missing", "volume-missing"]
+    assert isinstance(result["source"], DaytonaVolume)
+    assert manager.provider.calls == [("member-daytona", "/workspace")]
+    assert thread_repo.closed is True
+
+
+def test_enforce_idle_timeouts_destroys_when_provider_cannot_pause(monkeypatch):
+    manager = _new_test_manager()
+    manager.provider = SimpleNamespace(
+        name="agentbay",
+        get_capability=lambda: SimpleNamespace(can_pause=False, can_destroy=True),
+    )
+    manager.terminal_store = SimpleNamespace(
+        db_path=Path("/tmp/fake-sandbox.db"),
+        get_by_id=lambda _terminal_id: {"terminal_id": "term-1", "lease_id": "lease-1"},
+    )
+    active_rows = [
+        {
+            "session_id": "sess-1",
+            "thread_id": "thread-1",
+            "terminal_id": "term-1",
+            "lease_id": "lease-1",
+            "started_at": "2026-04-04T00:00:00",
+            "last_active_at": "2026-04-04T00:00:00",
+            "idle_ttl_sec": 1,
+            "max_duration_sec": 3600,
+            "status": "active",
+        }
+    ]
+    manager.session_manager = _FakeSessionManager(active_rows)
+    fake_lease = SimpleNamespace(
+        lease_id="lease-1",
+        provider_name="agentbay",
+        refresh_instance_status=lambda _provider: "running",
+        pause_instance=lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("pause should not be used")),
+        destroy_instance=lambda *_args, **_kwargs: destroy_calls.append(True),
+    )
+    destroy_calls: list[bool] = []
+    manager._get_lease = lambda _lease_id: fake_lease
+    manager._terminal_is_busy = lambda _terminal_id: False
+    manager._lease_is_busy = lambda _lease_id: False
+    monkeypatch.setattr(
+        sandbox_manager_module,
+        "terminal_from_row",
+        lambda _row, _db_path: SimpleNamespace(terminal_id="term-1", lease_id="lease-1"),
+    )
+
+    manager.enforce_idle_timeouts()
+
+    assert destroy_calls == [True]
+    assert manager.session_manager.deleted == [("sess-1", "idle_timeout")]
+
+
+def test_destroy_thread_resources_skips_local_sync_when_lease_has_no_volume_id():
+    manager = _new_test_manager()
+    manager.provider_capability = SimpleNamespace(runtime_kind="local")
+    manager.provider = SimpleNamespace(name="local")
+    manager.volume = _FakeVolume()
+    manager._get_thread_lease = lambda _thread_id: lease
+    manager._get_lease = lambda _lease_id: lease
+    manager._resolve_volume_entry = lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("volume lookup should not happen"))
+    manager.terminal_store = SimpleNamespace(
+        list_by_thread=lambda _thread_id: [{"terminal_id": "term-1", "lease_id": "lease-1", "thread_id": "thread-1"}],
+        delete=lambda _terminal_id: deleted_terminals.append(_terminal_id),
+        list_all=lambda: [],
+        db_path=Path("/tmp/fake-sandbox.db"),
+    )
+    manager.session_manager = SimpleNamespace(
+        get=lambda _thread_id, _terminal_id: SimpleNamespace(session_id="sess-1"),
+        delete=lambda session_id, reason: deleted_sessions.append((session_id, reason)),
+    )
+    deleted_terminals: list[str] = []
+    deleted_sessions: list[tuple[str, str]] = []
+    destroy_calls: list[str] = []
+
+    class _Lease:
+        lease_id = "lease-1"
+        observed_state = "running"
+        volume_id = None
+
+        def get_instance(self):
+            return SimpleNamespace(instance_id="instance-1")
+
+        def destroy_instance(self, _provider):
+            destroy_calls.append("lease-1")
+
+    lease = _Lease()
+    manager.lease_store = SimpleNamespace(delete=lambda lease_id: deleted_leases.append(lease_id))
+    deleted_leases: list[str] = []
+
+    assert manager.destroy_thread_resources("thread-1") is True
+    assert manager.volume.download_calls == []
+    assert manager.volume.cleared == ["thread-1"]
+    assert deleted_sessions == [("sess-1", "thread_deleted")]
+    assert deleted_terminals == ["term-1"]
+    assert destroy_calls == ["lease-1"]
+    assert deleted_leases == ["lease-1"]
+
+
+def test_sync_uploads_skips_local_volume_sync_when_lease_has_no_volume_id():
+    manager = _new_test_manager()
+    manager.provider_capability = SimpleNamespace(runtime_kind="local")
+    manager.volume = _FakeVolume()
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(terminal_id="term-1", lease_id="lease-1")
+    manager._get_lease = lambda _lease_id: SimpleNamespace(volume_id=None)
+    manager._get_thread_lease = lambda _thread_id: SimpleNamespace(volume_id=None)
+    manager._resolve_volume_entry = lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("volume lookup should not happen"))
+    manager.session_manager = SimpleNamespace(
+        get=lambda _thread_id, _terminal_id: SimpleNamespace(
+            lease=SimpleNamespace(get_instance=lambda: SimpleNamespace(instance_id="instance-1"))
+        )
+    )
+
+    assert manager.sync_uploads("thread-1") is True
+    assert manager.volume.upload_calls == []
+
+
+def test_get_sandbox_local_provider_does_not_require_volume_bootstrap(tmp_path):
+    manager = SandboxManager(
+        provider=LocalSessionProvider(default_cwd=str(tmp_path)),
+        db_path=tmp_path / "sandbox.db",
+    )
+
+    capability = manager.get_sandbox("thread-local")
+
+    assert capability.command.runtime_owns_cwd is True
+    session = manager.session_manager.get("thread-local")
+    assert session is not None
+    assert session.lease.provider_name == "local"
+
+
+def test_get_sandbox_auto_resumes_paused_lease_when_reconstructing_session():
+    manager = _new_test_manager()
+    manager.provider = SimpleNamespace(name="local")
+    manager.provider_capability = SimpleNamespace(runtime_kind="local", eager_instance_binding=False)
+    manager.volume = _FakeVolume()
+    terminal = SimpleNamespace(
+        terminal_id="term-1",
+        lease_id="lease-1",
+        get_state=lambda: SimpleNamespace(cwd="/tmp", env_delta={}, state_version=0),
+        update_state=lambda _state: None,
+    )
+    lease = SimpleNamespace(
+        provider_name="local",
+        observed_state="paused",
+        bind_mounts=None,
+        recipe=None,
+        get_instance=lambda: SimpleNamespace(instance_id="instance-1"),
+    )
+    manager._get_active_terminal = lambda _thread_id: terminal
+    manager._get_lease = lambda _lease_id: lease
+    manager._assert_lease_provider = lambda _lease, _thread_id: None
+    manager._ensure_bound_instance = lambda _lease: None
+    resume_calls: list[tuple[str, str]] = []
+    manager.resume_session = lambda thread_id, source="user_resume": resume_calls.append((thread_id, source)) or True
+    manager.session_manager = SimpleNamespace(
+        get=lambda _thread_id, _terminal_id: None,
+        create=lambda **_kwargs: SimpleNamespace(session_id="sess-1", terminal=terminal, lease=lease),
+    )
+
+    manager.get_sandbox("thread-1")
+
+    assert resume_calls == [("thread-1", "auto_resume")]
+
+
+def test_get_sandbox_auto_resumes_live_session_when_lease_state_is_paused():
+    manager = _new_test_manager()
+    terminal = SimpleNamespace(
+        terminal_id="term-1",
+        lease_id="lease-1",
+        get_state=lambda: SimpleNamespace(cwd="/tmp", env_delta={}, state_version=0),
+    )
+    paused_lease = SimpleNamespace(
+        lease_id="lease-1",
+        provider_name="local",
+        observed_state="paused",
+        bind_mounts=None,
+    )
+    resumed_lease = SimpleNamespace(
+        lease_id="lease-1",
+        provider_name="local",
+        observed_state="running",
+        bind_mounts=None,
+    )
+    live_session = SimpleNamespace(
+        terminal=terminal,
+        lease=paused_lease,
+        status="active",
+    )
+
+    manager.provider = SimpleNamespace(name="local")
+    manager.provider_capability = SimpleNamespace(runtime_kind="local", eager_instance_binding=False)
+    manager.volume = _FakeVolume()
+    manager._assert_lease_provider = lambda _lease, _thread_id: None
+    manager._ensure_bound_instance = lambda _lease: None
+    resume_calls: list[tuple[str, str]] = []
+
+    def _get_session(_thread_id, _terminal_id):
+        if resume_calls:
+            return SimpleNamespace(terminal=terminal, lease=resumed_lease, status="active")
+        return live_session
+
+    manager._get_active_terminal = lambda _thread_id: terminal
+    manager.resume_session = lambda thread_id, source="user_resume": resume_calls.append((thread_id, source)) or True
+    manager.session_manager = SimpleNamespace(get=_get_session)
+
+    capability = manager.get_sandbox("thread-1")
+
+    assert resume_calls == [("thread-1", "auto_resume")]
+    assert capability._session.lease is resumed_lease
+
+
+def test_get_sandbox_routes_bind_mounts_to_provider_thread_state():
+    manager = _new_test_manager()
+    bind_mount_calls: list[tuple[str, list[dict[str, str]]]] = []
+    terminal = SimpleNamespace(
+        terminal_id="term-1",
+        lease_id="lease-1",
+        get_state=lambda: SimpleNamespace(cwd="/tmp", env_delta={}, state_version=0),
+    )
+    lease = SimpleNamespace(
+        lease_id="lease-1",
+        provider_name="local",
+        observed_state="running",
+        get_instance=lambda: SimpleNamespace(instance_id="instance-1"),
+    )
+    session = SimpleNamespace(terminal=terminal, lease=lease, status="active")
+
+    manager.provider = SimpleNamespace(
+        name="local",
+        set_thread_bind_mounts=lambda thread_id, mounts: bind_mount_calls.append((thread_id, mounts)),
+    )
+    manager.provider_capability = SimpleNamespace(runtime_kind="local", eager_instance_binding=False)
+    manager._get_active_terminal = lambda _thread_id: terminal
+    manager._assert_lease_provider = lambda _lease, _thread_id: None
+    manager._ensure_bound_instance = lambda _lease: None
+    manager.session_manager = SimpleNamespace(get=lambda _thread_id, _terminal_id: session)
+
+    mounts = [{"source": "/tmp/a", "target": "/workspace/a"}]
+    capability = manager.get_sandbox("thread-1", bind_mounts=mounts)
+
+    assert bind_mount_calls == [("thread-1", mounts)]
+    assert capability._session is session
+
+
+def test_resume_session_rebinds_live_session_lease_after_resume():
+    manager = _new_test_manager()
+    terminal = SimpleNamespace(terminal_id="term-1", lease_id="lease-1")
+    resumed_lease = SimpleNamespace(
+        lease_id="lease-1",
+        observed_state="running",
+        get_instance=lambda: SimpleNamespace(instance_id="instance-1"),
+        resume_instance=lambda _provider, source="user_resume": True,
+    )
+    stale_lease = SimpleNamespace(lease_id="lease-1", observed_state="paused")
+    runtime = SimpleNamespace(lease=stale_lease)
+    live_session = SimpleNamespace(
+        session_id="sess-1",
+        terminal=terminal,
+        lease=stale_lease,
+        runtime=runtime,
+        status="paused",
+    )
+    manager.provider = SimpleNamespace(name="local")
+    manager._get_thread_terminals = lambda _thread_id: [terminal]
+    manager._get_thread_lease = lambda _thread_id: resumed_lease
+    manager._sync_to_sandbox = lambda *_args, **_kwargs: None
+    manager._ensure_chat_session = lambda _thread_id: None
+    manager.session_manager = SimpleNamespace(
+        get=lambda _thread_id, _terminal_id: live_session,
+        resume=lambda _session_id: setattr(live_session, "status", "active"),
+    )
+
+    ok = manager.resume_session("thread-1", source="auto_resume")
+
+    assert ok is True
+    assert live_session.lease is resumed_lease
+    assert runtime.lease is resumed_lease
+
+
+def test_upgrade_to_daytona_volume_uses_runtime_thread_repo_for_member_lookup(monkeypatch, tmp_path):
+    manager = _new_test_manager()
+    manager.provider = _FakeDaytonaProvider()
+    update_repo = _FakeUpdateRepo()
+    manager._sandbox_volume_repo = lambda: update_repo
+
+    thread_repo = _FakeThreadRepo({"member_id": "member-supabase"})
+    monkeypatch.setattr(
+        sandbox_manager_module,
+        "build_thread_repo",
+        lambda **_kwargs: thread_repo,
+        raising=False,
+    )
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
+
+    new_source = manager._upgrade_to_daytona_volume(
+        "thread-supabase",
+        HostVolume(tmp_path / "staging"),
+        "volume-1",
+        "/workspace",
+    )
+
+    assert manager.provider.calls == [("member-supabase", "/workspace")]
+    assert thread_repo.closed is True
+    assert isinstance(new_source, DaytonaVolume)
+    assert update_repo.closed is True
+    assert update_repo.updated
+
+
+def test_upgrade_to_daytona_volume_waits_when_reusing_existing_daytona_volume(monkeypatch, tmp_path):
+    manager = _new_test_manager()
+    provider = _FakeDaytonaProvider()
+    update_repo = _FakeUpdateRepo()
+    manager.provider = provider
+    manager._sandbox_volume_repo = lambda: update_repo
+
+    thread_repo = _FakeThreadRepo({"member_id": "member-supabase"})
+    monkeypatch.setattr(
+        sandbox_manager_module,
+        "build_thread_repo",
+        lambda **_kwargs: thread_repo,
+        raising=False,
+    )
+
+    def _already_exists(member_id: str, mount_path: str) -> str:
+        provider.calls.append((member_id, mount_path))
+        raise RuntimeError("volume already exists")
+
+    provider.create_managed_volume = _already_exists
+
+    new_source = manager._upgrade_to_daytona_volume(
+        "thread-supabase",
+        HostVolume(tmp_path / "staging"),
+        "volume-1",
+        "/workspace",
+    )
+
+    assert isinstance(new_source, DaytonaVolume)
+    assert provider.ready_waits == ["leon-volume-member-supabase"]
+
+
+def test_make_sandbox_monitor_repo_returns_sqlite():
+    from backend.web.core import storage_factory
+
+    cache_clear = getattr(cast(Any, storage_factory.make_sandbox_monitor_repo), "cache_clear", None)
+    if callable(cache_clear):
+        cache_clear()
+
+    repo = storage_factory.make_sandbox_monitor_repo()
+    try:
+        assert repo.__class__.__name__ == "SQLiteSandboxMonitorRepo"
+    finally:
+        repo.close()
diff --git a/tests/Unit/sandbox/test_sandbox_provider_availability.py b/tests/Unit/sandbox/test_sandbox_provider_availability.py
new file mode 100644
index 000000000..5b12fb2b6
--- /dev/null
+++ b/tests/Unit/sandbox/test_sandbox_provider_availability.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+from backend.web.services import sandbox_service
+from sandbox.providers.local import LocalSessionProvider
+
+
+def test_available_sandbox_types_marks_configured_but_unavailable_provider(monkeypatch, tmp_path: Path) -> None:
+    local_provider = LocalSessionProvider(default_cwd=str(tmp_path))
+    (tmp_path / "daytona.json").write_text("{}")
+
+    monkeypatch.setattr(sandbox_service, "SANDBOXES_DIR", tmp_path)
+    monkeypatch.setattr(
+        sandbox_service,
+        "init_providers_and_managers",
+        lambda: ({"local": local_provider}, {}),
+    )
+    monkeypatch.setattr(
+        sandbox_service.SandboxConfig,
+        "load",
+        classmethod(lambda cls, name: SimpleNamespace(provider="daytona", name=name)),
+    )
+
+    types = sandbox_service.available_sandbox_types()
+    daytona = next(item for item in types if item["name"] == "daytona")
+
+    assert daytona["provider"] == "daytona"
+    assert daytona["available"] is False
+    assert "unavailable in the current process" in daytona["reason"]
+
+
+def test_available_sandbox_types_marks_e2b_unavailable_when_sdk_missing(monkeypatch, tmp_path: Path) -> None:
+    local_provider = LocalSessionProvider(default_cwd=str(tmp_path))
+    (tmp_path / "e2b.json").write_text("{}")
+
+    monkeypatch.setattr(sandbox_service, "SANDBOXES_DIR", tmp_path)
+    monkeypatch.setattr(
+        sandbox_service,
+        "init_providers_and_managers",
+        lambda: ({"local": local_provider}, {}),
+    )
+    monkeypatch.setattr(
+        sandbox_service.SandboxConfig,
+        "load",
+        classmethod(lambda cls, name: SimpleNamespace(provider="e2b", name=name)),
+    )
+
+    types = sandbox_service.available_sandbox_types()
+    e2b = next(item for item in types if item["name"] == "e2b")
+
+    assert e2b["provider"] == "e2b"
+    assert e2b["available"] is False
+    assert "unavailable in the current process" in e2b["reason"]
+
+
+def test_build_providers_and_managers_passes_agentbay_pause_capability_overrides(monkeypatch, tmp_path: Path) -> None:
+    (tmp_path / "agentbay.json").write_text("{}")
+    monkeypatch.setattr(sandbox_service, "SANDBOXES_DIR", tmp_path)
+
+    captured: dict[str, object] = {}
+
+    class _FakeAgentBayProvider:
+        def __init__(self, **kwargs) -> None:
+            captured.update(kwargs)
+            self.name = kwargs["provider_name"]
+
+        def get_capability(self):
+            return SimpleNamespace(can_pause=False, can_resume=False, can_destroy=True)
+
+    class _FakeSandboxManager:
+        def __init__(self, provider, db_path=None) -> None:
+            self.provider = provider
+            self.db_path = db_path
+
+    monkeypatch.setattr(sandbox_service, "SandboxManager", _FakeSandboxManager)
+    monkeypatch.setattr(
+        sandbox_service.SandboxConfig,
+        "load",
+        classmethod(
+            lambda cls, name: SimpleNamespace(
+                provider="agentbay",
+                agentbay=SimpleNamespace(
+                    api_key="test-key",
+                    region_id="ap-southeast-1",
+                    context_path="/home/wuying",
+                    image_id=None,
+                    supports_pause=False,
+                    supports_resume=False,
+                ),
+            )
+        ),
+    )
+
+    import sandbox.providers.agentbay as agentbay_module
+
+    monkeypatch.setattr(agentbay_module, "AgentBayProvider", _FakeAgentBayProvider)
+
+    providers, managers = sandbox_service._build_providers_and_managers()
+
+    assert "agentbay" in providers
+    assert "agentbay" in managers
+    assert captured["supports_pause"] is False
+    assert captured["supports_resume"] is False
diff --git a/tests/test_sandbox_state.py b/tests/Unit/sandbox/test_sandbox_state.py
similarity index 100%
rename from tests/test_sandbox_state.py
rename to tests/Unit/sandbox/test_sandbox_state.py
diff --git a/tests/Unit/sandbox/test_sandbox_user_leases.py b/tests/Unit/sandbox/test_sandbox_user_leases.py
new file mode 100644
index 000000000..2386f5989
--- /dev/null
+++ b/tests/Unit/sandbox/test_sandbox_user_leases.py
@@ -0,0 +1,192 @@
+from types import SimpleNamespace
+
+from backend.web.services import sandbox_service
+
+
+class _FakeMonitorRepo:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def list_leases_with_threads(self):
+        return list(self._rows)
+
+    def close(self):
+        pass
+
+
+class _FakeThreadRepo:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def get_by_id(self, thread_id: str):
+        return self._rows.get(thread_id)
+
+    def close(self):
+        pass
+
+
+class _FakeMemberRepo:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def get_by_id(self, member_id: str):
+        return self._rows.get(member_id)
+
+    def close(self):
+        pass
+
+
+def test_list_user_leases_hides_subagent_threads_and_deduplicates_visible_agents(monkeypatch):
+    rows = [
+        {
+            "lease_id": "lease-1",
+            "provider_name": "daytona_selfhost",
+            "recipe_id": "daytona:default",
+            "recipe_json": None,
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-07T10:00:00Z",
+            "cwd": "/home/daytona/files/app",
+            "thread_id": "thread-parent",
+        },
+        {
+            "lease_id": "lease-1",
+            "provider_name": "daytona_selfhost",
+            "recipe_id": "daytona:default",
+            "recipe_json": None,
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-07T10:00:00Z",
+            "cwd": "/home/daytona/files/app",
+            "thread_id": "subagent-deadbeef",
+        },
+    ]
+    thread_repo = _FakeThreadRepo(
+        {
+            "thread-parent": {"member_id": "member-1"},
+            "subagent-deadbeef": {"member_id": "member-1"},
+        }
+    )
+    member_repo = _FakeMemberRepo(
+        {
+            "member-1": SimpleNamespace(id="member-1", name="Morel", avatar="x", owner_user_id="owner-1"),
+        }
+    )
+
+    monkeypatch.setattr(sandbox_service, "make_sandbox_monitor_repo", lambda: _FakeMonitorRepo(rows))
+
+    leases = sandbox_service.list_user_leases(
+        "owner-1",
+        thread_repo=thread_repo,
+        member_repo=member_repo,
+    )
+
+    assert leases == [
+        {
+            "lease_id": "lease-1",
+            "provider_name": "daytona_selfhost",
+            "recipe_id": "daytona:default",
+            "recipe": {
+                "id": "daytona:default",
+                "name": "Daytona Default",
+                "desc": "Default recipe for daytona",
+                "provider_type": "daytona",
+                "features": {"lark_cli": False},
+                "configurable_features": {"lark_cli": True},
+                "feature_options": [
+                    {
+                        "key": "lark_cli",
+                        "name": "Lark CLI",
+                        "description": "在 sandbox 初始化时懒安装并校验。",
+                        "icon": "feishu",
+                    }
+                ],
+                "builtin": True,
+            },
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-07T10:00:00Z",
+            "cwd": "/home/daytona/files/app",
+            "thread_ids": ["thread-parent"],
+            "agents": [
+                {
+                    "member_id": "member-1",
+                    "member_name": "Morel",
+                    "avatar_url": "/api/members/member-1/avatar",
+                }
+            ],
+            "recipe_name": "Daytona Default",
+        }
+    ]
+
+
+def test_list_user_leases_hides_stopped_and_destroying_leases(monkeypatch):
+    rows = [
+        {
+            "lease_id": "lease-running",
+            "provider_name": "local",
+            "recipe_id": "local:default",
+            "recipe_json": None,
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-07T10:00:00Z",
+            "cwd": "/tmp/running",
+            "thread_id": "thread-running",
+        },
+        {
+            "lease_id": "lease-paused",
+            "provider_name": "daytona_selfhost",
+            "recipe_id": "daytona:default",
+            "recipe_json": None,
+            "observed_state": "paused",
+            "desired_state": "paused",
+            "created_at": "2026-04-07T10:01:00Z",
+            "cwd": "/home/daytona/app",
+            "thread_id": "thread-paused",
+        },
+        {
+            "lease_id": "lease-detached",
+            "provider_name": "local",
+            "recipe_id": "local:default",
+            "recipe_json": None,
+            "observed_state": "detached",
+            "desired_state": "running",
+            "created_at": "2026-04-07T10:02:00Z",
+            "cwd": "/tmp/stale",
+            "thread_id": "thread-detached",
+        },
+        {
+            "lease_id": "lease-destroying",
+            "provider_name": "local",
+            "recipe_id": "local:default",
+            "recipe_json": None,
+            "observed_state": "paused",
+            "desired_state": "destroyed",
+            "created_at": "2026-04-07T10:03:00Z",
+            "cwd": "/tmp/destroying",
+            "thread_id": "thread-destroying",
+        },
+    ]
+    thread_repo = _FakeThreadRepo(
+        {
+            "thread-running": {"member_id": "member-1"},
+            "thread-paused": {"member_id": "member-1"},
+            "thread-detached": {"member_id": "member-1"},
+            "thread-destroying": {"member_id": "member-1"},
+        }
+    )
+    member_repo = _FakeMemberRepo(
+        {
+            "member-1": SimpleNamespace(id="member-1", name="Morel", avatar="x", owner_user_id="owner-1"),
+        }
+    )
+
+    monkeypatch.setattr(sandbox_service, "make_sandbox_monitor_repo", lambda: _FakeMonitorRepo(rows))
+
+    leases = sandbox_service.list_user_leases(
+        "owner-1",
+        thread_repo=thread_repo,
+        member_repo=member_repo,
+    )
+
+    assert [lease["lease_id"] for lease in leases] == ["lease-running", "lease-paused"]
diff --git a/tests/test_terminal_persistence.py b/tests/Unit/sandbox/test_terminal_persistence.py
similarity index 100%
rename from tests/test_terminal_persistence.py
rename to tests/Unit/sandbox/test_terminal_persistence.py
diff --git a/tests/Unit/storage/test_identity_default_thread_contract.py b/tests/Unit/storage/test_identity_default_thread_contract.py
new file mode 100644
index 000000000..c40c8d666
--- /dev/null
+++ b/tests/Unit/storage/test_identity_default_thread_contract.py
@@ -0,0 +1,37 @@
+from storage import contracts
+from storage.providers.supabase.member_repo import SupabaseMemberRepo
+from storage.providers.supabase.thread_repo import SupabaseThreadRepo
+
+
+def test_member_row_uses_next_thread_seq_not_next_entity_seq() -> None:
+    fields = contracts.MemberRow.model_fields
+    assert "next_thread_seq" in fields
+    assert "next_entity_seq" not in fields
+
+
+def test_thread_repo_exposes_get_default_thread_not_get_main_thread() -> None:
+    assert hasattr(contracts.ThreadRepo, "get_default_thread")
+    assert not hasattr(contracts.ThreadRepo, "get_main_thread")
+
+
+def test_thread_repo_exposes_get_by_user_id() -> None:
+    assert hasattr(contracts.ThreadRepo, "get_by_user_id")
+
+
+def test_member_repo_exposes_increment_thread_seq_not_increment_entity_seq() -> None:
+    assert hasattr(contracts.MemberRepo, "increment_thread_seq")
+    assert not hasattr(contracts.MemberRepo, "increment_entity_seq")
+
+
+def test_supabase_member_repo_exposes_increment_thread_seq() -> None:
+    assert hasattr(SupabaseMemberRepo, "increment_thread_seq")
+    assert not hasattr(SupabaseMemberRepo, "increment_entity_seq")
+
+
+def test_supabase_thread_repo_exposes_get_default_thread() -> None:
+    assert hasattr(SupabaseThreadRepo, "get_default_thread")
+    assert not hasattr(SupabaseThreadRepo, "get_main_thread")
+
+
+def test_supabase_thread_repo_exposes_get_by_user_id() -> None:
+    assert hasattr(SupabaseThreadRepo, "get_by_user_id")
diff --git a/tests/test_session_file_operations_cleanup.py b/tests/Unit/storage/test_session_file_operations_cleanup.py
similarity index 100%
rename from tests/test_session_file_operations_cleanup.py
rename to tests/Unit/storage/test_session_file_operations_cleanup.py
diff --git a/tests/Unit/storage/test_sqlite_sandbox_monitor_repo.py b/tests/Unit/storage/test_sqlite_sandbox_monitor_repo.py
new file mode 100644
index 000000000..6337cd375
--- /dev/null
+++ b/tests/Unit/storage/test_sqlite_sandbox_monitor_repo.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import sqlite3
+from pathlib import Path
+
+from storage.providers.sqlite.sandbox_monitor_repo import SQLiteSandboxMonitorRepo
+
+
+def _seed_sandbox_db(db_path: Path) -> None:
+    with sqlite3.connect(db_path) as conn:
+        conn.execute(
+            """
+            CREATE TABLE sandbox_leases (
+                lease_id TEXT PRIMARY KEY,
+                provider_name TEXT NOT NULL,
+                recipe_id TEXT,
+                recipe_json TEXT,
+                desired_state TEXT,
+                observed_state TEXT,
+                current_instance_id TEXT,
+                last_error TEXT,
+                created_at TEXT NOT NULL,
+                updated_at TEXT NOT NULL
+            )
+            """
+        )
+        conn.execute(
+            """
+            CREATE TABLE abstract_terminals (
+                terminal_id TEXT PRIMARY KEY,
+                thread_id TEXT,
+                lease_id TEXT,
+                cwd TEXT,
+                env_delta_json TEXT,
+                state_version INTEGER,
+                created_at TEXT,
+                updated_at TEXT
+            )
+            """
+        )
+        conn.execute(
+            """
+            INSERT INTO sandbox_leases (
+                lease_id, provider_name, recipe_id, recipe_json, desired_state, observed_state,
+                current_instance_id, last_error, created_at, updated_at
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                "lease-1",
+                "daytona_selfhost",
+                "daytona:default",
+                None,
+                "running",
+                "running",
+                None,
+                None,
+                "2026-04-07T10:00:00Z",
+                "2026-04-07T10:01:00Z",
+            ),
+        )
+        conn.execute(
+            """
+            INSERT INTO abstract_terminals (
+                terminal_id, thread_id, lease_id, cwd, env_delta_json, state_version, created_at, updated_at
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                "term-1",
+                "thread-1",
+                "lease-1",
+                "/home/daytona/app",
+                "{}",
+                1,
+                "2026-04-07T10:00:30Z",
+                "2026-04-07T10:01:30Z",
+            ),
+        )
+
+
+def test_list_leases_with_threads_exposes_lease_created_at(tmp_path: Path) -> None:
+    db_path = tmp_path / "sandbox.db"
+    _seed_sandbox_db(db_path)
+
+    repo = SQLiteSandboxMonitorRepo(db_path=db_path)
+    try:
+        rows = repo.list_leases_with_threads()
+    finally:
+        repo.close()
+
+    assert rows == [
+        {
+            "lease_id": "lease-1",
+            "provider_name": "daytona_selfhost",
+            "recipe_id": "daytona:default",
+            "recipe_json": None,
+            "desired_state": "running",
+            "observed_state": "running",
+            "created_at": "2026-04-07T10:00:00Z",
+            "updated_at": "2026-04-07T10:01:00Z",
+            "thread_id": "thread-1",
+            "cwd": "/home/daytona/app",
+        }
+    ]
diff --git a/tests/test_storage_import_boundary.py b/tests/Unit/storage/test_storage_import_boundary.py
similarity index 87%
rename from tests/test_storage_import_boundary.py
rename to tests/Unit/storage/test_storage_import_boundary.py
index a302ab399..a722b4d27 100644
--- a/tests/test_storage_import_boundary.py
+++ b/tests/Unit/storage/test_storage_import_boundary.py
@@ -4,7 +4,6 @@
 
 FORBIDDEN = (
     "from core.runtime.middleware.memory.checkpoint_repo import",
-    "from core.runtime.middleware.memory.thread_config_repo import",
     "from core.runtime.middleware.memory.run_event_repo import",
     "from core.runtime.middleware.memory.file_operation_repo import",
     "from core.runtime.middleware.memory.summary_repo import",
@@ -12,7 +11,7 @@
 
 
 def test_runtime_layers_do_not_import_memory_repo_modules_directly() -> None:
-    repo_root = Path(__file__).resolve().parents[1]
+    repo_root = Path(__file__).resolve().parents[3]
     scan_dirs = ("core", "backend", "eval")
     offenders: list[str] = []
 
diff --git a/tests/test_summary_repo.py b/tests/Unit/storage/test_summary_repo.py
similarity index 100%
rename from tests/test_summary_repo.py
rename to tests/Unit/storage/test_summary_repo.py
diff --git a/tests/Unit/storage/test_supabase_lease_repo.py b/tests/Unit/storage/test_supabase_lease_repo.py
new file mode 100644
index 000000000..abb350c88
--- /dev/null
+++ b/tests/Unit/storage/test_supabase_lease_repo.py
@@ -0,0 +1,22 @@
+import pytest
+
+from storage.providers.supabase.lease_repo import SupabaseLeaseRepo
+from tests.fakes.supabase import FakeSupabaseClient
+
+
+def test_supabase_lease_repo_adopt_instance_fails_loudly_if_bootstrap_reload_missing():
+    repo = SupabaseLeaseRepo(client=FakeSupabaseClient(tables={"sandbox_leases": [], "sandbox_instances": []}))
+    rows = iter([None, None])
+
+    repo.create = lambda **_kwargs: {  # type: ignore[method-assign]
+        "lease_id": "lease-1",
+        "provider_name": "test-provider",
+    }
+    repo.get = lambda _lease_id: next(rows)  # type: ignore[method-assign]
+
+    with pytest.raises(RuntimeError, match="failed to load lease after adopt_instance bootstrap"):
+        repo.adopt_instance(
+            lease_id="lease-1",
+            provider_name="test-provider",
+            instance_id="inst-123",
+        )
diff --git a/tests/Unit/storage/test_supabase_thread_repo.py b/tests/Unit/storage/test_supabase_thread_repo.py
new file mode 100644
index 000000000..28749c8b3
--- /dev/null
+++ b/tests/Unit/storage/test_supabase_thread_repo.py
@@ -0,0 +1,121 @@
+from storage.providers.supabase.thread_repo import SupabaseThreadRepo
+
+
+class _FakeTable:
+    def __init__(self) -> None:
+        self.insert_payload = None
+        self.update_payload = None
+        self.eq_calls: list[tuple[str, object]] = []
+        self.rows = [
+            {
+                "id": "thread-1",
+                "user_id": "thread-1",
+                "member_id": "member-1",
+                "sandbox_type": "local",
+                "model": None,
+                "cwd": None,
+                "observation_provider": None,
+                "is_main": 1,
+                "branch_index": 0,
+                "created_at": 1.0,
+            }
+        ]
+
+    def insert(self, payload):
+        self.insert_payload = payload
+        return self
+
+    def update(self, payload):
+        self.update_payload = payload
+        return self
+
+    def select(self, _cols):
+        return self
+
+    def eq(self, key, value):
+        self.eq_calls.append((key, value))
+        return self
+
+    def execute(self):
+        return type("Resp", (), {"data": self.rows})()
+
+
+class _FakeClient:
+    def __init__(self) -> None:
+        self.table_obj = _FakeTable()
+
+    def table(self, _name):
+        return self.table_obj
+
+
+def test_supabase_thread_repo_create_writes_integer_main_flag():
+    client = _FakeClient()
+    repo = SupabaseThreadRepo(client)
+
+    repo.create(
+        thread_id="thread-1",
+        member_id="member-1",
+        user_id="thread-1",
+        sandbox_type="local",
+        created_at=1.0,
+        is_main=True,
+        branch_index=0,
+    )
+
+    assert client.table_obj.insert_payload is not None
+    assert client.table_obj.insert_payload["is_main"] == 1
+
+
+def test_supabase_thread_repo_create_persists_dedicated_user_id():
+    client = _FakeClient()
+    repo = SupabaseThreadRepo(client)
+
+    repo.create(
+        thread_id="thread-1",
+        member_id="member-1",
+        user_id="thread-1",
+        sandbox_type="local",
+        created_at=1.0,
+        is_main=True,
+        branch_index=0,
+    )
+
+    assert client.table_obj.insert_payload is not None
+    assert client.table_obj.insert_payload["user_id"] == "thread-1"
+
+
+def test_supabase_thread_repo_update_writes_integer_main_flag():
+    client = _FakeClient()
+    client.table_obj.rows[0]["branch_index"] = 1
+    client.table_obj.rows[0]["is_main"] = 0
+    repo = SupabaseThreadRepo(client)
+
+    repo.update("thread-1", is_main=False)
+
+    assert client.table_obj.update_payload is not None
+    assert client.table_obj.update_payload["is_main"] == 0
+
+
+def test_supabase_thread_repo_get_default_thread_reads_by_member_and_main_flag():
+    client = _FakeClient()
+    repo = SupabaseThreadRepo(client)
+
+    result = repo.get_default_thread("member-1")
+
+    assert result is not None
+    assert result["id"] == "thread-1"
+    assert result["user_id"] == "thread-1"
+    assert ("member_id", "member-1") in client.table_obj.eq_calls
+    assert ("is_main", 1) in client.table_obj.eq_calls
+
+
+def test_supabase_thread_repo_get_by_user_id_reads_thread_identity() -> None:
+    client = _FakeClient()
+    repo = SupabaseThreadRepo(client)
+
+    result = repo.get_by_user_id("thread-1")
+
+    assert result is not None
+    assert result["id"] == "thread-1"
+    assert result["user_id"] == "thread-1"
+    assert ("user_id", "thread-1") in client.table_obj.eq_calls
diff --git a/tests/Unit/storage/test_supabase_tool_task_repo.py b/tests/Unit/storage/test_supabase_tool_task_repo.py
new file mode 100644
index 000000000..8b1d3b6ce
--- /dev/null
+++ b/tests/Unit/storage/test_supabase_tool_task_repo.py
@@ -0,0 +1,38 @@
+from storage.providers.supabase.tool_task_repo import SupabaseToolTaskRepo
+
+
+class _FakeTable:
+    def __init__(self, rows):
+        self.rows = rows
+        self.eq_calls: list[tuple[str, object]] = []
+
+    def select(self, _cols, count=None):
+        return self
+
+    def eq(self, key, value):
+        self.eq_calls.append((key, value))
+        return self
+
+    def execute(self):
+        return type("Resp", (), {"data": self.rows})()
+
+
+class _FakeClient:
+    def __init__(self, rows):
+        self.table_obj = _FakeTable(rows)
+
+    def table(self, _name):
+        return self.table_obj
+
+
+def test_supabase_tool_task_repo_next_id_uses_max_existing_id_not_row_count():
+    repo = SupabaseToolTaskRepo(
+        _FakeClient(
+            [
+                {"task_id": "1"},
+                {"task_id": "3"},
+            ]
+        )
+    )
+
+    assert repo.next_id("thread-gap") == "4"
diff --git a/tests/middleware/memory/test_summary_store.py b/tests/middleware/memory/test_summary_store.py
deleted file mode 100644
index 3487b7038..000000000
--- a/tests/middleware/memory/test_summary_store.py
+++ /dev/null
@@ -1,486 +0,0 @@
-"""Unit tests for SummaryStore."""
-
-import sqlite3
-import sys
-import threading
-from concurrent.futures import ThreadPoolExecutor
-from unittest.mock import patch
-
-import pytest
-
-from core.runtime.middleware.memory.summary_store import SummaryStore
-
-
-def test_save_and_get_summary(temp_db):
-    """Test saving and retrieving a summary."""
-    store = SummaryStore(temp_db)
-
-    # Save a summary
-    summary_id = store.save_summary(
-        thread_id="test-thread-1",
-        summary_text="This is a test summary",
-        compact_up_to_index=10,
-        compacted_at=20,
-    )
-
-    assert summary_id.startswith("test-thread-1_")
-
-    # Retrieve the summary
-    summary = store.get_latest_summary("test-thread-1")
-
-    assert summary is not None
-    assert summary.thread_id == "test-thread-1"
-    assert summary.summary_text == "This is a test summary"
-    assert summary.compact_up_to_index == 10
-    assert summary.compacted_at == 20
-    assert summary.is_split_turn is False
-    assert summary.split_turn_prefix is None
-    assert summary.is_active is True
-
-
-def test_multiple_summaries_only_latest_active(temp_db):
-    """Test that only the latest summary is active."""
-    store = SummaryStore(temp_db)
-
-    # Save first summary
-    _id1 = store.save_summary(
-        thread_id="test-thread-2",
-        summary_text="First summary",
-        compact_up_to_index=10,
-        compacted_at=20,
-    )
-
-    # Save second summary
-    id2 = store.save_summary(
-        thread_id="test-thread-2",
-        summary_text="Second summary",
-        compact_up_to_index=30,
-        compacted_at=40,
-    )
-
-    # Only the latest should be active
-    latest = store.get_latest_summary("test-thread-2")
-    assert latest is not None
-    assert latest.summary_id == id2
-    assert latest.summary_text == "Second summary"
-    assert latest.is_active is True
-
-    # List all summaries
-    all_summaries = store.list_summaries("test-thread-2")
-    assert len(all_summaries) == 2
-
-    # Check that first is inactive
-    active_count = sum(1 for s in all_summaries if s["is_active"])
-    assert active_count == 1
-
-
-def test_split_turn_summary(temp_db):
-    """Test saving and retrieving a split turn summary."""
-    store = SummaryStore(temp_db)
-
-    # Save a split turn summary
-    summary_id = store.save_summary(  # noqa: F841
-        thread_id="test-thread-3",
-        summary_text="Combined summary with split turn",
-        compact_up_to_index=15,
-        compacted_at=30,
-        is_split_turn=True,
-        split_turn_prefix="Prefix summary",
-    )
-
-    # Retrieve the summary
-    summary = store.get_latest_summary("test-thread-3")
-
-    assert summary is not None
-    assert summary.is_split_turn is True
-    assert summary.split_turn_prefix == "Prefix summary"
-    assert "Combined summary with split turn" in summary.summary_text
-
-
-def test_no_summary_returns_none(temp_db):
-    """Test that getting a non-existent summary returns None."""
-    store = SummaryStore(temp_db)
-
-    summary = store.get_latest_summary("non-existent-thread")
-    assert summary is None
-
-
-def test_delete_thread_summaries(temp_db):
-    """Test deleting all summaries for a thread."""
-    store = SummaryStore(temp_db)
-
-    # Save multiple summaries
-    store.save_summary(
-        thread_id="test-thread-4",
-        summary_text="Summary 1",
-        compact_up_to_index=10,
-        compacted_at=20,
-    )
-    store.save_summary(
-        thread_id="test-thread-4",
-        summary_text="Summary 2",
-        compact_up_to_index=30,
-        compacted_at=40,
-    )
-
-    # Verify they exist
-    assert len(store.list_summaries("test-thread-4")) == 2
-
-    # Delete all summaries
-    store.delete_thread_summaries("test-thread-4")
-
-    # Verify they're gone
-    assert len(store.list_summaries("test-thread-4")) == 0
-    assert store.get_latest_summary("test-thread-4") is None
-
-
-def test_retry_on_failure(temp_db):
-    """Test that save/get operations retry on failure."""
-    store = SummaryStore(temp_db)
-
-    # This test verifies the retry mechanism exists
-    # In a real scenario, we'd mock sqlite3 to simulate failures
-    # For now, we just verify normal operation works
-    summary_id = store.save_summary(  # noqa: F841
-        thread_id="test-thread-5",
-        summary_text="Test retry",
-        compact_up_to_index=5,
-        compacted_at=10,
-        max_retries=3,
-    )
-
-    summary = store.get_latest_summary("test-thread-5", max_retries=3)
-    assert summary is not None
-    assert summary.summary_text == "Test retry"
-
-
-def test_concurrent_writes(temp_db):
-    """Test concurrent writes with 5 threads writing different thread_ids."""
-    store = SummaryStore(temp_db)
-    results = []
-    errors = []
-
-    def write_summary(thread_num):
-        try:
-            thread_id = f"concurrent-thread-{thread_num}"
-            summary_id = store.save_summary(
-                thread_id=thread_id,
-                summary_text=f"Summary from thread {thread_num}",
-                compact_up_to_index=thread_num * 10,
-                compacted_at=thread_num * 20,
-            )
-            results.append((thread_id, summary_id))
-        except Exception as e:
-            errors.append(e)
-
-    # Launch 5 threads
-    threads = []
-    for i in range(5):
-        t = threading.Thread(target=write_summary, args=(i,))
-        threads.append(t)
-        t.start()
-
-    # Wait for all threads to complete
-    for t in threads:
-        t.join()
-
-    # Verify no errors
-    assert len(errors) == 0, f"Errors occurred: {errors}"
-
-    # Verify all 5 summaries were saved
-    assert len(results) == 5
-
-    # Verify each summary can be retrieved
-    for thread_id, summary_id in results:
-        summary = store.get_latest_summary(thread_id)
-        assert summary is not None
-        assert summary.thread_id == thread_id
-        assert summary.summary_id == summary_id
-
-
-def test_concurrent_reads(temp_db):
-    """Test concurrent reads with 10 threads reading same thread_id."""
-    store = SummaryStore(temp_db)
-
-    # First, save a summary
-    store.save_summary(
-        thread_id="shared-thread",
-        summary_text="Shared summary for concurrent reads",
-        compact_up_to_index=100,
-        compacted_at=200,
-    )
-
-    results = []
-    errors = []
-
-    def read_summary():
-        try:
-            summary = store.get_latest_summary("shared-thread")
-            results.append(summary)
-        except Exception as e:
-            errors.append(e)
-
-    # Launch 10 threads
-    with ThreadPoolExecutor(max_workers=10) as executor:
-        futures = [executor.submit(read_summary) for _ in range(10)]
-        for future in futures:
-            future.result()
-
-    # Verify no errors
-    assert len(errors) == 0, f"Errors occurred: {errors}"
-
-    # Verify all 10 reads succeeded
-    assert len(results) == 10
-
-    # Verify all reads returned the same data
-    for summary in results:
-        assert summary is not None
-        assert summary.thread_id == "shared-thread"
-        assert summary.summary_text == "Shared summary for concurrent reads"
-        assert summary.compact_up_to_index == 100
-        assert summary.compacted_at == 200
-
-
-def test_large_summary_text(temp_db):
-    """Test saving 1MB summary text."""
-    store = SummaryStore(temp_db)
-
-    # Create a 1MB string (1024 * 1024 characters)
-    large_text = "A" * (1024 * 1024)
-
-    # Save the large summary
-    summary_id = store.save_summary(
-        thread_id="large-thread",
-        summary_text=large_text,
-        compact_up_to_index=1000,
-        compacted_at=2000,
-    )
-
-    assert summary_id.startswith("large-thread_")
-
-    # Retrieve and verify
-    summary = store.get_latest_summary("large-thread")
-    assert summary is not None
-    assert len(summary.summary_text) == 1024 * 1024
-    assert summary.summary_text == large_text
-
-
-def test_special_characters_in_summary(temp_db):
-    """Test Unicode, emoji, and SQL injection characters in summary."""
-    store = SummaryStore(temp_db)
-
-    # Test various special characters
-    special_text = (
-        "Unicode: 你好世界 مرحبا العالم\n"
-        "Emoji: 😀🎉🚀💻\n"
-        "SQL injection: '; DROP TABLE summaries; --\n"
-        "Quotes: \"double\" 'single'\n"
-        "Backslashes: \\ \\\\ \\\\\\\n"
-        "Newlines and tabs:\n\t\tIndented text"
-    )
-
-    summary_id = store.save_summary(  # noqa: F841
-        thread_id="special-chars-thread",
-        summary_text=special_text,
-        compact_up_to_index=50,
-        compacted_at=100,
-    )
-
-    # Retrieve and verify exact match
-    summary = store.get_latest_summary("special-chars-thread")
-    assert summary is not None
-    assert summary.summary_text == special_text
-
-    # Verify the database still exists (SQL injection didn't work)
-    all_summaries = store.list_summaries("special-chars-thread")
-    assert len(all_summaries) == 1
-
-
-def test_negative_indices(temp_db):
-    """Test negative, zero, and maxsize indices."""
-    store = SummaryStore(temp_db)
-
-    # Test negative index
-    _summary_id_neg = store.save_summary(
-        thread_id="negative-index-thread",
-        summary_text="Negative index test",
-        compact_up_to_index=-1,
-        compacted_at=-10,
-    )
-
-    summary_neg = store.get_latest_summary("negative-index-thread")
-    assert summary_neg is not None
-    assert summary_neg.compact_up_to_index == -1
-    assert summary_neg.compacted_at == -10
-
-    # Test zero index
-    _summary_id_zero = store.save_summary(
-        thread_id="zero-index-thread",
-        summary_text="Zero index test",
-        compact_up_to_index=0,
-        compacted_at=0,
-    )
-
-    summary_zero = store.get_latest_summary("zero-index-thread")
-    assert summary_zero is not None
-    assert summary_zero.compact_up_to_index == 0
-    assert summary_zero.compacted_at == 0
-
-    # Test maxsize index
-    _summary_id_max = store.save_summary(
-        thread_id="maxsize-index-thread",
-        summary_text="Maxsize index test",
-        compact_up_to_index=sys.maxsize,
-        compacted_at=sys.maxsize,
-    )
-
-    summary_max = store.get_latest_summary("maxsize-index-thread")
-    assert summary_max is not None
-    assert summary_max.compact_up_to_index == sys.maxsize
-    assert summary_max.compacted_at == sys.maxsize
-
-
-def test_empty_summary_text(temp_db):
-    """Test empty string summaries."""
-    store = SummaryStore(temp_db)
-
-    # Save empty summary
-    summary_id = store.save_summary(
-        thread_id="empty-summary-thread",
-        summary_text="",
-        compact_up_to_index=10,
-        compacted_at=20,
-    )
-
-    assert summary_id.startswith("empty-summary-thread_")
-
-    # Retrieve and verify
-    summary = store.get_latest_summary("empty-summary-thread")
-    assert summary is not None
-    assert summary.summary_text == ""
-    assert summary.compact_up_to_index == 10
-    assert summary.compacted_at == 20
-
-
-def test_database_locked_retry(temp_db):
-    """Test database lock and retry mechanism."""
-    store = SummaryStore(temp_db)
-
-    # Mock the connection to raise OperationalError on first attempt
-    original_connect = sqlite3.connect
-    call_count = {"count": 0}
-
-    def mock_connect(*args, **kwargs):
-        call_count["count"] += 1
-        if call_count["count"] == 1:
-            # First call raises database locked error
-            raise sqlite3.OperationalError("database is locked")
-        # Subsequent calls succeed
-        return original_connect(*args, **kwargs)
-
-    with patch("sqlite3.connect", side_effect=mock_connect):
-        # This should retry and succeed
-        summary_id = store.save_summary(
-            thread_id="retry-thread",
-            summary_text="Retry test",
-            compact_up_to_index=5,
-            compacted_at=10,
-            max_retries=3,
-        )
-
-    # Verify it succeeded after retry
-    assert summary_id.startswith("retry-thread_")
-    assert call_count["count"] == 2  # First failed, second succeeded
-
-    # Verify the summary was saved
-    summary = store.get_latest_summary("retry-thread")
-    assert summary is not None
-    assert summary.summary_text == "Retry test"
-
-
-def test_transaction_rollback_on_error(temp_db):
-    """Test transaction rollback when error occurs during save."""
-    store = SummaryStore(temp_db)
-
-    # First, save a valid summary
-    store.save_summary(
-        thread_id="rollback-thread",
-        summary_text="Initial summary",
-        compact_up_to_index=10,
-        compacted_at=20,
-    )
-
-    # Verify it exists
-    initial_summaries = store.list_summaries("rollback-thread")
-    assert len(initial_summaries) == 1
-
-    # Import the module to patch its _connect function
-    from core.runtime.middleware.memory import summary_store
-
-    original_connect = summary_store._connect
-    call_count = {"count": 0}
-
-    class MockConnection:
-        """Wrapper around sqlite3.Connection that can fail on INSERT."""
-
-        def __init__(self, real_conn):
-            self._conn = real_conn
-
-        def execute(self, sql, *args):
-            call_count["count"] += 1
-            # Fail on the INSERT INTO summaries statement (after deactivation UPDATE)
-            if call_count["count"] > 1 and "INSERT INTO summaries" in str(sql):
-                raise sqlite3.IntegrityError("Simulated error")
-            return self._conn.execute(sql, *args)
-
-        def commit(self):
-            return self._conn.commit()
-
-        def rollback(self):
-            return self._conn.rollback()
-
-        def close(self):
-            return self._conn.close()
-
-        def __enter__(self):
-            return self
-
-        def __exit__(self, exc_type, exc_val, exc_tb):
-            if exc_type is None:
-                self.commit()
-            else:
-                self.rollback()
-            self.close()
-            return False
-
-    def mock_connect(db_path):
-        conn = original_connect(db_path)
-        return MockConnection(conn)
-
-    with patch.object(summary_store, "_connect", side_effect=mock_connect):
-        # This should fail and rollback
-        with pytest.raises(sqlite3.IntegrityError):
-            store.save_summary(
-                thread_id="rollback-thread",
-                summary_text="This should fail",
-                compact_up_to_index=30,
-                compacted_at=40,
-                max_retries=1,  # Don't retry to make test faster
-            )
-
-    # Verify the original summary is still there and still active
-    summaries_after = store.list_summaries("rollback-thread")
-    assert len(summaries_after) == 1
-    assert summaries_after[0]["compact_up_to_index"] == 10
-    assert summaries_after[0]["compacted_at"] == 20
-    assert summaries_after[0]["is_active"] == 1  # SQLite stores boolean as integer
-
-    # Also verify using get_latest_summary which includes summary_text
-    latest = store.get_latest_summary("rollback-thread")
-    assert latest is not None
-    assert latest.summary_text == "Initial summary"
-    assert latest.is_active is True
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
diff --git a/tests/middleware/memory/test_summary_store_performance.py b/tests/middleware/memory/test_summary_store_performance.py
deleted file mode 100644
index ce3b0c3bb..000000000
--- a/tests/middleware/memory/test_summary_store_performance.py
+++ /dev/null
@@ -1,266 +0,0 @@
-"""Performance tests for SummaryStore.
-
-This module tests the performance characteristics of SummaryStore operations
-to ensure they meet production requirements.
-
-Test Cases:
-1. Query performance with many summaries (1000 summaries, query < 50ms)
-2. Concurrent write performance (10 threads, avg write < 100ms)
-3. Database size growth (100 summaries, DB < 1MB)
-"""
-
-import sys
-import threading
-import time
-from pathlib import Path
-
-import pytest
-
-_SKIP_WINDOWS = pytest.mark.skipif(
-    sys.platform == "win32", reason="SQLite connection-per-call is slow on Windows; performance tests not meaningful there"
-)
-
-from core.runtime.middleware.memory.summary_store import SummaryStore
-
-
-@_SKIP_WINDOWS
-def test_query_performance_with_many_summaries(temp_db):
-    """Test query performance with 1000 summaries.
-
-    Requirements:
-    - Create 1000 summaries across multiple threads
-    - Query for latest summary should complete in < 50ms
-    - Index should enable fast lookups even with large dataset
-    """
-    store = SummaryStore(temp_db)
-
-    # Create 1000 summaries across 100 threads (10 summaries per thread)
-    num_threads = 100
-    summaries_per_thread = 10
-
-    print(f"\n[Performance Test] Creating {num_threads * summaries_per_thread} summaries...")
-    start_time = time.perf_counter()
-
-    for thread_idx in range(num_threads):
-        thread_id = f"thread-{thread_idx:04d}"
-        for summary_idx in range(summaries_per_thread):
-            store.save_summary(
-                thread_id=thread_id,
-                summary_text=f"Summary {summary_idx} for {thread_id}. " * 10,  # ~500 chars
-                compact_up_to_index=summary_idx * 10,
-                compacted_at=summary_idx * 20,
-            )
-
-    creation_time = time.perf_counter() - start_time
-    print(f"[Performance Test] Created 1000 summaries in {creation_time:.2f}s")
-
-    # Now test query performance on a thread with many summaries
-    # Query the middle thread to avoid edge cases
-    target_thread = "thread-0050"
-
-    # Warm up query (first query might be slower due to cold cache)
-    store.get_latest_summary(target_thread)
-
-    # Measure query performance over 10 iterations
-    query_times = []
-    for _ in range(10):
-        start = time.perf_counter()
-        summary = store.get_latest_summary(target_thread)
-        elapsed = (time.perf_counter() - start) * 1000  # Convert to ms
-        query_times.append(elapsed)
-
-        assert summary is not None
-        assert summary.thread_id == target_thread
-
-    avg_query_time = sum(query_times) / len(query_times)
-    max_query_time = max(query_times)
-
-    print(f"[Performance Test] Query times: avg={avg_query_time:.2f}ms, max={max_query_time:.2f}ms")
-
-    # Assert performance requirements
-    assert avg_query_time < 50, f"Average query time {avg_query_time:.2f}ms exceeds 50ms threshold"
-    assert max_query_time < 100, f"Max query time {max_query_time:.2f}ms exceeds 100ms threshold"
-
-
-@_SKIP_WINDOWS
-def test_concurrent_write_performance(temp_db):
-    """Test concurrent write performance with 10 threads.
-
-    Requirements:
-    - 10 threads writing concurrently
-    - Each thread writes 10 summaries
-    - Average write time per summary < 100ms
-    - No database locks or corruption
-    """
-    store = SummaryStore(temp_db)
-
-    num_threads = 10
-    summaries_per_thread = 10
-
-    results = []
-    errors = []
-
-    def write_summaries(thread_idx: int):
-        """Worker function to write summaries."""
-        thread_id = f"concurrent-thread-{thread_idx:02d}"
-        thread_times = []
-
-        try:
-            for summary_idx in range(summaries_per_thread):
-                start = time.perf_counter()
-
-                store.save_summary(
-                    thread_id=thread_id,
-                    summary_text=f"Concurrent summary {summary_idx} from thread {thread_idx}. " * 10,
-                    compact_up_to_index=summary_idx * 10,
-                    compacted_at=summary_idx * 20,
-                )
-
-                elapsed = (time.perf_counter() - start) * 1000  # Convert to ms
-                thread_times.append(elapsed)
-
-            results.append(
-                {
-                    "thread_idx": thread_idx,
-                    "times": thread_times,
-                    "avg_time": sum(thread_times) / len(thread_times),
-                }
-            )
-        except Exception as e:
-            errors.append(
-                {
-                    "thread_idx": thread_idx,
-                    "error": str(e),
-                }
-            )
-
-    # Start all threads
-    print(f"\n[Performance Test] Starting {num_threads} concurrent write threads...")
-    start_time = time.perf_counter()
-
-    threads = []
-    for i in range(num_threads):
-        t = threading.Thread(target=write_summaries, args=(i,))
-        threads.append(t)
-        t.start()
-
-    # Wait for all threads to complete
-    for t in threads:
-        t.join()
-
-    total_time = time.perf_counter() - start_time
-
-    # Check for errors
-    assert len(errors) == 0, f"Concurrent writes failed: {errors}"
-    assert len(results) == num_threads, f"Expected {num_threads} results, got {len(results)}"
-
-    # Calculate statistics
-    all_times = []
-    for result in results:
-        all_times.extend(result["times"])
-
-    avg_write_time = sum(all_times) / len(all_times)
-    max_write_time = max(all_times)
-    min_write_time = min(all_times)
-
-    print(f"[Performance Test] Concurrent writes completed in {total_time:.2f}s")
-    print(f"[Performance Test] Write times: avg={avg_write_time:.2f}ms, min={min_write_time:.2f}ms, max={max_write_time:.2f}ms")
-
-    # Assert performance requirements
-    assert avg_write_time < 100, f"Average write time {avg_write_time:.2f}ms exceeds 100ms threshold"
-
-    # Verify data integrity - each thread should have its latest summary
-    for i in range(num_threads):
-        thread_id = f"concurrent-thread-{i:02d}"
-        summary = store.get_latest_summary(thread_id)
-        assert summary is not None, f"Missing summary for {thread_id}"
-        assert summary.thread_id == thread_id
-        assert summary.compact_up_to_index == (summaries_per_thread - 1) * 10
-
-
-@_SKIP_WINDOWS
-def test_database_size_growth(temp_db):
-    """Test database size growth with 100 summaries.
-
-    Requirements:
-    - Create 100 summaries with realistic content
-    - Database size (including WAL files) should be < 1MB
-    - Verify efficient storage without excessive overhead
-    """
-    store = SummaryStore(temp_db)
-
-    num_summaries = 100
-
-    # Create realistic summary content (~2KB per summary)
-    summary_template = (
-        """
-    The conversation covered the following topics:
-    - User requested implementation of feature X
-    - Discussion about architecture and design patterns
-    - Code review and feedback on proposed changes
-    - Testing strategy and coverage requirements
-    - Documentation updates and API changes
-    """
-        * 10
-    )  # ~2KB of text
-
-    print(f"\n[Performance Test] Creating {num_summaries} summaries with realistic content...")
-
-    for i in range(num_summaries):
-        store.save_summary(
-            thread_id=f"size-test-thread-{i:03d}",
-            summary_text=f"Summary {i}: {summary_template}",
-            compact_up_to_index=i * 10,
-            compacted_at=i * 20,
-            is_split_turn=(i % 5 == 0),  # 20% split turns
-            split_turn_prefix=f"Prefix for summary {i}" if i % 5 == 0 else None,
-        )
-
-    # Force WAL checkpoint to flush data to main database
-    import sqlite3
-
-    conn = sqlite3.connect(str(temp_db))
-    try:
-        conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
-        conn.commit()
-    finally:
-        conn.close()
-
-    # Calculate total database size (main DB + WAL files)
-    db_size = temp_db.stat().st_size
-
-    wal_size = 0
-    for suffix in ["-wal", "-shm"]:
-        wal_file = Path(str(temp_db) + suffix)
-        if wal_file.exists():
-            wal_size += wal_file.stat().st_size
-
-    total_size = db_size + wal_size
-    total_size_kb = total_size / 1024
-    total_size_mb = total_size / (1024 * 1024)
-
-    print("[Performance Test] Database sizes:")
-    print(f"  - Main DB: {db_size / 1024:.2f} KB")
-    print(f"  - WAL files: {wal_size / 1024:.2f} KB")
-    print(f"  - Total: {total_size_kb:.2f} KB ({total_size_mb:.3f} MB)")
-
-    # Assert size requirements
-    assert total_size < 1024 * 1024, f"Database size {total_size_mb:.3f}MB exceeds 1MB threshold"
-
-    # Verify data integrity - spot check a few summaries
-    for i in [0, 49, 99]:
-        thread_id = f"size-test-thread-{i:03d}"
-        summary = store.get_latest_summary(thread_id)
-        assert summary is not None, f"Missing summary for {thread_id}"
-        assert summary.thread_id == thread_id
-        assert summary.compact_up_to_index == i * 10
-        assert summary_template in summary.summary_text
-
-    # Verify total count
-    all_threads = [f"size-test-thread-{i:03d}" for i in range(num_summaries)]
-    found_count = sum(1 for tid in all_threads if store.get_latest_summary(tid) is not None)
-    assert found_count == num_summaries, f"Expected {num_summaries} summaries, found {found_count}"
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v", "-s"])
diff --git a/tests/test_agent_pool.py b/tests/test_agent_pool.py
deleted file mode 100644
index 3ddd2945f..000000000
--- a/tests/test_agent_pool.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import asyncio
-import time
-from types import SimpleNamespace
-
-import pytest
-
-from backend.web.services import agent_pool
-
-
-class _FakeThreadRepo:
-    def get_by_id(self, thread_id: str):
-        return {"id": thread_id, "cwd": "/tmp", "model": "leon:large"}
-
-
-@pytest.mark.asyncio
-async def test_get_or_create_agent_creates_once_per_thread(monkeypatch: pytest.MonkeyPatch):
-    created: list[object] = []
-
-    def _fake_create_agent_sync(
-        sandbox_name: str,
-        workspace_root=None,
-        model_name: str | None = None,
-        agent: str | None = None,
-        queue_manager=None,
-        chat_repos=None,
-        extra_allowed_paths=None,
-    ) -> object:
-        time.sleep(0.05)
-        obj = SimpleNamespace()
-        created.append(obj)
-        return obj
-
-    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
-    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-1")
-
-    app = SimpleNamespace(
-        state=SimpleNamespace(
-            agent_pool={},
-            thread_repo=_FakeThreadRepo(),
-            thread_cwd={},
-            thread_sandbox={},
-        )
-    )
-
-    first, second = await asyncio.gather(
-        agent_pool.get_or_create_agent(app, "local", thread_id="thread-1"),
-        agent_pool.get_or_create_agent(app, "local", thread_id="thread-1"),
-    )
-
-    assert len(created) == 1
-    assert first is second
-    assert app.state.agent_pool["thread-1:local"] is first
diff --git a/tests/test_capability_async.py b/tests/test_capability_async.py
deleted file mode 100644
index 8d1ba06d7..000000000
--- a/tests/test_capability_async.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import asyncio
-import uuid
-
-from sandbox.capability import SandboxCapability
-from sandbox.interfaces.executor import AsyncCommand, ExecuteResult
-
-
-class _DummyState:
-    cwd = "/tmp"
-
-
-class _DummyTerminal:
-    terminal_id = "dummy-term"
-
-    def get_state(self):
-        return _DummyState()
-
-
-class _DummyRuntime:
-    def __init__(self):
-        self.commands: list[str] = []
-        self._async_commands: dict[str, AsyncCommand] = {}
-
-    async def execute(self, command: str, timeout=None):
-        self.commands.append(command)
-        await asyncio.sleep(0.01)
-        return ExecuteResult(exit_code=0, stdout=f"ok:{command}", stderr="")
-
-    async def start_command(self, command: str, cwd: str) -> AsyncCommand:
-        command_id = f"cmd_{uuid.uuid4().hex[:12]}"
-        result = await self.execute(command)
-        async_cmd = AsyncCommand(
-            command_id=command_id,
-            command_line=command,
-            cwd=cwd,
-            exit_code=result.exit_code,
-            done=True,
-            stdout_buffer=[result.stdout],
-        )
-        self._async_commands[command_id] = async_cmd
-        return async_cmd
-
-    async def get_command(self, command_id: str) -> AsyncCommand | None:
-        return self._async_commands.get(command_id)
-
-    async def wait_for_command(self, command_id: str, timeout: float | None = None) -> ExecuteResult | None:
-        cmd = self._async_commands.get(command_id)
-        if cmd is None:
-            return None
-        return ExecuteResult(
-            exit_code=cmd.exit_code or 0,
-            stdout="".join(cmd.stdout_buffer),
-            stderr="".join(cmd.stderr_buffer),
-        )
-
-
-class _DummySession:
-    def __init__(self):
-        self.terminal = _DummyTerminal()
-        self.runtime = _DummyRuntime()
-        self.touches = 0
-
-    def touch(self):
-        self.touches += 1
-
-
-async def _run_async_command_flow():
-    session = _DummySession()
-    capability = SandboxCapability(session)
-
-    async_cmd = await capability.command.execute_async("echo hi", cwd="/tmp/demo", env={"A": "1"})
-    assert async_cmd.command_id.startswith("cmd_")
-
-    status = await capability.command.get_status(async_cmd.command_id)
-    assert status is not None
-
-    result = await capability.command.wait_for(async_cmd.command_id, timeout=1.0)
-    assert result is not None
-    assert result.exit_code == 0
-    assert "echo hi" in result.stdout
-    assert session.touches > 0
-
-
-def test_command_wrapper_supports_execute_async():
-    asyncio.run(_run_async_command_flow())
diff --git a/tests/test_chat_session.py b/tests/test_chat_session.py
deleted file mode 100644
index 4f8e63aef..000000000
--- a/tests/test_chat_session.py
+++ /dev/null
@@ -1,412 +0,0 @@
-"""Unit tests for ChatSession and ChatSessionManager."""
-
-import asyncio
-import time
-from datetime import datetime, timedelta
-from unittest.mock import MagicMock
-
-import pytest
-
-from sandbox.chat_session import (
-    ChatSession,
-    ChatSessionManager,
-    ChatSessionPolicy,
-)
-from sandbox.lease import lease_from_row
-from sandbox.terminal import terminal_from_row
-from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
-from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-
-@pytest.fixture
-def terminal_store(temp_db):
-    """Create SQLiteTerminalRepo with temp database."""
-    store = SQLiteTerminalRepo(db_path=temp_db)
-    yield store
-    store.close()
-
-
-class _LeaseStoreCompat:
-    """Thin wrapper: repo returns dicts, tests expect domain objects from create/get."""
-
-    def __init__(self, repo: SQLiteLeaseRepo):
-        self._repo = repo
-
-    def create(self, lease_id, provider_name, **kw):
-        row = self._repo.create(lease_id, provider_name, **kw)
-        return lease_from_row(row, self._repo.db_path)
-
-    def get(self, lease_id):
-        row = self._repo.get(lease_id)
-        return lease_from_row(row, self._repo.db_path) if row else None
-
-    def __getattr__(self, name):
-        return getattr(self._repo, name)
-
-
-@pytest.fixture
-def lease_store(temp_db):
-    """Create SQLiteLeaseRepo with compat wrapper for tests."""
-    repo = SQLiteLeaseRepo(db_path=temp_db)
-    compat = _LeaseStoreCompat(repo)
-    yield compat
-    repo.close()
-
-
-@pytest.fixture
-def mock_provider():
-    """Create mock SandboxProvider."""
-    from sandbox.providers.local import LocalPersistentShellRuntime
-
-    provider = MagicMock()
-    provider.name = "local"
-    provider.create_runtime.side_effect = lambda terminal, lease: LocalPersistentShellRuntime(terminal, lease)
-    return provider
-
-
-@pytest.fixture
-def session_manager(temp_db, mock_provider):
-    """Create ChatSessionManager with temp database."""
-    manager = ChatSessionManager(provider=mock_provider, db_path=temp_db)
-    yield manager
-    manager._repo.close()
-
-
-class TestChatSessionPolicy:
-    """Test ChatSessionPolicy dataclass."""
-
-    def test_default_policy(self):
-        """Test default policy values."""
-        policy = ChatSessionPolicy()
-        assert policy.idle_ttl_sec == 600
-        assert policy.max_duration_sec == 86400
-
-    def test_custom_policy(self):
-        """Test custom policy values."""
-        policy = ChatSessionPolicy(
-            idle_ttl_sec=1800,
-            max_duration_sec=43200,
-        )
-        assert policy.idle_ttl_sec == 1800
-        assert policy.max_duration_sec == 43200
-
-
-class TestChatSession:
-    """Test ChatSession lifecycle."""
-
-    def test_is_expired_idle_timeout(self, terminal_store, lease_store):
-        """Test session expires after idle timeout."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-        runtime = MagicMock()
-
-        policy = ChatSessionPolicy(idle_ttl_sec=1, max_duration_sec=3600)
-        now = datetime.now()
-
-        session = ChatSession(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-            runtime=runtime,
-            policy=policy,
-            started_at=now,
-            last_active_at=now - timedelta(seconds=2),  # 2 seconds ago
-        )
-
-        assert session.is_expired()
-
-    def test_is_expired_max_duration(self, terminal_store, lease_store):
-        """Test session expires after max duration."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-        runtime = MagicMock()
-
-        policy = ChatSessionPolicy(idle_ttl_sec=3600, max_duration_sec=1)
-        now = datetime.now()
-
-        session = ChatSession(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-            runtime=runtime,
-            policy=policy,
-            started_at=now - timedelta(seconds=2),  # Created 2 seconds ago
-            last_active_at=now,
-        )
-
-        assert session.is_expired()
-
-    def test_not_expired(self, terminal_store, lease_store):
-        """Test session not expired when within limits."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-        runtime = MagicMock()
-
-        policy = ChatSessionPolicy(idle_ttl_sec=3600, max_duration_sec=86400)
-        now = datetime.now()
-
-        session = ChatSession(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-            runtime=runtime,
-            policy=policy,
-            started_at=now,
-            last_active_at=now,
-        )
-
-        assert not session.is_expired()
-
-    def test_touch_updates_activity(self, terminal_store, lease_store, session_manager, temp_db):
-        """Test touch updates last_active_at."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-        runtime = MagicMock()
-
-        policy = ChatSessionPolicy()
-        now = datetime.now()
-        old_time = now - timedelta(seconds=10)
-
-        session = ChatSession(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-            runtime=runtime,
-            policy=policy,
-            started_at=now,
-            last_active_at=old_time,
-            db_path=temp_db,
-        )
-
-        session.touch()
-
-        # last_active_at should be updated
-        assert session.last_active_at > old_time
-
-    @pytest.mark.asyncio
-    async def test_close_calls_runtime_close(self, terminal_store, lease_store, session_manager, temp_db):
-        """Test close calls runtime.close()."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-        runtime = MagicMock()
-        runtime.close = MagicMock(return_value=asyncio.Future())
-        runtime.close.return_value.set_result(None)
-
-        policy = ChatSessionPolicy()
-        now = datetime.now()
-
-        session = ChatSession(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-            runtime=runtime,
-            policy=policy,
-            started_at=now,
-            last_active_at=now,
-            db_path=temp_db,
-        )
-
-        await session.close()
-
-        runtime.close.assert_called_once()
-
-
-class TestChatSessionManager:
-    """Test ChatSessionManager CRUD operations."""
-
-    def test_ensure_tables(self, session_manager, temp_db):
-        """Test table creation."""
-
-        # Verify table exists
-        import sqlite3
-
-        conn = sqlite3.connect(str(temp_db))
-        try:
-            cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='chat_sessions'")
-            assert cursor.fetchone() is not None
-        finally:
-            conn.close()
-
-    def test_create_session(self, session_manager, terminal_store, lease_store):
-        """Test creating a new session."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        session = session_manager.create(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-        )
-
-        assert session.session_id == "sess-1"
-        assert session.thread_id == "thread-1"
-        assert session.terminal == terminal
-        assert session.lease == lease
-        assert session.runtime is not None
-
-    def test_get_session(self, session_manager, terminal_store, lease_store):
-        """Test retrieving session by thread_id."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        session_manager.create(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-        )
-
-        session = session_manager.get("thread-1", "term-1")
-        assert session is not None
-        assert session.session_id == "sess-1"
-        assert session.thread_id == "thread-1"
-
-    def test_get_nonexistent_session(self, session_manager):
-        """Test retrieving non-existent session returns None."""
-        session = session_manager.get("nonexistent-thread", "nonexistent-term")
-        assert session is None
-
-    def test_get_expired_session_returns_none(self, session_manager, terminal_store, lease_store):
-        """Test that expired session returns None and is cleaned up."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        # Create session with very short timeout
-        policy = ChatSessionPolicy(idle_ttl_sec=0, max_duration_sec=86400)
-        session_manager.create(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-            policy=policy,
-        )
-
-        time.sleep(0.1)  # Wait for expiry
-
-        # Should return None and clean up
-        session = session_manager.get("thread-1", "term-1")
-        assert session is None
-
-    def test_touch_updates_db(self, session_manager, terminal_store, lease_store, temp_db):
-        """Test that touch updates database."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        session = session_manager.create(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-        )
-
-        old_activity = session.last_active_at
-        time.sleep(0.01)
-
-        session_manager.touch("sess-1")
-
-        # Retrieve again and verify updated
-        session2 = session_manager.get("thread-1", "term-1")
-        assert session2.last_active_at > old_activity
-
-    def test_delete_session(self, session_manager, terminal_store, lease_store):
-        """Test deleting a session."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        session_manager.create(
-            session_id="sess-1",
-            thread_id="thread-1",
-            terminal=terminal,
-            lease=lease,
-        )
-
-        # Verify exists
-        assert session_manager.get("thread-1", "term-1") is not None
-
-        # Delete
-        session_manager.delete("sess-1")
-
-        # Verify deleted
-        assert session_manager.get("thread-1", "term-1") is None
-
-    def test_list_all_sessions(self, session_manager, terminal_store, lease_store):
-        """Test listing all sessions."""
-        terminal1 = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        terminal2 = terminal_from_row(terminal_store.create("term-2", "thread-2", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        time.sleep(0.01)
-        session_manager.create("sess-1", "thread-1", terminal1, lease)
-        time.sleep(0.01)
-        session_manager.create("sess-2", "thread-2", terminal2, lease)
-
-        sessions = session_manager.list_all()
-        assert len(sessions) == 2
-
-        # Should be ordered by created_at DESC
-        assert sessions[0]["session_id"] == "sess-2"
-        assert sessions[1]["session_id"] == "sess-1"
-
-    def test_cleanup_expired(self, session_manager, terminal_store, lease_store):
-        """Test cleanup_expired removes expired sessions."""
-        terminal1 = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        terminal2 = terminal_from_row(terminal_store.create("term-2", "thread-2", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        # Create one expired session
-        policy_expired = ChatSessionPolicy(idle_ttl_sec=0, max_duration_sec=86400)
-        session_manager.create("sess-1", "thread-1", terminal1, lease, policy=policy_expired)
-
-        # Create one active session
-        policy_active = ChatSessionPolicy(idle_ttl_sec=3600, max_duration_sec=86400)
-        session_manager.create("sess-2", "thread-2", terminal2, lease, policy=policy_active)
-
-        time.sleep(0.1)  # Wait for expiry
-
-        # Cleanup
-        count = session_manager.cleanup_expired()
-
-        assert count == 1
-        assert session_manager.get("thread-1", "term-1") is None
-        assert session_manager.get("thread-2", "term-2") is not None
-
-
-class TestChatSessionIntegration:
-    """Integration tests for chat session lifecycle."""
-
-    def test_full_lifecycle(self, session_manager, terminal_store, lease_store):
-        """Test complete session lifecycle: create → use → expire → cleanup."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        # Create session
-        session = session_manager.create("sess-1", "thread-1", terminal, lease)
-        assert session is not None
-
-        # Touch to update activity
-        session_manager.touch("sess-1")
-
-        # Retrieve again
-        session2 = session_manager.get("thread-1", "term-1")
-        assert session2 is not None
-
-        # Delete
-        session_manager.delete("sess-1")
-        assert session_manager.get("thread-1", "term-1") is None
-
-    def test_session_with_custom_policy(self, session_manager, terminal_store, lease_store):
-        """Test session with custom policy."""
-        terminal = terminal_from_row(terminal_store.create("term-1", "thread-1", "lease-1"), terminal_store.db_path)
-        lease = lease_store.create("lease-1", "local")
-
-        policy = ChatSessionPolicy(idle_ttl_sec=1800, max_duration_sec=43200)
-        session = session_manager.create("sess-1", "thread-1", terminal, lease, policy=policy)
-
-        assert session.policy.idle_ttl_sec == 1800
-        assert session.policy.max_duration_sec == 43200
diff --git a/tests/test_checkpoint_repo.py b/tests/test_checkpoint_repo.py
deleted file mode 100644
index cba5753f2..000000000
--- a/tests/test_checkpoint_repo.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import sqlite3
-from pathlib import Path
-
-import pytest
-
-from storage.providers.sqlite.checkpoint_repo import SQLiteCheckpointRepo
-from storage.providers.supabase.checkpoint_repo import SupabaseCheckpointRepo
-
-
-def _setup_tables(db_path: Path) -> None:
-    with sqlite3.connect(str(db_path)) as conn:
-        conn.execute("CREATE TABLE checkpoints (thread_id TEXT, checkpoint_id TEXT)")
-        conn.execute("CREATE TABLE writes (thread_id TEXT, checkpoint_id TEXT)")
-        conn.execute("CREATE TABLE checkpoint_writes (thread_id TEXT, checkpoint_id TEXT)")
-        conn.execute("CREATE TABLE checkpoint_blobs (thread_id TEXT, checkpoint_id TEXT)")
-        conn.commit()
-
-
-def test_list_thread_ids(tmp_path):
-    db_path = tmp_path / "leon.db"
-    _setup_tables(db_path)
-    with sqlite3.connect(str(db_path)) as conn:
-        conn.execute("INSERT INTO checkpoints (thread_id, checkpoint_id) VALUES (?, ?)", ("t-2", "c1"))
-        conn.execute("INSERT INTO checkpoints (thread_id, checkpoint_id) VALUES (?, ?)", ("t-1", "c2"))
-        conn.execute("INSERT INTO checkpoints (thread_id, checkpoint_id) VALUES (?, ?)", ("t-1", "c3"))
-        conn.commit()
-
-    repo = SQLiteCheckpointRepo(db_path=db_path)
-    try:
-        assert repo.list_thread_ids() == ["t-1", "t-2"]
-    finally:
-        repo.close()
-
-
-def test_delete_checkpoints_by_ids(tmp_path):
-    db_path = tmp_path / "leon.db"
-    _setup_tables(db_path)
-    with sqlite3.connect(str(db_path)) as conn:
-        conn.executemany(
-            "INSERT INTO checkpoints (thread_id, checkpoint_id) VALUES (?, ?)",
-            [("t-1", "c1"), ("t-1", "c2"), ("t-1", "c3"), ("t-2", "c2")],
-        )
-        conn.executemany(
-            "INSERT INTO writes (thread_id, checkpoint_id) VALUES (?, ?)",
-            [("t-1", "c2"), ("t-1", "c3"), ("t-2", "c2")],
-        )
-        conn.executemany(
-            "INSERT INTO checkpoint_writes (thread_id, checkpoint_id) VALUES (?, ?)",
-            [("t-1", "c2"), ("t-1", "c3"), ("t-2", "c2")],
-        )
-        conn.executemany(
-            "INSERT INTO checkpoint_blobs (thread_id, checkpoint_id) VALUES (?, ?)",
-            [("t-1", "c2"), ("t-1", "c3"), ("t-2", "c2")],
-        )
-        conn.commit()
-
-    repo = SQLiteCheckpointRepo(db_path=db_path)
-    try:
-        repo.delete_checkpoints_by_ids("t-1", ["c2", "c3"])
-    finally:
-        repo.close()
-
-    with sqlite3.connect(str(db_path)) as conn:
-        left_checkpoints = conn.execute("SELECT thread_id, checkpoint_id FROM checkpoints ORDER BY thread_id, checkpoint_id").fetchall()
-        left_writes = conn.execute("SELECT thread_id, checkpoint_id FROM writes ORDER BY thread_id, checkpoint_id").fetchall()
-        left_cp_writes = conn.execute("SELECT thread_id, checkpoint_id FROM checkpoint_writes ORDER BY thread_id, checkpoint_id").fetchall()
-        left_cp_blobs = conn.execute("SELECT thread_id, checkpoint_id FROM checkpoint_blobs ORDER BY thread_id, checkpoint_id").fetchall()
-
-    assert left_checkpoints == [("t-1", "c1"), ("t-2", "c2")]
-    assert left_writes == [("t-2", "c2")]
-    assert left_cp_writes == [("t-2", "c2")]
-    assert left_cp_blobs == [("t-2", "c2")]
-
-
-def test_delete_thread_data(tmp_path):
-    db_path = tmp_path / "leon.db"
-    _setup_tables(db_path)
-    with sqlite3.connect(str(db_path)) as conn:
-        conn.executemany(
-            "INSERT INTO checkpoints (thread_id, checkpoint_id) VALUES (?, ?)",
-            [("t-1", "c1"), ("t-2", "c2")],
-        )
-        conn.executemany(
-            "INSERT INTO writes (thread_id, checkpoint_id) VALUES (?, ?)",
-            [("t-1", "c1"), ("t-2", "c2")],
-        )
-        conn.executemany(
-            "INSERT INTO checkpoint_writes (thread_id, checkpoint_id) VALUES (?, ?)",
-            [("t-1", "c1"), ("t-2", "c2")],
-        )
-        conn.executemany(
-            "INSERT INTO checkpoint_blobs (thread_id, checkpoint_id) VALUES (?, ?)",
-            [("t-1", "c1"), ("t-2", "c2")],
-        )
-        conn.commit()
-
-    repo = SQLiteCheckpointRepo(db_path=db_path)
-    try:
-        repo.delete_thread_data("t-1")
-    finally:
-        repo.close()
-
-    with sqlite3.connect(str(db_path)) as conn:
-        left_checkpoints = conn.execute("SELECT thread_id FROM checkpoints ORDER BY thread_id").fetchall()
-        left_writes = conn.execute("SELECT thread_id FROM writes ORDER BY thread_id").fetchall()
-        left_cp_writes = conn.execute("SELECT thread_id FROM checkpoint_writes ORDER BY thread_id").fetchall()
-        left_cp_blobs = conn.execute("SELECT thread_id FROM checkpoint_blobs ORDER BY thread_id").fetchall()
-
-    assert left_checkpoints == [("t-2",)]
-    assert left_writes == [("t-2",)]
-    assert left_cp_writes == [("t-2",)]
-    assert left_cp_blobs == [("t-2",)]
-
-
-from tests.fakes.supabase import FakeSupabaseClient
-
-
-def test_supabase_checkpoint_repo_list_and_delete():
-    tables = {
-        "checkpoints": [
-            {"thread_id": "t-2", "checkpoint_id": "c1"},
-            {"thread_id": "t-1", "checkpoint_id": "c2"},
-            {"thread_id": "t-1", "checkpoint_id": "c3"},
-        ],
-        "writes": [
-            {"thread_id": "t-1", "checkpoint_id": "c2"},
-            {"thread_id": "t-1", "checkpoint_id": "c3"},
-            {"thread_id": "t-2", "checkpoint_id": "c2"},
-        ],
-        "checkpoint_writes": [
-            {"thread_id": "t-1", "checkpoint_id": "c2"},
-            {"thread_id": "t-1", "checkpoint_id": "c3"},
-            {"thread_id": "t-2", "checkpoint_id": "c2"},
-        ],
-        "checkpoint_blobs": [
-            {"thread_id": "t-1", "checkpoint_id": "c2"},
-            {"thread_id": "t-1", "checkpoint_id": "c3"},
-            {"thread_id": "t-2", "checkpoint_id": "c2"},
-        ],
-    }
-    repo = SupabaseCheckpointRepo(client=FakeSupabaseClient(tables=tables))
-    assert repo.list_thread_ids() == ["t-1", "t-2"]
-
-    repo.delete_checkpoints_by_ids("t-1", ["c2", "c3"])
-    assert tables["checkpoints"] == [{"thread_id": "t-2", "checkpoint_id": "c1"}]
-    assert tables["writes"] == [{"thread_id": "t-2", "checkpoint_id": "c2"}]
-    assert tables["checkpoint_writes"] == [{"thread_id": "t-2", "checkpoint_id": "c2"}]
-    assert tables["checkpoint_blobs"] == [{"thread_id": "t-2", "checkpoint_id": "c2"}]
-
-    repo.delete_thread_data("t-2")
-    assert tables["checkpoints"] == []
-    assert tables["writes"] == []
-    assert tables["checkpoint_writes"] == []
-    assert tables["checkpoint_blobs"] == []
-
-
-def test_supabase_checkpoint_repo_requires_compatible_client():
-    with pytest.raises(RuntimeError, match="table\\(name\\)"):
-        SupabaseCheckpointRepo(client=object())
diff --git a/tests/test_cron_api.py b/tests/test_cron_api.py
deleted file mode 100644
index 06cb85ae1..000000000
--- a/tests/test_cron_api.py
+++ /dev/null
@@ -1,88 +0,0 @@
-"""Tests for Cron Job REST API models and endpoint wiring."""
-
-import pytest
-from pydantic import ValidationError
-
-from backend.web.models.panel import CreateCronJobRequest, UpdateCronJobRequest
-
-# ── CreateCronJobRequest ──
-
-
-class TestCreateCronJobRequest:
-    def test_minimal_fields(self):
-        req = CreateCronJobRequest(name="nightly-backup", cron_expression="0 2 * * *")
-        assert req.name == "nightly-backup"
-        assert req.cron_expression == "0 2 * * *"
-
-    def test_defaults(self):
-        req = CreateCronJobRequest(name="job", cron_expression="* * * * *")
-        assert req.description == ""
-        assert req.task_template == "{}"
-        assert req.enabled is True
-
-    def test_all_fields(self):
-        req = CreateCronJobRequest(
-            name="weekly-report",
-            description="Generate weekly summary",
-            cron_expression="0 9 * * 1",
-            task_template='{"title": "Weekly Report"}',
-            enabled=False,
-        )
-        assert req.name == "weekly-report"
-        assert req.description == "Generate weekly summary"
-        assert req.cron_expression == "0 9 * * 1"
-        assert req.task_template == '{"title": "Weekly Report"}'
-        assert req.enabled is False
-
-    def test_missing_name_raises(self):
-        with pytest.raises(ValidationError):
-            CreateCronJobRequest(cron_expression="* * * * *")
-
-    def test_missing_cron_expression_raises(self):
-        with pytest.raises(ValidationError):
-            CreateCronJobRequest(name="job")
-
-
-# ── UpdateCronJobRequest ──
-
-
-class TestUpdateCronJobRequest:
-    def test_all_none_by_default(self):
-        req = UpdateCronJobRequest()
-        assert req.name is None
-        assert req.description is None
-        assert req.cron_expression is None
-        assert req.task_template is None
-        assert req.enabled is None
-
-    def test_partial_update_name_only(self):
-        req = UpdateCronJobRequest(name="renamed")
-        assert req.name == "renamed"
-        assert req.cron_expression is None
-
-    def test_partial_update_enabled_only(self):
-        req = UpdateCronJobRequest(enabled=False)
-        assert req.enabled is False
-        assert req.name is None
-
-    def test_model_dump_exclude_none(self):
-        req = UpdateCronJobRequest(name="new-name", enabled=True)
-        fields = req.model_dump(exclude_none=True)
-        assert fields == {"name": "new-name", "enabled": True}
-        assert "description" not in fields
-        assert "cron_expression" not in fields
-        assert "task_template" not in fields
-
-    def test_enabled_bool_to_int_conversion(self):
-        """Verify the pattern used in the router: bool -> int for SQLite."""
-        req = UpdateCronJobRequest(enabled=True)
-        fields = req.model_dump(exclude_none=True)
-        if "enabled" in fields:
-            fields["enabled"] = int(fields["enabled"])
-        assert fields["enabled"] == 1
-
-        req2 = UpdateCronJobRequest(enabled=False)
-        fields2 = req2.model_dump(exclude_none=True)
-        if "enabled" in fields2:
-            fields2["enabled"] = int(fields2["enabled"])
-        assert fields2["enabled"] == 0
diff --git a/tests/test_cron_job_service.py b/tests/test_cron_job_service.py
deleted file mode 100644
index 872da52e4..000000000
--- a/tests/test_cron_job_service.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""Tests for cron_job_service — cron_jobs CRUD with SQLite storage."""
-
-import pytest
-
-from backend.web.services import cron_job_service
-
-
-@pytest.fixture(autouse=True)
-def _use_tmp_db(tmp_path, monkeypatch):
-    """Redirect cron_job_service to a temporary SQLite database."""
-    from storage.providers.sqlite.cron_job_repo import SQLiteCronJobRepo
-
-    db_path = tmp_path / "test.db"
-    monkeypatch.setattr(cron_job_service, "make_cron_job_repo", lambda: SQLiteCronJobRepo(db_path=db_path))
-
-
-# ---------------------------------------------------------------------------
-# Validation
-# ---------------------------------------------------------------------------
-
-
-class TestValidation:
-    def test_create_raises_on_empty_name(self):
-        with pytest.raises(ValueError, match="name"):
-            cron_job_service.create_cron_job(name="", cron_expression="*/5 * * * *")
-
-    def test_create_raises_on_empty_cron_expression(self):
-        with pytest.raises(ValueError, match="cron_expression"):
-            cron_job_service.create_cron_job(name="my job", cron_expression="")
-
-    def test_create_raises_on_whitespace_name(self):
-        with pytest.raises(ValueError, match="name"):
-            cron_job_service.create_cron_job(name="   ", cron_expression="*/5 * * * *")
-
-    def test_create_raises_on_whitespace_cron_expression(self):
-        with pytest.raises(ValueError, match="cron_expression"):
-            cron_job_service.create_cron_job(name="my job", cron_expression="   ")
-
-
-# ---------------------------------------------------------------------------
-# create_cron_job
-# ---------------------------------------------------------------------------
-
-
-class TestCreateCronJob:
-    def test_basic_fields(self):
-        job = cron_job_service.create_cron_job(name="nightly backup", cron_expression="0 2 * * *")
-        assert job["name"] == "nightly backup"
-        assert job["cron_expression"] == "0 2 * * *"
-        assert job["id"]  # non-empty
-        assert job["created_at"] > 0
-
-    def test_default_values(self):
-        job = cron_job_service.create_cron_job(name="defaults", cron_expression="*/10 * * * *")
-        assert job["description"] == ""
-        assert job["task_template"] == "{}"
-        assert job["enabled"] == 1
-        assert job["last_run_at"] == 0
-        assert job["next_run_at"] == 0
-
-    def test_custom_fields(self):
-        job = cron_job_service.create_cron_job(
-            name="custom",
-            cron_expression="0 * * * *",
-            description="hourly sync",
-            task_template='{"title":"sync"}',
-            enabled=0,
-        )
-        assert job["description"] == "hourly sync"
-        assert job["task_template"] == '{"title":"sync"}'
-        assert job["enabled"] == 0
-
-
-# ---------------------------------------------------------------------------
-# get_cron_job
-# ---------------------------------------------------------------------------
-
-
-class TestGetCronJob:
-    def test_get_existing(self):
-        job = cron_job_service.create_cron_job(name="fetchable", cron_expression="0 0 * * *")
-        fetched = cron_job_service.get_cron_job(job["id"])
-        assert fetched is not None
-        assert fetched["name"] == "fetchable"
-
-    def test_get_nonexistent_returns_none(self):
-        assert cron_job_service.get_cron_job("nonexistent_id") is None
-
-
-# ---------------------------------------------------------------------------
-# list_cron_jobs
-# ---------------------------------------------------------------------------
-
-
-class TestListCronJobs:
-    def test_list_returns_all(self):
-        cron_job_service.create_cron_job(name="a", cron_expression="* * * * *")
-        cron_job_service.create_cron_job(name="b", cron_expression="* * * * *")
-        jobs = cron_job_service.list_cron_jobs()
-        assert len(jobs) >= 2
-
-    def test_list_ordered_by_created_at_desc(self):
-        cron_job_service.create_cron_job(name="first", cron_expression="* * * * *")
-        cron_job_service.create_cron_job(name="second", cron_expression="* * * * *")
-        jobs = cron_job_service.list_cron_jobs()
-        assert jobs[0]["created_at"] >= jobs[1]["created_at"]
-
-    def test_list_empty(self):
-        jobs = cron_job_service.list_cron_jobs()
-        assert jobs == []
-
-
-# ---------------------------------------------------------------------------
-# update_cron_job
-# ---------------------------------------------------------------------------
-
-
-class TestUpdateCronJob:
-    def test_update_name(self):
-        job = cron_job_service.create_cron_job(name="original", cron_expression="* * * * *")
-        updated = cron_job_service.update_cron_job(job["id"], name="renamed")
-        assert updated["name"] == "renamed"
-
-    def test_update_cron_expression(self):
-        job = cron_job_service.create_cron_job(name="expr", cron_expression="* * * * *")
-        updated = cron_job_service.update_cron_job(job["id"], cron_expression="0 0 * * *")
-        assert updated["cron_expression"] == "0 0 * * *"
-
-    def test_update_enabled(self):
-        job = cron_job_service.create_cron_job(name="toggle", cron_expression="* * * * *")
-        updated = cron_job_service.update_cron_job(job["id"], enabled=0)
-        assert updated["enabled"] == 0
-
-    def test_update_last_run_at(self):
-        job = cron_job_service.create_cron_job(name="run tracker", cron_expression="* * * * *")
-        updated = cron_job_service.update_cron_job(job["id"], last_run_at=1234567890)
-        assert updated["last_run_at"] == 1234567890
-
-    def test_update_nonexistent_returns_none(self):
-        result = cron_job_service.update_cron_job("ghost", name="nope")
-        assert result is None
-
-    def test_update_no_changes_returns_current(self):
-        job = cron_job_service.create_cron_job(name="stable", cron_expression="* * * * *")
-        result = cron_job_service.update_cron_job(job["id"])
-        assert result is not None
-        assert result["name"] == "stable"
-
-
-# ---------------------------------------------------------------------------
-# delete_cron_job
-# ---------------------------------------------------------------------------
-
-
-class TestDeleteCronJob:
-    def test_delete_existing(self):
-        job = cron_job_service.create_cron_job(name="to delete", cron_expression="* * * * *")
-        assert cron_job_service.delete_cron_job(job["id"]) is True
-        assert cron_job_service.get_cron_job(job["id"]) is None
-
-    def test_delete_nonexistent_returns_false(self):
-        assert cron_job_service.delete_cron_job("ghost") is False
-
-
-# ---------------------------------------------------------------------------
-# Full CRUD lifecycle
-# ---------------------------------------------------------------------------
-
-
-class TestCRUDLifecycle:
-    def test_full_lifecycle(self):
-        # Create
-        job = cron_job_service.create_cron_job(
-            name="lifecycle test",
-            cron_expression="0 */6 * * *",
-            description="every 6 hours",
-        )
-        job_id = job["id"]
-        assert job["name"] == "lifecycle test"
-
-        # Read
-        fetched = cron_job_service.get_cron_job(job_id)
-        assert fetched == job
-
-        # List
-        jobs = cron_job_service.list_cron_jobs()
-        assert any(j["id"] == job_id for j in jobs)
-
-        # Update
-        updated = cron_job_service.update_cron_job(job_id, name="updated name", enabled=0)
-        assert updated["name"] == "updated name"
-        assert updated["enabled"] == 0
-        assert updated["description"] == "every 6 hours"  # unchanged
-
-        # Delete
-        assert cron_job_service.delete_cron_job(job_id) is True
-        assert cron_job_service.get_cron_job(job_id) is None
-        assert cron_job_service.delete_cron_job(job_id) is False
diff --git a/tests/test_cron_service.py b/tests/test_cron_service.py
deleted file mode 100644
index 5d08cfd91..000000000
--- a/tests/test_cron_service.py
+++ /dev/null
@@ -1,184 +0,0 @@
-"""Tests for CronService — background cron scheduler that creates tasks."""
-
-import json
-import time
-
-import pytest
-
-from backend.web.services import cron_job_service, task_service
-from backend.web.services.cron_service import CronService
-
-
-@pytest.fixture(autouse=True)
-def _use_tmp_db(tmp_path, monkeypatch):
-    """Redirect both cron_job_service and task_service to a temp DB."""
-    from storage.providers.sqlite.cron_job_repo import SQLiteCronJobRepo
-    from storage.providers.sqlite.panel_task_repo import SQLitePanelTaskRepo
-
-    db_path = tmp_path / "test.db"
-    monkeypatch.setattr(cron_job_service, "make_cron_job_repo", lambda: SQLiteCronJobRepo(db_path=db_path))
-    monkeypatch.setattr(task_service, "make_panel_task_repo", lambda: SQLitePanelTaskRepo(db_path=db_path))
-
-
-@pytest.fixture
-def cron_svc():
-    return CronService()
-
-
-def _make_job(
-    *,
-    name: str = "test job",
-    cron_expression: str = "*/5 * * * *",
-    enabled: int = 1,
-    task_template: str | None = None,
-) -> dict:
-    """Helper: create a cron job and return it."""
-    tpl = task_template or json.dumps({"title": f"Task from {name}", "priority": "high"})
-    return cron_job_service.create_cron_job(
-        name=name,
-        cron_expression=cron_expression,
-        enabled=enabled,
-        task_template=tpl,
-    )
-
-
-# ---------------------------------------------------------------------------
-# trigger_job
-# ---------------------------------------------------------------------------
-
-
-class TestTriggerJob:
-    @pytest.mark.asyncio
-    async def test_trigger_job_creates_task(self, cron_svc):
-        """Triggering an enabled job creates a panel_task with correct fields."""
-        job = _make_job(name="nightly build")
-        result = await cron_svc.trigger_job(job["id"])
-
-        assert result is not None
-        assert result["source"] == "cron"
-        assert result["cron_job_id"] == job["id"]
-        assert result["title"] == "Task from nightly build"
-        assert result["priority"] == "high"
-        assert result["status"] == "pending"
-
-        # Verify task actually exists in DB
-        tasks = task_service.list_tasks()
-        assert any(t["id"] == result["id"] for t in tasks)
-
-    @pytest.mark.asyncio
-    async def test_trigger_disabled_job_returns_none(self, cron_svc):
-        """Triggering a disabled job returns None and creates no task."""
-        job = _make_job(name="disabled job", enabled=0)
-        result = await cron_svc.trigger_job(job["id"])
-
-        assert result is None
-
-        # No task should have been created
-        tasks = task_service.list_tasks()
-        assert not any(t["cron_job_id"] == job["id"] for t in tasks)
-
-    @pytest.mark.asyncio
-    async def test_trigger_updates_last_run_at(self, cron_svc):
-        """Triggering a job updates its last_run_at timestamp."""
-        job = _make_job(name="timestamp check")
-        assert job["last_run_at"] == 0
-
-        before = int(time.time() * 1000)
-        await cron_svc.trigger_job(job["id"])
-        after = int(time.time() * 1000)
-
-        updated_job = cron_job_service.get_cron_job(job["id"])
-        assert updated_job is not None
-        assert before <= updated_job["last_run_at"] <= after
-
-    @pytest.mark.asyncio
-    async def test_trigger_nonexistent_job_returns_none(self, cron_svc):
-        """Triggering a nonexistent job returns None."""
-        result = await cron_svc.trigger_job("nonexistent_id_999")
-        assert result is None
-
-    @pytest.mark.asyncio
-    async def test_trigger_with_minimal_template(self, cron_svc):
-        """A template with only a title still creates a valid task."""
-        job = _make_job(
-            name="minimal",
-            task_template=json.dumps({"title": "Minimal task"}),
-        )
-        result = await cron_svc.trigger_job(job["id"])
-        assert result is not None
-        assert result["title"] == "Minimal task"
-        assert result["source"] == "cron"
-
-    @pytest.mark.asyncio
-    async def test_trigger_with_empty_template(self, cron_svc):
-        """An empty template {} still creates a task with defaults."""
-        job = _make_job(name="empty template", task_template="{}")
-        result = await cron_svc.trigger_job(job["id"])
-        assert result is not None
-        assert result["source"] == "cron"
-        assert result["cron_job_id"] == job["id"]
-
-    @pytest.mark.asyncio
-    async def test_trigger_with_invalid_json_template_returns_none(self, cron_svc):
-        """A job with malformed JSON template returns None gracefully."""
-        job = _make_job(name="bad json", task_template="not-valid-json{{{")
-        result = await cron_svc.trigger_job(job["id"])
-        assert result is None
-
-
-# ---------------------------------------------------------------------------
-# is_due
-# ---------------------------------------------------------------------------
-
-
-class TestIsDue:
-    def test_job_is_due_when_never_run(self, cron_svc):
-        """A job that has never run (last_run_at=0) is due immediately."""
-        job = _make_job(cron_expression="*/1 * * * *")  # every minute
-        assert cron_svc.is_due(job) is True
-
-    def test_job_not_due_when_recently_run(self, cron_svc):
-        """A job that just ran is not due yet."""
-        job = _make_job(cron_expression="0 0 * * *")  # daily at midnight
-        # Simulate it was run 1 second ago
-        now_ms = int(time.time() * 1000)
-        cron_job_service.update_cron_job(job["id"], last_run_at=now_ms)
-        job = cron_job_service.get_cron_job(job["id"])
-        assert cron_svc.is_due(job) is False
-
-    def test_disabled_job_is_never_due(self, cron_svc):
-        """A disabled job is never due, regardless of timing."""
-        job = _make_job(cron_expression="*/1 * * * *", enabled=0)
-        assert cron_svc.is_due(job) is False
-
-
-# ---------------------------------------------------------------------------
-# start / stop lifecycle
-# ---------------------------------------------------------------------------
-
-
-class TestLifecycle:
-    @pytest.mark.asyncio
-    async def test_start_stop(self, cron_svc):
-        """CronService can start and stop without errors."""
-        await cron_svc.start()
-        assert cron_svc._running is True
-        assert cron_svc._task is not None
-
-        await cron_svc.stop()
-        assert cron_svc._running is False
-
-    @pytest.mark.asyncio
-    async def test_stop_without_start(self, cron_svc):
-        """Stopping a never-started service is a no-op."""
-        await cron_svc.stop()  # should not raise
-        assert cron_svc._running is False
-
-    @pytest.mark.asyncio
-    async def test_double_start(self, cron_svc):
-        """Starting an already running service is idempotent."""
-        await cron_svc.start()
-        task1 = cron_svc._task
-        await cron_svc.start()  # should be no-op
-        assert cron_svc._task is task1  # same task, not a new one
-        await cron_svc.stop()
diff --git a/tests/test_eval_repo.py b/tests/test_eval_repo.py
deleted file mode 100644
index f2d753f66..000000000
--- a/tests/test_eval_repo.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from eval.models import LLMCallRecord, RunTrajectory, SystemMetrics, ToolCallRecord
-from eval.storage import TrajectoryStore
-
-
-def test_save_and_load_trajectory(tmp_path):
-    db_path = tmp_path / "eval.db"
-    store = TrajectoryStore(db_path)
-
-    trajectory = RunTrajectory(
-        id="run-1",
-        thread_id="thread-1",
-        user_message="hello",
-        final_response="world",
-        started_at="2026-02-24T20:00:00Z",
-        finished_at="2026-02-24T20:00:01Z",
-        llm_calls=[LLMCallRecord(run_id="run-1", model_name="m1", input_tokens=10, output_tokens=3, total_tokens=13)],
-        tool_calls=[ToolCallRecord(run_id="run-1", tool_name="bash", success=True)],
-    )
-
-    run_id = store.save_trajectory(trajectory)
-    assert run_id == "run-1"
-
-    loaded = store.get_trajectory("run-1")
-    assert loaded is not None
-    assert loaded.thread_id == "thread-1"
-    assert loaded.final_response == "world"
-    assert len(loaded.llm_calls) == 1
-    assert len(loaded.tool_calls) == 1
-
-
-def test_list_runs_and_metrics(tmp_path):
-    db_path = tmp_path / "eval.db"
-    store = TrajectoryStore(db_path)
-
-    t1 = RunTrajectory(id="run-a", thread_id="thread-a", user_message="a")
-    t2 = RunTrajectory(id="run-b", thread_id="thread-b", user_message="b")
-    store.save_trajectory(t1)
-    store.save_trajectory(t2)
-
-    all_runs = store.list_runs(limit=10)
-    assert len(all_runs) == 2
-
-    thread_a_runs = store.list_runs(thread_id="thread-a", limit=10)
-    assert len(thread_a_runs) == 1
-    assert thread_a_runs[0]["id"] == "run-a"
-
-    metrics = SystemMetrics(total_tokens=42, llm_call_count=1)
-    store.save_metrics("run-a", "system", metrics)
-
-    rows = store.get_metrics("run-a")
-    assert len(rows) == 1
-    assert rows[0]["tier"] == "system"
-    assert rows[0]["metrics"]["total_tokens"] == 42
diff --git a/tests/test_file_operation_repo.py b/tests/test_file_operation_repo.py
deleted file mode 100644
index b7c5f1526..000000000
--- a/tests/test_file_operation_repo.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import sys
-
-import pytest
-
-from storage.providers.sqlite.file_operation_repo import SQLiteFileOperationRepo
-from storage.providers.supabase.file_operation_repo import SupabaseFileOperationRepo
-
-
-def test_record_and_query_file_operations(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteFileOperationRepo(db_path)
-
-    op1 = repo.record("t-1", "cp-1", "write", "/tmp/a.txt", None, "hello")
-    op2 = repo.record("t-1", "cp-2", "edit", "/tmp/a.txt", "hello", "world", [{"old": "hello", "new": "world"}])
-
-    assert op1 != op2
-
-    rows = repo.get_operations_for_thread("t-1")
-    assert len(rows) == 2
-    assert rows[0].checkpoint_id == "cp-1"
-    assert rows[1].changes == [{"old": "hello", "new": "world"}]
-
-
-def test_mark_reverted_and_status_filter(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteFileOperationRepo(db_path)
-
-    op1 = repo.record("t-2", "cp-1", "write", "/tmp/a.txt", None, "a")
-    repo.record("t-2", "cp-1", "write", "/tmp/b.txt", None, "b")
-
-    repo.mark_reverted([op1])
-
-    applied = repo.get_operations_for_thread("t-2", status="applied")
-    reverted = repo.get_operations_for_thread("t-2", status="reverted")
-
-    assert len(applied) == 1
-    assert len(reverted) == 1
-    assert reverted[0].id == op1
-
-
-def test_delete_thread_operations(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteFileOperationRepo(db_path)
-
-    repo.record("t-3", "cp-1", "write", "/tmp/a.txt", None, "a")
-    repo.record("t-3", "cp-2", "write", "/tmp/b.txt", None, "b")
-    repo.record("t-x", "cp-x", "write", "/tmp/c.txt", None, "c")
-
-    deleted = repo.delete_thread_operations("t-3")
-    assert deleted == 2
-    assert repo.get_operations_for_thread("t-3") == []
-    assert len(repo.get_operations_for_thread("t-x")) == 1
-
-
-from tests.fakes.supabase import FakeSupabaseClient
-
-
-@pytest.mark.skipif(
-    sys.platform == "win32", reason="time.time() resolution on Windows can produce identical timestamps; ordering becomes non-deterministic"
-)
-def test_supabase_file_operation_repo_record_and_query():
-    tables: dict[str, list[dict]] = {"file_operations": []}
-    repo = SupabaseFileOperationRepo(client=FakeSupabaseClient(tables=tables))
-
-    op1 = repo.record("t-1", "cp-1", "write", "/tmp/a.txt", None, "hello")
-    op2 = repo.record("t-1", "cp-2", "edit", "/tmp/a.txt", "hello", "world", [{"old": "hello", "new": "world"}])
-
-    rows = repo.get_operations_for_thread("t-1")
-    assert [row.id for row in rows] == [op1, op2]
-    assert rows[1].changes == [{"old": "hello", "new": "world"}]
-
-    for_checkpoint = repo.get_operations_for_checkpoint("t-1", "cp-2")
-    assert len(for_checkpoint) == 1
-    assert for_checkpoint[0].id == op2
-    assert repo.count_operations_for_checkpoint("t-1", "cp-2") == 1
-
-    after_cp2 = repo.get_operations_after_checkpoint("t-1", "cp-2")
-    assert [row.id for row in after_cp2] == [op2]
-
-
-def test_supabase_file_operation_repo_mark_reverted_and_delete_thread():
-    tables: dict[str, list[dict]] = {"file_operations": []}
-    repo = SupabaseFileOperationRepo(client=FakeSupabaseClient(tables=tables))
-
-    op1 = repo.record("t-2", "cp-1", "write", "/tmp/a.txt", None, "a")
-    repo.record("t-2", "cp-1", "write", "/tmp/b.txt", None, "b")
-    repo.record("t-x", "cp-x", "write", "/tmp/c.txt", None, "c")
-
-    repo.mark_reverted([op1])
-
-    applied = repo.get_operations_for_thread("t-2", status="applied")
-    reverted = repo.get_operations_for_thread("t-2", status="reverted")
-    assert len(applied) == 1
-    assert len(reverted) == 1
-    assert reverted[0].id == op1
-
-    deleted = repo.delete_thread_operations("t-2")
-    assert deleted == 2
-    assert repo.get_operations_for_thread("t-2") == []
-    assert len(repo.get_operations_for_thread("t-x")) == 1
-
-
-def test_supabase_file_operation_repo_requires_compatible_client():
-    with pytest.raises(RuntimeError, match="table\\(name\\)"):
-        SupabaseFileOperationRepo(client=object())
diff --git a/tests/test_filesystem_touch_updates_session.py b/tests/test_filesystem_touch_updates_session.py
deleted file mode 100644
index 9a6bede32..000000000
--- a/tests/test_filesystem_touch_updates_session.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""FS wrapper should count as activity (touch ChatSession) for idle reaper."""
-
-# TODO: fs.list_dir now goes through volume-mount path; FakeProvider needs a volume_id to pass
-import pytest
-
-pytest.skip("pre-existing: FakeProvider missing volume setup — needs test update", allow_module_level=True)
-
-import sqlite3
-import tempfile
-import uuid
-from datetime import datetime
-from pathlib import Path
-
-from sandbox.manager import SandboxManager
-from sandbox.provider import Metrics, ProviderCapability, ProviderExecResult, SandboxProvider, SessionInfo
-
-
-class _FakeProvider(SandboxProvider):
-    name = "fake"
-
-    def __init__(self) -> None:
-        self._statuses: dict[str, str] = {}
-
-    def get_capability(self) -> ProviderCapability:
-        return ProviderCapability(
-            can_pause=True,
-            can_resume=True,
-            can_destroy=True,
-            supports_webhook=False,
-        )
-
-    def create_session(self, context_id: str | None = None) -> SessionInfo:
-        sid = f"s-{uuid.uuid4().hex[:8]}"
-        self._statuses[sid] = "running"
-        return SessionInfo(session_id=sid, provider=self.name, status="running")
-
-    def destroy_session(self, session_id: str, sync: bool = True) -> bool:
-        self._statuses.pop(session_id, None)
-        return True
-
-    def pause_session(self, session_id: str) -> bool:
-        self._statuses[session_id] = "paused"
-        return True
-
-    def resume_session(self, session_id: str) -> bool:
-        self._statuses[session_id] = "running"
-        return True
-
-    def get_session_status(self, session_id: str) -> str:
-        return self._statuses.get(session_id, "deleted")
-
-    def execute(self, session_id: str, command: str, timeout_ms: int = 30000, cwd: str | None = None) -> ProviderExecResult:
-        return ProviderExecResult(output="", exit_code=0)
-
-    def read_file(self, session_id: str, path: str) -> str:
-        return ""
-
-    def write_file(self, session_id: str, path: str, content: str) -> str:
-        return "ok"
-
-    def list_dir(self, session_id: str, path: str) -> list[dict]:
-        return [{"name": "a.txt", "type": "file", "size": 1}]
-
-    def get_metrics(self, session_id: str) -> Metrics | None:
-        return None
-
-    def create_runtime(self, terminal, lease):
-        from sandbox.runtime import RemoteWrappedRuntime
-
-        return RemoteWrappedRuntime(terminal, lease, self)
-
-
-def _temp_db() -> Path:
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        return Path(f.name)
-
-
-def test_fs_list_dir_touches_session_last_active_at() -> None:
-    db = _temp_db()
-    try:
-        provider = _FakeProvider()
-        mgr = SandboxManager(provider=provider, db_path=db)
-
-        cap = mgr.get_sandbox("thread-1")
-        session_id = cap._session.session_id  # type: ignore[attr-defined]
-
-        with sqlite3.connect(str(db)) as conn:
-            before = conn.execute(
-                "SELECT last_active_at FROM chat_sessions WHERE chat_session_id = ?",
-                (session_id,),
-            ).fetchone()[0]
-
-        cap.fs.list_dir("/")
-
-        with sqlite3.connect(str(db)) as conn:
-            after = conn.execute(
-                "SELECT last_active_at FROM chat_sessions WHERE chat_session_id = ?",
-                (session_id,),
-            ).fetchone()[0]
-
-        assert datetime.fromisoformat(str(after)) >= datetime.fromisoformat(str(before))
-    finally:
-        db.unlink(missing_ok=True)
diff --git a/tests/test_followup_requeue.py b/tests/test_followup_requeue.py
deleted file mode 100644
index 7a798aa7d..000000000
--- a/tests/test_followup_requeue.py
+++ /dev/null
@@ -1,209 +0,0 @@
-"""Tests for followup queue re-enqueue logic in streaming_service.
-
-Covers the _consume_followup_queue function:
-- Normal path: dequeue + start_agent_run succeeds
-- Re-enqueue on failure: message is put back when start_agent_run raises
-- No followup: dequeue returns None, nothing happens
-- Re-enqueue failure: logs error when enqueue also fails (message lost)
-- Retry success: re-enqueued message can be processed on next attempt
-"""
-
-import asyncio
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from core.runtime.middleware.queue.manager import MessageQueueManager
-
-# ---------------------------------------------------------------------------
-# Fixtures
-# ---------------------------------------------------------------------------
-
-
-@pytest.fixture()
-def queue_manager(tmp_path):
-    """Real MessageQueueManager backed by a temp SQLite DB."""
-    qm = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
-    yield qm
-
-
-@pytest.fixture()
-def mock_app(queue_manager):
-    """Minimal app stub with state.queue_manager and state.thread_event_buffers/thread_tasks."""
-    state = SimpleNamespace(
-        queue_manager=queue_manager,
-        thread_event_buffers={},
-        thread_tasks={},
-    )
-    return SimpleNamespace(state=state)
-
-
-@pytest.fixture()
-def mock_agent():
-    """Minimal agent stub with runtime that supports transition()."""
-    runtime = MagicMock()
-    runtime.transition.return_value = True
-    runtime._activity_sink = None
-    agent = SimpleNamespace(runtime=runtime)
-    return agent
-
-
-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-
-class TestConsumeFollowupQueue:
-    """Tests for _consume_followup_queue re-enqueue logic."""
-
-    def test_no_followup_does_nothing(self, mock_agent, mock_app):
-        """When queue is empty, nothing happens."""
-
-        async def _run():
-            from backend.web.services.streaming_service import _consume_followup_queue
-
-            await _consume_followup_queue(mock_agent, "thread-1", mock_app)
-            # Queue is still empty
-            assert mock_app.state.queue_manager.dequeue("thread-1") is None
-            # Runtime transition was never called
-            mock_agent.runtime.transition.assert_not_called()
-
-        asyncio.run(_run())
-
-    def test_successful_followup_consumes_message(self, mock_agent, mock_app, queue_manager):
-        """When followup succeeds, message is consumed and not re-enqueued."""
-        queue_manager.enqueue("do something", "thread-1")
-
-        async def _run():
-            from backend.web.services.streaming_service import _consume_followup_queue
-
-            with patch("backend.web.services.streaming_service.start_agent_run") as mock_start:
-                mock_start.return_value = "run-123"  # start_agent_run returns str run_id
-
-                await _consume_followup_queue(mock_agent, "thread-1", mock_app)
-
-                mock_start.assert_called_once_with(
-                    mock_agent,
-                    "thread-1",
-                    "do something",
-                    mock_app,
-                    message_metadata={
-                        "source": "system",
-                        "notification_type": "steer",
-                        "sender_name": None,
-                        "sender_avatar_url": None,
-                        "is_steer": False,
-                    },
-                )
-            # Message was consumed, queue is empty
-            assert queue_manager.dequeue("thread-1") is None
-
-        asyncio.run(_run())
-
-    def test_exception_re_enqueues_message(self, mock_agent, mock_app, queue_manager):
-        """When start_agent_run raises, the dequeued message is re-enqueued."""
-        queue_manager.enqueue("important followup", "thread-1")
-
-        async def _run():
-            from backend.web.services.streaming_service import _consume_followup_queue
-
-            with patch("backend.web.services.streaming_service.start_agent_run", side_effect=RuntimeError("boom")):
-                await _consume_followup_queue(mock_agent, "thread-1", mock_app)
-
-            # Message was re-enqueued — it should be available again
-            item = queue_manager.dequeue("thread-1")
-            assert item is not None
-            assert item.content == "important followup"
-
-        asyncio.run(_run())
-
-    def test_re_enqueued_message_succeeds_on_retry(self, mock_agent, mock_app, queue_manager):
-        """A re-enqueued message can be successfully processed on the next attempt."""
-        queue_manager.enqueue("retry me", "thread-1")
-
-        async def _run():
-            from backend.web.services.streaming_service import _consume_followup_queue
-
-            # First attempt: fails
-            with patch("backend.web.services.streaming_service.start_agent_run", side_effect=RuntimeError("temporary failure")):
-                await _consume_followup_queue(mock_agent, "thread-1", mock_app)
-
-            # Verify message was re-enqueued
-            assert queue_manager.peek("thread-1") is True
-
-            # Second attempt: succeeds
-            with patch("backend.web.services.streaming_service.start_agent_run") as mock_start:
-                mock_start.return_value = "run-456"  # start_agent_run returns str run_id
-
-                await _consume_followup_queue(mock_agent, "thread-1", mock_app)
-
-                mock_start.assert_called_once_with(
-                    mock_agent,
-                    "thread-1",
-                    "retry me",
-                    mock_app,
-                    message_metadata={
-                        "source": "system",
-                        "notification_type": "steer",
-                        "sender_name": None,
-                        "sender_avatar_url": None,
-                        "is_steer": False,
-                    },
-                )
-
-            # Queue is now empty
-            assert queue_manager.dequeue("thread-1") is None
-
-        asyncio.run(_run())
-
-    def test_no_re_enqueue_when_dequeue_returns_none(self, mock_agent, mock_app, queue_manager):
-        """If dequeue itself raises, followup is None so re-enqueue is skipped."""
-
-        async def _run():
-            from backend.web.services.streaming_service import _consume_followup_queue
-
-            # Make dequeue raise — followup stays None, no re-enqueue attempted
-            with patch.object(queue_manager, "dequeue", side_effect=RuntimeError("db error")):
-                await _consume_followup_queue(mock_agent, "thread-1", mock_app)
-
-            # enqueue was never called for re-enqueue (followup was None)
-            # Queue is still empty
-            assert queue_manager.dequeue("thread-1") is None
-
-        asyncio.run(_run())
-
-    def test_re_enqueue_failure_logs_error(self, mock_agent, mock_app, queue_manager):
-        """When both start_agent_run AND re-enqueue fail, error is logged (message lost)."""
-        queue_manager.enqueue("doomed message", "thread-1")
-
-        async def _run():
-            from backend.web.services.streaming_service import _consume_followup_queue
-
-            with patch("backend.web.services.streaming_service.start_agent_run", side_effect=RuntimeError("start failed")):
-                # Also make re-enqueue fail
-                _original_enqueue = queue_manager.enqueue
-                with patch.object(queue_manager, "enqueue", side_effect=RuntimeError("enqueue failed")):
-                    await _consume_followup_queue(mock_agent, "thread-1", mock_app)
-
-            # Message is truly lost — queue is empty
-            assert queue_manager.dequeue("thread-1") is None
-
-        asyncio.run(_run())
-
-    def test_transition_failure_skips_start(self, mock_agent, mock_app, queue_manager):
-        """When runtime.transition returns False, start_agent_run is not called."""
-        queue_manager.enqueue("wont run", "thread-1")
-        mock_agent.runtime.transition.return_value = False
-
-        async def _run():
-            from backend.web.services.streaming_service import _consume_followup_queue
-
-            with patch("backend.web.services.streaming_service.start_agent_run") as mock_start:
-                await _consume_followup_queue(mock_agent, "thread-1", mock_app)
-                mock_start.assert_not_called()
-
-            # Message was consumed (dequeued) but not re-enqueued since no exception
-            assert queue_manager.dequeue("thread-1") is None
-
-        asyncio.run(_run())
diff --git a/tests/test_idle_reaper_shared_lease.py b/tests/test_idle_reaper_shared_lease.py
deleted file mode 100644
index 172e07537..000000000
--- a/tests/test_idle_reaper_shared_lease.py
+++ /dev/null
@@ -1,146 +0,0 @@
-from __future__ import annotations
-
-# TODO: get_sandbox now calls _setup_mounts which requires lease.volume_id; FakeProvider needs update
-import pytest
-
-pytest.skip("pre-existing: FakeProvider missing volume setup — needs test update", allow_module_level=True)
-
-import sqlite3
-from dataclasses import dataclass
-from datetime import datetime, timedelta
-from pathlib import Path
-
-from sandbox.manager import SandboxManager
-from sandbox.provider import ProviderCapability, ProviderExecResult, SandboxProvider, SessionInfo
-
-
-@dataclass
-class _DummyInstance:
-    instance_id: str
-
-
-class DummyProvider(SandboxProvider):
-    """Minimal provider stub for lease + idle-reaper tests."""
-
-    name = "daytona"
-
-    def __init__(self) -> None:
-        self._paused: set[str] = set()
-        self._created: list[str] = []
-        self._pause_calls: list[str] = []
-
-    def get_capability(self) -> ProviderCapability:
-        return ProviderCapability(
-            can_pause=True,
-            can_resume=True,
-            can_destroy=True,
-            supports_status_probe=True,
-            eager_instance_binding=False,
-            runtime_kind="remote",
-        )
-
-    def create_session(self, context_id: str | None = None) -> SessionInfo:
-        sid = f"sb-{len(self._created) + 1}"
-        self._created.append(sid)
-        return SessionInfo(session_id=sid, provider=self.name, status="running")
-
-    def destroy_session(self, session_id: str, sync: bool = True) -> bool:
-        return True
-
-    def pause_session(self, session_id: str) -> bool:
-        self._pause_calls.append(session_id)
-        self._paused.add(session_id)
-        return True
-
-    def resume_session(self, session_id: str) -> bool:
-        self._paused.discard(session_id)
-        return True
-
-    def get_session_status(self, session_id: str) -> str:
-        return "paused" if session_id in self._paused else "running"
-
-    def execute(
-        self,
-        session_id: str,
-        command: str,
-        timeout_ms: int = 30000,
-        cwd: str | None = None,
-    ) -> ProviderExecResult:
-        return ProviderExecResult(output="", exit_code=0)
-
-    def read_file(self, session_id: str, path: str) -> str:
-        return ""
-
-    def write_file(self, session_id: str, path: str, content: str) -> str:
-        return "ok"
-
-    def list_dir(self, session_id: str, path: str) -> list[dict]:
-        return []
-
-    def get_metrics(self, session_id: str):
-        return None
-
-    def create_runtime(self, terminal, lease):
-        from sandbox.runtime import RemoteWrappedRuntime
-
-        return RemoteWrappedRuntime(terminal, lease, self)
-
-
-def _connect(db: Path) -> sqlite3.Connection:
-    conn = sqlite3.connect(str(db), timeout=30)
-    conn.execute("PRAGMA busy_timeout=30000")
-    return conn
-
-
-def test_idle_reaper_does_not_pause_shared_lease_when_other_session_active(tmp_path: Path) -> None:
-    db = tmp_path / "sandbox.db"
-    provider = DummyProvider()
-    manager = SandboxManager(provider=provider, db_path=db)
-
-    thread_id = "thread-1"
-
-    # Create the main terminal/session.
-    cap = manager.get_sandbox(thread_id)
-    lease_id = cap._session.lease.lease_id  # type: ignore[attr-defined]
-
-    # Force-bind a physical instance so idle reaper has something to pause.
-    cap._session.lease.ensure_active_instance(provider)  # type: ignore[attr-defined]
-
-    # Create a background terminal/session on the same lease (non-block command behavior).
-    bg_session = manager.create_background_command_session(thread_id=thread_id, initial_cwd="/home/daytona")
-
-    main_session_id = cap._session.session_id  # type: ignore[attr-defined]
-    bg_session_id = bg_session.session_id
-
-    # Make the background session expired, keep the main session active.
-    now = datetime.now()
-    expired_at = (now - timedelta(seconds=10_000)).isoformat()
-
-    with _connect(db) as conn:
-        conn.execute(
-            "UPDATE chat_sessions SET idle_ttl_sec = 1, last_active_at = ?, started_at = ? WHERE chat_session_id = ?",
-            (expired_at, expired_at, bg_session_id),
-        )
-        conn.execute(
-            "UPDATE chat_sessions SET idle_ttl_sec = 300, last_active_at = ?, started_at = ? WHERE chat_session_id = ?",
-            (now.isoformat(), now.isoformat(), main_session_id),
-        )
-        conn.commit()
-
-    closed = manager.enforce_idle_timeouts()
-    assert closed == 1
-
-    # The shared lease must NOT be paused because the main session is still active.
-    lease = manager.lease_store.get(lease_id)
-    assert lease is not None
-    assert lease.desired_state == "running"
-    assert provider._pause_calls == []
-
-    with _connect(db) as conn:
-        row = conn.execute(
-            "SELECT status, close_reason FROM chat_sessions WHERE chat_session_id = ?",
-            (bg_session_id,),
-        ).fetchone()
-        assert row is not None
-        assert row[0] == "closed"
-        assert row[1] == "idle_timeout"
diff --git a/tests/test_integration_new_arch.py b/tests/test_integration_new_arch.py
deleted file mode 100644
index 459919424..000000000
--- a/tests/test_integration_new_arch.py
+++ /dev/null
@@ -1,619 +0,0 @@
-"""Integration tests for the full new architecture flow.
-
-Tests the complete flow: Thread → ChatSession → Runtime → Terminal → Lease → Instance
-"""
-
-# TODO: get_sandbox now calls _setup_mounts requiring lease.volume_id; FakeProvider/mock_provider
-#       needs a volume configured. Most tests in this file fail for the same reason.
-import pytest
-
-pytest.skip("pre-existing: FakeProvider missing volume setup — needs test update", allow_module_level=True)
-
-import asyncio
-import sqlite3
-import tempfile
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from sandbox.chat_session import ChatSessionManager
-from sandbox.manager import SandboxManager
-from sandbox.provider import ProviderCapability, SessionInfo
-from sandbox.terminal import terminal_from_row
-from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
-from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-
-@pytest.fixture
-def temp_db():
-    """Create temporary database for testing."""
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = Path(f.name)
-    yield db_path
-    db_path.unlink(missing_ok=True)
-
-
-@pytest.fixture
-def mock_provider():
-    """Create mock SandboxProvider for local testing."""
-    provider = MagicMock()
-    provider.name = "local"
-    provider.default_cwd = "/tmp"
-    provider.get_capability.return_value = ProviderCapability(
-        can_pause=True,
-        can_resume=True,
-        can_destroy=True,
-        supports_webhook=False,
-        supports_status_probe=False,
-        eager_instance_binding=True,
-        inspect_visible=True,
-        runtime_kind="local",
-    )
-    provider.create_session.return_value = SessionInfo(
-        session_id="local-inst-1",
-        provider="local",
-        status="running",
-    )
-    provider.get_session_status.return_value = "running"
-    provider.pause_session.return_value = True
-    provider.resume_session.return_value = True
-    provider.destroy_session.return_value = True
-
-    # Mock execute to return proper results
-    def mock_execute(instance_id, command, timeout_ms=None, cwd=None):
-        result = MagicMock()
-        result.exit_code = 0
-
-        if command == "pwd":
-            result.stdout = cwd or "/root"
-            result.stderr = ""
-        elif command.startswith("cd "):
-            result.stdout = ""
-            result.stderr = ""
-        else:
-            result.stdout = "command output"
-            result.stderr = ""
-
-        return result
-
-    provider.execute = mock_execute
-    from sandbox.providers.local import LocalPersistentShellRuntime
-
-    provider.create_runtime.side_effect = lambda terminal, lease: LocalPersistentShellRuntime(terminal, lease)
-    return provider
-
-
-@pytest.fixture
-def mock_remote_provider():
-    """Create mock remote provider that supports lease lifecycle + fs ops."""
-    provider = MagicMock()
-    provider.name = "e2b"
-    provider.get_capability.return_value = ProviderCapability(
-        can_pause=True,
-        can_resume=True,
-        can_destroy=True,
-        supports_webhook=False,
-        runtime_kind="remote",
-    )
-    provider.create_session.return_value = SessionInfo(
-        session_id="inst-remote-1",
-        provider="e2b",
-        status="running",
-    )
-    provider.get_session_status.return_value = "running"
-    provider.pause_session.return_value = True
-    provider.resume_session.return_value = True
-    provider.write_file.return_value = "ok"
-    provider.read_file.return_value = "content"
-    provider.list_dir.return_value = []
-    from sandbox.runtime import RemoteWrappedRuntime
-
-    provider.create_runtime.side_effect = lambda terminal, lease: RemoteWrappedRuntime(terminal, lease, provider)
-    return provider
-
-
-@pytest.fixture
-def sandbox_manager(temp_db, mock_provider):
-    """Create SandboxManager with temp database."""
-    return SandboxManager(provider=mock_provider, db_path=temp_db)
-
-
-@pytest.fixture
-def remote_sandbox_manager(temp_db, mock_remote_provider):
-    """Create SandboxManager with remote provider."""
-    return SandboxManager(provider=mock_remote_provider, db_path=temp_db)
-
-
-class TestFullArchitectureFlow:
-    """Test complete flow through all layers."""
-
-    @pytest.mark.skip(reason="pre-existing: get_sandbox now requires lease.volume_id — FakeProvider needs update")
-    def test_get_sandbox_creates_all_layers(self, sandbox_manager, temp_db):
-        """Test that get_sandbox creates Terminal → Lease → Runtime → ChatSession."""
-        thread_id = "test-thread-1"
-
-        # Get sandbox (should create everything)
-        capability = sandbox_manager.get_sandbox(thread_id)
-
-        assert capability is not None
-        assert capability._session is not None
-        assert capability._session.thread_id == thread_id
-        assert capability._session.terminal is not None
-        assert capability._session.lease is not None
-        assert capability._session.runtime is not None
-
-        # Verify persistence
-        terminal_store = SQLiteTerminalRepo(db_path=temp_db)
-        terminal_row = terminal_store.get_active(thread_id)
-        assert terminal_row is not None
-
-        lease_repo = SQLiteLeaseRepo(db_path=temp_db)
-        lease_row = lease_repo.get(terminal_row["lease_id"])
-        lease_repo.close()
-        assert lease_row is not None
-
-    def test_get_sandbox_reuses_existing_session(self, sandbox_manager):
-        """Test that get_sandbox reuses existing session."""
-        thread_id = "test-thread-2"
-
-        # First call creates
-        capability1 = sandbox_manager.get_sandbox(thread_id)
-        session_id1 = capability1._session.session_id
-
-        # Second call reuses
-        capability2 = sandbox_manager.get_sandbox(thread_id)
-        session_id2 = capability2._session.session_id
-
-        assert session_id1 == session_id2
-
-    @pytest.mark.asyncio
-    async def test_command_execution_through_capability(self, sandbox_manager):
-        """Test command execution through capability wrapper."""
-        thread_id = "test-thread-3"
-
-        capability = sandbox_manager.get_sandbox(thread_id)
-
-        # Execute command
-        result = await capability.command.execute("echo hello")
-
-        assert result.exit_code == 0
-        assert result.stdout is not None
-
-    @pytest.mark.asyncio
-    async def test_async_command_status_survives_session_recreate(self, sandbox_manager):
-        """Completed async commands should remain queryable after ChatSession recreation."""
-        thread_id = "test-thread-3b"
-        capability1 = sandbox_manager.get_sandbox(thread_id)
-        session_id_1 = capability1._session.session_id
-
-        async_cmd = await capability1.command.execute_async("echo async-ok")
-        done_1 = await capability1.command.wait_for(async_cmd.command_id, timeout=5.0)
-        assert done_1 is not None
-        assert done_1.exit_code == 0
-        assert "async-ok" in done_1.stdout
-
-        sandbox_manager.session_manager.delete(session_id_1, reason="test_rotate_session")
-        capability2 = sandbox_manager.get_sandbox(thread_id)
-        assert capability2._session.session_id != session_id_1
-
-        status = await capability2.command.get_status(async_cmd.command_id)
-        assert status is not None
-        assert status.done
-
-        done_2 = await capability2.command.wait_for(async_cmd.command_id, timeout=1.0)
-        assert done_2 is not None
-        assert done_2.exit_code == 0
-        assert "async-ok" in done_2.stdout
-
-    @pytest.mark.asyncio
-    async def test_non_blocking_command_uses_new_abstract_terminal(self, sandbox_manager, temp_db):
-        thread_id = "test-thread-async-terminal"
-        capability = sandbox_manager.get_sandbox(thread_id)
-        default_terminal_id = capability._session.terminal.terminal_id
-        shared_lease_id = capability._session.lease.lease_id
-
-        from sandbox.terminal import TerminalState
-
-        capability._session.terminal.update_state(TerminalState(cwd="/tmp", env_delta={"FOO": "bar"}))
-
-        async_cmd = await capability.command.execute_async("echo bg-terminal")
-        result = await capability.command.wait_for(async_cmd.command_id, timeout=5.0)
-        assert result is not None
-        assert result.exit_code == 0
-        assert "bg-terminal" in result.stdout
-
-        terminal_rows = sandbox_manager.terminal_store.list_by_thread(thread_id)
-        assert len(terminal_rows) == 2
-        terminals = [terminal_from_row(r, sandbox_manager.terminal_store.db_path) for r in terminal_rows]
-        default_row = sandbox_manager.terminal_store.get_default(thread_id)
-        assert default_row is not None
-        default_terminal = terminal_from_row(default_row, sandbox_manager.terminal_store.db_path)
-        assert default_terminal.terminal_id == default_terminal_id
-
-        background_terminal = next(t for t in terminals if t.terminal_id != default_terminal_id)
-        assert background_terminal.lease_id == shared_lease_id
-        bg_state = background_terminal.get_state()
-        assert bg_state.cwd in {"/tmp", "/private/tmp"}
-        assert bg_state.env_delta.get("FOO") == "bar"
-
-        with sqlite3.connect(str(temp_db), timeout=30) as conn:
-            row = conn.execute(
-                "SELECT terminal_id FROM terminal_commands WHERE command_id = ?",
-                (async_cmd.command_id,),
-            ).fetchone()
-        assert row is not None
-        assert row[0] == background_terminal.terminal_id
-
-    @pytest.mark.asyncio
-    async def test_running_async_command_visible_from_new_manager(self, temp_db, mock_provider):
-        thread_id = "test-thread-running-visible"
-        manager1 = SandboxManager(provider=mock_provider, db_path=temp_db)
-        capability1 = manager1.get_sandbox(thread_id)
-
-        async_cmd = await capability1.command.execute_async("for i in 1 2 3; do echo tick-$i; sleep 1; done")
-        await asyncio.sleep(1.2)
-
-        # Simulate command_status query from a fresh API manager/session process.
-        manager2 = SandboxManager(provider=mock_provider, db_path=temp_db)
-        capability2 = manager2.get_sandbox(thread_id)
-
-        running = await capability2.command.get_status(async_cmd.command_id)
-        assert running is not None
-        assert not running.done
-        assert "Runtime restarted before command completion" not in "".join(running.stderr_buffer)
-        assert "tick-1" in "".join(running.stdout_buffer)
-
-        finished = await capability2.command.wait_for(async_cmd.command_id, timeout=5.0)
-        assert finished is not None
-        assert finished.exit_code == 0
-        assert "tick-3" in finished.stdout
-
-    def test_terminal_state_persists_across_sessions(self, sandbox_manager, temp_db):
-        """Test that terminal state persists when session expires."""
-        thread_id = "test-thread-4"
-
-        # Create session and update terminal state
-        capability1 = sandbox_manager.get_sandbox(thread_id)
-        terminal_id = capability1._session.terminal.terminal_id
-
-        # Update terminal state
-        from sandbox.terminal import TerminalState
-
-        new_state = TerminalState(cwd="/tmp", env_delta={"FOO": "bar"})
-        capability1._session.terminal.update_state(new_state)
-
-        # Delete session (simulating expiry)
-        sandbox_manager.session_manager.delete(capability1._session.session_id)
-
-        # Get sandbox again (creates new session)
-        capability2 = sandbox_manager.get_sandbox(thread_id)
-
-        # Terminal should be reused with persisted state
-        assert capability2._session.terminal.terminal_id == terminal_id
-        state = capability2._session.terminal.get_state()
-        assert state.cwd == "/tmp"
-        assert state.env_delta == {"FOO": "bar"}
-
-    def test_get_sandbox_fails_on_provider_mismatch(self, temp_db, mock_provider, mock_remote_provider):
-        local_mgr = SandboxManager(provider=mock_provider, db_path=temp_db)
-        remote_mgr = SandboxManager(provider=mock_remote_provider, db_path=temp_db)
-
-        thread_id = "test-thread-provider-mismatch"
-        _ = local_mgr.get_sandbox(thread_id)
-
-        with pytest.raises(RuntimeError, match="bound to provider"):
-            remote_mgr.get_sandbox(thread_id)
-
-    def test_pause_all_sessions_skips_provider_mismatch(self, temp_db, mock_provider, mock_remote_provider):
-        local_mgr = SandboxManager(provider=mock_provider, db_path=temp_db)
-        remote_mgr = SandboxManager(provider=mock_remote_provider, db_path=temp_db)
-
-        _ = local_mgr.get_sandbox("test-thread-provider-mismatch-pause")
-
-        assert remote_mgr.pause_all_sessions() == 0
-
-    def test_lease_shared_across_terminals(self, sandbox_manager, temp_db):
-        """Test that multiple terminals can share the same lease."""
-        thread_id1 = "test-thread-5"
-        thread_id2 = "test-thread-6"
-
-        # Create first terminal
-        capability1 = sandbox_manager.get_sandbox(thread_id1)
-        lease_id1 = capability1._session.lease.lease_id
-
-        # Manually create second terminal with same lease
-        terminal_store = SQLiteTerminalRepo(db_path=temp_db)
-        _terminal2 = terminal_store.create(
-            terminal_id="term-shared",
-            thread_id=thread_id2,
-            lease_id=lease_id1,
-        )
-
-        # Get sandbox for second thread
-        capability2 = sandbox_manager.get_sandbox(thread_id2)
-        lease_id2 = capability2._session.lease.lease_id
-
-        # Should share the same lease
-        assert lease_id1 == lease_id2
-
-    def test_session_touch_updates_activity(self, sandbox_manager):
-        """Test that capability.touch() updates session activity."""
-        thread_id = "test-thread-7"
-
-        capability = sandbox_manager.get_sandbox(thread_id)
-        old_activity = capability._session.last_active_at
-
-        import time
-
-        time.sleep(0.01)
-
-        capability.touch()
-
-        # Activity should be updated
-        assert capability._session.last_active_at > old_activity
-
-    def test_session_info_api(self, sandbox_manager):
-        """Test that manager can expose current provider session info."""
-        thread_id = "test-thread-8"
-
-        session_info = sandbox_manager.get_or_create_session(thread_id)
-        assert session_info is not None
-        assert session_info.provider == "local"
-
-        sessions = sandbox_manager.list_sessions()
-        assert len(sessions) > 0
-
-    def test_remote_fs_operation_fails_on_paused_lease(self, remote_sandbox_manager, mock_remote_provider):
-        """Paused lease must fail fast until explicit resume."""
-        thread_id = "test-thread-remote-fs-1"
-        capability = remote_sandbox_manager.get_sandbox(thread_id)
-
-        lease = capability._session.lease
-        lease.ensure_active_instance(mock_remote_provider)
-        lease.pause_instance(mock_remote_provider)
-        assert lease.get_instance() is not None
-        assert lease.get_instance().status == "paused"
-        mock_remote_provider.get_session_status.return_value = "paused"
-
-        with pytest.raises(RuntimeError, match="is paused"):
-            capability.fs.write_file("/home/user/test.txt", "ok")
-        assert lease.get_instance().status == "paused"
-
-
-class TestSessionLifecycle:
-    """Test session lifecycle management."""
-
-    def test_session_expiry_cleanup(self, sandbox_manager, temp_db):
-        """Test that expired sessions are cleaned up."""
-
-        thread_id = "test-thread-9"
-
-        # Create session with very short timeout
-        capability = sandbox_manager.get_sandbox(thread_id)
-        _session_id = capability._session.session_id
-
-        # Manually update policy to expire immediately
-        session_manager = ChatSessionManager(
-            provider=sandbox_manager.provider,
-            db_path=temp_db,
-        )
-
-        import time
-
-        time.sleep(0.1)
-
-        # Cleanup expired
-        count = session_manager.cleanup_expired()
-
-        # Session should still exist (default policy is 10 minutes)
-        assert count == 0
-
-    def test_pause_and_resume_session(self, sandbox_manager):
-        """Test pausing and resuming sessions."""
-        thread_id = "test-thread-10"
-
-        # Create session
-        capability = sandbox_manager.get_sandbox(thread_id)
-        session_id = capability._session.session_id
-        terminal_id = capability._session.terminal.terminal_id
-
-        assert sandbox_manager.pause_session(thread_id)
-        paused = sandbox_manager.session_manager.get(thread_id, terminal_id)
-        assert paused is not None
-        assert paused.session_id == session_id
-        assert paused.status == "paused"
-
-        assert sandbox_manager.resume_session(thread_id)
-        resumed = sandbox_manager.session_manager.get(thread_id, terminal_id)
-        assert resumed is not None
-        assert resumed.session_id == session_id
-        assert resumed.status == "active"
-
-    def test_pause_and_resume_cover_all_thread_terminals(self, sandbox_manager):
-        thread_id = "test-thread-10b"
-        capability = sandbox_manager.get_sandbox(thread_id)
-        asyncio.run(capability.command.execute_async("echo bg"))
-
-        terminal_rows = sandbox_manager.terminal_store.list_by_thread(thread_id)
-        assert len(terminal_rows) == 2
-
-        assert sandbox_manager.pause_session(thread_id)
-        for row in terminal_rows:
-            session = sandbox_manager.session_manager.get(thread_id, row["terminal_id"])
-            assert session is not None
-            assert session.status == "paused"
-
-        assert sandbox_manager.resume_session(thread_id)
-        for row in terminal_rows:
-            session = sandbox_manager.session_manager.get(thread_id, row["terminal_id"])
-            assert session is not None
-            assert session.status == "active"
-
-    def test_destroy_session(self, sandbox_manager):
-        """Test destroying a session."""
-        thread_id = "test-thread-11"
-
-        # Create session
-        capability = sandbox_manager.get_sandbox(thread_id)
-        _session_id = capability._session.session_id
-        terminal_id = capability._session.terminal.terminal_id
-
-        # Destroy
-        sandbox_manager.destroy_session(thread_id)
-
-        # Session should be gone
-        session = sandbox_manager.session_manager.get(thread_id, terminal_id)
-        assert session is None
-
-    def test_destroy_session_removes_all_thread_resources(self, sandbox_manager):
-        thread_id = "test-thread-11b"
-        capability = sandbox_manager.get_sandbox(thread_id)
-        asyncio.run(capability.command.execute_async("echo bg"))
-
-        terminal_rows_before = sandbox_manager.terminal_store.list_by_thread(thread_id)
-        assert len(terminal_rows_before) == 2
-
-        assert sandbox_manager.destroy_session(thread_id)
-        assert sandbox_manager.terminal_store.list_by_thread(thread_id) == []
-        assert all(sandbox_manager.session_manager.get(thread_id, row["terminal_id"]) is None for row in terminal_rows_before)
-
-
-class TestMultiThreadScenarios:
-    """Test scenarios with multiple threads."""
-
-    def test_multiple_threads_independent_sessions(self, sandbox_manager):
-        """Test that multiple threads get independent sessions."""
-        thread_ids = [f"test-thread-{i}" for i in range(3)]
-
-        capabilities = [sandbox_manager.get_sandbox(tid) for tid in thread_ids]
-
-        # All should have different sessions
-        session_ids = [cap._session.session_id for cap in capabilities]
-        assert len(set(session_ids)) == 3
-
-        # All should have different terminals
-        terminal_ids = [cap._session.terminal.terminal_id for cap in capabilities]
-        assert len(set(terminal_ids)) == 3
-
-    def test_thread_switch_preserves_state(self, sandbox_manager):
-        """Test that switching between threads preserves state."""
-        thread_id1 = "test-thread-12"
-        thread_id2 = "test-thread-13"
-
-        # Work on thread 1
-        cap1 = sandbox_manager.get_sandbox(thread_id1)
-        from sandbox.terminal import TerminalState
-
-        cap1._session.terminal.update_state(TerminalState(cwd="/tmp"))
-
-        # Switch to thread 2
-        cap2 = sandbox_manager.get_sandbox(thread_id2)
-        cap2._session.terminal.update_state(TerminalState(cwd="/home"))
-
-        # Switch back to thread 1
-        cap1_again = sandbox_manager.get_sandbox(thread_id1)
-        state1 = cap1_again._session.terminal.get_state()
-        assert state1.cwd == "/tmp"
-
-        # Check thread 2 state
-        cap2_again = sandbox_manager.get_sandbox(thread_id2)
-        state2 = cap2_again._session.terminal.get_state()
-        assert state2.cwd == "/home"
-
-
-class TestErrorHandling:
-    """Test error handling scenarios."""
-
-    def test_missing_terminal_recreates_with_same_id(self, sandbox_manager, temp_db):
-        """Test that terminal is recreated when missing from DB.
-
-        Note: The terminal_id is stored in the session, so when we delete
-        the terminal but not the session, the session still references the
-        old terminal_id. This is expected behavior - the terminal_id is
-        stable across recreations.
-        """
-        thread_id = "test-thread-14"
-
-        # Create session
-        capability = sandbox_manager.get_sandbox(thread_id)
-        terminal_id = capability._session.terminal.terminal_id
-
-        # Delete terminal from DB (but not session)
-        terminal_store = SQLiteTerminalRepo(db_path=temp_db)
-        terminal_store.delete(terminal_id)
-
-        # Delete session to force full recreation
-        sandbox_manager.session_manager.delete(capability._session.session_id)
-
-        # Get sandbox again - creates new terminal
-        _capability2 = sandbox_manager.get_sandbox(thread_id)
-
-        # Terminal should exist in DB now
-        _terminal2 = terminal_store.get_active(thread_id)
-        assert _terminal2 is not None
-
-    def test_missing_lease_recreates_with_same_id(self, sandbox_manager, temp_db):
-        """Test that lease is recreated when missing from DB.
-
-        Note: The lease_id is stored in the terminal, so when we delete
-        the lease but not the terminal, the terminal still references the
-        old lease_id. This is expected behavior - the lease_id is stable.
-        """
-        thread_id = "test-thread-15"
-
-        # Create session
-        capability = sandbox_manager.get_sandbox(thread_id)
-        lease_id = capability._session.lease.lease_id
-
-        # Delete lease from DB
-        lease_repo = SQLiteLeaseRepo(db_path=temp_db)
-        lease_repo.delete(lease_id)
-        lease_repo.close()
-
-        # Delete session AND terminal to force full recreation
-        sandbox_manager.session_manager.delete(capability._session.session_id)
-        terminal_store = SQLiteTerminalRepo(db_path=temp_db)
-        terminal_store.delete(capability._session.terminal.terminal_id)
-
-        # Get sandbox again - creates new terminal + lease
-        capability2 = sandbox_manager.get_sandbox(thread_id)
-
-        # Lease should exist in DB now
-        lease_repo2 = SQLiteLeaseRepo(db_path=temp_db)
-        lease2 = lease_repo2.get(capability2._session.lease.lease_id)
-        lease_repo2.close()
-        assert lease2 is not None
-
-
-# ── create_sandbox() factory tests ──────────────────────────────────────────
-
-from sandbox import LocalSandbox, create_sandbox  # noqa: E402
-from sandbox.config import SandboxConfig  # noqa: E402
-
-
-def test_create_sandbox_local():
-    sbx = create_sandbox(SandboxConfig(provider="local"), workspace_root="/tmp")
-    assert isinstance(sbx, LocalSandbox)
-    assert sbx.working_dir == "/tmp"
-
-
-def test_create_sandbox_agentbay_requires_api_key(monkeypatch):
-    monkeypatch.delenv("AGENTBAY_API_KEY", raising=False)
-    with pytest.raises(ValueError, match="AGENTBAY_API_KEY"):
-        create_sandbox(SandboxConfig(provider="agentbay"))
-
-
-def test_create_sandbox_e2b_requires_api_key(monkeypatch):
-    monkeypatch.delenv("E2B_API_KEY", raising=False)
-    with pytest.raises(ValueError, match="E2B_API_KEY"):
-        create_sandbox(SandboxConfig(provider="e2b"))
-
-
-def test_create_sandbox_daytona_requires_api_key(monkeypatch):
-    monkeypatch.delenv("DAYTONA_API_KEY", raising=False)
-    with pytest.raises(ValueError, match="DAYTONA_API_KEY"):
-        create_sandbox(SandboxConfig(provider="daytona"))
-
-
-def test_create_sandbox_unknown_provider():
-    with pytest.raises(ValueError, match="Unknown sandbox provider"):
-        create_sandbox(SandboxConfig(provider="bogus"))
diff --git a/tests/test_lease.py b/tests/test_lease.py
deleted file mode 100644
index d6b985a17..000000000
--- a/tests/test_lease.py
+++ /dev/null
@@ -1,428 +0,0 @@
-"""Unit tests for SandboxLease and SQLiteLeaseRepo."""
-
-import sqlite3
-from datetime import datetime
-from unittest.mock import MagicMock
-
-import pytest
-
-from sandbox.lease import (
-    SandboxInstance,
-    lease_from_row,
-)
-from sandbox.provider import SessionInfo
-from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
-
-
-@pytest.fixture
-def store(temp_db):
-    """Create SQLiteLeaseRepo with temp database."""
-    repo = SQLiteLeaseRepo(db_path=temp_db)
-    yield repo
-    repo.close()
-
-
-@pytest.fixture
-def mock_provider():
-    """Create mock SandboxProvider."""
-    provider = MagicMock()
-    provider.name = "test-provider"
-    return provider
-
-
-def _create_lease(store, lease_id, provider_name, volume_id=None):
-    """Create lease via repo and return as domain object."""
-    row = store.create(lease_id, provider_name, volume_id=volume_id)
-    return lease_from_row(row, store.db_path)
-
-
-def _get_lease(store, lease_id):
-    """Get lease via repo and return as domain object."""
-    row = store.get(lease_id)
-    if row is None:
-        return None
-    return lease_from_row(row, store.db_path)
-
-
-class TestSandboxInstance:
-    """Test SandboxInstance dataclass."""
-
-    def test_create_instance(self):
-        """Test creating SandboxInstance."""
-        now = datetime.now()
-        instance = SandboxInstance(
-            instance_id="inst-123",
-            provider_name="e2b",
-            status="running",
-            created_at=now,
-        )
-
-        assert instance.instance_id == "inst-123"
-        assert instance.provider_name == "e2b"
-        assert instance.status == "running"
-        assert instance.created_at == now
-
-
-class TestLeaseRepo:
-    """Test SQLiteLeaseRepo CRUD operations."""
-
-    def test_ensure_tables(self, store, temp_db):
-        """Test table creation."""
-        conn = sqlite3.connect(str(temp_db))
-        try:
-            cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='sandbox_leases'")
-            assert cursor.fetchone() is not None
-        finally:
-            conn.close()
-
-    def test_create_lease(self, store):
-        """Test creating a new lease."""
-        lease = _create_lease(store, "lease-123", "e2b")
-
-        assert lease.lease_id == "lease-123"
-        assert lease.provider_name == "e2b"
-        assert lease.get_instance() is None
-        assert lease.needs_refresh is False
-        assert lease.refresh_hint_at is None
-
-    def test_get_lease(self, store):
-        """Test retrieving lease by lease_id."""
-        store.create(lease_id="lease-123", provider_name="e2b")
-
-        row = store.get("lease-123")
-        assert row is not None
-        assert row["lease_id"] == "lease-123"
-        assert row["provider_name"] == "e2b"
-
-    def test_get_nonexistent_lease(self, store):
-        """Test retrieving non-existent lease returns None."""
-        lease = store.get("nonexistent-lease")
-        assert lease is None
-
-    def test_delete_lease(self, store):
-        """Test deleting a lease."""
-        store.create(lease_id="lease-123", provider_name="e2b")
-
-        # Verify exists
-        assert store.get("lease-123") is not None
-
-        # Delete
-        store.delete("lease-123")
-
-        # Verify deleted
-        assert store.get("lease-123") is None
-
-    def test_list_all_leases(self, store):
-        """Test listing all leases."""
-        import time
-
-        store.create("lease-1", "e2b")
-        time.sleep(0.01)
-        store.create("lease-2", "agentbay")
-        time.sleep(0.01)
-        store.create("lease-3", "e2b")
-
-        leases = store.list_all()
-        assert len(leases) == 3
-
-        # Should be ordered by created_at DESC
-        assert leases[0]["lease_id"] == "lease-3"
-        assert leases[1]["lease_id"] == "lease-2"
-        assert leases[2]["lease_id"] == "lease-1"
-
-    def test_list_by_provider(self, store):
-        """Test listing leases by provider."""
-        store.create("lease-1", "e2b")
-        store.create("lease-2", "agentbay")
-        store.create("lease-3", "e2b")
-
-        e2b_leases = store.list_by_provider("e2b")
-        assert len(e2b_leases) == 2
-        assert all(lease["provider_name"] == "e2b" for lease in e2b_leases)
-
-        agentbay_leases = store.list_by_provider("agentbay")
-        assert len(agentbay_leases) == 1
-        assert agentbay_leases[0]["provider_name"] == "agentbay"
-
-    def test_find_by_instance(self, store, mock_provider):
-        lease = _create_lease(store, "lease-1", "test-provider")
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-lookup",
-            provider="test-provider",
-            status="running",
-        )
-        lease.ensure_active_instance(mock_provider)
-
-        found_row = store.find_by_instance(provider_name="test-provider", instance_id="inst-lookup")
-        assert found_row is not None
-        assert found_row["lease_id"] == "lease-1"
-
-
-class TestSQLiteLease:
-    """Test SQLiteLease instance management."""
-
-    def test_ensure_active_instance_creates_new(self, store, mock_provider):
-        """Test ensure_active_instance creates new instance when none exists."""
-        lease = _create_lease(store, "lease-1", "test-provider")
-
-        # Mock provider to return new session
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-
-        instance = lease.ensure_active_instance(mock_provider)
-
-        assert instance.instance_id == "inst-123"
-        assert instance.status == "running"
-        assert lease.get_instance() == instance
-        mock_provider.create_session.assert_called_once()
-
-    def test_ensure_active_instance_reuses_running(self, store, mock_provider):
-        """Test ensure_active_instance reuses running instance."""
-        lease = _create_lease(store, "lease-1", "test-provider")
-
-        # Create initial instance
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-        instance1 = lease.ensure_active_instance(mock_provider)
-
-        # Mock provider to report instance is still running
-        mock_provider.get_session_status.return_value = "running"
-
-        # Call again - should reuse
-        instance2 = lease.ensure_active_instance(mock_provider)
-
-        assert instance2.instance_id == instance1.instance_id
-        assert mock_provider.create_session.call_count == 1  # Only called once
-        assert mock_provider.get_session_status.call_count == 0
-
-    def test_ensure_active_instance_converges_stale_paused_state(self, store, mock_provider):
-        """If DB says paused but provider says running, lease status must converge to running."""
-        lease = _create_lease(store, "lease-1", "test-provider")
-
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-        lease.ensure_active_instance(mock_provider)
-
-        mock_provider.pause_session.return_value = True
-        lease.pause_instance(mock_provider)
-        assert lease.get_instance().status == "paused"
-
-        mock_provider.get_session_status.return_value = "running"
-        instance = lease.ensure_active_instance(mock_provider)
-        assert instance.status == "running"
-
-        reloaded = _get_lease(store, "lease-1")
-        assert reloaded is not None
-        assert reloaded.get_instance() is not None
-        assert reloaded.get_instance().status == "running"
-
-    def test_invalidation_forces_refresh_even_when_snapshot_fresh(self, store, mock_provider):
-        lease = _create_lease(store, "lease-1", "test-provider")
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-        lease.ensure_active_instance(mock_provider)
-        assert lease.needs_refresh is False
-
-        lease.mark_needs_refresh()
-        assert lease.needs_refresh is True
-
-        mock_provider.get_session_status.return_value = "running"
-        lease.ensure_active_instance(mock_provider)
-
-        assert mock_provider.get_session_status.call_count == 1
-        assert lease.needs_refresh is False
-
-    def test_store_mark_needs_refresh(self, store):
-        _create_lease(store, "lease-1", "test-provider")
-        updated = store.mark_needs_refresh(lease_id="lease-1")
-        assert updated is True
-
-        reloaded = _get_lease(store, "lease-1")
-        assert reloaded is not None
-        assert reloaded.needs_refresh is True
-        assert reloaded.refresh_hint_at is not None
-
-    def test_destroy_instance(self, store, mock_provider):
-        """Test destroying instance."""
-        lease = _create_lease(store, "lease-1", "test-provider")
-
-        # Create instance
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-        lease.ensure_active_instance(mock_provider)
-
-        # Destroy
-        lease.destroy_instance(mock_provider)
-
-        assert lease.get_instance() is None
-        mock_provider.destroy_session.assert_called_once_with("inst-123")
-
-    def test_pause_instance(self, store, mock_provider):
-        """Test pausing instance."""
-        lease = _create_lease(store, "lease-1", "test-provider")
-
-        # Create instance
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-        lease.ensure_active_instance(mock_provider)
-
-        # Pause
-        mock_provider.pause_session.return_value = True
-        result = lease.pause_instance(mock_provider)
-
-        assert result is True
-        assert lease.get_instance().status == "paused"
-        mock_provider.pause_session.assert_called_once_with("inst-123")
-
-    def test_resume_instance(self, store, mock_provider):
-        """Test resuming instance."""
-        lease = _create_lease(store, "lease-1", "test-provider")
-
-        # Create and pause instance
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-        lease.ensure_active_instance(mock_provider)
-        mock_provider.pause_session.return_value = True
-        lease.pause_instance(mock_provider)
-
-        # Resume
-        mock_provider.resume_session.return_value = True
-        result = lease.resume_instance(mock_provider)
-
-        assert result is True
-        assert lease.get_instance().status == "running"
-        mock_provider.resume_session.assert_called_once_with("inst-123")
-
-    def test_instance_persists_across_retrieval(self, store, mock_provider):
-        """Test that instance persists when lease is retrieved again."""
-        lease = _create_lease(store, "lease-1", "test-provider")
-
-        # Create instance
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-        lease.ensure_active_instance(mock_provider)
-
-        # Retrieve lease again
-        lease2 = _get_lease(store, "lease-1")
-        assert lease2 is not None
-        instance = lease2.get_instance()
-        assert instance is not None
-        assert instance.instance_id == "inst-123"
-        assert instance.status == "running"
-
-    def test_apply_rolls_back_state_when_event_insert_conflicts(self, store, mock_provider):
-        """Snapshot/metadata updates must roll back when event append fails."""
-        lease = _create_lease(store, "lease-atomic", "test-provider")
-        lease.apply(
-            mock_provider,
-            event_type="provider.error",
-            source="test.seed",
-            payload={"error": "seed"},
-            event_id="evt-duplicate",
-        )
-
-        before = _get_lease(store, "lease-atomic")
-        assert before is not None
-
-        with pytest.raises(sqlite3.IntegrityError):
-            lease.apply(
-                mock_provider,
-                event_type="provider.error",
-                source="test.conflict",
-                payload={"error": "boom"},
-                event_id="evt-duplicate",
-            )
-
-        after = _get_lease(store, "lease-atomic")
-        assert after is not None
-        assert after.version == before.version
-        assert after.last_error == before.last_error
-        assert after.needs_refresh == before.needs_refresh
-        assert after.observed_state == before.observed_state
-
-        conn = sqlite3.connect(str(store.db_path), timeout=30)
-        try:
-            count_row = conn.execute(
-                "SELECT COUNT(*) FROM lease_events WHERE event_id = ?",
-                ("evt-duplicate",),
-            ).fetchone()
-        finally:
-            conn.close()
-        assert count_row is not None
-        assert int(count_row[0]) == 1
-
-
-class TestLeaseIntegration:
-    """Integration tests for lease lifecycle."""
-
-    def test_full_lifecycle(self, store, mock_provider):
-        """Test complete lease lifecycle: create -> instance -> pause -> resume -> destroy."""
-        # Create lease
-        lease = _create_lease(store, "lease-1", "test-provider")
-        assert lease.get_instance() is None
-
-        # Create instance
-        mock_provider.create_session.return_value = SessionInfo(
-            session_id="inst-123",
-            provider="test-provider",
-            status="running",
-        )
-        instance = lease.ensure_active_instance(mock_provider)
-        assert instance.instance_id == "inst-123"
-
-        # Pause
-        mock_provider.pause_session.return_value = True
-        lease.pause_instance(mock_provider)
-        assert lease.get_instance().status == "paused"
-
-        # Resume
-        mock_provider.resume_session.return_value = True
-        lease.resume_instance(mock_provider)
-        assert lease.get_instance().status == "running"
-
-        # Destroy
-        lease.destroy_instance(mock_provider)
-        assert lease.get_instance() is None
-
-        # Delete lease
-        store.delete("lease-1")
-        assert store.get("lease-1") is None
-
-    def test_multiple_leases_different_providers(self, store):
-        """Test multiple leases with different providers."""
-        lease1 = _create_lease(store, "lease-1", "e2b")
-        lease2 = _create_lease(store, "lease-2", "agentbay")
-        lease3 = _create_lease(store, "lease-3", "e2b")
-
-        assert lease1.provider_name == "e2b"
-        assert lease2.provider_name == "agentbay"
-        assert lease3.provider_name == "e2b"
-
-        # Verify all created
-        assert store.get("lease-1") is not None
-        assert store.get("lease-2") is not None
-        assert store.get("lease-3") is not None
diff --git a/tests/test_local_chat_session.py b/tests/test_local_chat_session.py
deleted file mode 100644
index 49b45fb9a..000000000
--- a/tests/test_local_chat_session.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""Tests for local sandbox using ChatSession architecture."""
-
-from __future__ import annotations
-
-# TODO: pre-existing: get_sandbox requires lease.volume_id
-import pytest
-
-pytest.skip("pre-existing: FakeProvider missing volume setup — needs test update", allow_module_level=True)
-
-from pathlib import Path
-
-import pytest
-
-from sandbox.base import LocalSandbox
-from sandbox.manager import lookup_sandbox_for_thread
-from sandbox.providers.local import LocalSessionProvider
-from sandbox.thread_context import set_current_thread_id
-
-
-@pytest.mark.asyncio
-async def test_local_chat_session_persistence_and_resume(tmp_path: Path):
-    workspace = tmp_path / "workspace"
-    workspace.mkdir(parents=True, exist_ok=True)
-    db_path = tmp_path / "sandbox.db"
-
-    thread_id = "local-thread-1"
-    sandbox = LocalSandbox(workspace_root=str(workspace), db_path=db_path)
-    set_current_thread_id(thread_id)
-    sandbox.ensure_session(thread_id)
-
-    shell = sandbox.shell()
-
-    first = await shell.execute("cd /tmp && export LEON_LOCAL_VAR=chat-session-ok && pwd")
-    assert first.exit_code == 0
-    assert "/tmp" in first.stdout
-
-    second = await shell.execute("pwd")
-    assert second.exit_code == 0
-    assert "/tmp" in second.stdout
-
-    third = await shell.execute("echo $LEON_LOCAL_VAR")
-    assert third.exit_code == 0
-    assert "chat-session-ok" in third.stdout
-
-    assert sandbox.pause_thread(thread_id)
-    assert lookup_sandbox_for_thread(thread_id, db_path=db_path) == "local"
-    assert sandbox.resume_thread(thread_id)
-
-    set_current_thread_id(thread_id)
-    resumed_pwd = await shell.execute("pwd")
-    assert resumed_pwd.exit_code == 0
-    assert "/tmp" in resumed_pwd.stdout
-
-    resumed_env = await shell.execute("echo $LEON_LOCAL_VAR")
-    assert resumed_env.exit_code == 0
-    assert "chat-session-ok" in resumed_env.stdout
-
-    sandbox.close()
-
-
-def test_local_provider_pause_resume_state_recovery():
-    provider = LocalSessionProvider()
-    session = provider.create_session(context_id="leon-lease-test-session")
-    sid = session.session_id
-    provider._session_states.clear()
-    assert provider.pause_session(sid)
-    assert provider.get_session_status(sid) == "paused"
-
-    provider._session_states.clear()
-    assert provider.resume_session(sid)
-    assert provider.get_session_status(sid) == "running"
-    assert not provider.pause_session("unknown-session-id")
diff --git a/tests/test_main_thread_flow.py b/tests/test_main_thread_flow.py
deleted file mode 100644
index e9c2afbd3..000000000
--- a/tests/test_main_thread_flow.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import pytest
-
-pytest.skip("pre-existing: thread_config and agent-member wiring broken — needs migration", allow_module_level=True)
-
-import asyncio
-import os
-from types import SimpleNamespace
-
-from backend.web.models.requests import CreateThreadRequest, ResolveMainThreadRequest
-from backend.web.routers import threads as threads_router
-from backend.web.services.auth_service import AuthService
-from storage.contracts import EntityRow
-from storage.providers.sqlite.entity_repo import SQLiteEntityRepo
-from storage.providers.sqlite.member_repo import SQLiteAccountRepo, SQLiteMemberRepo
-from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
-
-
-def test_register_creates_agent_members_without_threads(tmp_path, monkeypatch):
-    db_path = tmp_path / "leon.db"
-    members_dir = tmp_path / "members"
-
-    import backend.web.services.member_service as member_service
-
-    monkeypatch.setattr(member_service, "MEMBERS_DIR", members_dir)
-    monkeypatch.setattr(member_service, "LEON_HOME", tmp_path)
-
-    member_repo = SQLiteMemberRepo(db_path)
-    account_repo = SQLiteAccountRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-    service = AuthService(
-        members=member_repo,
-        accounts=account_repo,
-        entities=entity_repo,
-    )
-
-    payload = service.register("fresh_user", "pass1234")
-    claims = service.verify_token(payload["token"])
-    account = account_repo.get_by_username("fresh_user")
-
-    owned_agents = member_repo.list_by_owner_user_id(payload["user"]["id"])
-    assert "member_id" not in claims
-    assert claims["user_id"] == payload["user"]["id"]
-    assert payload["user"]["name"] == "fresh_user"
-    assert account is not None
-    assert account.user_id == payload["user"]["id"]
-    assert len(owned_agents) == 2
-    assert [agent.name for agent in owned_agents] == ["Toad", "Morel"]
-    for agent in owned_agents:
-        assert thread_repo.list_by_member(agent.id) == []
-        assert entity_repo.get_by_member_id(agent.id) == []
-
-
-def test_first_explicit_thread_becomes_main_then_followups_are_children(tmp_path):
-    db_path = tmp_path / "leon.db"
-
-    member_repo = SQLiteMemberRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-
-    from storage.contracts import MemberRow, MemberType
-
-    member_repo.create(
-        MemberRow(
-            id="owner-1",
-            name="owner",
-            type=MemberType.HUMAN,
-            created_at=1.0,
-        )
-    )
-    member_repo.create(
-        MemberRow(
-            id="member-1",
-            name="Template Agent",
-            type=MemberType.MYCEL_AGENT,
-            owner_user_id="owner-1",
-            created_at=2.0,
-        )
-    )
-
-    app = SimpleNamespace(
-        state=SimpleNamespace(
-            member_repo=member_repo,
-            entity_repo=entity_repo,
-            thread_repo=thread_repo,
-            thread_sandbox={},
-            thread_cwd={},
-        )
-    )
-
-    first = threads_router._create_owned_thread(
-        app,
-        "owner-1",
-        CreateThreadRequest(member_id="member-1", sandbox="local"),
-        is_main=False,
-    )
-    second = threads_router._create_owned_thread(
-        app,
-        "owner-1",
-        CreateThreadRequest(member_id="member-1", sandbox="local"),
-        is_main=False,
-    )
-
-    assert first["is_main"] is True
-    assert first["branch_index"] == 0
-    assert first["entity_name"] == "Template Agent"
-    assert second["is_main"] is False
-    assert second["branch_index"] == 1
-    assert second["entity_name"] == "Template Agent · 分身1"
-    assert thread_repo.get_main_thread("member-1")["id"] == first["thread_id"]
-
-
-def test_member_rename_recomputes_agent_entity_names(tmp_path, monkeypatch):
-    db_path = tmp_path / "leon.db"
-    members_dir = tmp_path / "members"
-    members_dir.mkdir(parents=True)
-    os.environ["LEON_DB_PATH"] = str(db_path)
-
-    import backend.web.services.member_service as member_service
-
-    monkeypatch.setattr(member_service, "MEMBERS_DIR", members_dir)
-    monkeypatch.setattr(member_service, "LEON_HOME", tmp_path)
-
-    member_repo = SQLiteMemberRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-
-    from storage.contracts import MemberRow, MemberType
-
-    member_repo.create(
-        MemberRow(
-            id="owner-1",
-            name="owner",
-            type=MemberType.HUMAN,
-            created_at=1.0,
-        )
-    )
-    member_repo.create(
-        MemberRow(
-            id="member-1",
-            name="Toad",
-            type=MemberType.MYCEL_AGENT,
-            owner_user_id="owner-1",
-            created_at=2.0,
-        )
-    )
-
-    member_dir = members_dir / "member-1"
-    member_dir.mkdir()
-    (member_dir / "agent.md").write_text("---\nname: Toad\n---\n\n", encoding="utf-8")
-    (member_dir / "meta.json").write_text("{}", encoding="utf-8")
-
-    thread_repo.create(
-        thread_id="member-1-1",
-        member_id="member-1",
-        sandbox_type="local",
-        created_at=3.0,
-        is_main=True,
-        branch_index=0,
-    )
-    thread_repo.create(
-        thread_id="member-1-2",
-        member_id="member-1",
-        sandbox_type="local",
-        created_at=4.0,
-        is_main=False,
-        branch_index=1,
-    )
-    entity_repo.create(
-        EntityRow(
-            id="member-1-1",
-            type="agent",
-            member_id="member-1",
-            name="Toad",
-            thread_id="member-1-1",
-            created_at=3.0,
-        )
-    )
-    entity_repo.create(
-        EntityRow(
-            id="member-1-2",
-            type="agent",
-            member_id="member-1",
-            name="Toad · 分身1",
-            thread_id="member-1-2",
-            created_at=4.0,
-        )
-    )
-
-    updated = member_service.update_member("member-1", name="Scout")
-
-    refreshed_entities = sorted(entity_repo.get_by_member_id("member-1"), key=lambda entity: entity.thread_id or "")
-    assert updated is not None
-    assert updated["name"] == "Scout"
-    assert [entity.name for entity in refreshed_entities] == ["Scout", "Scout · 分身1"]
-
-
-def test_resolve_main_thread_returns_null_when_member_has_no_main(tmp_path):
-    db_path = tmp_path / "leon.db"
-
-    member_repo = SQLiteMemberRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-
-    from storage.contracts import MemberRow, MemberType
-
-    member_repo.create(
-        MemberRow(
-            id="owner-1",
-            name="owner",
-            type=MemberType.HUMAN,
-            created_at=1.0,
-        )
-    )
-    member_repo.create(
-        MemberRow(
-            id="member-1",
-            name="Template Agent",
-            type=MemberType.MYCEL_AGENT,
-            owner_user_id="owner-1",
-            created_at=2.0,
-        )
-    )
-
-    app = SimpleNamespace(
-        state=SimpleNamespace(
-            member_repo=member_repo,
-            entity_repo=entity_repo,
-            thread_repo=thread_repo,
-            thread_sandbox={},
-            thread_cwd={},
-        )
-    )
-
-    result = asyncio.run(
-        threads_router.resolve_main_thread(
-            ResolveMainThreadRequest(member_id="member-1"),
-            "owner-1",
-            app,
-        )
-    )
-
-    assert result == {"thread": None}
diff --git a/tests/test_manager_ground_truth.py b/tests/test_manager_ground_truth.py
deleted file mode 100644
index 59027d277..000000000
--- a/tests/test_manager_ground_truth.py
+++ /dev/null
@@ -1,303 +0,0 @@
-"""Tests for SandboxManager inspect ground-truth behavior."""
-
-import asyncio
-import sqlite3
-import tempfile
-import uuid
-from datetime import datetime, timedelta
-from pathlib import Path
-
-import pytest
-
-from sandbox.manager import SandboxManager
-from sandbox.provider import Metrics, ProviderCapability, ProviderExecResult, SandboxProvider, SessionInfo
-from storage import StorageContainer
-from storage.providers.sqlite.checkpoint_repo import SQLiteCheckpointRepo
-from storage.providers.sqlite.eval_repo import SQLiteEvalRepo
-from storage.providers.supabase.checkpoint_repo import SupabaseCheckpointRepo
-from storage.providers.supabase.eval_repo import SupabaseEvalRepo
-from storage.providers.supabase.file_operation_repo import SupabaseFileOperationRepo
-from storage.providers.supabase.run_event_repo import SupabaseRunEventRepo
-from storage.providers.supabase.summary_repo import SupabaseSummaryRepo
-
-
-class FakeProvider(SandboxProvider):
-    name = "fake"
-
-    def __init__(self):
-        self._statuses: dict[str, str] = {}
-        self.fail_pause = False
-
-    def get_capability(self) -> ProviderCapability:
-        return ProviderCapability(
-            can_pause=True,
-            can_resume=True,
-            can_destroy=True,
-            supports_webhook=False,
-        )
-
-    def create_session(self, context_id: str | None = None, thread_id: str | None = None) -> SessionInfo:
-        sid = f"s-{uuid.uuid4().hex[:8]}"
-        self._statuses[sid] = "running"
-        return SessionInfo(session_id=sid, provider=self.name, status="running")
-
-    def destroy_session(self, session_id: str, sync: bool = True) -> bool:
-        self._statuses.pop(session_id, None)
-        return True
-
-    def pause_session(self, session_id: str) -> bool:
-        if self.fail_pause:
-            return False
-        if session_id in self._statuses:
-            self._statuses[session_id] = "paused"
-            return True
-        return False
-
-    def resume_session(self, session_id: str) -> bool:
-        if session_id in self._statuses:
-            self._statuses[session_id] = "running"
-            return True
-        return False
-
-    def get_session_status(self, session_id: str) -> str:
-        return self._statuses.get(session_id, "deleted")
-
-    def execute(
-        self,
-        session_id: str,
-        command: str,
-        timeout_ms: int = 30000,
-        cwd: str | None = None,
-    ) -> ProviderExecResult:
-        return ProviderExecResult(output="", exit_code=0, error=None)
-
-    def read_file(self, session_id: str, path: str) -> str:
-        return ""
-
-    def write_file(self, session_id: str, path: str, content: str) -> str:
-        return "ok"
-
-    def list_dir(self, session_id: str, path: str) -> list[dict]:
-        return []
-
-    def get_metrics(self, session_id: str) -> Metrics | None:
-        return None
-
-    def list_provider_sessions(self) -> list[SessionInfo]:
-        return [SessionInfo(session_id=sid, provider=self.name, status=status) for sid, status in self._statuses.items()]
-
-    def create_runtime(self, terminal, lease):
-        from sandbox.runtime import RemoteWrappedRuntime
-
-        return RemoteWrappedRuntime(terminal, lease, self)
-
-
-class _FakeSupabaseClient:
-    def table(self, table_name: str):
-        raise AssertionError(f"table() should not be called in this container wiring test: {table_name}")
-
-
-def _temp_db() -> Path:
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        return Path(f.name)
-
-
-@pytest.mark.skip(reason="pre-existing: get_sandbox requires lease.volume_id — FakeProvider needs update")
-def test_list_sessions_shows_running_lease_without_chat_session() -> None:
-    db = _temp_db()
-    try:
-        provider = FakeProvider()
-        mgr = SandboxManager(provider=provider, db_path=db)
-        lease = mgr.lease_store.create("lease-1", provider.name)
-        instance = lease.ensure_active_instance(provider)
-        mgr.terminal_store.create("term-1", "thread-1", "lease-1", "/home/user")
-
-        rows = mgr.list_sessions()
-        assert rows
-        row = rows[0]
-        assert row["thread_id"] == "thread-1"
-        assert row["instance_id"] == instance.instance_id
-        assert row["status"] == "running"
-        assert row["source"] == "lease"
-    finally:
-        db.unlink(missing_ok=True)
-
-
-def test_list_sessions_includes_provider_orphan(temp_db) -> None:
-    provider = FakeProvider()
-    mgr = SandboxManager(provider=provider, db_path=temp_db)
-    orphan = provider.create_session()
-    rows = mgr.list_sessions()
-    assert any(r["instance_id"] == orphan.session_id and r["source"] == "provider_orphan" for r in rows)
-
-
-@pytest.mark.skip(reason="pre-existing: get_sandbox requires lease.volume_id — FakeProvider needs update")
-def test_enforce_idle_timeouts_pauses_lease_and_closes_session() -> None:
-    db = _temp_db()
-    try:
-        provider = FakeProvider()
-        mgr = SandboxManager(provider=provider, db_path=db)
-
-        capability = mgr.get_sandbox("thread-1")
-        asyncio.run(capability.command.execute("echo hi"))
-        session_id = capability._session.session_id
-        instance_id = capability._session.lease.get_instance().instance_id
-
-        with sqlite3.connect(str(db)) as conn:
-            conn.execute(
-                """
-                UPDATE chat_sessions
-                SET idle_ttl_sec = 1, last_active_at = ?
-                WHERE chat_session_id = ?
-                """,
-                ((datetime.now() - timedelta(seconds=5)).isoformat(), session_id),
-            )
-            conn.commit()
-
-        count = mgr.enforce_idle_timeouts()
-        assert count == 1
-        assert provider.get_session_status(instance_id) == "paused"
-        assert mgr.session_manager.get("thread-1") is None
-    finally:
-        db.unlink(missing_ok=True)
-
-
-@pytest.mark.skip(reason="pre-existing: get_sandbox requires lease.volume_id — FakeProvider needs update")
-def test_enforce_idle_timeouts_continues_on_pause_failure() -> None:
-    db = _temp_db()
-    try:
-        provider = FakeProvider()
-        mgr = SandboxManager(provider=provider, db_path=db)
-
-        capability = mgr.get_sandbox("thread-1")
-        asyncio.run(capability.command.execute("echo hi"))
-        session_id = capability._session.session_id
-
-        with sqlite3.connect(str(db)) as conn:
-            conn.execute(
-                """
-                UPDATE chat_sessions
-                SET idle_ttl_sec = 1, last_active_at = ?
-                WHERE chat_session_id = ?
-                """,
-                ((datetime.now() - timedelta(seconds=5)).isoformat(), session_id),
-            )
-            conn.commit()
-
-        provider.fail_pause = True
-        count = mgr.enforce_idle_timeouts()
-        assert count == 0
-        assert mgr.session_manager.get("thread-1") is not None
-    finally:
-        db.unlink(missing_ok=True)
-
-
-def test_storage_container_sqlite_strategy_is_non_regression(temp_db) -> None:
-    container = StorageContainer(main_db_path=temp_db, strategy="sqlite")
-    repo = container.checkpoint_repo()
-    assert isinstance(repo, SQLiteCheckpointRepo)
-
-
-def test_storage_container_supabase_repos_are_concrete() -> None:
-    fake_client = _FakeSupabaseClient()
-    container = StorageContainer(strategy="supabase", supabase_client=fake_client)
-    checkpoint_repo = container.checkpoint_repo()
-    assert isinstance(checkpoint_repo, SupabaseCheckpointRepo)
-    run_event_repo = container.run_event_repo()
-    assert isinstance(run_event_repo, SupabaseRunEventRepo)
-    file_operation_repo = container.file_operation_repo()
-    assert isinstance(file_operation_repo, SupabaseFileOperationRepo)
-    summary_repo = container.summary_repo()
-    assert isinstance(summary_repo, SupabaseSummaryRepo)
-    eval_repo = container.eval_repo()
-    assert isinstance(eval_repo, SupabaseEvalRepo)
-
-
-def test_storage_container_repo_level_provider_override_from_sqlite_default() -> None:
-    fake_client = _FakeSupabaseClient()
-    container = StorageContainer(
-        strategy="sqlite",
-        repo_providers={"checkpoint_repo": "supabase"},
-        supabase_client=fake_client,
-    )
-    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
-
-
-def test_storage_container_repo_level_provider_override_from_supabase_default() -> None:
-    fake_client = _FakeSupabaseClient()
-    container = StorageContainer(
-        strategy="supabase",
-        repo_providers={"eval_repo": "sqlite"},
-        supabase_client=fake_client,
-    )
-    assert isinstance(container.eval_repo(), SQLiteEvalRepo)
-    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
-
-
-def test_storage_container_supabase_checkpoint_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy checkpoint_repo requires supabase_client",
-    ):
-        container.checkpoint_repo()
-
-
-def test_storage_container_supabase_run_event_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy run_event_repo requires supabase_client",
-    ):
-        container.run_event_repo()
-
-
-def test_storage_container_supabase_file_operation_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy file_operation_repo requires supabase_client",
-    ):
-        container.file_operation_repo()
-
-
-def test_storage_container_supabase_summary_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy summary_repo requires supabase_client",
-    ):
-        container.summary_repo()
-
-
-def test_storage_container_supabase_eval_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy eval_repo requires supabase_client",
-    ):
-        container.eval_repo()
-
-
-def test_storage_container_rejects_unknown_strategy() -> None:
-    with pytest.raises(
-        ValueError,
-        match="Unsupported storage strategy: redis. Supported strategies: sqlite, supabase",
-    ):
-        StorageContainer(strategy="redis")  # type: ignore[arg-type]
-
-
-def test_storage_container_rejects_unknown_repo_provider_binding() -> None:
-    with pytest.raises(
-        ValueError,
-        match="Unknown repo provider bindings: foo_repo",
-    ):
-        StorageContainer(repo_providers={"foo_repo": "sqlite"})
-
-
-def test_storage_container_rejects_invalid_repo_provider_value() -> None:
-    with pytest.raises(
-        ValueError,
-        match="Unsupported provider for checkpoint_repo",
-    ):
-        StorageContainer(repo_providers={"checkpoint_repo": "mysql"})
diff --git a/tests/test_monitor_core_overview.py b/tests/test_monitor_core_overview.py
deleted file mode 100644
index d80ace417..000000000
--- a/tests/test_monitor_core_overview.py
+++ /dev/null
@@ -1,415 +0,0 @@
-import pytest
-
-pytest.skip("pre-existing: monitor/resource_service API mismatch — needs test update", allow_module_level=True)
-
-import json
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from backend.web.services import resource_service
-from sandbox.provider import ProviderCapability, build_resource_capabilities
-
-
-def _write_provider_config(tmp_path: Path, instance_name: str, payload: dict) -> None:
-    (tmp_path / f"{instance_name}.json").write_text(json.dumps(payload))
-
-
-def _make_fake_thread_config_repo(agent_by_thread: dict[str, str]):
-    """Fake ThreadConfigRepo backed by a simple dict — works for both SQLite and Supabase code paths."""
-    repo = MagicMock()
-    repo.lookup_config.side_effect = lambda tid: (
-        {
-            "sandbox_type": "local",
-            "cwd": None,
-            "model": None,
-            "queue_mode": None,
-            "observation_provider": None,
-            "agent": agent_by_thread[tid],
-        }
-        if tid in agent_by_thread
-        else None
-    )
-    repo.close.return_value = None
-    return repo
-
-
-def _make_fake_repo(sessions: list[dict]):
-    """Create a mock repo that returns pre-canned sessions."""
-    repo = MagicMock()
-    repo.list_sessions_with_leases.return_value = sessions
-    repo.close.return_value = None
-    return repo
-
-
-def _patch_resources_context(
-    monkeypatch,
-    *,
-    tmp_path: Path,
-    providers: list[dict],
-    sessions: list[dict],
-    snapshots: dict | None = None,
-) -> None:
-    monkeypatch.setattr(resource_service, "SANDBOXES_DIR", tmp_path)
-    monkeypatch.setattr(resource_service, "available_sandbox_types", lambda: providers)
-    monkeypatch.setattr(
-        resource_service,
-        "SQLiteSandboxMonitorRepo",
-        lambda: _make_fake_repo(sessions),
-    )
-    capability_by_provider = {
-        "local": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=False,
-            screenshot=False,
-            web=False,
-            process=False,
-            hooks=False,
-            snapshot=False,
-        ),
-        "docker": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=True,
-            screenshot=False,
-            web=False,
-            process=False,
-            hooks=False,
-            snapshot=False,
-        ),
-        "e2b": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=False,
-            screenshot=False,
-            web=False,
-            process=False,
-            hooks=False,
-            snapshot=True,
-        ),
-        "daytona": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=False,
-            screenshot=False,
-            web=False,
-            process=False,
-            hooks=True,
-            snapshot=False,
-        ),
-        "agentbay": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=True,
-            screenshot=True,
-            web=True,
-            process=True,
-            hooks=False,
-            snapshot=False,
-        ),
-    }
-
-    def _fake_provider_builder(config_name: str, *, sandboxes_dir: Path | None = None):
-        provider_name = resource_service.resolve_provider_name(
-            config_name,
-            sandboxes_dir=sandboxes_dir or tmp_path,
-        )
-        resource_capabilities = capability_by_provider.get(provider_name)
-        if resource_capabilities is None:
-            return None
-
-        class _FakeProvider:
-            def get_capability(self) -> ProviderCapability:
-                return ProviderCapability(
-                    can_pause=True,
-                    can_resume=True,
-                    can_destroy=True,
-                    resource_capabilities=resource_capabilities,
-                )
-
-        return _FakeProvider()
-
-    monkeypatch.setattr(resource_service, "build_provider_from_config_name", _fake_provider_builder)
-    if snapshots is not None:
-        monkeypatch.setattr(resource_service, "list_snapshots_by_lease_ids", lambda _: snapshots)
-
-
-def test_list_resource_providers_maps_status_and_metric_metadata(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "docker_dev", {"provider": "docker"})
-
-    monkeypatch.setattr(
-        resource_service,
-        "_make_thread_config_repo",
-        lambda: _make_fake_thread_config_repo({"thread-local-1": "member-1"}),
-    )
-    monkeypatch.setattr(resource_service, "_member_name_map", lambda: {"member-1": "Alice"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[
-            {"name": "local", "available": True},
-            {"name": "docker_dev", "available": False, "reason": "docker daemon down"},
-        ],
-        sessions=[
-            {
-                "provider": "local",
-                "session_id": "sess-local-1",
-                "thread_id": "thread-local-1",
-                "observed_state": "detached",
-                "desired_state": "running",
-                "created_at": "2026-03-03T00:00:00",
-            },
-            {
-                "provider": "docker_dev",
-                "session_id": "sess-docker-1",
-                "thread_id": "thread-docker-1",
-                "observed_state": "paused",
-                "desired_state": "paused",
-                "created_at": "2026-03-03T00:00:00",
-            },
-        ],
-    )
-
-    payload = resource_service.list_resource_providers()
-    assert "summary" in payload
-    assert "providers" in payload
-    assert payload["summary"]["total_providers"] == 2
-    assert payload["summary"]["active_providers"] == 1
-    assert payload["summary"]["unavailable_providers"] == 1
-    assert payload["summary"]["running_sessions"] == 1
-
-    local = next(item for item in payload["providers"] if item["id"] == "local")
-    assert local["status"] == "active"
-    assert local["telemetry"]["running"]["used"] == 1
-    assert local["telemetry"]["running"]["source"] == "sandbox_db"
-    assert local["telemetry"]["running"]["freshness"] == "cached"
-    assert local["sessions"][0]["threadId"] == "thread-local-1"
-    assert local["sessions"][0]["agentId"] == "member-1"
-    assert local["sessions"][0]["agentName"] == "Alice"
-
-    docker = next(item for item in payload["providers"] if item["id"] == "docker_dev")
-    assert docker["status"] == "unavailable"
-    assert docker["error"]["code"] == "PROVIDER_UNAVAILABLE"
-    assert docker["sessions"][0]["status"] == "paused"
-    assert docker["sessions"][0]["agentName"] == "未绑定Agent"
-
-
-def test_list_resource_providers_marks_ready_when_no_running_sessions(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "e2b_test", {"provider": "e2b"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "e2b_test", "available": True}],
-        sessions=[],
-    )
-
-    payload = resource_service.list_resource_providers()
-    assert len(payload["providers"]) == 1
-    assert payload["summary"]["active_providers"] == 0
-    assert payload["summary"]["running_sessions"] == 0
-
-    e2b = payload["providers"][0]
-    assert e2b["id"] == "e2b_test"
-    assert e2b["status"] == "ready"
-    assert e2b["telemetry"]["running"]["used"] == 0
-    assert e2b["telemetry"]["cpu"]["freshness"] == "stale"
-    assert e2b["cardCpu"]["used"] is None
-    assert e2b["cardCpu"]["limit"] is None
-    assert e2b["cardCpu"]["error"] is not None
-
-
-def test_list_resource_providers_prefers_config_console_url_override(tmp_path, monkeypatch):
-    _write_provider_config(
-        tmp_path,
-        "daytona_selfhost",
-        {
-            "provider": "daytona",
-            "console_url": "https://ops.example.com/daytona",
-            "daytona": {"target": "local", "api_url": "https://daytona.example.com/api"},
-        },
-    )
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "daytona_selfhost", "available": True}],
-        sessions=[],
-    )
-
-    payload = resource_service.list_resource_providers()
-    provider = payload["providers"][0]
-    assert provider["id"] == "daytona_selfhost"
-    assert provider["consoleUrl"] == "https://ops.example.com/daytona"
-    assert provider["type"] == "container"
-
-
-def test_list_resource_providers_uses_snapshot_metrics(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "agentbay_prod", {"provider": "agentbay"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "agentbay_prod", "available": True}],
-        sessions=[
-            {
-                "provider": "agentbay_prod",
-                "session_id": "sess-1",
-                "thread_id": "thread-1",
-                "lease_id": "lease-1",
-                "status": "running",
-                "created_at": "2026-03-03T00:00:00",
-            }
-        ],
-        snapshots={
-            "lease-1": {
-                "lease_id": "lease-1",
-                "cpu_used": 21.0,
-                "cpu_limit": 100.0,
-                "memory_used_mb": 1024.0,
-                "memory_total_mb": 4096.0,
-                "disk_used_gb": 4.0,
-                "disk_total_gb": 20.0,
-                "collected_at": "2099-01-01T00:00:00Z",
-            }
-        },
-    )
-
-    payload = resource_service.list_resource_providers()
-    provider = payload["providers"][0]
-    assert provider["telemetry"]["cpu"]["used"] == 21.0
-    assert provider["telemetry"]["cpu"]["limit"] == 100.0
-    assert provider["telemetry"]["memory"]["used"] == 1.0
-    assert provider["telemetry"]["memory"]["limit"] == 4.0
-    assert provider["telemetry"]["disk"]["used"] == 4.0
-    assert provider["telemetry"]["disk"]["limit"] == 20.0
-    assert provider["telemetry"]["cpu"]["source"] == "api"
-
-
-def test_list_resource_providers_surfaces_snapshot_probe_error(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "daytona_cloud", {"provider": "daytona"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "daytona_cloud", "available": True}],
-        sessions=[
-            {
-                "provider": "daytona_cloud",
-                "session_id": "sess-1",
-                "thread_id": "thread-1",
-                "lease_id": "lease-1",
-                "status": "paused",
-                "created_at": "2026-03-03T00:00:00",
-            }
-        ],
-        snapshots={
-            "lease-1": {
-                "lease_id": "lease-1",
-                "cpu_used": None,
-                "cpu_limit": None,
-                "memory_used_mb": None,
-                "memory_total_mb": None,
-                "disk_used_gb": None,
-                "disk_total_gb": None,
-                "probe_error": "metrics unavailable",
-                "collected_at": "2099-01-01T00:00:00Z",
-            }
-        },
-    )
-
-    payload = resource_service.list_resource_providers()
-    provider = payload["providers"][0]
-    assert provider["telemetry"]["cpu"]["used"] is None
-    assert provider["telemetry"]["cpu"]["source"] == "sandbox_db"
-    assert provider["telemetry"]["cpu"]["error"] == "metrics unavailable"
-    assert provider["telemetry"]["memory"]["error"] == "metrics unavailable"
-    assert provider["telemetry"]["disk"]["error"] == "metrics unavailable"
-
-
-def test_thread_owner_uses_agent_ref_as_name_when_member_lookup_missing(monkeypatch):
-    monkeypatch.setattr(
-        resource_service,
-        "_make_thread_config_repo",
-        lambda: _make_fake_thread_config_repo({"thread-1": "Lex"}),
-    )
-    monkeypatch.setattr(resource_service, "_member_name_map", lambda: {})
-
-    owners = resource_service._thread_owners(["thread-1", "thread-2"])
-    assert owners["thread-1"]["agent_id"] == "Lex"
-    assert owners["thread-1"]["agent_name"] == "Lex"
-    assert owners["thread-2"]["agent_id"] is None
-    assert owners["thread-2"]["agent_name"] == "未绑定Agent"
-
-
-def test_thread_owner_works_with_supabase_backed_thread_config(monkeypatch):
-    """Thread config lookup routes through ThreadConfigRepo abstraction,
-    so it works identically whether the backing store is SQLite or Supabase."""
-
-    class _FakeSupabaseThreadConfigRepo:
-        """Mimics SupabaseThreadConfigRepo interface without a real Supabase connection."""
-
-        def __init__(self):
-            self._data = {"thread-supabase-1": "agent-uuid-abc"}
-
-        def lookup_config(self, thread_id: str):
-            agent = self._data.get(thread_id)
-            return (
-                {
-                    "sandbox_type": "local",
-                    "cwd": None,
-                    "model": None,
-                    "queue_mode": None,
-                    "observation_provider": None,
-                    "agent": agent,
-                }
-                if agent
-                else None
-            )
-
-        def close(self):
-            pass
-
-    monkeypatch.setattr(resource_service, "_make_thread_config_repo", _FakeSupabaseThreadConfigRepo)
-    monkeypatch.setattr(resource_service, "_member_name_map", lambda: {"agent-uuid-abc": "Bob"})
-
-    owners = resource_service._thread_owners(["thread-supabase-1", "thread-missing"])
-    assert owners["thread-supabase-1"]["agent_id"] == "agent-uuid-abc"
-    assert owners["thread-supabase-1"]["agent_name"] == "Bob"
-    assert owners["thread-missing"]["agent_id"] is None
-    assert owners["thread-missing"]["agent_name"] == "未绑定Agent"
-
-
-def test_list_resource_providers_uses_instance_capability_single_source(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "agentbay_prod", {"provider": "agentbay"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "agentbay_prod", "available": True}],
-        sessions=[],
-    )
-
-    class _InstanceOverrideProvider:
-        def get_capability(self) -> ProviderCapability:
-            return ProviderCapability(
-                can_pause=False,
-                can_resume=False,
-                can_destroy=True,
-                resource_capabilities=build_resource_capabilities(
-                    filesystem=True,
-                    terminal=True,
-                    metrics=False,
-                    screenshot=False,
-                    web=False,
-                    process=False,
-                    hooks=False,
-                    snapshot=False,
-                ),
-            )
-
-    monkeypatch.setattr(
-        resource_service,
-        "build_provider_from_config_name",
-        lambda _name, **_kwargs: _InstanceOverrideProvider(),
-    )
-
-    payload = resource_service.list_resource_providers()
-    provider = payload["providers"][0]
-    assert provider["capabilities"]["metrics"] is False
-    assert provider["capabilities"]["web"] is False
diff --git a/tests/test_monitor_resource_overview_cache.py b/tests/test_monitor_resource_overview_cache.py
deleted file mode 100644
index d0426c967..000000000
--- a/tests/test_monitor_resource_overview_cache.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from backend.web.services import resource_cache as cache
-
-
-def test_resource_overview_cache_refresh_adds_metadata(monkeypatch):
-    cache.clear_resource_overview_cache()
-    monkeypatch.setattr(
-        cache.resource_service,
-        "list_resource_providers",
-        lambda: {
-            "summary": {
-                "snapshot_at": "2026-03-03T00:00:00Z",
-                "total_providers": 1,
-                "active_providers": 1,
-                "unavailable_providers": 0,
-                "running_sessions": 2,
-            },
-            "providers": [{"id": "local"}],
-        },
-    )
-
-    payload = cache.refresh_resource_overview_sync()
-    assert payload["summary"]["refresh_status"] == "ok"
-    assert payload["summary"]["refresh_error"] is None
-    assert payload["summary"]["last_refreshed_at"] == "2026-03-03T00:00:00Z"
-
-    cached = cache.get_resource_overview_snapshot()
-    assert cached["providers"][0]["id"] == "local"
-
-
-def test_resource_overview_cache_keeps_last_snapshot_on_refresh_error(monkeypatch):
-    cache.clear_resource_overview_cache()
-    monkeypatch.setattr(
-        cache.resource_service,
-        "list_resource_providers",
-        lambda: {
-            "summary": {
-                "snapshot_at": "2026-03-03T00:00:00Z",
-                "total_providers": 1,
-                "active_providers": 1,
-                "unavailable_providers": 0,
-                "running_sessions": 1,
-            },
-            "providers": [{"id": "docker"}],
-        },
-    )
-    cache.refresh_resource_overview_sync()
-
-    def _raise():
-        raise RuntimeError("probe failed")
-
-    monkeypatch.setattr(cache.resource_service, "list_resource_providers", _raise)
-    degraded = cache.refresh_resource_overview_sync()
-    assert degraded["providers"][0]["id"] == "docker"
-    assert degraded["summary"]["refresh_status"] == "error"
-    assert degraded["summary"]["refresh_error"] == "probe failed"
diff --git a/tests/test_monitor_resources_route.py b/tests/test_monitor_resources_route.py
deleted file mode 100644
index 0cc37f989..000000000
--- a/tests/test_monitor_resources_route.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from fastapi.testclient import TestClient
-
-from backend.web.main import app
-
-
-def test_monitor_resources_route_smoke():
-    with TestClient(app) as client:
-        response = client.get("/api/monitor/resources")
-
-    assert response.status_code == 200
-    payload = response.json()
-    assert "summary" in payload
-    assert "providers" in payload
-    assert "snapshot_at" in payload["summary"]
-    assert "running_sessions" in payload["summary"]
-    assert isinstance(payload["providers"], list)
-
-
-def test_monitor_resources_refresh_route_smoke():
-    with TestClient(app) as client:
-        response = client.post("/api/monitor/resources/refresh")
-
-    assert response.status_code == 200
-    payload = response.json()
-    assert "summary" in payload
-    assert "providers" in payload
-    assert "last_refreshed_at" in payload["summary"]
-    assert "refresh_status" in payload["summary"]
-
-
-def test_monitor_health_route_smoke():
-    with TestClient(app) as client:
-        response = client.get("/api/monitor/health")
-
-    assert response.status_code == 200
-    payload = response.json()
-    assert "snapshot_at" in payload
-    assert "db" in payload
-    assert "sessions" in payload
diff --git a/tests/test_mount_pluggable.py b/tests/test_mount_pluggable.py
deleted file mode 100644
index b9bcdd049..000000000
--- a/tests/test_mount_pluggable.py
+++ /dev/null
@@ -1,212 +0,0 @@
-"""Mount contract tests for pluggable multi-folder mounts."""
-
-from __future__ import annotations
-
-# TODO: pre-existing failures — provider capability API changed
-import pytest
-
-pytest.skip("pre-existing: provider capability API mismatch — needs test update", allow_module_level=True)
-
-import subprocess
-import sys
-import types
-from pathlib import Path
-
-import pytest
-
-
-def test_mount_spec_defaults_to_mount_mode() -> None:
-    from sandbox.config import MountSpec
-
-    mount = MountSpec.model_validate({"source": "/host/x", "target": "/sandbox/x"})
-    assert mount.mode == "mount"
-
-
-def test_create_thread_request_parses_bind_mounts_with_legacy_keys() -> None:
-    from backend.web.models.requests import CreateThreadRequest
-
-    payload = CreateThreadRequest.model_validate(
-        {
-            "sandbox": "local",
-            "bind_mounts": [
-                {"source": "/host/tasks", "target": "/sandbox/tasks", "mode": "mount", "read_only": False},
-                {"host_path": "/host/docs", "mount_path": "/sandbox/docs", "mode": "copy", "read_only": True},
-            ],
-        }
-    )
-
-    assert len(payload.bind_mounts) == 2
-    assert payload.bind_mounts[0].source == "/host/tasks"
-    assert payload.bind_mounts[0].target == "/sandbox/tasks"
-    assert payload.bind_mounts[1].source == "/host/docs"
-    assert payload.bind_mounts[1].target == "/sandbox/docs"
-    assert payload.bind_mounts[1].mode == "copy"
-    assert payload.bind_mounts[1].read_only is True
-
-
-def test_mount_capability_gate_detects_mismatch() -> None:
-    from backend.web.routers.threads import _find_mount_capability_mismatch
-    from sandbox.config import MountSpec
-    from sandbox.provider import MountCapability
-
-    requested = [MountSpec.model_validate({"source": "/host/a", "target": "/sandbox/a", "mode": "copy"})]
-    mismatch = _find_mount_capability_mismatch(
-        requested_mounts=requested,
-        mount_capability=MountCapability(supports_mount=True, supports_copy=False, supports_read_only=False),
-    )
-
-    assert mismatch is not None
-    assert mismatch["requested"] == {"mode": "copy", "read_only": False}
-    assert mismatch["capability"]["supports_copy"] is False
-
-
-def test_mount_capability_gate_accepts_supported_combo() -> None:
-    from backend.web.routers.threads import _find_mount_capability_mismatch
-    from sandbox.config import MountSpec
-    from sandbox.provider import MountCapability
-
-    requested = [
-        MountSpec.model_validate({"source": "/host/a", "target": "/sandbox/a", "mode": "mount", "read_only": True}),
-        MountSpec.model_validate({"source": "/host/b", "target": "/sandbox/b", "mode": "copy", "read_only": False}),
-    ]
-    mismatch = _find_mount_capability_mismatch(
-        requested_mounts=requested,
-        mount_capability=MountCapability(supports_mount=True, supports_copy=True, supports_read_only=True),
-    )
-    assert mismatch is None
-
-
-def test_mount_capability_gate_respects_mode_handlers() -> None:
-    from backend.web.routers.threads import _find_mount_capability_mismatch
-    from sandbox.config import MountSpec
-    from sandbox.provider import MountCapability
-
-    requested = [MountSpec.model_validate({"source": "/host/a", "target": "/sandbox/a", "mode": "copy"})]
-    mismatch = _find_mount_capability_mismatch(
-        requested_mounts=requested,
-        mount_capability=MountCapability(
-            supports_mount=True,
-            supports_copy=True,
-            supports_read_only=True,
-            mode_handlers={"mount": True, "copy": False},
-        ),
-    )
-
-    assert mismatch is not None
-    assert mismatch["requested"] == {"mode": "copy", "read_only": False}
-    assert mismatch["capability"]["mode_handlers"]["copy"] is False
-
-
-def test_docker_provider_supports_multiple_bind_mount_modes(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-    from sandbox.providers.docker import DockerProvider
-
-    copy_source = tmp_path / "bootstrap"
-    copy_source.mkdir(parents=True, exist_ok=True)
-    (copy_source / "seed.txt").write_text("hello")
-
-    provider = DockerProvider(
-        image="python:3.12-slim",
-        mount_path="/workspace",
-        default_cwd="/home/leon",
-        bind_mounts=[
-            {"source": "/host/tasks", "target": "/home/leon/shared/tasks", "mode": "mount", "read_only": False},
-            {"source": "/host/docs", "target": "/home/leon/shared/docs", "mode": "mount", "read_only": True},
-            {"source": str(copy_source), "target": "/home/leon/bootstrap", "mode": "copy", "read_only": False},
-            {
-                "host_path": "/host/issues",
-                "mount_path": "/home/leon/shared/issues",
-                "mode": "mount",
-                "read_only": False,
-            },
-        ],
-    )
-
-    calls: list[list[str]] = []
-
-    def fake_run(cmd: list[str], **_: object) -> subprocess.CompletedProcess[str]:
-        calls.append(cmd)
-        return subprocess.CompletedProcess(cmd, 0, stdout="container-123\n", stderr="")
-
-    monkeypatch.setattr(provider, "_run", fake_run)
-
-    session = provider.create_session(context_id="ctx-volume")
-    assert session.status == "running"
-
-    run_cmd = calls[0]
-    volume_specs = [run_cmd[i + 1] for i, token in enumerate(run_cmd) if token == "-v"]
-    assert "/host/tasks:/home/leon/shared/tasks" in volume_specs
-    assert "/host/docs:/home/leon/shared/docs:ro" in volume_specs
-    assert "/host/issues:/home/leon/shared/issues" in volume_specs
-    assert "ctx-volume:/workspace" in volume_specs
-    assert all(str(copy_source) not in spec for spec in volume_specs)
-
-    serialized_calls = [" ".join(cmd) for cmd in calls]
-    assert any("docker cp" in cmd and "bootstrap/." in cmd and "container-123:/home/leon/bootstrap" in cmd for cmd in serialized_calls)
-
-
-def test_daytona_provider_maps_multiple_mounts_to_http_payload(monkeypatch: pytest.MonkeyPatch) -> None:
-    captured: dict[str, object] = {}
-
-    class FakeDaytona:
-        def __init__(self) -> None:
-            pass
-
-    fake_sdk = types.SimpleNamespace(Daytona=FakeDaytona)
-    monkeypatch.setitem(sys.modules, "daytona_sdk", fake_sdk)
-
-    import sandbox.providers.daytona as daytona_module
-    from sandbox.providers.daytona import DaytonaProvider
-
-    class FakeResponse:
-        def __init__(self, status_code: int, payload: dict[str, object]) -> None:
-            self.status_code = status_code
-            self._payload = payload
-            self.text = str(payload)
-
-        def json(self) -> dict[str, object]:
-            return self._payload
-
-    class FakeClient:
-        def __init__(self, timeout: float) -> None:
-            self.timeout = timeout
-
-        def __enter__(self) -> FakeClient:
-            return self
-
-        def __exit__(self, exc_type, exc, tb) -> None:
-            return None
-
-        def post(self, url: str, headers: dict[str, str], json: dict[str, object]) -> FakeResponse:
-            captured["url"] = url
-            captured["headers"] = headers
-            captured["json"] = json
-            return FakeResponse(200, {"id": "sb-123"})
-
-    monkeypatch.setattr(daytona_module.httpx, "Client", FakeClient)
-
-    provider = DaytonaProvider(
-        api_key="token-1",
-        api_url="http://127.0.0.1:3000/api",
-        bind_mounts=[
-            {"source": "/host/tasks", "target": "/home/daytona/shared/tasks", "mode": "mount", "read_only": False},
-            {"source": "/host/docs", "target": "/home/daytona/shared/docs", "mode": "mount", "read_only": True},
-            {"source": "/host/bootstrap", "target": "/home/daytona/bootstrap", "mode": "copy", "read_only": False},
-            {
-                "host_path": "/host/issues",
-                "mount_path": "/home/daytona/shared/issues",
-                "mode": "mount",
-                "read_only": False,
-            },
-        ],
-    )
-
-    sandbox_id = provider._create_via_http(provider.bind_mounts)
-    assert sandbox_id == "sb-123"
-
-    payload = captured["json"]
-    assert isinstance(payload, dict)
-    assert payload.get("bindMounts") == [
-        {"hostPath": "/host/tasks", "mountPath": "/home/daytona/shared/tasks", "readOnly": False},
-        {"hostPath": "/host/docs", "mountPath": "/home/daytona/shared/docs", "readOnly": True},
-        {"hostPath": "/host/issues", "mountPath": "/home/daytona/shared/issues", "readOnly": False},
-    ]
diff --git a/tests/test_remote_sandbox.py b/tests/test_remote_sandbox.py
deleted file mode 100644
index c0a48e22a..000000000
--- a/tests/test_remote_sandbox.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""Unit tests for RemoteSandbox._run_init_commands and RemoteSandbox.close()."""
-
-# TODO: pre-existing: get_sandbox now requires lease.volume_id
-import pytest
-
-pytest.skip("pre-existing: RemoteSandbox tests need volume setup — needs test update", allow_module_level=True)
-
-import asyncio
-import tempfile
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-from sandbox.base import RemoteSandbox
-from sandbox.config import SandboxConfig
-from sandbox.interfaces.executor import ExecuteResult
-from sandbox.provider import ProviderCapability, SessionInfo
-from sandbox.thread_context import set_current_thread_id
-
-
-@pytest.fixture
-def temp_db():
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = Path(f.name)
-    yield db_path
-    db_path.unlink(missing_ok=True)
-
-
-def _make_provider(on_init_exit_code: int = 0) -> MagicMock:
-    provider = MagicMock()
-    provider.name = "mock"
-    provider.default_cwd = "/tmp"
-    provider.get_capability.return_value = ProviderCapability(
-        can_pause=True,
-        can_resume=True,
-        can_destroy=True,
-        supports_status_probe=False,
-        eager_instance_binding=True,
-    )
-    provider.create_session.return_value = SessionInfo(session_id="inst-1", provider="mock", status="running")
-    provider.get_session_status.return_value = "running"
-    provider.pause_session.return_value = True
-    provider.resume_session.return_value = True
-    provider.destroy_session.return_value = True
-
-    runtime = MagicMock()
-    runtime.runtime_id = "runtime-test-000001"
-    runtime.chat_session_id = None
-    runtime.execute = AsyncMock(
-        return_value=ExecuteResult(
-            exit_code=on_init_exit_code,
-            stdout="ok" if on_init_exit_code == 0 else "",
-            stderr="" if on_init_exit_code == 0 else "fail",
-        )
-    )
-    runtime.close = AsyncMock()
-    provider.create_runtime.return_value = runtime
-    return provider
-
-
-def _make_sandbox(provider, db_path: Path, init_commands: list[str] | None = None, on_exit: str = "pause") -> RemoteSandbox:
-    config = SandboxConfig(provider="mock", on_exit=on_exit, init_commands=init_commands or [])
-    return RemoteSandbox(
-        provider=provider,
-        config=config,
-        default_cwd="/tmp",
-        db_path=db_path,
-        name="mock",
-        working_dir="/tmp",
-        env_label="Mock",
-    )
-
-
-# ── _run_init_commands ───────────────────────────────────────────────────────
-
-
-def test_run_init_commands_happy_path(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, init_commands=["echo hello"])
-    set_current_thread_id("thread-init-1")
-    assert sandbox._get_capability() is not None
-    assert "thread-init-1" in sandbox._init_commands_run
-
-
-def test_run_init_commands_failure_raises(temp_db):
-    sandbox = _make_sandbox(_make_provider(on_init_exit_code=1), temp_db, init_commands=["bad-cmd"])
-    set_current_thread_id("thread-init-fail")
-    with pytest.raises(RuntimeError, match="Init command #1 failed"):
-        sandbox._get_capability()
-
-
-def test_run_init_commands_idempotent(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, init_commands=["echo once"])
-    set_current_thread_id("thread-init-2")
-    sandbox._get_capability()
-    sandbox._get_capability()
-    assert len(sandbox._init_commands_run) == 1
-
-
-@pytest.mark.asyncio
-async def test_run_init_commands_inside_running_loop(temp_db):
-    """Covers the run_coroutine_threadsafe branch: _get_capability called from a running event loop."""
-    sandbox = _make_sandbox(_make_provider(), temp_db, init_commands=["echo hello"])
-    set_current_thread_id("thread-init-async")
-    await asyncio.to_thread(sandbox._get_capability)
-    assert "thread-init-async" in sandbox._init_commands_run
-
-
-# ── RemoteSandbox.close() ────────────────────────────────────────────────────
-
-
-def test_close_pause_calls_pause_all_sessions(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, on_exit="pause")
-    sandbox._manager.pause_all_sessions = MagicMock(return_value=2)
-    sandbox.close()
-    sandbox._manager.pause_all_sessions.assert_called_once()
-
-
-def test_close_destroy_calls_destroy_for_each_session(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, on_exit="destroy")
-    sandbox._manager.list_sessions = MagicMock(return_value=[{"thread_id": "t1"}, {"thread_id": "t2"}, {"thread_id": "t3"}])
-    sandbox._manager.destroy_session = MagicMock(return_value=True)
-    sandbox.close()
-    assert sandbox._manager.destroy_session.call_count == 3
-
-
-def test_close_destroy_continues_after_one_failure(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, on_exit="destroy")
-    sandbox._manager.list_sessions = MagicMock(return_value=[{"thread_id": "t1"}, {"thread_id": "t2"}, {"thread_id": "t3"}])
-
-    call_count = 0
-
-    def side_effect(thread_id):
-        nonlocal call_count
-        call_count += 1
-        if thread_id == "t2":
-            raise RuntimeError("network error")
-        return True
-
-    sandbox._manager.destroy_session = MagicMock(side_effect=side_effect)
-    sandbox.close()
-    assert call_count == 3
diff --git a/tests/test_resource_snapshot.py b/tests/test_resource_snapshot.py
deleted file mode 100644
index 314e2a194..000000000
--- a/tests/test_resource_snapshot.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import pytest
-
-pytest.skip("pre-existing: resource_snapshot API mismatch — needs test update", allow_module_level=True)
-
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from sandbox.provider import Metrics, ProviderCapability, ProviderExecResult, SandboxProvider, SessionInfo
-from sandbox.resource_snapshot import (
-    ensure_resource_snapshot_table,
-    list_snapshots_by_lease_ids,
-    probe_and_upsert_for_instance,
-    upsert_lease_resource_snapshot,
-)
-
-
-class _FakeProvider(SandboxProvider):
-    name = "fake"
-
-    def get_capability(self) -> ProviderCapability:
-        return ProviderCapability(
-            can_pause=True,
-            can_resume=True,
-            can_destroy=True,
-            resource_capabilities={
-                "filesystem": True,
-                "terminal": True,
-                "metrics": True,
-                "screenshot": False,
-                "web": False,
-                "process": False,
-                "hooks": False,
-                "mount": False,
-            },
-        )
-
-    def create_session(self, context_id: str | None = None) -> SessionInfo:
-        raise RuntimeError("unused")
-
-    def destroy_session(self, session_id: str, sync: bool = True) -> bool:
-        raise RuntimeError("unused")
-
-    def pause_session(self, session_id: str) -> bool:
-        raise RuntimeError("unused")
-
-    def resume_session(self, session_id: str) -> bool:
-        raise RuntimeError("unused")
-
-    def get_session_status(self, session_id: str) -> str:
-        raise RuntimeError("unused")
-
-    def execute(self, session_id: str, command: str, timeout_ms: int = 30000, cwd: str | None = None) -> ProviderExecResult:
-        raise RuntimeError("unused")
-
-    def read_file(self, session_id: str, path: str) -> str:
-        raise RuntimeError("unused")
-
-    def write_file(self, session_id: str, path: str, content: str) -> str:
-        raise RuntimeError("unused")
-
-    def list_dir(self, session_id: str, path: str) -> list[dict]:
-        raise RuntimeError("unused")
-
-    def get_metrics(self, session_id: str) -> Metrics | None:
-        return Metrics(
-            cpu_percent=23.5,
-            memory_used_mb=1536.0,
-            memory_total_mb=4096.0,
-            disk_used_gb=8.0,
-            disk_total_gb=20.0,
-            network_rx_kbps=30.0,
-            network_tx_kbps=40.0,
-        )
-
-
-def test_upsert_and_query_snapshot(tmp_path):
-    db_path = Path(tmp_path) / "sandbox.db"
-    ensure_resource_snapshot_table(db_path)
-    upsert_lease_resource_snapshot(
-        lease_id="lease-1",
-        provider_name="agentbay_prod",
-        observed_state="running",
-        probe_mode="running_runtime",
-        cpu_used=12.0,
-        cpu_limit=100.0,
-        memory_used_mb=512.0,
-        memory_total_mb=1024.0,
-        disk_used_gb=2.0,
-        disk_total_gb=10.0,
-        network_rx_kbps=1.0,
-        network_tx_kbps=2.0,
-        probe_error=None,
-        db_path=db_path,
-    )
-    snapshots = list_snapshots_by_lease_ids(["lease-1"], db_path=db_path)
-    assert snapshots["lease-1"]["provider_name"] == "agentbay_prod"
-    assert snapshots["lease-1"]["cpu_used"] == 12.0
-
-
-def test_probe_and_upsert_from_provider_metrics(tmp_path):
-    db_path = Path(tmp_path) / "sandbox.db"
-    provider = _FakeProvider()
-    result = probe_and_upsert_for_instance(
-        lease_id="lease-2",
-        provider_name="fake_provider",
-        observed_state="running",
-        probe_mode="create_running",
-        provider=provider,
-        instance_id="instance-1",
-        db_path=db_path,
-    )
-    assert result["ok"] is True
-    snapshots = list_snapshots_by_lease_ids(["lease-2"], db_path=db_path)
-    assert snapshots["lease-2"]["cpu_used"] == 23.5
-    assert snapshots["lease-2"]["memory_total_mb"] == 4096.0
-
-
-def test_probe_and_upsert_ignores_non_numeric_metrics(tmp_path):
-    db_path = Path(tmp_path) / "sandbox.db"
-    provider = _FakeProvider()
-    provider.get_metrics = lambda _session_id: MagicMock()
-    result = probe_and_upsert_for_instance(
-        lease_id="lease-3",
-        provider_name="fake_provider",
-        observed_state="running",
-        probe_mode="create_running",
-        provider=provider,
-        instance_id="instance-1",
-        db_path=db_path,
-    )
-    assert result["ok"] is False
-    assert result["error"] == "metrics unavailable"
-    snapshots = list_snapshots_by_lease_ids(["lease-3"], db_path=db_path)
-    assert snapshots["lease-3"]["cpu_used"] is None
-    assert snapshots["lease-3"]["probe_error"] == "metrics unavailable"
diff --git a/tests/test_run_event_repo.py b/tests/test_run_event_repo.py
deleted file mode 100644
index 87f8b1282..000000000
--- a/tests/test_run_event_repo.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import sqlite3
-
-import pytest
-
-from storage.providers.sqlite.run_event_repo import SQLiteRunEventRepo
-from storage.providers.supabase.run_event_repo import SupabaseRunEventRepo
-
-
-def test_append_and_list_events_with_cursor(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteRunEventRepo(db_path)
-    try:
-        seq1 = repo.append_event("t-1", "r-1", "tool_call", {"name": "ls"}, "m-1")
-        seq2 = repo.append_event("t-1", "r-1", "tool_result", {"ok": True}, "m-2")
-
-        assert seq1 == 1
-        assert seq2 == 2
-
-        events = repo.list_events("t-1", "r-1", after=0)
-        assert [event["event_type"] for event in events] == ["tool_call", "tool_result"]
-
-        cursor_events = repo.list_events("t-1", "r-1", after=1)
-        assert len(cursor_events) == 1
-        assert cursor_events[0]["seq"] == 2
-        assert cursor_events[0]["data"] == {"ok": True}
-    finally:
-        repo.close()
-
-
-def test_latest_and_list_run_ids(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteRunEventRepo(db_path)
-    try:
-        repo.append_event("t-2", "r-1", "status", {"s": 1})
-        repo.append_event("t-2", "r-2", "status", {"s": 2})
-        repo.append_event("t-2", "r-1", "status", {"s": 3})
-
-        assert repo.latest_seq("t-2") == 3
-        assert repo.latest_run_id("t-2") == "r-1"
-        assert repo.list_run_ids("t-2") == ["r-1", "r-2"]
-    finally:
-        repo.close()
-
-
-def test_delete_runs_and_thread_events(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteRunEventRepo(db_path)
-    try:
-        repo.append_event("t-3", "r-1", "status", {"v": 1})
-        repo.append_event("t-3", "r-2", "status", {"v": 2})
-        repo.append_event("t-3", "r-2", "status", {"v": 3})
-
-        deleted = repo.delete_runs("t-3", ["r-2"])
-        assert deleted == 2
-        assert repo.list_run_ids("t-3") == ["r-1"]
-
-        deleted_all = repo.delete_thread_events("t-3")
-        assert deleted_all == 1
-        assert repo.latest_seq("t-3") == 0
-
-        with sqlite3.connect(str(db_path)) as conn:
-            remaining = conn.execute("SELECT COUNT(*) FROM run_events WHERE thread_id = ?", ("t-3",)).fetchone()[0]
-            assert remaining == 0
-    finally:
-        repo.close()
-
-
-from tests.fakes.supabase import FakeSupabaseClient
-
-
-def test_supabase_run_event_repo_append_and_list_events_with_cursor():
-    tables: dict[str, list[dict]] = {"run_events": []}
-    repo = SupabaseRunEventRepo(client=FakeSupabaseClient(tables=tables, auto_seq_tables={"run_events"}))
-
-    seq1 = repo.append_event("t-1", "r-1", "tool_call", {"name": "ls"}, "m-1")
-    seq2 = repo.append_event("t-1", "r-1", "tool_result", {"ok": True}, "m-2")
-
-    assert seq1 == 1
-    assert seq2 == 2
-
-    events = repo.list_events("t-1", "r-1", after=0)
-    assert [event["event_type"] for event in events] == ["tool_call", "tool_result"]
-
-    cursor_events = repo.list_events("t-1", "r-1", after=1)
-    assert len(cursor_events) == 1
-    assert cursor_events[0]["seq"] == 2
-    assert cursor_events[0]["data"] == {"ok": True}
-
-
-def test_supabase_run_event_repo_latest_and_list_run_ids():
-    tables: dict[str, list[dict]] = {"run_events": []}
-    repo = SupabaseRunEventRepo(client=FakeSupabaseClient(tables=tables, auto_seq_tables={"run_events"}))
-
-    repo.append_event("t-2", "r-1", "status", {"s": 1})
-    repo.append_event("t-2", "r-2", "status", {"s": 2})
-    repo.append_event("t-2", "r-1", "status", {"s": 3})
-
-    assert repo.latest_seq("t-2") == 3
-    assert repo.latest_run_id("t-2") == "r-1"
-    assert repo.list_run_ids("t-2") == ["r-1", "r-2"]
-
-
-def test_supabase_run_event_repo_delete_runs_and_thread_events():
-    tables: dict[str, list[dict]] = {"run_events": []}
-    repo = SupabaseRunEventRepo(client=FakeSupabaseClient(tables=tables, auto_seq_tables={"run_events"}))
-
-    repo.append_event("t-3", "r-1", "status", {"v": 1})
-    repo.append_event("t-3", "r-2", "status", {"v": 2})
-    repo.append_event("t-3", "r-2", "status", {"v": 3})
-
-    deleted = repo.delete_runs("t-3", ["r-2"])
-    assert deleted == 2
-    assert repo.list_run_ids("t-3") == ["r-1"]
-
-    deleted_all = repo.delete_thread_events("t-3")
-    assert deleted_all == 1
-    assert repo.latest_seq("t-3") == 0
-    assert tables["run_events"] == []
-
-
-def test_supabase_run_event_repo_requires_compatible_client():
-    with pytest.raises(RuntimeError, match="table\\(name\\)"):
-        SupabaseRunEventRepo(client=object())
diff --git a/tests/test_sandbox_e2e.py b/tests/test_sandbox_e2e.py
deleted file mode 100644
index f1dd64383..000000000
--- a/tests/test_sandbox_e2e.py
+++ /dev/null
@@ -1,234 +0,0 @@
-"""End-to-end headless test for sandbox mode.
-
-Tests that LeonAgent can:
-1. Initialize with sandbox=docker or sandbox=e2b
-2. Execute commands in the sandbox
-3. Read/write files in the sandbox
-4. All paths resolve correctly (no macOS firmlink leaks)
-
-Usage:
-    # Docker sandbox (requires Docker running)
-    pytest tests/test_sandbox_e2e.py -k docker -s
-
-    # E2B sandbox (requires E2B_API_KEY)
-    pytest tests/test_sandbox_e2e.py -k e2b -s
-
-    # Both
-    pytest tests/test_sandbox_e2e.py -s
-"""
-
-import pytest
-
-pytest.skip("pre-existing: Docker/E2B e2e tests require running providers", allow_module_level=True)
-
-import os
-import sys
-import uuid
-
-import pytest
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Load config.env so API keys are available
-from config.env_manager import ConfigManager
-
-ConfigManager().load_to_env()
-
-
-def _can_docker() -> bool:
-    """Check if Docker is available."""
-    import subprocess
-
-    try:
-        subprocess.run(["docker", "info"], capture_output=True, timeout=5)
-        return True
-    except Exception:
-        return False
-
-
-def _can_e2b() -> bool:
-    if os.getenv("E2B_API_KEY"):
-        return True
-    # Check sandbox config file
-    from pathlib import Path
-
-    config_file = Path.home() / ".leon" / "sandboxes" / "e2b.json"
-    if config_file.exists():
-        import json
-
-        data = json.loads(config_file.read_text())
-        key = data.get("e2b", {}).get("api_key")
-        if key:
-            os.environ["E2B_API_KEY"] = key
-            return True
-    return False
-
-
-def _invoke_and_extract(agent, message: str, thread_id: str) -> dict:
-    """Invoke agent via async runner and extract tool calls + response."""
-    import asyncio
-
-    from core.runner import NonInteractiveRunner
-    from sandbox.thread_context import set_current_thread_id
-
-    set_current_thread_id(thread_id)
-    runner = NonInteractiveRunner(agent, thread_id, debug=True)
-    result = asyncio.run(runner.run_turn(message))
-
-    return {
-        "tool_calls": [tc["name"] for tc in result.get("tool_calls", [])],
-        "response": result.get("response", ""),
-        "error": result.get("error"),
-    }
-
-
-def _get_model_name() -> str:
-    return os.getenv("MODEL_NAME") or "claude-sonnet-4-5-20250929"
-
-
-# ---------------------------------------------------------------------------
-# Docker E2E
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.skipif(not _can_docker(), reason="Docker not available")
-class TestDockerSandboxE2E:
-    def test_agent_init_and_command(self):
-        """Agent initializes with docker sandbox and can run commands."""
-        from agent import create_leon_agent
-
-        thread_id = f"test-docker-{uuid.uuid4().hex[:8]}"
-        agent = None
-        try:
-            agent = create_leon_agent(
-                model_name=_get_model_name(),
-                sandbox="docker",
-                verbose=True,
-            )
-
-            # Verify workspace_root is the sandbox path, not a local resolved path
-            assert str(agent.workspace_root) == "/workspace", f"workspace_root should be /workspace, got {agent.workspace_root}"
-
-            # Ensure session exists before invoking
-            agent._sandbox.ensure_session(thread_id)
-
-            extracted = _invoke_and_extract(
-                agent,
-                "Use the run_command tool to execute: echo 'SANDBOX_OK' && pwd",
-                thread_id,
-            )
-
-            print("\n--- Result ---")
-            print(f"Response: {extracted['response'][:500]}")
-            print(f"Tool calls: {extracted['tool_calls']}")
-
-            assert "run_command" in extracted["tool_calls"], f"Expected run_command in {extracted['tool_calls']}"
-
-        finally:
-            if agent:
-                agent.close()
-
-    def test_file_operations(self):
-        """Agent can read and write files in docker sandbox."""
-        from agent import create_leon_agent
-
-        thread_id = f"test-docker-{uuid.uuid4().hex[:8]}"
-        agent = None
-        try:
-            agent = create_leon_agent(
-                model_name=_get_model_name(),
-                sandbox="docker",
-                verbose=True,
-            )
-            agent._sandbox.ensure_session(thread_id)
-
-            extracted = _invoke_and_extract(
-                agent,
-                "Write the text 'hello from test' to /workspace/test_e2e.txt, then read it back and tell me the content.",
-                thread_id,
-            )
-
-            print("\n--- Result ---")
-            print(f"Response: {extracted['response'][:500]}")
-            print(f"Tool calls: {extracted['tool_calls']}")
-
-            assert "write_file" in extracted["tool_calls"], f"Expected write_file in {extracted['tool_calls']}"
-
-        finally:
-            if agent:
-                agent.close()
-
-
-# ---------------------------------------------------------------------------
-# E2B E2E
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.skipif(not _can_e2b(), reason="E2B_API_KEY not set")
-class TestE2BSandboxE2E:
-    def test_agent_init_and_command(self):
-        """Agent initializes with e2b sandbox and can run commands."""
-        from agent import create_leon_agent
-
-        thread_id = f"test-e2b-{uuid.uuid4().hex[:8]}"
-        agent = None
-        try:
-            agent = create_leon_agent(
-                model_name=_get_model_name(),
-                sandbox="e2b",
-                verbose=True,
-            )
-
-            assert str(agent.workspace_root) == "/home/user", f"workspace_root should be /home/user, got {agent.workspace_root}"
-
-            agent._sandbox.ensure_session(thread_id)
-
-            extracted = _invoke_and_extract(
-                agent,
-                "Use the run_command tool to execute: echo 'E2B_OK' && uname -a",
-                thread_id,
-            )
-
-            print("\n--- Result ---")
-            print(f"Response: {extracted['response'][:500]}")
-            print(f"Tool calls: {extracted['tool_calls']}")
-
-            assert "run_command" in extracted["tool_calls"], f"Expected run_command in {extracted['tool_calls']}"
-
-        finally:
-            if agent:
-                agent.close()
-
-    def test_file_operations(self):
-        """Agent can read and write files in e2b sandbox."""
-        from agent import create_leon_agent
-
-        thread_id = f"test-e2b-{uuid.uuid4().hex[:8]}"
-        agent = None
-        try:
-            agent = create_leon_agent(
-                model_name=_get_model_name(),
-                sandbox="e2b",
-                verbose=True,
-            )
-            agent._sandbox.ensure_session(thread_id)
-
-            extracted = _invoke_and_extract(
-                agent,
-                "Write the text 'e2b test content' to /home/user/test_e2e.txt, then read it back and tell me the content.",
-                thread_id,
-            )
-
-            print("\n--- Result ---")
-            print(f"Response: {extracted['response'][:500]}")
-            print(f"Tool calls: {extracted['tool_calls']}")
-
-            assert "write_file" in extracted["tool_calls"], f"Expected write_file in {extracted['tool_calls']}"
-
-        finally:
-            if agent:
-                agent.close()
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-s", "-v"])
diff --git a/tests/test_sqlite_kernel.py b/tests/test_sqlite_kernel.py
deleted file mode 100644
index d91d13e11..000000000
--- a/tests/test_sqlite_kernel.py
+++ /dev/null
@@ -1,361 +0,0 @@
-"""Unit tests for the SQLite kernel module (role-based path resolution, pragmas, connections)."""
-
-from __future__ import annotations
-
-import sqlite3
-from pathlib import Path
-
-import pytest
-
-from storage.providers.sqlite.kernel import (
-    BUSY_TIMEOUT_MS,
-    SYNCHRONOUS,
-    WAL_MODE,
-    SQLiteDBRole,
-    _env_path,
-    apply_pragmas,
-    connect_sqlite,
-    connect_sqlite_role,
-    resolve_role_db_path,
-)
-
-# ---------------------------------------------------------------------------
-# _env_path helper
-# ---------------------------------------------------------------------------
-
-
-class TestEnvPath:
-    def test_returns_fallback_when_env_not_set(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.delenv("LEON_TEST_UNUSED_VAR", raising=False)
-        fallback = Path("/fallback/path.db")
-        assert _env_path("LEON_TEST_UNUSED_VAR", fallback) == fallback
-
-    def test_returns_env_value_when_set(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.setenv("LEON_TEST_CUSTOM_PATH", "/custom/override.db")
-        result = _env_path("LEON_TEST_CUSTOM_PATH", Path("/fallback/path.db"))
-        assert result == Path("/custom/override.db")
-
-    def test_returns_fallback_for_empty_string_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.setenv("LEON_TEST_EMPTY_VAR", "")
-        fallback = Path("/fallback/path.db")
-        assert _env_path("LEON_TEST_EMPTY_VAR", fallback) == fallback
-
-    def test_returns_path_object(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.setenv("LEON_TEST_TYPE_CHECK", "/some/path.db")
-        result = _env_path("LEON_TEST_TYPE_CHECK", Path("/fallback"))
-        assert isinstance(result, Path)
-
-
-# ---------------------------------------------------------------------------
-# resolve_role_db_path — default fallbacks (no env overrides)
-# ---------------------------------------------------------------------------
-
-
-class TestResolveRoleDbPathDefaults:
-    """Each role resolves to the expected default path when no env overrides are set."""
-
-    @pytest.fixture(autouse=True)
-    def _clear_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        for var in (
-            "LEON_DB_PATH",
-            "LEON_RUN_EVENT_DB_PATH",
-            "LEON_EVAL_DB_PATH",
-            "LEON_SANDBOX_DB_PATH",
-            "LEON_QUEUE_DB_PATH",
-            "LEON_SUBAGENT_DB_PATH",
-        ):
-            monkeypatch.delenv(var, raising=False)
-
-    def _home_root(self) -> Path:
-        return Path.home() / ".leon"
-
-    def test_main_role(self) -> None:
-        assert resolve_role_db_path(SQLiteDBRole.MAIN) == self._home_root() / "leon.db"
-
-    def test_run_event_role(self) -> None:
-        assert resolve_role_db_path(SQLiteDBRole.RUN_EVENT) == self._home_root() / "events.db"
-
-    def test_eval_role(self) -> None:
-        assert resolve_role_db_path(SQLiteDBRole.EVAL) == self._home_root() / "eval.db"
-
-    def test_sandbox_role(self) -> None:
-        assert resolve_role_db_path(SQLiteDBRole.SANDBOX) == self._home_root() / "sandbox.db"
-
-    def test_queue_role(self) -> None:
-        assert resolve_role_db_path(SQLiteDBRole.QUEUE) == self._home_root() / "queue.db"
-
-    def test_subagent_role(self) -> None:
-        assert resolve_role_db_path(SQLiteDBRole.SUBAGENT) == self._home_root() / "subagent.db"
-
-
-# ---------------------------------------------------------------------------
-# resolve_role_db_path — env overrides
-# ---------------------------------------------------------------------------
-
-
-class TestResolveRoleDbPathEnvOverrides:
-    """Environment variable overrides take precedence over defaults."""
-
-    def test_main_env_override(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        custom = tmp_path / "custom_main.db"
-        monkeypatch.setenv("LEON_DB_PATH", str(custom))
-        assert resolve_role_db_path(SQLiteDBRole.MAIN) == custom
-
-    def test_run_event_env_override(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        custom = tmp_path / "custom_events.db"
-        monkeypatch.setenv("LEON_RUN_EVENT_DB_PATH", str(custom))
-        assert resolve_role_db_path(SQLiteDBRole.RUN_EVENT) == custom
-
-    def test_eval_env_override(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        custom = tmp_path / "custom_eval.db"
-        monkeypatch.setenv("LEON_EVAL_DB_PATH", str(custom))
-        assert resolve_role_db_path(SQLiteDBRole.EVAL) == custom
-
-    def test_sandbox_env_override(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        custom = tmp_path / "custom_sandbox.db"
-        monkeypatch.setenv("LEON_SANDBOX_DB_PATH", str(custom))
-        assert resolve_role_db_path(SQLiteDBRole.SANDBOX) == custom
-
-    def test_queue_env_override(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        custom = tmp_path / "custom_queue.db"
-        monkeypatch.setenv("LEON_QUEUE_DB_PATH", str(custom))
-        assert resolve_role_db_path(SQLiteDBRole.QUEUE) == custom
-
-    def test_subagent_env_override(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        custom = tmp_path / "custom_subagent.db"
-        monkeypatch.setenv("LEON_SUBAGENT_DB_PATH", str(custom))
-        assert resolve_role_db_path(SQLiteDBRole.SUBAGENT) == custom
-
-    def test_main_env_affects_dependent_roles(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        """RUN_EVENT, QUEUE, SUBAGENT derive from main_path via .with_name(); changing LEON_DB_PATH shifts them."""
-        custom_main = tmp_path / "alt" / "main.db"
-        monkeypatch.setenv("LEON_DB_PATH", str(custom_main))
-        # Clear role-specific overrides so fallback logic kicks in
-        for var in ("LEON_RUN_EVENT_DB_PATH", "LEON_QUEUE_DB_PATH", "LEON_SUBAGENT_DB_PATH"):
-            monkeypatch.delenv(var, raising=False)
-
-        assert resolve_role_db_path(SQLiteDBRole.RUN_EVENT) == tmp_path / "alt" / "events.db"
-        assert resolve_role_db_path(SQLiteDBRole.QUEUE) == tmp_path / "alt" / "queue.db"
-        assert resolve_role_db_path(SQLiteDBRole.SUBAGENT) == tmp_path / "alt" / "subagent.db"
-
-    def test_role_specific_env_beats_derived_main_path(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        """Role-specific env var takes priority over the main_path-derived fallback."""
-        monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "main.db"))
-        explicit_events = tmp_path / "explicit_events.db"
-        monkeypatch.setenv("LEON_RUN_EVENT_DB_PATH", str(explicit_events))
-        assert resolve_role_db_path(SQLiteDBRole.RUN_EVENT) == explicit_events
-
-
-# ---------------------------------------------------------------------------
-# resolve_role_db_path — explicit db_path argument
-# ---------------------------------------------------------------------------
-
-
-class TestResolveRoleDbPathExplicit:
-    """When db_path is provided it is returned directly, ignoring role and env."""
-
-    def test_explicit_path_overrides_role(self, tmp_path: Path) -> None:
-        explicit = tmp_path / "explicit.db"
-        assert resolve_role_db_path(SQLiteDBRole.MAIN, db_path=explicit) == explicit
-
-    def test_explicit_str_path_converted_to_path(self, tmp_path: Path) -> None:
-        explicit_str = str(tmp_path / "explicit.db")
-        result = resolve_role_db_path(SQLiteDBRole.EVAL, db_path=explicit_str)
-        assert isinstance(result, Path)
-        assert result == Path(explicit_str)
-
-    def test_explicit_path_ignores_env(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "from_env.db"))
-        explicit = tmp_path / "explicit.db"
-        assert resolve_role_db_path(SQLiteDBRole.MAIN, db_path=explicit) == explicit
-
-
-# ---------------------------------------------------------------------------
-# resolve_role_db_path — edge cases
-# ---------------------------------------------------------------------------
-
-
-class TestResolveRoleDbPathEdgeCases:
-    def test_none_db_path_uses_role_resolution(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.delenv("LEON_DB_PATH", raising=False)
-        result = resolve_role_db_path(SQLiteDBRole.MAIN, db_path=None)
-        assert result == Path.home() / ".leon" / "leon.db"
-
-    @pytest.mark.skip(reason="pre-existing: SQLiteDBRole unknown role handling mismatch")
-    def test_unknown_role_string_falls_through_to_main(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        """A role value not matching any branch falls through to the final return (main_path)."""
-        monkeypatch.delenv("LEON_DB_PATH", raising=False)
-        # Simulate an unexpected role by passing a raw string that StrEnum allows
-        # Since SQLiteDBRole is a StrEnum, we cannot create invalid members,
-        # but the fall-through path (line 53) should still return main_path.
-        # We verify this by confirming all known roles are accounted for.
-        all_roles = list(SQLiteDBRole)
-        assert len(all_roles) == 6, "If a new role is added, update this test"
-
-    def test_all_enum_members_are_str_enum(self) -> None:
-        """SQLiteDBRole members are strings (StrEnum), ensuring they work in string contexts."""
-        for role in SQLiteDBRole:
-            assert isinstance(role, str)
-            assert role == role.value
-
-
-# ---------------------------------------------------------------------------
-# apply_pragmas
-# ---------------------------------------------------------------------------
-
-
-class TestApplyPragmas:
-    def test_pragmas_set_correctly(self, tmp_path: Path) -> None:
-        db_file = tmp_path / "test.db"
-        conn = sqlite3.connect(str(db_file))
-        try:
-            apply_pragmas(conn)
-            journal = conn.execute("PRAGMA journal_mode").fetchone()[0]
-            busy = conn.execute("PRAGMA busy_timeout").fetchone()[0]
-            sync = conn.execute("PRAGMA synchronous").fetchone()[0]
-
-            assert journal.upper() == WAL_MODE.upper()
-            assert busy == BUSY_TIMEOUT_MS
-            # NORMAL = 1 in SQLite's integer encoding
-            assert sync == 1
-        finally:
-            conn.close()
-
-
-# ---------------------------------------------------------------------------
-# connect_sqlite
-# ---------------------------------------------------------------------------
-
-
-class TestConnectSqlite:
-    def test_creates_parent_directories(self, tmp_path: Path) -> None:
-        nested = tmp_path / "a" / "b" / "c" / "test.db"
-        conn = connect_sqlite(nested)
-        try:
-            assert nested.parent.exists()
-        finally:
-            conn.close()
-
-    def test_returns_connection_with_pragmas(self, tmp_path: Path) -> None:
-        db_file = tmp_path / "test.db"
-        conn = connect_sqlite(db_file)
-        try:
-            journal = conn.execute("PRAGMA journal_mode").fetchone()[0]
-            assert journal.upper() == WAL_MODE.upper()
-        finally:
-            conn.close()
-
-    def test_row_factory_applied(self, tmp_path: Path) -> None:
-        db_file = tmp_path / "test.db"
-        conn = connect_sqlite(db_file, row_factory=sqlite3.Row)
-        try:
-            assert conn.row_factory is sqlite3.Row
-        finally:
-            conn.close()
-
-    def test_row_factory_none_by_default(self, tmp_path: Path) -> None:
-        db_file = tmp_path / "test.db"
-        conn = connect_sqlite(db_file)
-        try:
-            assert conn.row_factory is None
-        finally:
-            conn.close()
-
-    def test_accepts_str_path(self, tmp_path: Path) -> None:
-        db_file = str(tmp_path / "test.db")
-        conn = connect_sqlite(db_file)
-        try:
-            conn.execute("SELECT 1")
-        finally:
-            conn.close()
-
-    def test_custom_timeout(self, tmp_path: Path) -> None:
-        db_file = tmp_path / "test.db"
-        conn = connect_sqlite(db_file, timeout_ms=5000)
-        try:
-            busy = conn.execute("PRAGMA busy_timeout").fetchone()[0]
-            assert busy == BUSY_TIMEOUT_MS  # apply_pragmas sets the constant
-        finally:
-            conn.close()
-
-
-# ---------------------------------------------------------------------------
-# connect_sqlite_role
-# ---------------------------------------------------------------------------
-
-
-class TestConnectSqliteRole:
-    def test_creates_db_for_main_role(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-        conn = connect_sqlite_role(SQLiteDBRole.MAIN)
-        try:
-            conn.execute("SELECT 1")
-            assert (tmp_path / "leon.db").exists()
-        finally:
-            conn.close()
-
-    def test_creates_db_for_run_event_role(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-        monkeypatch.delenv("LEON_RUN_EVENT_DB_PATH", raising=False)
-        conn = connect_sqlite_role(SQLiteDBRole.RUN_EVENT)
-        try:
-            conn.execute("SELECT 1")
-            assert (tmp_path / "events.db").exists()
-        finally:
-            conn.close()
-
-    def test_explicit_db_path_overrides_role(self, tmp_path: Path) -> None:
-        explicit = tmp_path / "override.db"
-        conn = connect_sqlite_role(SQLiteDBRole.EVAL, db_path=explicit)
-        try:
-            conn.execute("SELECT 1")
-            assert explicit.exists()
-        finally:
-            conn.close()
-
-    def test_row_factory_forwarded(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-        monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-        conn = connect_sqlite_role(SQLiteDBRole.MAIN, row_factory=sqlite3.Row)
-        try:
-            assert conn.row_factory is sqlite3.Row
-        finally:
-            conn.close()
-
-
-# ---------------------------------------------------------------------------
-# SQLiteDBRole enum
-# ---------------------------------------------------------------------------
-
-
-class TestSQLiteDBRole:
-    def test_all_roles_have_unique_values(self) -> None:
-        values = [r.value for r in SQLiteDBRole]
-        assert len(values) == len(set(values))
-
-    def test_role_values(self) -> None:
-        assert SQLiteDBRole.MAIN == "main"
-        assert SQLiteDBRole.RUN_EVENT == "run_event"
-        assert SQLiteDBRole.EVAL == "eval"
-        assert SQLiteDBRole.SANDBOX == "sandbox"
-        assert SQLiteDBRole.QUEUE == "queue"
-        assert SQLiteDBRole.SUBAGENT == "subagent"
-
-    def test_enum_is_str(self) -> None:
-        for role in SQLiteDBRole:
-            assert isinstance(role, str)
-
-
-# ---------------------------------------------------------------------------
-# Module constants
-# ---------------------------------------------------------------------------
-
-
-class TestModuleConstants:
-    def test_wal_mode_value(self) -> None:
-        assert WAL_MODE == "WAL"
-
-    def test_busy_timeout_value(self) -> None:
-        assert BUSY_TIMEOUT_MS == 30_000
-
-    def test_synchronous_value(self) -> None:
-        assert SYNCHRONOUS == "NORMAL"
diff --git a/tests/test_sse_reconnect.py b/tests/test_sse_reconnect.py
deleted file mode 100644
index 4b794fc75..000000000
--- a/tests/test_sse_reconnect.py
+++ /dev/null
@@ -1,228 +0,0 @@
-"""Tests for SSE reconnection + persistent event log.
-
-Covers:
-- EventStore: CRUD operations on run_events table
-- serialize_message: msg.id inclusion
-- observe_run_events: after-based filtering
-"""
-
-import asyncio
-import sqlite3
-from unittest.mock import patch
-
-import pytest
-
-
-@pytest.fixture()
-def tmp_db(tmp_path):
-    """Patch EventStore to use a temp DB file."""
-    db_path = tmp_path / "test_leon.db"
-    with patch("backend.web.services.event_store._DB_PATH", db_path):
-        import backend.web.services.event_store as es
-
-        es._conn = None
-        es.init_event_store()
-        yield db_path
-        if es._conn is not None:
-            asyncio.run(es._conn.close())
-            es._conn = None
-
-
-class TestEventStore:
-    """EventStore CRUD operations."""
-
-    def test_init_creates_table(self, tmp_db):
-        # run_events live in events.db (sibling of the main DB)
-        events_db = tmp_db.with_name("events.db")
-        conn = sqlite3.connect(str(events_db))
-        tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='run_events'").fetchall()
-        assert len(tables) == 1
-
-    def test_append_and_read(self, tmp_db):
-        async def _run():
-            from backend.web.services.event_store import append_event, read_events_after
-
-            seq1 = await append_event("t1", "r1", {"event": "text", "data": '{"content":"hello"}'}, "msg-1")
-            seq2 = await append_event("t1", "r1", {"event": "tool_call", "data": '{"id":"tc1"}'}, "msg-1")
-            assert seq1 < seq2
-            events = await read_events_after("t1", "r1", 0)
-            assert len(events) == 2
-            assert events[0]["event"] == "text"
-            assert events[0]["message_id"] == "msg-1"
-            assert events[1]["event"] == "tool_call"
-
-        asyncio.run(_run())
-
-    def test_read_after_filters(self, tmp_db):
-        async def _run():
-            from backend.web.services.event_store import append_event, read_events_after
-
-            seq1 = await append_event("t1", "r1", {"event": "text", "data": "{}"})
-            seq2 = await append_event("t1", "r1", {"event": "done", "data": "{}"})
-            events = await read_events_after("t1", "r1", seq1)
-            assert len(events) == 1
-            assert events[0]["seq"] == seq2
-
-        asyncio.run(_run())
-
-    def test_get_latest_run_id(self, tmp_db):
-        async def _run():
-            from backend.web.services.event_store import append_event, get_latest_run_id
-
-            assert await get_latest_run_id("t1") is None
-            await append_event("t1", "run-a", {"event": "text", "data": "{}"})
-            await append_event("t1", "run-b", {"event": "text", "data": "{}"})
-            assert await get_latest_run_id("t1") == "run-b"
-
-        asyncio.run(_run())
-
-    def test_cleanup_old_runs(self, tmp_db):
-        async def _run():
-            from backend.web.services.event_store import append_event, cleanup_old_runs, read_events_after
-
-            for run in ["r1", "r2", "r3"]:
-                await append_event("t1", run, {"event": "text", "data": "{}"})
-                await append_event("t1", run, {"event": "done", "data": ""})
-            deleted = await cleanup_old_runs("t1", keep_latest=1)
-            assert deleted == 4
-            remaining = await read_events_after("t1", "r3", 0)
-            assert len(remaining) == 2
-            assert await read_events_after("t1", "r1", 0) == []
-            assert await read_events_after("t1", "r2", 0) == []
-
-        asyncio.run(_run())
-
-    def test_cleanup_thread(self, tmp_db):
-        async def _run():
-            from backend.web.services.event_store import append_event, cleanup_thread, read_events_after
-
-            await append_event("t1", "r1", {"event": "text", "data": "{}"})
-            await append_event("t2", "r1", {"event": "text", "data": "{}"})
-            deleted = await cleanup_thread("t1")
-            assert deleted == 1
-            assert await read_events_after("t1", "r1", 0) == []
-            assert len(await read_events_after("t2", "r1", 0)) == 1
-
-        asyncio.run(_run())
-
-    def test_cross_thread_isolation(self, tmp_db):
-        async def _run():
-            from backend.web.services.event_store import append_event, read_events_after
-
-            await append_event("t1", "r1", {"event": "text", "data": '{"content":"t1"}'})
-            await append_event("t2", "r1", {"event": "text", "data": '{"content":"t2"}'})
-            t1_events = await read_events_after("t1", "r1", 0)
-            t2_events = await read_events_after("t2", "r1", 0)
-            assert len(t1_events) == 1
-            assert len(t2_events) == 1
-
-        asyncio.run(_run())
-
-
-# ---------------------------------------------------------------------------
-# serialize_message tests
-# ---------------------------------------------------------------------------
-
-
-class TestSerializeMessage:
-    """serialize_message includes msg.id."""
-
-    def test_includes_id(self):
-        from backend.web.utils.serializers import serialize_message
-
-        class AIMessage:
-            id = "msg-uuid-123"
-            content = "hello"
-            tool_calls = []
-            tool_call_id = None
-
-        result = serialize_message(AIMessage())
-        assert result["id"] == "msg-uuid-123"
-        assert result["type"] == "AIMessage"
-        assert result["content"] == "hello"
-
-    def test_missing_id_returns_none(self):
-        from backend.web.utils.serializers import serialize_message
-
-        class HumanMessage:
-            content = "hi"
-            tool_calls = []
-            tool_call_id = None
-
-        result = serialize_message(HumanMessage())
-        assert result["id"] is None
-
-
-# ---------------------------------------------------------------------------
-# RunEventBuffer + observe_run_events tests
-# ---------------------------------------------------------------------------
-
-import json
-
-
-class TestObserveRunEvents:
-    """observe_run_events with after-based filtering."""
-
-    def test_observe_yields_all_events(self):
-        async def _run():
-            from backend.web.services.event_buffer import RunEventBuffer
-            from backend.web.services.streaming_service import observe_run_events
-
-            buf = RunEventBuffer()
-            await buf.put({"event": "text", "data": json.dumps({"content": "hi", "_seq": 1})})
-            await buf.put({"event": "done", "data": json.dumps({"thread_id": "t1", "_seq": 2})})
-            await buf.mark_done()
-
-            events = []
-            async for ev in observe_run_events(buf):
-                if "event" in ev:
-                    events.append(ev)
-            assert len(events) == 2
-
-        asyncio.run(_run())
-
-    def test_observe_after_skips_old_events(self):
-        async def _run():
-            from backend.web.services.event_buffer import RunEventBuffer
-            from backend.web.services.streaming_service import observe_run_events
-
-            buf = RunEventBuffer()
-            await buf.put({"event": "text", "data": json.dumps({"content": "old", "_seq": 5})})
-            await buf.put({"event": "text", "data": json.dumps({"content": "new", "_seq": 10})})
-            await buf.put({"event": "done", "data": json.dumps({"thread_id": "t1", "_seq": 11})})
-            await buf.mark_done()
-
-            events = []
-            async for ev in observe_run_events(buf, after=5):
-                if "event" in ev:
-                    events.append(ev)
-            assert len(events) == 2
-            assert json.loads(events[0]["data"])["content"] == "new"
-
-        asyncio.run(_run())
-
-    def test_buffer_run_id_field(self):
-        from backend.web.services.event_buffer import RunEventBuffer
-
-        buf = RunEventBuffer()
-        assert buf.run_id == ""
-        buf.run_id = "test-run-123"
-        assert buf.run_id == "test-run-123"
-
-    def test_read_with_timeout_returns_done_when_mark_done_happens_during_wait(self):
-        async def _run():
-            from backend.web.services.event_buffer import RunEventBuffer
-
-            buf = RunEventBuffer()
-
-            async def _mark_done_soon():
-                await asyncio.sleep(0.05)
-                await buf.mark_done()
-
-            mark_task = asyncio.create_task(_mark_done_soon())
-            events, cursor = await buf.read_with_timeout(0, timeout=1)
-            await mark_task
-            assert events == []
-            assert cursor == 0
-
-        asyncio.run(_run())
diff --git a/tests/test_sse_reconnect_integration.py b/tests/test_sse_reconnect_integration.py
deleted file mode 100644
index fb94be6e4..000000000
--- a/tests/test_sse_reconnect_integration.py
+++ /dev/null
@@ -1,537 +0,0 @@
-"""Rigorous integration tests for SSE reconnect + persistent event log.
-
-Tests real data flows end-to-end:
-- emit() → SQLite → read_events_after round-trip
-- serialize_message with real LangChain messages
-- observe_run_events cursor semantics under concurrent writes
-- EventStore edge cases (empty runs, duplicate appends, large payloads)
-- Thread deletion cleans up events
-"""
-
-import asyncio
-import json
-from unittest.mock import patch
-
-import pytest
-
-
-@pytest.fixture()
-def tmp_db(tmp_path):
-    """Patch EventStore to use a temp DB file."""
-    db_path = tmp_path / "test_leon.db"
-    with patch("backend.web.services.event_store._DB_PATH", db_path):
-        import backend.web.services.event_store as es
-
-        es._conn = None
-        es.init_event_store()
-        yield db_path
-        if es._conn is not None:
-            asyncio.run(es._conn.close())
-            es._conn = None
-
-
-# ---------------------------------------------------------------------------
-# 1. Real LangChain message serialization
-# ---------------------------------------------------------------------------
-
-
-class TestRealLangChainMessages:
-    """Test serialize_message with actual LangChain message objects."""
-
-    def test_ai_message_has_stable_id(self):
-        from langchain_core.messages import AIMessage
-
-        from backend.web.utils.serializers import serialize_message
-
-        msg = AIMessage(content="Hello world", id="msg-abc-123")
-        result = serialize_message(msg)
-        assert result["id"] == "msg-abc-123"
-        assert result["type"] == "AIMessage"
-        assert result["content"] == "Hello world"
-        assert result["tool_calls"] == []
-
-    def test_human_message_has_stable_id(self):
-        from langchain_core.messages import HumanMessage
-
-        from backend.web.utils.serializers import serialize_message
-
-        msg = HumanMessage(content="Hi there", id="msg-human-456")
-        result = serialize_message(msg)
-        assert result["id"] == "msg-human-456"
-        assert result["type"] == "HumanMessage"
-
-    def test_tool_message_has_stable_id(self):
-        from langchain_core.messages import ToolMessage
-
-        from backend.web.utils.serializers import serialize_message
-
-        msg = ToolMessage(content="result data", tool_call_id="call_xyz", id="msg-tool-789")
-        result = serialize_message(msg)
-        assert result["id"] == "msg-tool-789"
-        assert result["type"] == "ToolMessage"
-        assert result["tool_call_id"] == "call_xyz"
-
-    def test_ai_message_with_tool_calls_preserves_ids(self):
-        from langchain_core.messages import AIMessage
-
-        from backend.web.utils.serializers import serialize_message
-
-        msg = AIMessage(
-            content="Let me search for that.",
-            id="msg-ai-tc",
-            tool_calls=[
-                {"id": "call_abc", "name": "web_search", "args": {"query": "test"}},
-                {"id": "call_def", "name": "read_file", "args": {"path": "/tmp/x"}},
-            ],
-        )
-        result = serialize_message(msg)
-        assert result["id"] == "msg-ai-tc"
-        assert len(result["tool_calls"]) == 2
-        assert result["tool_calls"][0]["id"] == "call_abc"
-        assert result["tool_calls"][1]["id"] == "call_def"
-
-    def test_ai_message_default_id_is_none(self):
-        """LangChain AIMessage without explicit id has id=None. Our serializer handles this."""
-        from langchain_core.messages import AIMessage
-
-        from backend.web.utils.serializers import serialize_message
-
-        msg = AIMessage(content="auto id")
-        result = serialize_message(msg)
-        # LangChain does NOT auto-generate id — it's None unless explicitly set
-        # In streaming, LangGraph assigns UUIDs; in direct construction, it's None
-        # Our serializer correctly passes through None
-        assert result["id"] is None
-
-    def test_multipart_content_serialization(self):
-        """AIMessage with list content (multimodal) preserves id."""
-        from langchain_core.messages import AIMessage
-
-        from backend.web.utils.serializers import serialize_message
-
-        msg = AIMessage(
-            content=[{"type": "text", "text": "hello"}, {"type": "text", "text": " world"}],
-            id="msg-multi",
-        )
-        result = serialize_message(msg)
-        assert result["id"] == "msg-multi"
-        assert isinstance(result["content"], list)
-
-
-# ---------------------------------------------------------------------------
-# 2. Full serialize → JSON → mapBackendEntries round-trip
-# ---------------------------------------------------------------------------
-
-
-class TestSerializeMapRoundTrip:
-    """Verify that serialize_message output feeds correctly into mapBackendEntries."""
-
-    def _build_conversation(self):
-        """Build a realistic multi-turn conversation with LangChain messages."""
-        from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
-
-        from backend.web.utils.serializers import serialize_message
-
-        messages = [
-            HumanMessage(content="Search for Python docs", id="human-1"),
-            AIMessage(
-                content="I'll search for that.",
-                id="ai-1",
-                tool_calls=[{"id": "call_001", "name": "web_search", "args": {"q": "python"}}],
-            ),
-            ToolMessage(content="Python is a programming language...", tool_call_id="call_001", id="tool-1"),
-            AIMessage(content="Here's what I found about Python.", id="ai-2"),
-            HumanMessage(content="Now search for Rust", id="human-2"),
-            AIMessage(
-                content="",
-                id="ai-3",
-                tool_calls=[{"id": "call_002", "name": "web_search", "args": {"q": "rust"}}],
-            ),
-            ToolMessage(content="Rust is a systems language...", tool_call_id="call_002", id="tool-2"),
-            AIMessage(content="Rust is a systems programming language.", id="ai-4"),
-        ]
-        return [serialize_message(m) for m in messages]
-
-    def test_round_trip_ids_are_stable(self):
-        """IDs from serialize_message flow through to mapBackendEntries entries."""
-        serialized = self._build_conversation()
-
-        # Simulate JSON round-trip (as happens over HTTP)
-        json_str = json.dumps(serialized)
-        payload = json.loads(json_str)
-
-        # Import frontend-equivalent mapping (Python side for testing)
-        # We test the serialized data structure directly
-        assert payload[0]["id"] == "human-1"
-        assert payload[0]["type"] == "HumanMessage"
-        assert payload[1]["id"] == "ai-1"
-        assert payload[1]["type"] == "AIMessage"
-        assert payload[1]["tool_calls"][0]["id"] == "call_001"
-        assert payload[2]["id"] == "tool-1"
-        assert payload[2]["tool_call_id"] == "call_001"
-
-    def test_all_messages_have_ids(self):
-        """Every serialized message has a non-None id."""
-        serialized = self._build_conversation()
-        for msg in serialized:
-            assert msg["id"] is not None, f"Message type={msg['type']} has no id"
-
-    def test_ids_are_unique(self):
-        """All message IDs are unique within a conversation."""
-        serialized = self._build_conversation()
-        ids = [msg["id"] for msg in serialized]
-        assert len(ids) == len(set(ids)), f"Duplicate IDs found: {ids}"
-
-
-# ---------------------------------------------------------------------------
-# 3. emit() → SQLite → read_events_after round-trip
-# ---------------------------------------------------------------------------
-
-
-class TestEmitSQLiteRoundTrip:
-    """Simulate the real producer emit() path and verify SQLite persistence."""
-
-    def test_emit_persists_and_injects_metadata(self, tmp_db):
-        """emit() should write to SQLite AND inject _seq/_run_id/message_id into event data."""
-
-        async def _run():
-            from backend.web.services.event_buffer import RunEventBuffer
-            from backend.web.services.event_store import append_event, read_events_after
-
-            buf = RunEventBuffer()
-            run_id = "test-run-001"
-            buf.run_id = run_id
-            thread_id = "thread-abc"
-
-            event = {"event": "text", "data": json.dumps({"content": "hello"}, ensure_ascii=False)}
-            message_id = "msg-ai-uuid-1"
-            seq = await append_event(thread_id, run_id, event, message_id)
-
-            data = json.loads(event["data"])
-            data["_seq"] = seq
-            data["_run_id"] = run_id
-            data["message_id"] = message_id
-            enriched_event = {**event, "data": json.dumps(data, ensure_ascii=False)}
-            await buf.put(enriched_event)
-
-            db_events = await read_events_after(thread_id, run_id, 0)
-            assert len(db_events) == 1
-            assert db_events[0]["event"] == "text"
-            assert db_events[0]["message_id"] == message_id
-            assert db_events[0]["seq"] == seq
-
-            buf_events, _ = await buf.read(0)
-            buf_data = json.loads(buf_events[0]["data"])
-            assert buf_data["_seq"] == seq
-            assert buf_data["_run_id"] == run_id
-            assert buf_data["message_id"] == message_id
-            assert buf_data["content"] == "hello"
-
-        asyncio.run(_run())
-
-    def test_emit_sequence_is_monotonic(self, tmp_db):
-        """Sequence numbers from append_event must be strictly increasing."""
-
-        async def _run():
-            from backend.web.services.event_store import append_event
-
-            seqs = []
-            for i in range(20):
-                seq = await append_event("t1", "r1", {"event": "text", "data": f'{{"n":{i}}}'}, f"msg-{i}")
-                seqs.append(seq)
-            for i in range(1, len(seqs)):
-                assert seqs[i] > seqs[i - 1], f"seq[{i}]={seqs[i]} not > seq[{i - 1}]={seqs[i - 1]}"
-
-        asyncio.run(_run())
-
-    def test_emit_tool_call_with_message_id(self, tmp_db):
-        """tool_call events should persist with the AIMessage's id."""
-
-        async def _run():
-            from backend.web.services.event_store import append_event, read_events_after
-
-            tc_event = {
-                "event": "tool_call",
-                "data": json.dumps({"id": "call_abc", "name": "web_search", "args": {"q": "test"}}),
-            }
-            await append_event("t1", "r1", tc_event, "ai-msg-uuid")
-            events = await read_events_after("t1", "r1", 0)
-            assert events[0]["message_id"] == "ai-msg-uuid"
-            data = json.loads(events[0]["data"])
-            assert data["id"] == "call_abc"
-
-        asyncio.run(_run())
-
-    def test_emit_tool_result_with_message_id(self, tmp_db):
-        """tool_result events should persist with the ToolMessage's id."""
-
-        async def _run():
-            from backend.web.services.event_store import append_event, read_events_after
-
-            tr_event = {
-                "event": "tool_result",
-                "data": json.dumps({"tool_call_id": "call_abc", "name": "web_search", "content": "results..."}),
-            }
-            await append_event("t1", "r1", tr_event, "tool-msg-uuid")
-            events = await read_events_after("t1", "r1", 0)
-            assert events[0]["message_id"] == "tool-msg-uuid"
-
-        asyncio.run(_run())
-
-    def test_status_events_have_no_message_id(self, tmp_db):
-        """Status events should persist with message_id=None."""
-
-        async def _run():
-            from backend.web.services.event_store import append_event, read_events_after
-
-            status_event = {
-                "event": "status",
-                "data": json.dumps({"state": {"state": "ACTIVE"}, "tokens": {}}),
-            }
-            await append_event("t1", "r1", status_event, None)
-            events = await read_events_after("t1", "r1", 0)
-            assert events[0]["message_id"] is None
-
-        asyncio.run(_run())
-
-
-# ---------------------------------------------------------------------------
-# 4. observe_run_events cursor semantics + concurrent writes
-# ---------------------------------------------------------------------------
-
-
-class TestObserveCursorSemantics:
-    """Test observe_run_events under realistic conditions."""
-
-    def test_observe_concurrent_producer_consumer(self):
-        """Producer writes events while consumer reads — no data loss."""
-        import asyncio
-
-        async def _run():
-            from backend.web.services.event_buffer import RunEventBuffer
-            from backend.web.services.streaming_service import observe_run_events
-
-            buf = RunEventBuffer()
-            total_events = 50
-
-            async def producer():
-                for i in range(total_events):
-                    await buf.put(
-                        {
-                            "event": "text",
-                            "data": json.dumps({"content": f"chunk-{i}", "_seq": i + 1}),
-                        }
-                    )
-                    await asyncio.sleep(0.001)
-                await buf.put({"event": "done", "data": json.dumps({"_seq": total_events + 1})})
-                await buf.mark_done()
-
-            consumed = []
-
-            async def consumer():
-                async for ev in observe_run_events(buf):
-                    if "event" in ev:
-                        consumed.append(ev)
-
-            await asyncio.gather(producer(), consumer())
-            # All events including done
-            assert len(consumed) == total_events + 1
-
-        asyncio.run(_run())
-
-    def test_observe_after_skips_exactly(self):
-        """after=N skips events with _seq <= N, yields _seq > N."""
-        import asyncio
-
-        async def _run():
-            from backend.web.services.event_buffer import RunEventBuffer
-            from backend.web.services.streaming_service import observe_run_events
-
-            buf = RunEventBuffer()
-            for seq in [1, 2, 3, 4, 5]:
-                await buf.put({"event": "text", "data": json.dumps({"_seq": seq, "n": seq})})
-            await buf.put({"event": "done", "data": json.dumps({"_seq": 6})})
-            await buf.mark_done()
-
-            # after=3 → should get seq 4, 5, 6
-            events = []
-            async for ev in observe_run_events(buf, after=3):
-                if "event" in ev:
-                    events.append(ev)
-            assert len(events) == 3
-            seqs = [json.loads(e["data"])["_seq"] for e in events]
-            assert seqs == [4, 5, 6]
-
-        asyncio.run(_run())
-
-    def test_observe_after_zero_gets_all(self):
-        """after=0 should yield all events."""
-        import asyncio
-
-        async def _run():
-            from backend.web.services.event_buffer import RunEventBuffer
-            from backend.web.services.streaming_service import observe_run_events
-
-            buf = RunEventBuffer()
-            await buf.put({"event": "text", "data": json.dumps({"_seq": 1})})
-            await buf.put({"event": "done", "data": json.dumps({"_seq": 2})})
-            await buf.mark_done()
-
-            events = []
-            async for ev in observe_run_events(buf, after=0):
-                if "event" in ev:
-                    events.append(ev)
-            assert len(events) == 2
-
-        asyncio.run(_run())
-
-    @pytest.mark.skip(reason="pre-existing: observe_run_events filtering behavior mismatch")
-    def test_observe_events_without_seq_always_yielded(self):
-        """Events with non-JSON data bypass the after filter entirely."""
-        import asyncio
-
-        async def _run():
-            from backend.web.services.event_buffer import RunEventBuffer
-            from backend.web.services.streaming_service import observe_run_events
-
-            buf = RunEventBuffer()
-            # Valid JSON without _seq → gets _seq=0 via .get("_seq", 0) → filtered when after>0
-            await buf.put({"event": "status", "data": json.dumps({"state": "ACTIVE"})})
-            # Non-JSON data → json.loads fails → bypasses filter entirely
-            await buf.put({"event": "done", "data": "not-json"})
-            await buf.mark_done()
-
-            events = []
-            async for ev in observe_run_events(buf, after=999):
-                if "event" in ev:
-                    events.append(ev)
-            # Only the non-JSON event passes through (JSON event has _seq=0 <= 999)
-            assert len(events) == 1
-            assert events[0]["event"] == "done"
-
-        asyncio.run(_run())
-
-
-# ---------------------------------------------------------------------------
-# 5. EventStore edge cases
-# ---------------------------------------------------------------------------
-
-
-class TestEventStoreEdgeCases:
-    """Edge cases and stress tests for EventStore."""
-
-    def test_large_payload(self, tmp_db):
-        """Events with large data payloads persist correctly."""
-
-        async def _run():
-            from backend.web.services.event_store import append_event, read_events_after
-
-            big_content = "x" * 100_000
-            event = {"event": "text", "data": json.dumps({"content": big_content})}
-            await append_event("t1", "r1", event)
-
-            events = await read_events_after("t1", "r1", 0)
-            assert len(events) == 1
-            data = json.loads(events[0]["data"])
-            assert len(data["content"]) == 100_000
-
-        asyncio.run(_run())
-
-    def test_unicode_content(self, tmp_db):
-        """Unicode content (Chinese, emoji) persists correctly."""
-
-        async def _run():
-            from backend.web.services.event_store import append_event, read_events_after
-
-            content = "你好世界 🌍 こんにちは"
-            event = {"event": "text", "data": json.dumps({"content": content}, ensure_ascii=False)}
-            await append_event("t1", "r1", event, "msg-unicode")
-
-            events = await read_events_after("t1", "r1", 0)
-            data = json.loads(events[0]["data"])
-            assert data["content"] == content
-
-        asyncio.run(_run())
-
-    def test_cleanup_keeps_latest_n(self, tmp_db):
-        """cleanup_old_runs(keep_latest=2) keeps exactly 2 most recent runs."""
-
-        async def _run():
-            from backend.web.services.event_store import (
-                append_event,
-                cleanup_old_runs,
-                read_events_after,
-            )
-
-            for run in ["r1", "r2", "r3", "r4"]:
-                for i in range(3):
-                    await append_event("t1", run, {"event": "text", "data": f'{{"n":{i}}}'})
-
-            await cleanup_old_runs("t1", keep_latest=2)
-
-            # r1 and r2 should be gone
-            assert await read_events_after("t1", "r1", 0) == []
-            assert await read_events_after("t1", "r2", 0) == []
-            # r3 and r4 should remain
-            assert len(await read_events_after("t1", "r3", 0)) == 3
-            assert len(await read_events_after("t1", "r4", 0)) == 3
-
-        asyncio.run(_run())
-
-    def test_cleanup_noop_when_fewer_runs(self, tmp_db):
-        """cleanup_old_runs does nothing when runs <= keep_latest."""
-
-        async def _run():
-            from backend.web.services.event_store import append_event, cleanup_old_runs, read_events_after
-
-            await append_event("t1", "r1", {"event": "done", "data": "{}"})
-            deleted = await cleanup_old_runs("t1", keep_latest=5)
-            assert deleted == 0
-            assert len(await read_events_after("t1", "r1", 0)) == 1
-
-        asyncio.run(_run())
-
-    def test_empty_run_id(self, tmp_db):
-        """get_latest_run_id returns None for thread with no events."""
-
-        async def _run():
-            from backend.web.services.event_store import get_latest_run_id
-
-            assert await get_latest_run_id("nonexistent-thread") is None
-
-        asyncio.run(_run())
-
-    def test_multiple_threads_independent_cleanup(self, tmp_db):
-        """Cleaning up one thread doesn't affect another."""
-
-        async def _run():
-            from backend.web.services.event_store import append_event, cleanup_thread, read_events_after
-
-            await append_event("t1", "r1", {"event": "text", "data": '{"a":1}'})
-            await append_event("t1", "r1", {"event": "done", "data": "{}"})
-            await append_event("t2", "r1", {"event": "text", "data": '{"b":2}'})
-
-            await cleanup_thread("t1")
-            assert await read_events_after("t1", "r1", 0) == []
-            assert len(await read_events_after("t2", "r1", 0)) == 1
-
-        asyncio.run(_run())
-
-    def test_db_wal_mode(self, tmp_db):
-        """Verify WAL mode is enabled for concurrent read/write."""
-
-        async def _run():
-            # WAL is set during init_event_store(), trigger via append_event
-            from backend.web.services.event_store import append_event
-
-            await append_event("t1", "r1", {"event": "text", "data": "{}"})
-
-        asyncio.run(_run())
-
-        import sqlite3
-
-        # run_events live in events.db (sibling of the main DB)
-        events_db = tmp_db.with_name("events.db")
-        conn = sqlite3.connect(str(events_db))
-        mode = conn.execute("PRAGMA journal_mode").fetchone()[0]
-        assert mode == "wal"
diff --git a/tests/test_storage_runtime_wiring.py b/tests/test_storage_runtime_wiring.py
deleted file mode 100644
index fcb60e8ae..000000000
--- a/tests/test_storage_runtime_wiring.py
+++ /dev/null
@@ -1,389 +0,0 @@
-"""Runtime storage wiring tests for backend agent creation path."""
-
-from __future__ import annotations
-
-import asyncio
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Any
-
-import pytest
-
-from backend.web.services import agent_pool
-from backend.web.services.event_buffer import ThreadEventBuffer
-from backend.web.services.streaming_service import _run_agent_to_buffer
-from storage.providers.sqlite.checkpoint_repo import SQLiteCheckpointRepo
-from storage.providers.sqlite.eval_repo import SQLiteEvalRepo
-from storage.providers.supabase.checkpoint_repo import SupabaseCheckpointRepo
-
-
-class _FakeSupabaseClient:
-    def table(self, table_name: str):
-        raise AssertionError(f"table() should not be called in this wiring test: {table_name}")
-
-
-def _build_fake_supabase_client() -> _FakeSupabaseClient:
-    return _FakeSupabaseClient()
-
-
-def _build_invalid_supabase_client() -> object:
-    return object()
-
-
-def _capture_create_leon_agent(monkeypatch: pytest.MonkeyPatch) -> dict[str, Any]:
-    captured: dict[str, Any] = {}
-
-    def _fake_create_leon_agent(**kwargs):
-        captured.update(kwargs)
-        return object()
-
-    monkeypatch.setattr(agent_pool, "create_leon_agent", _fake_create_leon_agent)
-    return captured
-
-
-def test_create_agent_sync_wires_supabase_storage_container(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.setenv(
-        "LEON_SUPABASE_CLIENT_FACTORY",
-        "tests.test_storage_runtime_wiring:_build_fake_supabase_client",
-    )
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-    monkeypatch.setenv("LEON_EVAL_DB_PATH", str(tmp_path / "eval.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-    container = captured["storage_container"]
-    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
-
-
-def test_create_agent_sync_supabase_missing_runtime_config_fails_loud(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
-
-    with pytest.raises(
-        RuntimeError,
-        match="LEON_SUPABASE_CLIENT_FACTORY",
-    ):
-        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-
-def test_create_agent_sync_supabase_invalid_runtime_config_fails_loud(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.setenv(
-        "LEON_SUPABASE_CLIENT_FACTORY",
-        "tests.test_storage_runtime_wiring:_build_invalid_supabase_client",
-    )
-
-    with pytest.raises(RuntimeError, match="callable table\\(name\\) API"):
-        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-
-def test_create_agent_sync_defaults_to_sqlite_storage_container(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.delenv("LEON_STORAGE_STRATEGY", raising=False)
-    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-    container = captured["storage_container"]
-    assert isinstance(container.checkpoint_repo(), SQLiteCheckpointRepo)
-
-
-def test_create_agent_sync_repo_override_supabase_with_sqlite_default(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "sqlite")
-    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"checkpoint_repo":"supabase"}')
-    monkeypatch.setenv(
-        "LEON_SUPABASE_CLIENT_FACTORY",
-        "tests.test_storage_runtime_wiring:_build_fake_supabase_client",
-    )
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-    container = captured["storage_container"]
-    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
-
-
-def test_create_agent_sync_repo_override_sqlite_with_supabase_default(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"eval_repo":"sqlite"}')
-    monkeypatch.setenv(
-        "LEON_SUPABASE_CLIENT_FACTORY",
-        "tests.test_storage_runtime_wiring:_build_fake_supabase_client",
-    )
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-    monkeypatch.setenv("LEON_EVAL_DB_PATH", str(tmp_path / "eval.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-    container = captured["storage_container"]
-    assert isinstance(container.eval_repo(), SQLiteEvalRepo)
-
-
-@pytest.mark.skip(reason="pre-existing: storage wiring/factory API mismatch")
-def test_create_agent_sync_all_sqlite_override_with_supabase_default_does_not_require_factory(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.setenv(
-        "LEON_STORAGE_REPO_PROVIDERS",
-        (
-            '{"checkpoint_repo":"sqlite","thread_config_repo":"sqlite","run_event_repo":"sqlite",'
-            '"file_operation_repo":"sqlite","summary_repo":"sqlite","eval_repo":"sqlite",'
-            '"queue_repo":"sqlite","workspace_repo":"sqlite"}'
-        ),
-    )
-    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-    monkeypatch.setenv("LEON_EVAL_DB_PATH", str(tmp_path / "eval.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-    container = captured["storage_container"]
-    assert isinstance(container.checkpoint_repo(), SQLiteCheckpointRepo)
-
-
-def test_create_agent_sync_repo_override_supabase_without_runtime_config_fails_loud(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "sqlite")
-    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"checkpoint_repo":"supabase"}')
-    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
-
-    with pytest.raises(RuntimeError, match="LEON_SUPABASE_CLIENT_FACTORY"):
-        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-
-def test_create_agent_sync_invalid_repo_override_json_fails_loud(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", "not-json")
-
-    with pytest.raises(RuntimeError, match="Invalid LEON_STORAGE_REPO_PROVIDERS"):
-        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-
-class _FakeRunEventRepo:
-    def __init__(self) -> None:
-        self.append_calls: list[dict[str, Any]] = []
-        self.closed = False
-
-    def append_event(
-        self,
-        thread_id: str,
-        run_id: str,
-        event_type: str,
-        data: dict[str, Any],
-        message_id: str | None = None,
-    ) -> int:
-        self.append_calls.append(
-            {
-                "thread_id": thread_id,
-                "run_id": run_id,
-                "event_type": event_type,
-                "data": data,
-                "message_id": message_id,
-            }
-        )
-        return len(self.append_calls)
-
-    def list_run_ids(self, thread_id: str) -> list[str]:
-        return []
-
-    def delete_runs(self, thread_id: str, run_ids: list[str]) -> int:
-        return 0
-
-    def close(self) -> None:
-        self.closed = True
-
-
-class _FakeStorageContainer:
-    def __init__(self, repo: _FakeRunEventRepo) -> None:
-        self._repo = repo
-
-    def run_event_repo(self) -> _FakeRunEventRepo:
-        return self._repo
-
-
-class _FakeGraphAgent:
-    checkpointer = None
-
-    async def astream(self, *_args: Any, **_kwargs: Any):
-        if False:  # pragma: no cover
-            yield None
-
-
-class _FakeRuntime:
-    current_state = "IDLE"
-
-    def get_pending_subagent_events(self) -> list[tuple[str, list[dict[str, Any]]]]:
-        return []
-
-    def get_status_dict(self) -> dict[str, Any]:
-        return {}
-
-    def set_event_callback(self, cb: Any) -> None:
-        pass
-
-    def set_activity_sink(self, sink: Any) -> None:
-        pass
-
-    def emit_activity_event(self, event: dict[str, Any]) -> None:
-        pass
-
-    def transition(self, new_state: Any) -> bool:
-        return True
-
-
-class _FakeRuntimeAgent:
-    def __init__(self, storage_container: Any = None) -> None:
-        self.agent = _FakeGraphAgent()
-        self.storage_container = storage_container
-        self.runtime = _FakeRuntime()
-
-
-@pytest.mark.skip(reason="pre-existing: storage wiring/factory API mismatch")
-def test_run_runtime_consumes_storage_container_run_event_repo(monkeypatch: pytest.MonkeyPatch) -> None:
-    async def _run() -> None:
-        repo = _FakeRunEventRepo()
-        agent = _FakeRuntimeAgent(storage_container=_FakeStorageContainer(repo))
-        from unittest.mock import MagicMock
-
-        qm = MagicMock()
-        qm.dequeue.return_value = None
-        app = SimpleNamespace(state=SimpleNamespace(thread_tasks={}, thread_event_buffers={}, subagent_buffers={}, queue_manager=qm))
-        thread_buf = ThreadEventBuffer()
-        run_id = "run-1"
-
-        await _run_agent_to_buffer(agent, "thread-1", "hello", app, False, thread_buf, run_id)
-
-        assert repo.append_calls, "run path should persist events through storage_container.run_event_repo()"
-        assert any(c["event_type"] == "run_done" for c in repo.append_calls)
-        assert repo.closed is True
-
-    asyncio.run(_run())
-
-
-@pytest.mark.skip(reason="pre-existing: storage wiring/factory API mismatch")
-def test_run_runtime_without_storage_container_keeps_sqlite_event_store_path(monkeypatch: pytest.MonkeyPatch) -> None:
-    async def _run() -> None:
-        import backend.web.services.event_store as event_store
-
-        calls: list[dict[str, Any]] = []
-
-        async def _fake_append_event(
-            thread_id: str,
-            run_id: str,
-            event: dict[str, Any],
-            message_id: str | None = None,
-            run_event_repo: Any | None = None,
-        ) -> int:
-            calls.append(
-                {
-                    "thread_id": thread_id,
-                    "run_id": run_id,
-                    "event": event,
-                    "message_id": message_id,
-                    "run_event_repo": run_event_repo,
-                }
-            )
-            return len(calls)
-
-        async def _fake_cleanup_old_runs(
-            thread_id: str,
-            keep_latest: int = 1,
-            run_event_repo: Any | None = None,
-        ) -> int:
-            return 0
-
-        monkeypatch.setattr(event_store, "append_event", _fake_append_event)
-        monkeypatch.setattr(event_store, "cleanup_old_runs", _fake_cleanup_old_runs)
-
-        from unittest.mock import MagicMock
-
-        qm = MagicMock()
-        qm.dequeue.return_value = None
-        agent = _FakeRuntimeAgent(storage_container=None)
-        app = SimpleNamespace(state=SimpleNamespace(thread_tasks={}, thread_event_buffers={}, subagent_buffers={}, queue_manager=qm))
-        thread_buf = ThreadEventBuffer()
-        run_id = "run-1"
-
-        await _run_agent_to_buffer(agent, "thread-1", "hello", app, False, thread_buf, run_id)
-
-        assert calls, "sqlite event store path should still be used when no storage container is injected"
-        assert all(call["run_event_repo"] is None for call in calls)
-
-    asyncio.run(_run())
-
-
-@pytest.mark.skip(reason="pre-existing: thread_config_repo removed from StorageContainer")
-def test_purge_thread_deletes_all_repo_data(tmp_path: Path) -> None:
-    from storage.container import StorageContainer
-
-    db_path = tmp_path / "leon.db"
-    eval_db = tmp_path / "eval.db"
-    container = StorageContainer(main_db_path=db_path, eval_db_path=eval_db, strategy="sqlite")
-
-    # Populate repos for thread t-1 and t-2
-    tc = container.thread_config_repo()
-    tc.save_metadata("t-1", "docker", "/ws")
-    tc.save_metadata("t-2", "local", None)
-    tc.close()
-
-    re_repo = container.run_event_repo()
-    re_repo.append_event("t-1", "r-1", "status", {"ok": True})
-    re_repo.append_event("t-2", "r-2", "status", {"ok": True})
-    re_repo.close()
-
-    fo = container.file_operation_repo()
-    fo.record("t-1", "cp-1", "write", "/a.txt", None, "x")
-    fo.record("t-2", "cp-2", "write", "/b.txt", None, "y")
-    fo.close()
-
-    sr = container.summary_repo()
-    sr.ensure_tables()
-    sr.save_summary("s-1", "t-1", "summary", 10, 20, False, None, "2025-01-01")
-    sr.close()
-
-    # Purge t-1
-    container.purge_thread("t-1")
-
-    # Verify t-1 is gone, t-2 remains
-    tc2 = container.thread_config_repo()
-    assert tc2.lookup_metadata("t-1") is None
-    assert tc2.lookup_metadata("t-2") == ("local", None)
-    tc2.close()
-
-    re2 = container.run_event_repo()
-    assert re2.latest_seq("t-1") == 0
-    assert re2.latest_seq("t-2") > 0
-    re2.close()
-
-    fo2 = container.file_operation_repo()
-    assert fo2.get_operations_for_thread("t-1") == []
-    assert len(fo2.get_operations_for_thread("t-2")) == 1
-    fo2.close()
-
-    sr2 = container.summary_repo()
-    assert sr2.get_latest_summary_row("t-1") is None
-    sr2.close()
diff --git a/tests/test_sync_state_thread_safety.py b/tests/test_sync_state_thread_safety.py
deleted file mode 100644
index 911e22c39..000000000
--- a/tests/test_sync_state_thread_safety.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from __future__ import annotations
-
-from concurrent.futures import ThreadPoolExecutor
-from pathlib import Path
-
-from sandbox.sync.state import SyncState
-
-
-def test_sync_state_shared_instance_survives_cross_thread_access(tmp_path: Path) -> None:
-    workspace = tmp_path / "workspace"
-    workspace.mkdir()
-    (workspace / "hello.txt").write_text("hello")
-
-    state = SyncState()
-    try:
-
-        def _detect() -> list[str]:
-            return state.detect_changes("thread-a", workspace)
-
-        with ThreadPoolExecutor(max_workers=1) as pool:
-            changed = pool.submit(_detect).result(timeout=10)
-    finally:
-        state.clear_thread("thread-a")
-        state.close()
-
-    assert changed == ["hello.txt"]
diff --git a/tests/test_sync_strategy.py b/tests/test_sync_strategy.py
deleted file mode 100644
index 8f7f7b0fc..000000000
--- a/tests/test_sync_strategy.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from pathlib import Path
-
-import pytest
-
-from sandbox.sync.state import SyncState, _calculate_checksum
-from sandbox.sync.strategy import IncrementalSyncStrategy
-
-
-@pytest.fixture
-def sync_env(tmp_path: Path, monkeypatch):
-    monkeypatch.setenv("LEON_SANDBOX_DB_PATH", str(tmp_path / "sandbox.db"))
-    state = SyncState()
-    strategy = IncrementalSyncStrategy(state=state)
-    workspace = tmp_path / "thread-1" / "files"
-    workspace.mkdir(parents=True)
-    return state, strategy, workspace
-
-
-def test_download_updates_checksums(sync_env):
-    """After download, checksums should reflect downloaded files so next upload doesn't redundantly re-upload."""
-    state, strategy, workspace = sync_env
-
-    # Simulate: file was uploaded (tracked in DB with checksum A)
-    (workspace / "readme.txt").write_text("original")
-    original_checksum = _calculate_checksum(workspace / "readme.txt")
-    state.track_file("thread-1", "readme.txt", original_checksum, 1000)
-
-    # Simulate: agent modified file in sandbox, then downloaded (overwritten locally)
-    (workspace / "readme.txt").write_text("agent-modified")
-    new_checksum = _calculate_checksum(workspace / "readme.txt")
-
-    # After download, checksums should be updated
-    strategy._update_checksums_after_download("thread-1", workspace)
-
-    # Verify DB has new checksum
-    info = state.get_file_info("thread-1", "readme.txt")
-    assert info["checksum"] == new_checksum
-
-    # detect_changes should return empty (nothing to upload)
-    changes = state.detect_changes("thread-1", workspace)
-    assert changes == []
diff --git a/tests/test_task_service.py b/tests/test_task_service.py
deleted file mode 100644
index e3105c5da..000000000
--- a/tests/test_task_service.py
+++ /dev/null
@@ -1,186 +0,0 @@
-"""Tests for task_service — panel_tasks CRUD with extended schema."""
-
-import sqlite3
-import time
-
-import pytest
-
-from backend.web.services import task_service
-
-
-@pytest.fixture(autouse=True)
-def _use_tmp_db(tmp_path, monkeypatch):
-    """Redirect task_service to a temporary SQLite database."""
-    from storage.providers.sqlite.panel_task_repo import SQLitePanelTaskRepo
-
-    db_path = tmp_path / "test.db"
-    monkeypatch.setattr(task_service, "make_panel_task_repo", lambda: SQLitePanelTaskRepo(db_path=db_path))
-
-
-# ---------------------------------------------------------------------------
-# Table schema
-# ---------------------------------------------------------------------------
-
-
-class TestSchema:
-    def test_new_columns_present_on_created_task(self):
-        task = task_service.create_task(title="schema check")
-        for col in ("thread_id", "source", "cron_job_id", "result", "started_at", "completed_at"):
-            assert col in task, f"missing column: {col}"
-
-    def test_new_columns_have_correct_defaults(self):
-        task = task_service.create_task(title="defaults check")
-        assert task["thread_id"] == ""
-        assert task["source"] == "manual"
-        assert task["cron_job_id"] == ""
-        assert task["result"] == ""
-        assert task["started_at"] == 0
-        assert task["completed_at"] == 0
-
-
-# ---------------------------------------------------------------------------
-# create_task
-# ---------------------------------------------------------------------------
-
-
-class TestCreateTask:
-    def test_basic_fields(self):
-        task = task_service.create_task(title="buy milk", priority="high")
-        assert task["title"] == "buy milk"
-        assert task["priority"] == "high"
-        assert task["status"] == "pending"
-        assert task["progress"] == 0
-
-    def test_accepts_source(self):
-        task = task_service.create_task(title="cron task", source="cron")
-        assert task["source"] == "cron"
-
-    def test_accepts_cron_job_id(self):
-        task = task_service.create_task(title="scheduled", cron_job_id="cj_123")
-        assert task["cron_job_id"] == "cj_123"
-
-    def test_accepts_thread_id(self):
-        task = task_service.create_task(title="agent task", thread_id="th_abc")
-        assert task["thread_id"] == "th_abc"
-
-
-# ---------------------------------------------------------------------------
-# update_task
-# ---------------------------------------------------------------------------
-
-
-class TestUpdateTask:
-    def test_update_title_and_status(self):
-        task = task_service.create_task(title="original")
-        updated = task_service.update_task(task["id"], title="changed", status="in_progress")
-        assert updated["title"] == "changed"
-        assert updated["status"] == "in_progress"
-
-    def test_update_progress(self):
-        task = task_service.create_task(title="progress test")
-        updated = task_service.update_task(task["id"], progress=50)
-        assert updated["progress"] == 50
-
-    def test_update_thread_id(self):
-        task = task_service.create_task(title="link thread")
-        updated = task_service.update_task(task["id"], thread_id="th_999")
-        assert updated["thread_id"] == "th_999"
-
-    def test_update_result(self):
-        task = task_service.create_task(title="result test")
-        updated = task_service.update_task(task["id"], result="done: 3 files changed")
-        assert updated["result"] == "done: 3 files changed"
-
-    def test_update_started_at(self):
-        task = task_service.create_task(title="timing test")
-        now = int(time.time() * 1000)
-        updated = task_service.update_task(task["id"], started_at=now)
-        assert updated["started_at"] == now
-
-    def test_update_completed_at(self):
-        task = task_service.create_task(title="timing test 2")
-        now = int(time.time() * 1000)
-        updated = task_service.update_task(task["id"], completed_at=now)
-        assert updated["completed_at"] == now
-
-    def test_update_nonexistent_returns_none(self):
-        result = task_service.update_task("nonexistent", title="nope")
-        assert result is None
-
-
-# ---------------------------------------------------------------------------
-# list / delete / bulk_update
-# ---------------------------------------------------------------------------
-
-
-class TestListDeleteBulk:
-    def test_list_returns_all(self):
-        task_service.create_task(title="a")
-        task_service.create_task(title="b")
-        tasks = task_service.list_tasks()
-        assert len(tasks) >= 2
-
-    def test_delete_existing(self):
-        task = task_service.create_task(title="to delete")
-        assert task_service.delete_task(task["id"]) is True
-        tasks = task_service.list_tasks()
-        assert all(t["id"] != task["id"] for t in tasks)
-
-    def test_delete_nonexistent(self):
-        assert task_service.delete_task("ghost") is False
-
-    def test_bulk_update_completed(self):
-        t1 = task_service.create_task(title="bulk1")
-        t2 = task_service.create_task(title="bulk2")
-        count = task_service.bulk_update_task_status([t1["id"], t2["id"]], "completed")
-        assert count == 2
-        tasks = {t["id"]: t for t in task_service.list_tasks()}
-        assert tasks[t1["id"]]["progress"] == 100
-        assert tasks[t2["id"]]["status"] == "completed"
-
-
-# ---------------------------------------------------------------------------
-# Migration — existing DB without new columns
-# ---------------------------------------------------------------------------
-
-
-class TestMigration:
-    def test_old_table_gets_new_columns(self, tmp_path, monkeypatch):
-        """Simulate an old DB that lacks the new columns."""
-        from storage.providers.sqlite.panel_task_repo import SQLitePanelTaskRepo
-
-        db_path = tmp_path / "legacy.db"
-        monkeypatch.setattr(task_service, "make_panel_task_repo", lambda: SQLitePanelTaskRepo(db_path=db_path))
-
-        # Create the old schema directly
-        conn = sqlite3.connect(str(db_path))
-        conn.execute("""
-            CREATE TABLE panel_tasks (
-                id TEXT PRIMARY KEY,
-                title TEXT NOT NULL,
-                description TEXT DEFAULT '',
-                assignee_id TEXT DEFAULT '',
-                status TEXT DEFAULT 'pending',
-                priority TEXT DEFAULT 'medium',
-                progress INTEGER DEFAULT 0,
-                deadline TEXT DEFAULT '',
-                created_at INTEGER NOT NULL
-            )
-        """)
-        conn.execute(
-            "INSERT INTO panel_tasks (id,title,created_at) VALUES (?,?,?)",
-            ("old_1", "legacy task", int(time.time() * 1000)),
-        )
-        conn.commit()
-        conn.close()
-
-        # Now open through task_service — migration should add columns
-        tasks = task_service.list_tasks()
-        assert len(tasks) == 1
-        task = tasks[0]
-        assert task["thread_id"] == ""
-        assert task["source"] == "manual"
-        assert task["cron_job_id"] == ""
-        assert task["result"] == ""
-        assert task["started_at"] == 0
-        assert task["completed_at"] == 0
diff --git a/tests/test_taskboard_middleware.py b/tests/test_taskboard_middleware.py
deleted file mode 100644
index 51cbe28db..000000000
--- a/tests/test_taskboard_middleware.py
+++ /dev/null
@@ -1,287 +0,0 @@
-"""Tests for TaskBoardMiddleware — agent tools for panel_tasks board."""
-
-import json
-
-import pytest
-
-from backend.web.services import task_service
-
-
-@pytest.fixture(autouse=True)
-def _use_tmp_db(tmp_path, monkeypatch):
-    """Redirect task_service to a temporary SQLite database."""
-    from storage.providers.sqlite.panel_task_repo import SQLitePanelTaskRepo
-
-    db_path = tmp_path / "test.db"
-    monkeypatch.setattr(task_service, "make_panel_task_repo", lambda: SQLitePanelTaskRepo(db_path=db_path))
-
-
-@pytest.fixture()
-def middleware():
-    from backend.taskboard.middleware import TaskBoardMiddleware
-
-    mw = TaskBoardMiddleware(thread_id="test-thread-001")
-    return mw
-
-
-def _make_tool_call(name: str, args: dict, call_id: str = "tc_1") -> dict:
-    return {"name": name, "id": call_id, "args": args}
-
-
-def _parse_result(tool_message) -> dict:
-    return json.loads(tool_message.content)
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas
-# ---------------------------------------------------------------------------
-
-
-class TestToolSchemas:
-    def test_schemas_registered(self, middleware):
-        schemas = middleware._get_tool_schemas()
-        names = {s["function"]["name"] for s in schemas}
-        expected = {
-            "ListBoardTasks",
-            "ClaimTask",
-            "UpdateTaskProgress",
-            "CompleteTask",
-            "FailTask",
-            "CreateBoardTask",
-        }
-        assert names == expected
-
-    def test_schema_format(self, middleware):
-        schemas = middleware._get_tool_schemas()
-        for s in schemas:
-            assert s["type"] == "function"
-            assert "name" in s["function"]
-            assert "description" in s["function"]
-            assert "parameters" in s["function"]
-            params = s["function"]["parameters"]
-            assert params["type"] == "object"
-            assert "properties" in params
-
-
-# ---------------------------------------------------------------------------
-# CreateBoardTask
-# ---------------------------------------------------------------------------
-
-
-class TestCreateBoardTask:
-    def test_creates_task_with_source_agent(self, middleware):
-        call = _make_tool_call("CreateBoardTask", {"Title": "Do something"})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        assert "task" in result
-        task = result["task"]
-        assert task["title"] == "Do something"
-        assert task["source"] == "agent"
-        assert task["status"] == "pending"
-
-    def test_creates_with_description_and_priority(self, middleware):
-        call = _make_tool_call(
-            "CreateBoardTask",
-            {"Title": "Important", "Description": "Details here", "Priority": "high"},
-        )
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        task = result["task"]
-        assert task["title"] == "Important"
-        assert task["description"] == "Details here"
-        assert task["priority"] == "high"
-
-    def test_default_priority_is_medium(self, middleware):
-        call = _make_tool_call("CreateBoardTask", {"Title": "Default prio"})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-        assert result["task"]["priority"] == "medium"
-
-
-# ---------------------------------------------------------------------------
-# ClaimTask
-# ---------------------------------------------------------------------------
-
-
-class TestClaimTask:
-    def test_sets_running_and_thread_id(self, middleware):
-        created = task_service.create_task(title="claim me")
-        call = _make_tool_call("ClaimTask", {"TaskId": created["id"]})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        task = result["task"]
-        assert task["status"] == "running"
-        assert task["thread_id"] == "test-thread-001"
-        assert task["started_at"] > 0
-
-    def test_claim_nonexistent_returns_error(self, middleware):
-        call = _make_tool_call("ClaimTask", {"TaskId": "ghost"})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-        assert "error" in result
-
-
-# ---------------------------------------------------------------------------
-# CompleteTask
-# ---------------------------------------------------------------------------
-
-
-class TestCompleteTask:
-    def test_sets_completed_status_and_result(self, middleware):
-        created = task_service.create_task(title="finish me")
-        call = _make_tool_call(
-            "CompleteTask",
-            {"TaskId": created["id"], "Result": "All done, 5 files changed"},
-        )
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        task = result["task"]
-        assert task["status"] == "completed"
-        assert task["result"] == "All done, 5 files changed"
-        assert task["progress"] == 100
-        assert task["completed_at"] > 0
-
-    def test_complete_nonexistent_returns_error(self, middleware):
-        call = _make_tool_call("CompleteTask", {"TaskId": "ghost", "Result": "n/a"})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-        assert "error" in result
-
-
-# ---------------------------------------------------------------------------
-# FailTask
-# ---------------------------------------------------------------------------
-
-
-class TestFailTask:
-    def test_sets_failed_status_and_reason(self, middleware):
-        created = task_service.create_task(title="will fail")
-        call = _make_tool_call(
-            "FailTask",
-            {"TaskId": created["id"], "Reason": "API timeout"},
-        )
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        task = result["task"]
-        assert task["status"] == "failed"
-        assert task["result"] == "API timeout"
-        assert task["completed_at"] > 0
-
-    def test_fail_nonexistent_returns_error(self, middleware):
-        call = _make_tool_call("FailTask", {"TaskId": "ghost", "Reason": "n/a"})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-        assert "error" in result
-
-
-# ---------------------------------------------------------------------------
-# ListBoardTasks
-# ---------------------------------------------------------------------------
-
-
-class TestListBoardTasks:
-    def test_returns_all_tasks(self, middleware):
-        task_service.create_task(title="task A")
-        task_service.create_task(title="task B")
-        call = _make_tool_call("ListBoardTasks", {})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        assert len(result["tasks"]) >= 2
-        assert result["total"] >= 2
-
-    def test_filter_by_status(self, middleware):
-        _t1 = task_service.create_task(title="pending task")
-        t2 = task_service.create_task(title="running task")
-        task_service.update_task(t2["id"], status="running")
-
-        call = _make_tool_call("ListBoardTasks", {"Status": "running"})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        assert all(t["status"] == "running" for t in result["tasks"])
-        assert any(t["id"] == t2["id"] for t in result["tasks"])
-
-    def test_filter_by_priority(self, middleware):
-        task_service.create_task(title="low prio", priority="low")
-        task_service.create_task(title="high prio", priority="high")
-
-        call = _make_tool_call("ListBoardTasks", {"Priority": "high"})
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        assert all(t["priority"] == "high" for t in result["tasks"])
-
-
-# ---------------------------------------------------------------------------
-# UpdateTaskProgress
-# ---------------------------------------------------------------------------
-
-
-class TestUpdateTaskProgress:
-    def test_updates_progress(self, middleware):
-        created = task_service.create_task(title="progressing")
-        call = _make_tool_call(
-            "UpdateTaskProgress",
-            {"TaskId": created["id"], "Progress": 50},
-        )
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        assert result["task"]["progress"] == 50
-
-    def test_appends_note_to_description(self, middleware):
-        created = task_service.create_task(title="noted", description="original")
-        call = _make_tool_call(
-            "UpdateTaskProgress",
-            {"TaskId": created["id"], "Progress": 75, "Note": "halfway done"},
-        )
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-
-        assert "halfway done" in result["task"]["description"]
-
-    def test_progress_nonexistent_returns_error(self, middleware):
-        call = _make_tool_call(
-            "UpdateTaskProgress",
-            {"TaskId": "ghost", "Progress": 50},
-        )
-        msg = middleware._handle_tool_call(call)
-        result = _parse_result(msg)
-        assert "error" in result
-
-
-# ---------------------------------------------------------------------------
-# wrap_tool_call passthrough
-# ---------------------------------------------------------------------------
-
-
-class TestWrapToolCall:
-    def test_unknown_tool_passes_through(self, middleware):
-        """Tools not owned by this middleware are forwarded to next handler."""
-        from unittest.mock import MagicMock
-
-        call = {"name": "SomeOtherTool", "id": "tc_99", "args": {}}
-        request = MagicMock()
-        request.tool_call = call
-        sentinel = object()
-        result = middleware.wrap_tool_call(request, lambda _req: sentinel)
-        assert result is sentinel
-
-    def test_owned_tool_is_intercepted(self, middleware):
-        """Owned tools are handled internally, not forwarded."""
-        from unittest.mock import MagicMock
-
-        task_service.create_task(title="intercepted")
-        call = {"name": "ListBoardTasks", "id": "tc_99", "args": {}}
-        request = MagicMock()
-        request.tool_call = call
-        sentinel = object()
-        result = middleware.wrap_tool_call(request, lambda _req: sentinel)
-        # Should NOT be the sentinel — middleware handled it
-        assert result is not sentinel
diff --git a/tests/test_terminal.py b/tests/test_terminal.py
deleted file mode 100644
index 44b931aa8..000000000
--- a/tests/test_terminal.py
+++ /dev/null
@@ -1,380 +0,0 @@
-"""Unit tests for AbstractTerminal and TerminalStore."""
-
-import json
-import sqlite3
-
-import pytest
-
-from sandbox.terminal import TerminalState, terminal_from_row
-from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-
-@pytest.fixture
-def store(temp_db):
-    """Create SQLiteTerminalRepo with temp database."""
-    repo = SQLiteTerminalRepo(db_path=temp_db)
-    yield repo
-    repo.close()
-
-
-def _wrap(store, row):
-    """Wrap a repo dict into an AbstractTerminal domain object."""
-    if row is None:
-        return None
-    return terminal_from_row(row, store.db_path)
-
-
-class TestTerminalState:
-    """Test TerminalState dataclass."""
-
-    def test_create_default(self):
-        """Test creating TerminalState with defaults."""
-        state = TerminalState(cwd="/home/user")
-        assert state.cwd == "/home/user"
-        assert state.env_delta == {}
-        assert state.state_version == 0
-
-    def test_create_with_env(self):
-        """Test creating TerminalState with env_delta."""
-        state = TerminalState(
-            cwd="/home/user",
-            env_delta={"FOO": "bar", "BAZ": "qux"},
-            state_version=5,
-        )
-        assert state.cwd == "/home/user"
-        assert state.env_delta == {"FOO": "bar", "BAZ": "qux"}
-        assert state.state_version == 5
-
-    def test_to_json(self):
-        """Test serialization to JSON."""
-        state = TerminalState(
-            cwd="/home/user",
-            env_delta={"FOO": "bar"},
-            state_version=3,
-        )
-        json_str = state.to_json()
-        data = json.loads(json_str)
-
-        assert data["cwd"] == "/home/user"
-        assert data["env_delta"] == {"FOO": "bar"}
-        assert data["state_version"] == 3
-
-    def test_from_json(self):
-        """Test deserialization from JSON."""
-        json_str = json.dumps(
-            {
-                "cwd": "/home/user",
-                "env_delta": {"FOO": "bar"},
-                "state_version": 3,
-            }
-        )
-        state = TerminalState.from_json(json_str)
-
-        assert state.cwd == "/home/user"
-        assert state.env_delta == {"FOO": "bar"}
-        assert state.state_version == 3
-
-    def test_from_json_missing_fields(self):
-        """Test deserialization with missing optional fields."""
-        json_str = json.dumps({"cwd": "/home/user"})
-        state = TerminalState.from_json(json_str)
-
-        assert state.cwd == "/home/user"
-        assert state.env_delta == {}
-        assert state.state_version == 0
-
-
-class TestTerminalStore:
-    """Test SQLiteTerminalRepo CRUD operations."""
-
-    def test_ensure_tables(self, store, temp_db):
-        """Test table creation."""
-        conn = sqlite3.connect(str(temp_db))
-        try:
-            cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='abstract_terminals'")
-            assert cursor.fetchone() is not None
-        finally:
-            conn.close()
-
-    def test_create_terminal(self, store):
-        """Test creating a new terminal."""
-        terminal = _wrap(
-            store,
-            store.create(
-                terminal_id="term-123",
-                thread_id="thread-456",
-                lease_id="lease-789",
-                initial_cwd="/home/user",
-            ),
-        )
-
-        assert terminal.terminal_id == "term-123"
-        assert terminal.thread_id == "thread-456"
-        assert terminal.lease_id == "lease-789"
-        assert terminal.get_state().cwd == "/home/user"
-        assert terminal.get_state().env_delta == {}
-        assert terminal.get_state().state_version == 0
-
-    def test_get_terminal_by_thread_id(self, store):
-        """Test retrieving terminal by thread_id."""
-        store.create(
-            terminal_id="term-123",
-            thread_id="thread-456",
-            lease_id="lease-789",
-            initial_cwd="/home/user",
-        )
-
-        terminal = _wrap(store, store.get_active("thread-456"))
-        assert terminal is not None
-        assert terminal.terminal_id == "term-123"
-        assert terminal.thread_id == "thread-456"
-        assert terminal.lease_id == "lease-789"
-
-    def test_get_terminal_by_id(self, store):
-        """Test retrieving terminal by terminal_id."""
-        store.create(
-            terminal_id="term-123",
-            thread_id="thread-456",
-            lease_id="lease-789",
-            initial_cwd="/home/user",
-        )
-
-        terminal = _wrap(store, store.get_by_id("term-123"))
-        assert terminal is not None
-        assert terminal.terminal_id == "term-123"
-        assert terminal.thread_id == "thread-456"
-
-    def test_get_nonexistent_terminal(self, store):
-        """Test retrieving non-existent terminal returns None."""
-        terminal = store.get_active("nonexistent-thread")
-        assert terminal is None
-
-        terminal = store.get_by_id("nonexistent-terminal")
-        assert terminal is None
-
-    def test_delete_terminal(self, store):
-        """Test deleting a terminal."""
-        store.create(
-            terminal_id="term-123",
-            thread_id="thread-456",
-            lease_id="lease-789",
-        )
-
-        # Verify exists
-        assert store.get_active("thread-456") is not None
-
-        # Delete
-        store.delete("term-123")
-
-        # Verify deleted
-        assert store.get_active("thread-456") is None
-
-    def test_delete_terminal_cleans_command_chunks(self, store, temp_db):
-        """Deleting a terminal should remove command rows and associated output chunks."""
-        store.create(
-            terminal_id="term-123",
-            thread_id="thread-456",
-            lease_id="lease-789",
-        )
-        conn = sqlite3.connect(str(temp_db))
-        try:
-            conn.execute(
-                """
-                CREATE TABLE IF NOT EXISTS terminal_commands (
-                    command_id TEXT PRIMARY KEY,
-                    terminal_id TEXT NOT NULL
-                )
-                """
-            )
-            conn.execute(
-                """
-                CREATE TABLE IF NOT EXISTS terminal_command_chunks (
-                    chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,
-                    command_id TEXT NOT NULL,
-                    stream TEXT NOT NULL,
-                    content TEXT NOT NULL,
-                    created_at TIMESTAMP NOT NULL
-                )
-                """
-            )
-            conn.execute(
-                "INSERT INTO terminal_commands (command_id, terminal_id) VALUES (?, ?)",
-                ("cmd-1", "term-123"),
-            )
-            conn.execute(
-                """
-                INSERT INTO terminal_command_chunks (command_id, stream, content, created_at)
-                VALUES (?, ?, ?, CURRENT_TIMESTAMP)
-                """,
-                ("cmd-1", "stdout", "line-1"),
-            )
-            conn.commit()
-        finally:
-            conn.close()
-
-        store.delete("term-123")
-
-        conn2 = sqlite3.connect(str(temp_db))
-        try:
-            cmd_row = conn2.execute("SELECT command_id FROM terminal_commands WHERE command_id = ?", ("cmd-1",)).fetchone()
-            chunk_row = conn2.execute(
-                "SELECT chunk_id FROM terminal_command_chunks WHERE command_id = ?",
-                ("cmd-1",),
-            ).fetchone()
-        finally:
-            conn2.close()
-        assert cmd_row is None
-        assert chunk_row is None
-
-    def test_list_all_terminals(self, store):
-        """Test listing all terminals."""
-        import time
-
-        store.create("term-1", "thread-1", "lease-1", "/home/user1")
-        time.sleep(0.01)  # Ensure different timestamps
-        store.create("term-2", "thread-2", "lease-1", "/home/user2")
-        time.sleep(0.01)
-        store.create("term-3", "thread-3", "lease-2", "/home/user3")
-
-        terminals = store.list_all()
-        assert len(terminals) == 3
-
-        # Should be ordered by created_at DESC
-        assert terminals[0]["terminal_id"] == "term-3"
-        assert terminals[1]["terminal_id"] == "term-2"
-        assert terminals[2]["terminal_id"] == "term-1"
-
-
-class TestSQLiteTerminal:
-    """Test SQLiteTerminal state persistence."""
-
-    def test_update_state_increments_version(self, store):
-        """Test that update_state increments state_version."""
-        terminal = _wrap(store, store.create("term-1", "thread-1", "lease-1", "/home/user"))
-
-        assert terminal.get_state().state_version == 0
-
-        # Update state
-        new_state = TerminalState(cwd="/home/user/project", env_delta={"FOO": "bar"})
-        terminal.update_state(new_state)
-
-        assert terminal.get_state().state_version == 1
-        assert terminal.get_state().cwd == "/home/user/project"
-        assert terminal.get_state().env_delta == {"FOO": "bar"}
-
-    def test_update_state_persists_to_db(self, store, temp_db):
-        """Test that update_state persists to database."""
-        terminal = _wrap(store, store.create("term-1", "thread-1", "lease-1", "/home/user"))
-
-        # Update state
-        new_state = TerminalState(
-            cwd="/home/user/project",
-            env_delta={"FOO": "bar", "BAZ": "qux"},
-        )
-        terminal.update_state(new_state)
-
-        # Verify persisted to DB
-        conn = sqlite3.connect(str(temp_db))
-        conn.row_factory = sqlite3.Row
-        try:
-            row = conn.execute(
-                "SELECT cwd, env_delta_json, state_version FROM abstract_terminals WHERE terminal_id = ?",
-                ("term-1",),
-            ).fetchone()
-            assert row["cwd"] == "/home/user/project"
-            assert json.loads(row["env_delta_json"]) == {"FOO": "bar", "BAZ": "qux"}
-            assert row["state_version"] == 1
-        finally:
-            conn.close()
-
-    def test_state_persists_across_retrieval(self, store):
-        """Test that state persists when terminal is retrieved again."""
-        terminal = _wrap(store, store.create("term-1", "thread-1", "lease-1", "/home/user"))
-
-        # Update state
-        new_state = TerminalState(cwd="/home/user/project", env_delta={"FOO": "bar"})
-        terminal.update_state(new_state)
-
-        # Retrieve terminal again
-        terminal2 = _wrap(store, store.get_active("thread-1"))
-        assert terminal2 is not None
-        assert terminal2.get_state().cwd == "/home/user/project"
-        assert terminal2.get_state().env_delta == {"FOO": "bar"}
-        assert terminal2.get_state().state_version == 1
-
-    def test_multiple_state_updates(self, store):
-        """Test multiple state updates increment version correctly."""
-        terminal = _wrap(store, store.create("term-1", "thread-1", "lease-1", "/home/user"))
-
-        # Update 1
-        terminal.update_state(TerminalState(cwd="/home/user/project1"))
-        assert terminal.get_state().state_version == 1
-
-        # Update 2
-        terminal.update_state(TerminalState(cwd="/home/user/project2"))
-        assert terminal.get_state().state_version == 2
-
-        # Update 3
-        terminal.update_state(TerminalState(cwd="/home/user/project3", env_delta={"FOO": "bar"}))
-        assert terminal.get_state().state_version == 3
-
-        # Verify final state
-        state = terminal.get_state()
-        assert state.cwd == "/home/user/project3"
-        assert state.env_delta == {"FOO": "bar"}
-        assert state.state_version == 3
-
-
-class TestTerminalIntegration:
-    """Integration tests for terminal lifecycle."""
-
-    def test_full_lifecycle(self, store):
-        """Test complete terminal lifecycle: create → update → retrieve → delete."""
-        # Create
-        terminal = _wrap(store, store.create("term-1", "thread-1", "lease-1", "/home/user"))
-        assert terminal.get_state().cwd == "/home/user"
-
-        # Update state multiple times
-        terminal.update_state(TerminalState(cwd="/home/user/project"))
-        terminal.update_state(TerminalState(cwd="/home/user/project/src", env_delta={"PATH": "/usr/local/bin"}))
-
-        # Retrieve and verify
-        terminal2 = _wrap(store, store.get_active("thread-1"))
-        assert terminal2 is not None
-        assert terminal2.get_state().cwd == "/home/user/project/src"
-        assert terminal2.get_state().env_delta == {"PATH": "/usr/local/bin"}
-        assert terminal2.get_state().state_version == 2
-
-        # Delete
-        store.delete("term-1")
-        assert store.get_active("thread-1") is None
-
-    def test_multiple_terminals_different_leases(self, store):
-        """Test multiple terminals can point to different leases."""
-        term1 = _wrap(store, store.create("term-1", "thread-1", "lease-1", "/home/user1"))
-        term2 = _wrap(store, store.create("term-2", "thread-2", "lease-2", "/home/user2"))
-        term3 = _wrap(store, store.create("term-3", "thread-3", "lease-1", "/home/user3"))
-
-        # Verify all created
-        assert store.get_active("thread-1") is not None
-        assert store.get_active("thread-2") is not None
-        assert store.get_active("thread-3") is not None
-
-        # Verify lease associations
-        assert term1.lease_id == "lease-1"
-        assert term2.lease_id == "lease-2"
-        assert term3.lease_id == "lease-1"
-
-    def test_state_isolation_between_terminals(self, store):
-        """Test that state updates are isolated between terminals."""
-        term1 = _wrap(store, store.create("term-1", "thread-1", "lease-1", "/home/user1"))
-        _term2 = _wrap(store, store.create("term-2", "thread-2", "lease-1", "/home/user2"))
-
-        # Update term1 state
-        term1.update_state(TerminalState(cwd="/home/user1/project", env_delta={"FOO": "bar"}))
-
-        # Verify term2 state unchanged
-        term2_retrieved = _wrap(store, store.get_active("thread-2"))
-        assert term2_retrieved.get_state().cwd == "/home/user2"
-        assert term2_retrieved.get_state().env_delta == {}
-        assert term2_retrieved.get_state().state_version == 0
diff --git a/tests/test_thread_config_repo.py b/tests/test_thread_config_repo.py
deleted file mode 100644
index 007d30c40..000000000
--- a/tests/test_thread_config_repo.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# TODO: thread_config_repo was removed in refactoring; update tests to use thread_repo / thread_launch_pref_repo
-import pytest
-
-pytest.skip("thread_config_repo module removed — needs migration to thread_repo", allow_module_level=True)
-
-import sqlite3  # noqa: E402
-from pathlib import Path  # noqa: E402
-
-from storage.providers.sqlite.thread_config_repo import SQLiteThreadConfigRepo  # noqa: F401
-from storage.providers.supabase.thread_config_repo import SupabaseThreadConfigRepo
-
-from backend.web.utils import helpers
-
-
-def test_migrate_thread_metadata_table(tmp_path):
-    db_path = tmp_path / "leon.db"
-    with sqlite3.connect(str(db_path)) as conn:
-        conn.execute("CREATE TABLE thread_metadata (thread_id TEXT PRIMARY KEY, sandbox_type TEXT NOT NULL, cwd TEXT, model TEXT)")
-        conn.execute(
-            "INSERT INTO thread_metadata (thread_id, sandbox_type, cwd, model) VALUES (?, ?, ?, ?)",
-            ("t-1", "local", "/tmp/ws", "m-1"),
-        )
-        conn.commit()
-
-    repo = SQLiteThreadConfigRepo(db_path)
-    try:
-        assert repo.lookup_metadata("t-1") == ("local", "/tmp/ws")
-        assert repo.lookup_model("t-1") == "m-1"
-    finally:
-        repo.close()
-
-    with sqlite3.connect(str(db_path)) as conn:
-        tables = {r[0] for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'")}
-        assert "thread_config" in tables
-        assert "thread_metadata" not in tables
-
-
-def test_save_and_lookup_thread_config(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteThreadConfigRepo(db_path)
-    try:
-        repo.save_metadata("t-2", "docker", "/workspace")
-        repo.save_model("t-2", "anthropic/claude-sonnet-4.6")
-        assert repo.lookup_metadata("t-2") == ("docker", "/workspace")
-        assert repo.lookup_model("t-2") == "anthropic/claude-sonnet-4.6"
-        repo.update_fields("t-2", queue_mode="followup", observation_provider="langfuse")
-        cfg = repo.lookup_config("t-2")
-        assert cfg is not None
-        assert cfg["queue_mode"] == "followup"
-        assert cfg["observation_provider"] == "langfuse"
-    finally:
-        repo.close()
-
-
-def test_helpers_compatibility_api(tmp_path, monkeypatch):
-    db_path = tmp_path / "leon.db"
-    monkeypatch.setattr(helpers, "DB_PATH", Path(db_path))
-
-    helpers.init_thread_config("t-3", "local", "/tmp/p")
-    helpers.save_thread_model("t-3", "m-3")
-
-    config = helpers.load_thread_config("t-3")
-    assert config is not None
-    assert (config.sandbox_type, config.cwd) == ("local", "/tmp/p")
-    assert helpers.lookup_thread_model("t-3") == "m-3"
-    helpers.save_thread_config("t-3", observation_provider="langsmith")
-    config2 = helpers.load_thread_config("t-3")
-    assert config2 is not None
-    assert config2.observation_provider == "langsmith"
-
-
-from tests.fakes.supabase import FakeSupabaseClient
-
-
-def test_supabase_thread_config_repo_save_and_lookup():
-    tables: dict[str, list[dict]] = {"thread_config": []}
-    repo = SupabaseThreadConfigRepo(client=FakeSupabaseClient(tables=tables))
-
-    repo.save_metadata("t-1", "docker", "/workspace")
-    repo.save_model("t-1", "anthropic/claude-sonnet-4.6")
-
-    assert repo.lookup_metadata("t-1") == ("docker", "/workspace")
-    assert repo.lookup_model("t-1") == "anthropic/claude-sonnet-4.6"
-
-    repo.save_model("t-2", "openai/gpt-5")
-    assert repo.lookup_metadata("t-2") == ("local", None)
-    assert repo.lookup_model("t-2") == "openai/gpt-5"
-    repo.update_fields("t-1", queue_mode="followup", observation_provider="langfuse")
-    cfg = repo.lookup_config("t-1")
-    assert cfg is not None
-    assert cfg["queue_mode"] == "followup"
-    assert cfg["observation_provider"] == "langfuse"
-
-
-def test_supabase_thread_config_repo_delete():
-    tables: dict[str, list[dict]] = {"thread_config": []}
-    repo = SupabaseThreadConfigRepo(client=FakeSupabaseClient(tables=tables))
-    repo.save_metadata("t-1", "docker", "/workspace")
-    repo.save_metadata("t-2", "local", None)
-
-    repo.delete_thread_config("t-1")
-    assert repo.lookup_metadata("t-1") is None
-    assert repo.lookup_metadata("t-2") == ("local", None)
-
-
-def test_sqlite_thread_config_repo_delete(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteThreadConfigRepo(db_path)
-    try:
-        repo.save_metadata("t-1", "docker", "/workspace")
-        repo.save_metadata("t-2", "local", None)
-        repo.delete_thread_config("t-1")
-        assert repo.lookup_metadata("t-1") is None
-        assert repo.lookup_metadata("t-2") == ("local", None)
-    finally:
-        repo.close()
-
-
-def test_supabase_thread_config_repo_requires_compatible_client():
-    with pytest.raises(RuntimeError, match="table\\(name\\)"):
-        SupabaseThreadConfigRepo(client=object())
diff --git a/tests/test_thread_repo.py b/tests/test_thread_repo.py
deleted file mode 100644
index f45c9fec5..000000000
--- a/tests/test_thread_repo.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import sqlite3
-
-import pytest
-
-from storage.contracts import EntityRow, MemberRow, MemberType
-from storage.providers.sqlite.entity_repo import SQLiteEntityRepo
-from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
-
-
-def test_create_main_thread_persists_main_flag(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteThreadRepo(db_path)
-    try:
-        repo.create(
-            thread_id="agent-1",
-            member_id="member-1",
-            sandbox_type="local",
-            created_at=1.0,
-            is_main=True,
-            branch_index=0,
-        )
-
-        row = repo.get_by_id("agent-1")
-        assert row is not None
-        assert row["is_main"] is True
-        assert row["branch_index"] == 0
-        assert repo.get_main_thread("member-1")["id"] == "agent-1"
-    finally:
-        repo.close()
-
-
-def test_rejects_multiple_main_threads_for_same_member(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteThreadRepo(db_path)
-    try:
-        repo.create(
-            thread_id="agent-1",
-            member_id="member-1",
-            sandbox_type="local",
-            created_at=1.0,
-            is_main=True,
-            branch_index=0,
-        )
-
-        with pytest.raises(sqlite3.IntegrityError):
-            repo.create(
-                thread_id="agent-2",
-                member_id="member-1",
-                sandbox_type="local",
-                created_at=2.0,
-                is_main=True,
-                branch_index=0,
-            )
-    finally:
-        repo.close()
-
-
-def test_rejects_duplicate_branch_index_for_same_member(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteThreadRepo(db_path)
-    try:
-        repo.create(
-            thread_id="agent-1",
-            member_id="member-1",
-            sandbox_type="local",
-            created_at=1.0,
-            is_main=True,
-            branch_index=0,
-        )
-
-        repo.create(
-            thread_id="agent-2",
-            member_id="member-1",
-            sandbox_type="local",
-            created_at=2.0,
-            is_main=False,
-            branch_index=1,
-        )
-
-        with pytest.raises(sqlite3.IntegrityError):
-            repo.create(
-                thread_id="agent-3",
-                member_id="member-1",
-                sandbox_type="local",
-                created_at=3.0,
-                is_main=False,
-                branch_index=1,
-            )
-    finally:
-        repo.close()
-
-
-def test_list_by_owner_user_id_includes_main_flag(tmp_path):
-    db_path = tmp_path / "leon.db"
-    member_repo = SQLiteMemberRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-    try:
-        member_repo.create(
-            MemberRow(
-                id="owner-1",
-                name="owner",
-                type=MemberType.HUMAN,
-                created_at=1.0,
-            )
-        )
-        member_repo.create(
-            MemberRow(
-                id="member-1",
-                name="Toad",
-                type=MemberType.MYCEL_AGENT,
-                owner_user_id="owner-1",
-                created_at=2.0,
-            )
-        )
-        entity_repo.create(
-            EntityRow(
-                id="agent-1",
-                type="agent",
-                member_id="member-1",
-                name="Toad",
-                thread_id="agent-1",
-                created_at=3.0,
-            )
-        )
-        thread_repo.create(
-            thread_id="agent-1",
-            member_id="member-1",
-            sandbox_type="local",
-            created_at=3.0,
-            is_main=True,
-            branch_index=0,
-        )
-
-        rows = thread_repo.list_by_owner_user_id("owner-1")
-        assert len(rows) == 1
-        assert rows[0]["is_main"] is True
-        assert rows[0]["branch_index"] == 0
-    finally:
-        thread_repo.close()
-        entity_repo.close()
-        member_repo.close()
diff --git a/tests/test_tool_registry_runner.py b/tests/test_tool_registry_runner.py
deleted file mode 100644
index 934ae93ca..000000000
--- a/tests/test_tool_registry_runner.py
+++ /dev/null
@@ -1,339 +0,0 @@
-"""Tests for ToolRegistry, ToolRunner, and ToolValidator (P0/P1 verification).
-
-Covers:
-- P0: Three-tier error normalization (Layer 1: validation, Layer 2: execution, Layer 3: soft)
-- P1: ToolRegistry inline/deferred mode
-- P1: ToolRunner dispatches registered tools and normalizes errors
-"""
-
-from __future__ import annotations
-
-from unittest.mock import MagicMock
-
-import pytest
-
-from core.runtime.errors import InputValidationError
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
-from core.runtime.runner import ToolRunner
-from core.runtime.validator import ToolValidator
-
-# ---------------------------------------------------------------------------
-# ToolRegistry
-# ---------------------------------------------------------------------------
-
-
-class TestToolRegistry:
-    def _make_entry(self, name: str, mode: ToolMode = ToolMode.INLINE) -> ToolEntry:
-        return ToolEntry(
-            name=name,
-            mode=mode,
-            schema={"name": name, "description": f"{name} tool"},
-            handler=lambda: f"result:{name}",
-            source="test",
-        )
-
-    def test_register_and_get(self):
-        reg = ToolRegistry()
-        entry = self._make_entry("Read")
-        reg.register(entry)
-        assert reg.get("Read") is entry
-
-    def test_get_unknown_returns_none(self):
-        reg = ToolRegistry()
-        assert reg.get("NonExistent") is None
-
-    def test_inline_tools_appear_in_get_inline_schemas(self):
-        reg = ToolRegistry()
-        reg.register(self._make_entry("Read", ToolMode.INLINE))
-        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
-        schemas = reg.get_inline_schemas()
-        names = [s["name"] for s in schemas]
-        assert "Read" in names
-        assert "TaskCreate" not in names  # P1: deferred not in inline
-
-    def test_deferred_tools_not_in_inline_schemas(self):
-        reg = ToolRegistry()
-        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
-        reg.register(self._make_entry("TaskUpdate", ToolMode.DEFERRED))
-        assert reg.get_inline_schemas() == []
-
-    def test_search_finds_by_name(self):
-        reg = ToolRegistry()
-        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
-        reg.register(self._make_entry("Read", ToolMode.INLINE))
-        results = reg.search("task")
-        names = [e.name for e in results]
-        assert "TaskCreate" in names
-
-    def test_search_includes_deferred_tools(self):
-        """tool_search must discover deferred tools too."""
-        reg = ToolRegistry()
-        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
-        results = reg.search("TaskCreate")
-        assert any(e.name == "TaskCreate" for e in results)
-
-    def test_allowed_tools_filter(self):
-        reg = ToolRegistry(allowed_tools={"Read", "Grep"})
-        reg.register(self._make_entry("Read"))
-        reg.register(self._make_entry("Grep"))
-        reg.register(self._make_entry("Bash"))
-        assert reg.get("Read") is not None
-        assert reg.get("Grep") is not None
-        assert reg.get("Bash") is None  # filtered out
-
-    def test_dynamic_schema_callable(self):
-        call_count = 0
-
-        def schema_fn() -> dict:
-            nonlocal call_count
-            call_count += 1
-            return {"name": "DynTool", "description": "dynamic"}
-
-        reg = ToolRegistry()
-        entry = ToolEntry(
-            name="DynTool",
-            mode=ToolMode.INLINE,
-            schema=schema_fn,
-            handler=lambda: "ok",
-            source="test",
-        )
-        reg.register(entry)
-        schemas = reg.get_inline_schemas()
-        assert call_count >= 1
-        assert any(s["name"] == "DynTool" for s in schemas)
-
-
-# ---------------------------------------------------------------------------
-# ToolValidator
-# ---------------------------------------------------------------------------
-
-
-class TestToolValidator:
-    def _schema(self, required: list[str], props: dict) -> dict:
-        return {
-            "name": "TestTool",
-            "parameters": {
-                "type": "object",
-                "required": required,
-                "properties": {k: {"type": v} for k, v in props.items()},
-            },
-        }
-
-    def test_valid_args_pass(self):
-        v = ToolValidator()
-        schema = self._schema(["file_path"], {"file_path": "string"})
-        result = v.validate(schema, {"file_path": "/tmp/x"})
-        assert result.ok
-
-    def test_missing_required_raises_layer1(self):
-        v = ToolValidator()
-        schema = self._schema(["file_path"], {"file_path": "string"})
-        with pytest.raises(InputValidationError) as exc_info:
-            v.validate(schema, {})
-        assert "file_path" in str(exc_info.value)
-        assert "missing" in str(exc_info.value)
-
-    def test_wrong_type_raises_layer1(self):
-        v = ToolValidator()
-        schema = self._schema(["count"], {"count": "integer"})
-        with pytest.raises(InputValidationError):
-            v.validate(schema, {"count": "not-an-int"})
-
-    def test_extra_params_allowed(self):
-        v = ToolValidator()
-        schema = self._schema(["a"], {"a": "string"})
-        result = v.validate(schema, {"a": "hello", "extra": "ok"})
-        assert result.ok
-
-
-# ---------------------------------------------------------------------------
-# ToolRunner — P0 error normalization
-# ---------------------------------------------------------------------------
-
-
-def _make_runner(entries: list[ToolEntry]) -> ToolRunner:
-    reg = ToolRegistry()
-    for e in entries:
-        reg.register(e)
-    return ToolRunner(registry=reg)
-
-
-def _make_tool_call_request(name: str, args: dict, call_id: str = "tc-1"):
-    req = MagicMock()
-    req.tool_call = {"name": name, "args": args, "id": call_id}
-    return req
-
-
-class TestToolRunnerErrorNormalization:
-    """P0: three-tier error normalization."""
-
-    def test_layer1_missing_param_returns_input_validation_error(self):
-        entry = ToolEntry(
-            name="Read",
-            mode=ToolMode.INLINE,
-            schema={
-                "name": "Read",
-                "parameters": {
-                    "type": "object",
-                    "required": ["file_path"],
-                    "properties": {"file_path": {"type": "string"}},
-                },
-            },
-            handler=lambda file_path: "content",
-            source="test",
-        )
-        runner = _make_runner([entry])
-        req = _make_tool_call_request("Read", {})  # missing file_path
-
-        called_upstream = []
-
-        def upstream(r):
-            called_upstream.append(r)
-            return MagicMock()
-
-        result = runner.wrap_tool_call(req, upstream)
-        # Layer 1 error format: InputValidationError: {name} failed due to...
-        assert "InputValidationError" in result.content
-        assert "Read" in result.content
-        assert not called_upstream  # must not fall through to upstream
-
-    def test_layer2_handler_exception_returns_tool_use_error(self):
-        def bad_handler(**kwargs):
-            raise ValueError("disk full")
-
-        entry = ToolEntry(
-            name="Write",
-            mode=ToolMode.INLINE,
-            schema={
-                "name": "Write",
-                "parameters": {
-                    "type": "object",
-                    "required": [],
-                    "properties": {},
-                },
-            },
-            handler=bad_handler,
-            source="test",
-        )
-        runner = _make_runner([entry])
-        req = _make_tool_call_request("Write", {})
-        result = runner.wrap_tool_call(req, lambda r: MagicMock())
-        # Layer 2 error format: <tool_use_error>...</tool_use_error>
-        assert "<tool_use_error>" in result.content
-        assert "disk full" in result.content
-
-    def test_layer3_handler_returns_soft_failure_text(self):
-        def soft_fail(**kwargs):
-            return "No files found"
-
-        entry = ToolEntry(
-            name="Glob",
-            mode=ToolMode.INLINE,
-            schema={
-                "name": "Glob",
-                "parameters": {
-                    "type": "object",
-                    "required": ["pattern"],
-                    "properties": {"pattern": {"type": "string"}},
-                },
-            },
-            handler=soft_fail,
-            source="test",
-        )
-        runner = _make_runner([entry])
-        req = _make_tool_call_request("Glob", {"pattern": "**/*.xyz"})
-        result = runner.wrap_tool_call(req, lambda r: MagicMock())
-        # Layer 3: plain text, no tags
-        assert result.content == "No files found"
-        assert "<tool_use_error>" not in result.content
-        assert "InputValidationError" not in result.content
-
-    def test_unknown_tool_falls_through_to_upstream(self):
-        runner = _make_runner([])  # empty registry
-        req = _make_tool_call_request("UnknownMCPTool", {})
-        upstream_called = []
-
-        def upstream(r):
-            upstream_called.append(r)
-            msg = MagicMock()
-            msg.content = "mcp result"
-            return msg
-
-        result = runner.wrap_tool_call(req, upstream)
-        assert upstream_called
-        assert result.content == "mcp result"
-
-
-class TestToolRunnerInlineInjection:
-    """P1: ToolRunner injects inline schemas into model call."""
-
-    def test_inline_schemas_injected(self):
-        entry = ToolEntry(
-            name="Read",
-            mode=ToolMode.INLINE,
-            schema={"name": "Read", "description": "read file"},
-            handler=lambda: "ok",
-            source="test",
-        )
-        runner = _make_runner([entry])
-
-        # Build a mock ModelRequest
-        request = MagicMock()
-        request.tools = []
-
-        captured = []
-
-        def handler(req):
-            captured.append(req)
-            return MagicMock()
-
-        request.override.return_value = request
-        runner.wrap_model_call(request, handler)
-
-        # Should have called override with tools containing Read
-        assert request.override.called
-        call_kwargs = request.override.call_args
-        _tools_arg = call_kwargs[1].get("tools") or (call_kwargs[0][0] if call_kwargs[0] else None)
-        # override was called — inline tools were injected
-
-    def test_deferred_schemas_not_injected(self):
-        deferred = ToolEntry(
-            name="TaskCreate",
-            mode=ToolMode.DEFERRED,
-            schema={"name": "TaskCreate", "description": "create task"},
-            handler=lambda: "ok",
-            source="test",
-        )
-        runner = _make_runner([deferred])
-        schemas = runner._registry.get_inline_schemas()
-        assert all(s["name"] != "TaskCreate" for s in schemas)
-
-
-# ---------------------------------------------------------------------------
-# P1: tool_modes from config honored
-# ---------------------------------------------------------------------------
-
-
-class TestToolModeFromConfig:
-    """Verify tool_modes config is applied during service init."""
-
-    def test_task_service_registers_deferred(self, tmp_path):
-        reg = ToolRegistry()
-        from core.tools.task.service import TaskService
-
-        _svc = TaskService(registry=reg, db_path=tmp_path / "test.db")
-        # TaskCreate/TaskUpdate/TaskList/TaskGet should be DEFERRED
-        for tool_name in ["TaskCreate", "TaskGet", "TaskList", "TaskUpdate"]:
-            entry = reg.get(tool_name)
-            assert entry is not None, f"{tool_name} not registered"
-            assert entry.mode == ToolMode.DEFERRED, f"{tool_name} should be DEFERRED, got {entry.mode}"
-
-    def test_search_service_registers_inline(self, tmp_path):
-        reg = ToolRegistry()
-        from core.tools.search.service import SearchService
-
-        _svc = SearchService(registry=reg, workspace_root=tmp_path)
-        for tool_name in ["Grep", "Glob"]:
-            entry = reg.get(tool_name)
-            assert entry is not None, f"{tool_name} not registered"
-            assert entry.mode == ToolMode.INLINE, f"{tool_name} should be INLINE, got {entry.mode}"
diff --git a/typings/bs4/__init__.pyi b/typings/bs4/__init__.pyi
new file mode 100644
index 000000000..b77a582cd
--- /dev/null
+++ b/typings/bs4/__init__.pyi
@@ -0,0 +1,7 @@
+from typing import Any
+
+class BeautifulSoup:
+    def __init__(self, markup: str, features: str | None = None, **kwargs: Any) -> None: ...
+    def find(self, name: Any = ..., *args: Any, **kwargs: Any) -> Any: ...
+    def get_text(self, separator: str = ..., strip: bool = ...) -> str: ...
+    def __call__(self, *args: Any, **kwargs: Any) -> list[Any]: ...
diff --git a/typings/langfuse/__init__.pyi b/typings/langfuse/__init__.pyi
new file mode 100644
index 000000000..e69de29bb
diff --git a/typings/langfuse/api/__init__.pyi b/typings/langfuse/api/__init__.pyi
new file mode 100644
index 000000000..fed664964
--- /dev/null
+++ b/typings/langfuse/api/__init__.pyi
@@ -0,0 +1,3 @@
+from .client import FernLangfuse
+
+__all__ = ["FernLangfuse"]
diff --git a/typings/langfuse/api/client.pyi b/typings/langfuse/api/client.pyi
new file mode 100644
index 000000000..af88b072b
--- /dev/null
+++ b/typings/langfuse/api/client.pyi
@@ -0,0 +1,7 @@
+from typing import Any
+
+class FernLangfuse:
+    observations: Any
+    trace: Any
+
+    def __init__(self, *, username: str, password: str, base_url: str) -> None: ...
diff --git a/typings/markdownify/__init__.pyi b/typings/markdownify/__init__.pyi
new file mode 100644
index 000000000..6c570ff74
--- /dev/null
+++ b/typings/markdownify/__init__.pyi
@@ -0,0 +1,7 @@
+def markdownify(
+    html: str,
+    *,
+    heading_style: str = ...,
+    bullets: str = ...,
+    strip: list[str] | None = ...,
+) -> str: ...
diff --git a/typings/pymupdf.pyi b/typings/pymupdf.pyi
new file mode 100644
index 000000000..6548477ba
--- /dev/null
+++ b/typings/pymupdf.pyi
@@ -0,0 +1,12 @@
+from pathlib import Path
+from typing import Any
+
+class Page:
+    def get_text(self, *args: Any, **kwargs: Any) -> str: ...
+
+class Document:
+    def __len__(self) -> int: ...
+    def __getitem__(self, index: int) -> Page: ...
+    def close(self) -> None: ...
+
+def open(path: str | Path, *args: Any, **kwargs: Any) -> Document: ...
diff --git a/uv.lock b/uv.lock
index 721e5c891..78f682840 100644
--- a/uv.lock
+++ b/uv.lock
@@ -375,6 +375,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/90/45/f458fa2c388e79dd9d8b9b0c99f1d31b568f27388f2fdba7bb66bbc0c6ed/cachetools-6.2.6-py3-none-any.whl", hash = "sha256:8c9717235b3c651603fff0076db52d6acbfd1b338b8ed50256092f7ce9c85bda", size = 11668, upload-time = "2026-01-27T20:32:58.527Z" },
 ]
 
+[[package]]
+name = "cattrs"
+version = "26.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a0/ec/ba18945e7d6e55a58364d9fb2e46049c1c2998b3d805f19b703f14e81057/cattrs-26.1.0.tar.gz", hash = "sha256:fa239e0f0ec0715ba34852ce813986dfed1e12117e209b816ab87401271cdd40", size = 495672, upload-time = "2026-02-18T22:15:19.406Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/80/56/60547f7801b97c67e97491dc3d9ade9fbccbd0325058fd3dfcb2f5d98d90/cattrs-26.1.0-py3-none-any.whl", hash = "sha256:d1e0804c42639494d469d08d4f26d6b9de9b8ab26b446db7b5f8c2e97f7c3096", size = 73054, upload-time = "2026-02-18T22:15:17.958Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2026.1.4"
@@ -719,6 +732,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" },
 ]
 
+[[package]]
+name = "docstring-to-markdown"
+version = "0.17"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/52/d8/8abe80d62c5dce1075578031bcfde07e735bcf0afe2886dd48b470162ab4/docstring_to_markdown-0.17.tar.gz", hash = "sha256:df72a112294c7492487c9da2451cae0faeee06e86008245c188c5761c9590ca3", size = 32260, upload-time = "2025-05-02T15:09:07.932Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/7b/af3d0da15bed3a8665419bb3a630585756920f4ad67abfdfef26240ebcc0/docstring_to_markdown-0.17-py3-none-any.whl", hash = "sha256:fd7d5094aa83943bf5f9e1a13701866b7c452eac19765380dead666e36d3711c", size = 23479, upload-time = "2025-05-02T15:09:06.676Z" },
+]
+
 [[package]]
 name = "duckduckgo-search"
 version = "8.1.1"
@@ -1089,6 +1115,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
+[[package]]
+name = "jedi"
+version = "0.19.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "parso" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" },
+]
+
+[[package]]
+name = "jedi-language-server"
+version = "0.41.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cattrs" },
+    { name = "docstring-to-markdown" },
+    { name = "jedi" },
+    { name = "lsprotocol" },
+    { name = "pygls" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f3/34/4a35094c680040c8dd598b1ee9153a701289351c1dcbad1a0f2d196c524b/jedi_language_server-0.41.3.tar.gz", hash = "sha256:113ec22b95fadaceefbb704b5f365384bed296b82ede59026be375ecc97a9f8a", size = 29113, upload-time = "2024-02-26T04:28:05.521Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b6/67/2cf4419a8c418b0e5cba0b43dc1ea33a0bb42907694d6a786a3644889f32/jedi_language_server-0.41.3-py3-none-any.whl", hash = "sha256:7411f7479cdc9e9ea495f91e20b182a5d00170c0a8a4a87d3a147462282c06af", size = 27615, upload-time = "2024-02-26T04:28:02.084Z" },
+]
+
 [[package]]
 name = "jiter"
 version = "0.12.0"
@@ -1422,10 +1476,12 @@ dependencies = [
     { name = "langgraph" },
     { name = "langgraph-checkpoint-postgres" },
     { name = "langgraph-checkpoint-sqlite" },
+    { name = "multilspy" },
     { name = "pillow" },
     { name = "psycopg", extra = ["binary"] },
     { name = "pydantic" },
     { name = "pyjwt" },
+    { name = "pyright" },
     { name = "pyyaml" },
     { name = "rich" },
     { name = "sse-starlette" },
@@ -1513,6 +1569,7 @@ requires-dist = [
     { name = "langgraph-checkpoint-sqlite", specifier = ">=2.0.0" },
     { name = "langsmith", marker = "extra == 'all'", specifier = ">=0.1.0" },
     { name = "langsmith", marker = "extra == 'langsmith'", specifier = ">=0.1.0" },
+    { name = "multilspy", specifier = ">=0.0.15" },
     { name = "opentelemetry-api", marker = "extra == 'otel'", specifier = ">=1.20.0" },
     { name = "opentelemetry-exporter-otlp", marker = "extra == 'otel'", specifier = ">=1.20.0" },
     { name = "opentelemetry-sdk", marker = "extra == 'otel'", specifier = ">=1.20.0" },
@@ -1523,6 +1580,7 @@ requires-dist = [
     { name = "pymupdf", marker = "extra == 'all'", specifier = ">=1.24.0" },
     { name = "pymupdf", marker = "extra == 'docs'", specifier = ">=1.24.0" },
     { name = "pymupdf", marker = "extra == 'pdf'", specifier = ">=1.24.0" },
+    { name = "pyright", specifier = ">=1.1.0" },
     { name = "python-pptx", marker = "extra == 'all'", specifier = ">=1.0.0" },
     { name = "python-pptx", marker = "extra == 'docs'", specifier = ">=1.0.0" },
     { name = "python-pptx", marker = "extra == 'pptx'", specifier = ">=1.0.0" },
@@ -1560,6 +1618,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
 ]
 
+[[package]]
+name = "lsprotocol"
+version = "2023.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "cattrs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9d/f6/6e80484ec078d0b50699ceb1833597b792a6c695f90c645fbaf54b947e6f/lsprotocol-2023.0.1.tar.gz", hash = "sha256:cc5c15130d2403c18b734304339e51242d3018a05c4f7d0f198ad6e0cd21861d", size = 69434, upload-time = "2024-01-09T17:21:12.625Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/37/2351e48cb3309673492d3a8c59d407b75fb6630e560eb27ecd4da03adc9a/lsprotocol-2023.0.1-py3-none-any.whl", hash = "sha256:c75223c9e4af2f24272b14c6375787438279369236cd568f596d4951052a60f2", size = 70826, upload-time = "2024-01-09T17:21:14.491Z" },
+]
+
 [[package]]
 name = "lxml"
 version = "6.0.2"
@@ -1876,6 +1947,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" },
 ]
 
+[[package]]
+name = "multilspy"
+version = "0.0.15"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jedi-language-server" },
+    { name = "psutil" },
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d8/a8/4d6ab48e624f911eb5229aa01b3524b916470c9d036a9e8cc96d6fb81673/multilspy-0.0.15.tar.gz", hash = "sha256:b27a0b7c5c5306216b31fe1df9b4a42d2797735d0a78928e0df9ef8dfbcc97c5", size = 120639, upload-time = "2025-04-03T07:01:27.216Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/4d/b9d3492d6a7a2536498fc7fd49c1cc7bc86a41acf93b0ad967d75dbe5cd6/multilspy-0.0.15-py3-none-any.whl", hash = "sha256:3fa88939b953ed5d39aba4688a34105ec1e5cf2b2f778167fee2b78b3c0e1427", size = 137361, upload-time = "2025-04-03T07:01:25.492Z" },
+]
+
 [[package]]
 name = "multipart"
 version = "1.3.0"
@@ -2176,6 +2262,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
+[[package]]
+name = "parso"
+version = "0.8.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/81/76/a1e769043c0c0c9fe391b702539d594731a4362334cdf4dc25d0c09761e7/parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd", size = 401621, upload-time = "2026-02-09T15:45:24.425Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b6/61/fae042894f4296ec49e3f193aff5d7c18440da9e48102c3315e1bc4519a7/parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff", size = 106894, upload-time = "2026-02-09T15:45:21.391Z" },
+]
+
 [[package]]
 name = "pillow"
 version = "12.1.0"
@@ -2403,6 +2498,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" },
 ]
 
+[[package]]
+name = "psutil"
+version = "7.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" },
+    { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" },
+    { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" },
+    { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" },
+    { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" },
+    { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" },
+    { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
+]
+
 [[package]]
 name = "psycopg"
 version = "3.3.3"
@@ -2594,6 +2717,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498", size = 15730, upload-time = "2025-03-17T18:53:14.532Z" },
 ]
 
+[[package]]
+name = "pygls"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cattrs" },
+    { name = "lsprotocol" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/86/b9/41d173dad9eaa9db9c785a85671fc3d68961f08d67706dc2e79011e10b5c/pygls-1.3.1.tar.gz", hash = "sha256:140edceefa0da0e9b3c533547c892a42a7d2fd9217ae848c330c53d266a55018", size = 45527, upload-time = "2024-03-26T18:44:25.679Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/11/19/b74a10dd24548e96e8c80226cbacb28b021bc3a168a7d2709fb0d0185348/pygls-1.3.1-py3-none-any.whl", hash = "sha256:6e00f11efc56321bdeb6eac04f6d86131f654c7d49124344a9ebb968da3dd91e", size = 56031, upload-time = "2024-03-26T18:44:24.249Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.2"
@@ -3030,7 +3166,7 @@ wheels = [
 
 [[package]]
 name = "requests"
-version = "2.32.5"
+version = "2.32.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
@@ -3038,9 +3174,9 @@ dependencies = [
     { name = "idna" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218, upload-time = "2024-05-29T15:37:49.536Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928, upload-time = "2024-05-29T15:37:47.027Z" },
 ]
 
 [[package]]