From 23f3b162f309e914d25a94d0d8383a5657e632f6 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 15:25:37 +0800 Subject: [PATCH 01/87] chore: open resource observability split workstream From c447a6d234d56076e412a19eb864077e9993f14f Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 15:52:04 +0800 Subject: [PATCH 02/87] docs: require playwright trace proofs for resources split --- ...2026-04-06-resource-observability-split.md | 347 ++++++++++++++++++ ...-06-resource-observability-split-design.md | 133 +++++++ 2 files changed, 480 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-06-resource-observability-split.md create mode 100644 docs/superpowers/specs/2026-04-06-resource-observability-split-design.md diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md new file mode 100644 index 000000000..8265ffa4b --- /dev/null +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -0,0 +1,347 @@ +# Resource Observability Split Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Separate global monitor resources from user-visible product resources while moving the monitor/resource truth chain onto Supabase-backed wiring honestly enough that the system is not pretending local SQLite is still the only source of truth. + +**Architecture:** The implementation is split into two reviewable cuts. Cut A handles sandbox truth-source rewiring so lease/terminal/chat-session construction stops hardcoding SQLite-only repo creation. Cut B moves monitor/resource reads onto the shared storage abstraction, keeps `/api/monitor/resources` global, and introduces `/api/resources/*` for the product contract. + +**Tech Stack:** Python, FastAPI, Supabase-backed storage providers, existing storage contract/container abstractions, pytest, ruff + +--- + +### Task 1: Lock Storage Abstraction For Monitor Reads + +**Files:** +- Modify: `storage/contracts.py` +- Modify: `storage/container.py` +- Modify: `backend/web/core/storage_factory.py` +- Test: `tests/Unit/storage/test_storage_container.py` + +- [ ] **Step 1: Write the failing test** + +```python +def test_storage_container_builds_sandbox_monitor_repo_with_supabase(fake_supabase_client): + container = StorageContainer(strategy="supabase", supabase_client=fake_supabase_client) + + repo = container.sandbox_monitor_repo() + + assert repo.__class__.__name__ == "SupabaseSandboxMonitorRepo" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `uv run pytest -q tests/Unit/storage/test_storage_container.py -k sandbox_monitor_repo` +Expected: FAIL because `StorageContainer` has no `sandbox_monitor_repo()` and no `SandboxMonitorRepo` contract. + +- [ ] **Step 3: Write minimal implementation** + +```python +class SandboxMonitorRepo(Protocol): + def query_threads(self, *, thread_id: str | None = None) -> list[dict[str, Any]]: ... + def query_thread_summary(self, thread_id: str) -> dict[str, Any] | None: ... + def query_thread_sessions(self, thread_id: str) -> list[dict[str, Any]]: ... + def query_leases(self) -> list[dict[str, Any]]: ... + def list_leases_with_threads(self) -> list[dict[str, Any]]: ... + def query_lease(self, lease_id: str) -> dict[str, Any] | None: ... + def query_lease_threads(self, lease_id: str) -> list[dict[str, Any]]: ... + def query_lease_events(self, lease_id: str) -> list[dict[str, Any]]: ... + def query_diverged(self) -> list[dict[str, Any]]: ... + def query_events(self, limit: int = 100) -> list[dict[str, Any]]: ... + def query_event(self, event_id: str) -> dict[str, Any] | None: ... + def count_rows(self, table_names: list[str]) -> dict[str, int]: ... + def list_sessions_with_leases(self) -> list[dict[str, Any]]: ... + def list_probe_targets(self) -> list[dict[str, Any]]: ... + def query_lease_instance_id(self, lease_id: str) -> str | None: ... + def close(self) -> None: ... +``` + +```python +_REPO_REGISTRY["sandbox_monitor_repo"] = ( + "storage.providers.supabase.sandbox_monitor_repo", + "SupabaseSandboxMonitorRepo", +) +``` + +```python +def sandbox_monitor_repo(self) -> SandboxMonitorRepo: + return self._build_repo("sandbox_monitor_repo", self._sqlite_sandbox_monitor_repo) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `uv run pytest -q tests/Unit/storage/test_storage_container.py -k sandbox_monitor_repo` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add storage/contracts.py storage/container.py backend/web/core/storage_factory.py tests/Unit/storage/test_storage_container.py +git commit -m "refactor: move sandbox monitor repo into storage container" +``` + +### Task 2: Make Sandbox Repo Construction Strategy-Aware + +**Files:** +- Modify: `backend/web/core/storage_factory.py` +- Modify: `sandbox/manager.py` +- Modify: `sandbox/chat_session.py` +- Modify: `backend/web/utils/helpers.py` +- Modify: `backend/web/services/file_channel_service.py` +- Modify: `backend/web/services/activity_tracker.py` +- Modify: `backend/web/routers/threads.py` +- Modify: `backend/web/routers/webhooks.py` +- Test: `tests/Unit/backend/web/core/test_storage_factory.py` + +- [ ] **Step 1: Write the failing test** + +```python +def test_make_lease_repo_uses_supabase_when_strategy_is_supabase(monkeypatch, fake_supabase_client): + monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase") + monkeypatch.setenv("LEON_SUPABASE_CLIENT_FACTORY", "tests.support.fake_supabase:create_client") + + repo = make_lease_repo() + + assert repo.__class__.__name__ == "SupabaseLeaseRepo" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `uv run pytest -q tests/Unit/backend/web/core/test_storage_factory.py -k 'make_lease_repo or make_terminal_repo or make_chat_session_repo'` +Expected: FAIL because these factories do not exist. + +- [ ] **Step 3: Write minimal implementation** + +```python +def make_lease_repo(db_path: Any = None) -> Any: + if _strategy() == "supabase": + from storage.providers.supabase.lease_repo import SupabaseLeaseRepo + return SupabaseLeaseRepo(client=_supabase_client()) + from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo + return SQLiteLeaseRepo(db_path=db_path) +``` + +```python +def make_terminal_repo(db_path: Any = None) -> Any: + if _strategy() == "supabase": + from storage.providers.supabase.terminal_repo import SupabaseTerminalRepo + return SupabaseTerminalRepo(client=_supabase_client()) + from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo + return SQLiteTerminalRepo(db_path=db_path) +``` + +```python +def make_chat_session_repo(db_path: Any = None) -> Any: + if _strategy() == "supabase": + from storage.providers.supabase.chat_session_repo import SupabaseChatSessionRepo + return SupabaseChatSessionRepo(client=_supabase_client()) + from storage.providers.sqlite.chat_session_repo import SQLiteChatSessionRepo + return SQLiteChatSessionRepo(db_path=db_path) +``` + +```python +self.terminal_store = make_terminal_repo(db_path=self.db_path) +self.lease_store = make_lease_repo(db_path=self.db_path) +self.session_manager = ChatSessionManager( + provider=provider, + db_path=self.db_path, + default_policy=ChatSessionPolicy(), + chat_session_repo=make_chat_session_repo(db_path=self.db_path), +) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `uv run pytest -q tests/Unit/backend/web/core/test_storage_factory.py -k 'make_lease_repo or make_terminal_repo or make_chat_session_repo'` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add backend/web/core/storage_factory.py sandbox/manager.py sandbox/chat_session.py backend/web/utils/helpers.py backend/web/services/file_channel_service.py backend/web/services/activity_tracker.py backend/web/routers/threads.py backend/web/routers/webhooks.py tests/Unit/backend/web/core/test_storage_factory.py +git commit -m "refactor: route sandbox repo construction through storage strategy" +``` + +### Task 3: Split Global Monitor Routes From Product Resource Routes + +**Files:** +- Create: `backend/web/routers/resources.py` +- Modify: `backend/web/routers/monitor.py` +- Modify: `backend/web/core/lifespan.py` +- Modify: `backend/web/services/monitor_service.py` +- Modify: `backend/web/services/resource_service.py` +- Modify: `backend/web/services/sandbox_service.py` +- Test: `tests/Integration/test_monitor_resources_route.py` +- Test: `tests/Integration/test_resources_route.py` + +- [ ] **Step 1: Write the failing test** + +```python +def test_resources_overview_route_is_not_served_from_monitor_prefix(client): + response = client.get("/api/resources/overview") + + assert response.status_code == 200 +``` + +```python +def test_monitor_resources_route_remains_available_for_global_view(client): + response = client.get("/api/monitor/resources") + + assert response.status_code == 200 +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `uv run pytest -q tests/Integration/test_resources_route.py tests/Integration/test_monitor_resources_route.py` +Expected: FAIL because `/api/resources/overview` does not exist. + +- [ ] **Step 3: Write minimal implementation** + +```python +router = APIRouter(prefix="/api/resources", tags=["resources"]) + +@router.get("/overview") +def get_resources_overview(request: Request, current_user=Depends(require_current_user)): + return list_resource_providers(request.app.state, current_user_id=current_user.user_id) +``` + +```python +monitor_repo = request.app.state.storage_container.sandbox_monitor_repo() +``` + +```python +app.include_router(resources_router) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `uv run pytest -q tests/Integration/test_resources_route.py tests/Integration/test_monitor_resources_route.py` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add backend/web/routers/resources.py backend/web/routers/monitor.py backend/web/core/lifespan.py backend/web/services/monitor_service.py backend/web/services/resource_service.py backend/web/services/sandbox_service.py tests/Integration/test_resources_route.py tests/Integration/test_monitor_resources_route.py +git commit -m "feat: split global monitor resources from product resources api" +``` + +### Task 4: Rewire Frontend Resource Consumer Minimally + +**Files:** +- Modify: `frontend/app/src/pages/resources/api.ts` +- Modify: `frontend/app/src/pages/ResourcesPage.tsx` +- Modify: `frontend/app/src/pages/resources/ProviderCard.tsx` +- Test: `frontend/app/src/pages/resources/api.test.ts` +- Test: Playwright CLI product trace on `/resources` + +- [ ] **Step 1: Write the failing test** + +```ts +it("fetches overview from /api/resources/overview", async () => { + await fetchResourcesOverview(); + expect(fetch).toHaveBeenCalledWith("/api/resources/overview", expect.anything()); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd frontend/app && npm test -- api.test.ts` +Expected: FAIL because the client still calls `/api/monitor/resources`. + +- [ ] **Step 3: Write minimal implementation** + +```ts +export async function fetchResourcesOverview() { + return requestJson("/api/resources/overview"); +} +``` + +```tsx +
+``` + +```tsx +

资源

+``` + +```tsx +... +``` + +```tsx +{totalSessions} 会话 +``` + +```tsx + + ))} +
+ + )} +
+ + + +
+ + + + + {traceView === 'conversation' ? ( +
+ messages: {conversationTail.length} + loading: {conversationLoading ? 'yes' : 'no'} +
+ ) : ( +
+ assistant: {traceStats.assistant} + tool: {traceStats.tool} + runtime: {traceStats.runtime} + loading: {traceLoading ? 'yes' : 'no'} +
+ )} + {traceError &&
Trace load failed: {traceError}
} + {conversationError &&
Conversation load failed: {conversationError}
} +
+ {traceView === 'conversation' ? ( + <> + {conversationTail.map((message, idx) => ( + + ))} + {conversationTail.length === 0 &&
No conversation messages yet.
} + + ) : traceView === 'events' ? ( + <> + {visibleTrace.map((item, idx) => ( + + ))} + {visibleTrace.length === 0 &&
No trace events for this filter.
} + + ) : ( + <> + {traceSteps.map((step) => ( + + ))} + {traceSteps.length === 0 &&
No trace events for this filter.
} + + )} +
+ + {showRawTable && traceView !== 'conversation' && ( +
+ Raw trace table + + + + + + + + + + + + + + {traceTail.slice().reverse().map((item, idx) => ( + + + + + + + + + + ))} + +
StepActorEventSummaryRunWhenPayload
{item.seq || '-'}{item.actor}{item.event_type}{item.summary}{shortId(item.run_id)}{item.created_ago || '-'} +
+ view +
{JSON.stringify(item.payload, null, 2)}
+
+
+
+ )} + + ); +} + +// Page: Session Detail +function SessionDetailPage() { + const { sessionId } = useParams(); + const [data, setData] = React.useState(null); + const [error, setError] = React.useState(null); + + React.useEffect(() => { + if (!sessionId) return; + setError(null); + fetchAPI(`/session/${sessionId}`) + .then((payload) => setData(payload)) + .catch((e) => setError(e.message)); + }, [sessionId]); + + if (error) return
Session load failed: {error}
; + if (!data) return
Loading...
; + + return ( +
+ +

Session: {data.session_id.slice(0, 8)}

+ +
+
Thread: {data.thread_id.slice(0, 8)}
+
Status: {data.info.status}
+
Provider: {data.info.provider || '-'}
+
Started: {data.info.started_ago}
+
Last Active: {data.info.last_active_ago}
+
Ended: {data.info.ended_ago || '-'}
+
); } @@ -439,17 +1350,597 @@ function EventDetailPage() { ); } +// Page: Evaluation +function EvaluationPage() { + const location = useLocation(); + const [dataset, setDataset] = React.useState('SWE-bench/SWE-bench_Lite'); + const [split, setSplit] = React.useState('test'); + const [startIdx, setStartIdx] = React.useState('0'); + const [sliceCount, setSliceCount] = React.useState('10'); + const [promptProfile, setPromptProfile] = React.useState('heuristic'); + const [timeoutSec, setTimeoutSec] = React.useState('180'); + const [recursionLimit, setRecursionLimit] = React.useState('256'); + const [sandbox, setSandbox] = React.useState('local'); + const [runStatus, setRunStatus] = React.useState<'idle' | 'starting' | 'submitted' | 'error'>('idle'); + const [evaluationId, setEvaluationId] = React.useState(''); + const [runError, setRunError] = React.useState(null); + const [evaluations, setEvaluations] = React.useState([]); + const [evalOffset, setEvalOffset] = React.useState(0); + const [evalLimit] = React.useState(30); + const [evalPagination, setEvalPagination] = React.useState(null); + const [runsLoading, setRunsLoading] = React.useState(false); + const [composerOpen, setComposerOpen] = React.useState(false); + + const loadEvaluations = React.useCallback(async () => { + setRunsLoading(true); + try { + const payload = await fetchAPI(`/evaluations?limit=${evalLimit}&offset=${evalOffset}`); + setEvaluations(Array.isArray(payload?.items) ? payload.items : []); + setEvalPagination(payload?.pagination || null); + } catch (e: any) { + setRunError(e?.message || String(e)); + } finally { + setRunsLoading(false); + } + }, [evalLimit, evalOffset]); + + React.useEffect(() => { + void loadEvaluations(); + const timer = window.setInterval(() => { + void loadEvaluations(); + }, 2500); + return () => window.clearInterval(timer); + }, [loadEvaluations]); + + async function handleStart() { + if (runStatus === 'starting') return; + setRunError(null); + setEvaluationId(''); + setRunStatus('starting'); + + try { + const payload = await fetchJSON('/api/monitor/evaluations', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + dataset, + split, + start: Number(startIdx), + count: Number(sliceCount), + prompt_profile: promptProfile, + timeout_sec: Number(timeoutSec), + recursion_limit: Number(recursionLimit), + sandbox, + arm: 'monitor', + }), + }); + const nextEvalId = String(payload?.evaluation_id || ''); + if (!nextEvalId) throw new Error('create evaluation returned empty evaluation_id'); + setEvaluationId(nextEvalId); + setRunStatus('submitted'); + setComposerOpen(false); + await loadEvaluations(); + } catch (e: any) { + setRunStatus('error'); + setRunError(e?.message || String(e)); + } + } + + const currentEval = evaluations.find((item: any) => item.evaluation_id === evaluationId); + const submissionPreview = { + dataset, + split, + start: Number(startIdx || '0'), + count: Number(sliceCount || '0'), + prompt_profile: promptProfile, + timeout_sec: Number(timeoutSec || '0'), + recursion_limit: Number(recursionLimit || '0'), + sandbox, + arm: 'monitor', + }; + const parameterReference = [ + ['Dataset', 'Benchmark source', 'Lite for fast iteration, Verified for strict runs'], + ['Split', 'Data partition', 'Use test for formal comparison'], + ['Start / Slice', 'Case range', 'Run small slices first, then scale up'], + ['Prompt Profile', 'Prompt strategy', 'Compare baseline vs heuristic in A/B'], + ['Timeout(s)', 'Per-case wall clock limit', '180~300 for initial runs'], + ['Recursion', 'Agent iteration budget', '256 default, raise to 512 for hard tasks'], + ['Sandbox', 'Execution provider', 'Use local for quick checks, daytona for infra parity'], + ]; + const statusReference = [ + ['queued', 'Job is persisted and waiting for executor slots.'], + ['running', 'At least one thread is active and writing status updates.'], + ['provisional', 'Artifacts are incomplete (missing eval summary or eval error). Score is not final.'], + ['completed', 'Runner finished and artifacts were written.'], + ['completed_with_errors', 'Runner finished, but summary reports failed items/errors.'], + ['error', 'Runner failed; open detail page to inspect stderr and trace.'], + ]; + const currentProgress = currentEval ? evalProgress(currentEval) : null; + + React.useEffect(() => { + window.scrollTo({ top: 0, left: 0, behavior: 'auto' }); + }, []); + React.useEffect(() => { + // @@@evaluation-query-open - allow deterministic screenshot/review entry to open config panel via ?new=1. + const query = new URLSearchParams(location.search); + setComposerOpen(query.get('new') === '1'); + }, [location.search]); + + return ( +
+

Evaluation

+

One evaluation contains many threads. Start jobs from config panel, track durable progress in list, then drill into thread trace.

+ +
+
+

1. Submit

+

Open config, choose scope/profile/sandbox, then submit one batch run.

+
+
+

2. Track

+

List auto-refreshes every 2.5s and survives reload. Status is backend-persisted.

+
+
+

3. Inspect

+

Open evaluation detail to jump to per-thread trace and tool-call timeline.

+
+
+ +
+
+

Current Submission

+

Latest evaluation submitted from this page.

+
evaluation: {evaluationId || '-'}
+

status: {currentEval?.status || runStatus}

+ {currentEval && currentProgress && ( +
+
phase: {String(currentEval.status || '-').toUpperCase()}
+
+
+
+
+ {formatProgressSummary(currentProgress)} +
+
+ )} + {runError &&
run error: {runError}
} + {evaluationId && ( +

+ open evaluation detail +

+ )} +
+ +
+

Start New Evaluation

+

Open a focused config panel. After submit, track progress in the evaluation list below.

+ +
+
+ +
+
+

Evaluations ({evalPagination?.total ?? evaluations.length})

+ +
+

+ Auto refresh: 2.5s {runsLoading ? '| loading...' : ''} + {' '}| page {evalPagination?.page ?? 1} +

+

Evaluation = one batch run. Progress shows total/completed/started-or-running/pending. Click Evaluation ID for detail trace and thread links.

+ + + + + + + + + + + + + + + {evaluations.map((item: any) => ( + + + + + + + + + + + ))} + {evaluations.length === 0 && ( + + + + )} + +
EvaluationDatasetRangeProfile / SandboxStatusProgressScoreUpdated
{shortId(item.evaluation_id, 14)}{item.dataset}{item.start_idx}..{item.start_idx + item.slice_count - 1}{item.prompt_profile || '-'} / {item.sandbox || '-'} + {(() => { + // @@@publishable-preferred - publishable is the canonical release gate; score_gate stays as compatibility fallback. + const publishable = item.score?.publishable ?? (item.score?.score_gate === 'final'); + return ( + <> +
{String(item.status || '-').toUpperCase()}
+
publishable: {publishable ? 'TRUE' : 'FALSE'}
+ + ); + })()} +
+ {(() => { + const p = evalProgress(item); + return ( +
+
+
+
+
{formatProgressSummary(p)}
+
+ ); + })()} +
+ {(item.score?.publishable ?? (item.score?.score_gate === 'final')) ? ( + <> +
R {formatResolvedScore(item)}
+
C {formatPct(item.score?.completed_rate_pct)} | T {formatPct(item.score?.tool_call_thread_rate_pct)}
+ + ) : ( + <> +
R PROVISIONAL
+
C - | T -
+ + )} +
{item.updated_ago || '-'}
No evaluations yet.
+
+ +

+ offset={evalPagination?.offset ?? 0} | limit={evalPagination?.limit ?? evalLimit} | total={evalPagination?.total ?? evaluations.length} +

+ +
+
+ +
+
+

Status Guide

+
    + {statusReference.map((row) => ( +
  • {row[0]}: {row[1]}
  • + ))} +
+
+
+

Field Guide

+
    + {parameterReference.slice(0, 4).map((row) => ( +
  • {row[0]}: {row[1]}
  • + ))} +
+
+
+ + {composerOpen && ( + // @@@evaluation-composer-modal - keep config editing in a fixed layer to avoid "tail jump" in long list pages. +
setComposerOpen(false)}> +
e.stopPropagation()}> +
+

New Evaluation Config

+ +
+

Configure run scope, profile and runtime, then submit.

+ +
+
+

Run Scope

+
+
+ + +

Benchmark source. Lite is faster; Verified is stricter and slower.

+
+
+ + +

Dataset partition. Use test for formal comparison.

+
+
+ + setStartIdx(e.target.value)} /> +

Starting index inside the selected split.

+
+
+ + +

How many items to run in this evaluation batch.

+
+
+
+ +
+

Agent Profile

+
+
+ + +

Prompt strategy passed to runner. Used for A/B profile comparison.

+
+
+ + setRecursionLimit(e.target.value)} /> +

Agent recursion/iteration budget per item.

+
+
+
+ +
+

Runtime

+
+
+ + setTimeoutSec(e.target.value)} /> +

Per-item wall-clock timeout in seconds.

+
+
+ + +

Execution environment provider for this run.

+
+
+
+ +
+
+ + +
+

Submits config to backend and starts an evaluation job.

+
+
+ +
+ Submission Preview +
{JSON.stringify(submissionPreview, null, 2)}
+
+ +
+ Parameter Reference + + + + + + + + + + {parameterReference.map((row) => ( + + + + + + ))} + +
FieldMeaningRecommendation
{row[0]}{row[1]}{row[2]}
+
+
+
+ )} +
+ ); +} + +function EvaluationDetailPage() { + const { evaluationId } = useParams(); + const [data, setData] = React.useState(null); + + React.useEffect(() => { + fetchAPI(`/evaluation/${evaluationId}`).then(setData); + }, [evaluationId]); + + if (!data) return
Loading...
; + const detailProgress = evalProgress({ + threads_done: data.info?.threads_done ?? 0, + threads_running: data.info?.threads_running ?? 0, + slice_count: data.info?.slice_count ?? data.info?.threads_total ?? 0, + progress_source: data.info?.progress_source ?? 'thread_rows', + }); + const threadStateLabel = detailProgress.mode === 'checkpoint_estimate' ? 'started' : 'running'; + const scoreGate = String(data.info?.score?.score_gate || 'provisional'); + const publishable = Boolean(data.info?.score?.publishable ?? (scoreGate === 'final')); + const scoreFinal = publishable; + const summaryReady = !!data.info?.score?.eval_summary_path; + + return ( +
+ +

Evaluation: {shortId(data.evaluation_id, 14)}

+

+ {data.info.status} | dataset={data.info.dataset} | {threadStateLabel}={data.info.threads_running}/{data.info.threads_total} + {' '}| gate={scoreGate} + {' '}| publishable={String(publishable)} + {' '}| score={scoreFinal ? `${data.info.score?.resolved_instances ?? 0}/${data.info.score?.total_instances ?? 0} (${formatPct(data.info.score?.primary_score_pct)})` : 'PROVISIONAL'} +

+
+
phase: {String(data.info.status || '-').toUpperCase()}
+
+
+
+
+ {formatProgressSummary(detailProgress)} +
+
+ +
+
Split: {data.info.split}
+
Start: {data.info.start_idx}
+
Count: {data.info.slice_count}
+
Profile: {data.info.prompt_profile}
+
Timeout: {data.info.timeout_sec}s
+
Recursion: {data.info.recursion_limit}
+
Score Gate: {scoreGate}
+
Publishable: {String(publishable)}
+
Summary: {summaryReady ? 'ready' : 'missing'}
+ {scoreFinal ? ( + <> +
Resolved: {data.info.score?.resolved_instances ?? 0}/{data.info.score?.total_instances ?? 0}
+
Resolved Rate: {formatPct(data.info.score?.resolved_rate_pct)}
+
Completed: {data.info.score?.completed_instances ?? 0}/{data.info.score?.total_instances ?? 0}
+
Completed Rate: {formatPct(data.info.score?.completed_rate_pct)}
+
Non-empty Patch: {data.info.score?.non_empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
+
Non-empty Rate: {formatPct(data.info.score?.non_empty_patch_rate_pct)}
+
Empty Patch: {data.info.score?.empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
+
Errors: {data.info.score?.error_instances ?? 0}
+
Trace Active: {data.info.score?.active_trace_threads ?? 0}/{data.info.score?.total_instances ?? 0}
+
Tool-call Threads: {data.info.score?.tool_call_threads ?? 0}/{data.info.score?.total_instances ?? 0}
+
Tool-call Coverage: {formatPct(data.info.score?.tool_call_thread_rate_pct)}
+
Tool Calls Total: {data.info.score?.tool_calls_total ?? 0}
+
Avg Tool Calls(active): {data.info.score?.avg_tool_calls_per_active_thread ?? '-'}
+
Recursion Cap Hits: {data.info.score?.recursion_cap_hits ?? 0}{data.info.score?.recursion_limit ? ` / cap ${data.info.score.recursion_limit}` : ''}
+ + ) : ( + <> +
Final Score: blocked (provisional)
+
Block Reason: {data.info.score?.manifest_eval_error ? 'manifest_eval_error' : 'missing_eval_summary'}
+ + )} +
Run Dir: {data.info.score?.run_dir || '-'}
+
+ +
+

{data.threads.title} ({data.threads.count})

+ + + + + + + + + + + + + + {data.threads.items.map((item: any) => ( + + + + + + + + + + ))} + {data.threads.items.length === 0 && ( + + + + )} + +
#ThreadSessionRunEventsStatusStart
{item.item_index} + + {evalThreadLabel(item.thread_id, data.evaluation_id)} + + + {item.session?.session_url ? ( + {shortId(item.session.session_id)} + ) : '-'} + {item.run?.run_id ? shortId(item.run.run_id, 12) : '-'}{item.run?.event_count ?? 0}{item.status}{item.start_idx}
No threads in this evaluation.
+
+
+ ); +} + // Layout: Top navigation +function ScrollToTopOnRouteChange() { + const { pathname } = useLocation(); + React.useEffect(() => { + // @@@history-scroll-restore-disable - browser may restore stale scroll offsets and make user land at page tail. + const prev = window.history.scrollRestoration; + window.history.scrollRestoration = 'manual'; + return () => { + window.history.scrollRestoration = prev; + }; + }, []); + React.useEffect(() => { + // @@@route-scroll-reset - switch tabs/details should always start from top to avoid "tail landing" confusion. + window.scrollTo({ top: 0, left: 0, behavior: 'auto' }); + }, [pathname]); + return null; +} + function Layout({ children }: { children: React.ReactNode }) { return (
@@ -463,16 +1954,20 @@ function Layout({ children }: { children: React.ReactNode }) { export default function App() { return ( + } /> } /> } /> + } /> } /> } /> } /> } /> } /> + } /> + } /> diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 0b767eade..bb6c8ad20 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -253,11 +253,264 @@ section li { color: #e0e0e0; } -/* Loading */ -div:has(> :only-child:is(div:contains("Loading"))) { +.trace-summary { + white-space: pre-wrap; + word-break: break-word; + max-width: 56ch; +} + +.trace-actor { + display: inline-block; + padding: 0.15rem 0.45rem; + border-radius: 999px; + font-size: 0.75rem; + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.trace-assistant { + background: #1f3a5a; + color: #8dc3ff; +} + +.trace-tool { + background: #2d3f24; + color: #a9e684; +} + +.trace-runtime { + background: #4b3d1f; + color: #f2c56b; +} + +.trace-details summary { + cursor: pointer; + color: #8db9ff; +} + +.trace-payload { + margin-top: 0.5rem; + max-height: 220px; + overflow: auto; + padding: 0.75rem; +} + +.trace-toolbar { + margin: 0.8rem 0; display: flex; - justify-content: center; + justify-content: flex-start; + flex-wrap: wrap; align-items: center; - min-height: 200px; - color: #888; + gap: 1rem; +} + +.trace-run-select { + display: flex; + align-items: center; + gap: 0.4rem; +} + +.trace-run-select select { + border: 1px solid #2e3e57; + background: #101721; + color: #dbe9f7; + border-radius: 6px; + padding: 0.3rem 0.45rem; +} + +.trace-filters { + display: flex; + gap: 0.4rem; +} + +.trace-view-switch { + display: flex; + gap: 0.4rem; +} + +.trace-filter-btn { + border: 1px solid #2e3e57; + background: #1a2432; + color: #9ec2ef; + border-radius: 6px; + padding: 0.28rem 0.62rem; + cursor: pointer; +} + +.trace-filter-btn.is-active { + background: #2a4f7a; + color: #e8f3ff; + border-color: #4d85bf; +} + +.trace-raw-toggle { + color: #9aa7b6; + font-size: 0.9rem; + display: flex; + align-items: center; + gap: 0.35rem; +} + +.trace-metrics { + display: flex; + gap: 1rem; + color: #91a4b8; + font-size: 0.9rem; +} + +.trace-timeline { + margin-top: 0.8rem; + display: flex; + flex-direction: column; + gap: 0.8rem; +} + +.trace-card { + border: 1px solid #2a2f36; + background: #12161c; + border-radius: 10px; + padding: 0.7rem 0.8rem; +} + +.trace-card-assistant { + border-left: 4px solid #4f7fd8; +} + +.trace-card-tool { + border-left: 4px solid #5f9446; +} + +.trace-card-runtime { + border-left: 4px solid #a07932; +} + +.trace-card-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 0.6rem; + gap: 0.8rem; +} + +.trace-card-meta { + display: flex; + align-items: center; + gap: 0.42rem; +} + +.trace-step { + color: #89a4c0; + font-family: 'SF Mono', Monaco, monospace; + font-size: 0.85rem; +} + +.trace-event { + color: #ccd6e0; + font-size: 0.85rem; + font-family: 'SF Mono', Monaco, monospace; +} + +.trace-run-id { + color: #8395aa; +} + +.trace-block-wrap { + display: flex; + flex-direction: column; + gap: 0.35rem; +} + +.trace-label { + color: #8ea0b4; + font-size: 0.78rem; + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.trace-block { + background: #0c1014; + border: 1px solid #1f2732; + border-radius: 7px; + padding: 0.55rem 0.65rem; + font-family: 'SF Mono', Monaco, monospace; + font-size: 0.82rem; + color: #dae5f2; + white-space: pre-wrap; + word-break: break-word; + max-height: 300px; + overflow: auto; +} + +.trace-output { + max-height: 460px; +} + +.trace-assistant-text { + max-height: 340px; +} + +.trace-command { + color: #bde59d; +} + +.trace-empty { + border: 1px dashed #33404f; + color: #95a4b4; + border-radius: 8px; + padding: 1rem; +} + +.trace-raw-table { + margin-top: 1rem; +} + +.trace-step-card { + border: 1px solid #2a2f36; + background: #0f141b; + border-left: 4px solid #4f7fd8; + border-radius: 10px; + padding: 0.8rem; +} + +.conversation-card { + border: 1px solid #30363d; + background: #0d1117; + border-radius: 10px; + padding: 0.8rem; +} + +.trace-step-header { + display: flex; + justify-content: space-between; + gap: 0.8rem; + margin-bottom: 0.55rem; +} + +.trace-step-meta { + display: flex; + align-items: center; + gap: 0.6rem; +} + +.trace-step-index { + color: #e7f1ff; + font-weight: 600; +} + +.trace-step-block { + display: flex; + flex-direction: column; + gap: 0.28rem; + margin: 0.4rem 0 0.65rem; +} + +.trace-raw-item { + margin-top: 0.5rem; +} + +.trace-raw-item-title { + display: flex; + gap: 0.45rem; + color: #9eb3c9; + font-size: 0.82rem; } diff --git a/pyproject.toml b/pyproject.toml index 58e77e574..5c4e01120 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,11 +59,11 @@ docs = ["pymupdf>=1.24.0", "python-pptx>=1.0.0"] sandbox = ["wuying-agentbay-sdk>=0.10.0"] e2b = ["e2b>=2.13.0"] daytona = ["daytona-sdk>=0.139.0,<0.140.0", "python-socks>=2.7.0"] -eval = ["httpx-sse>=0.4.0"] +eval = ["httpx-sse>=0.4.0", "datasets>=4.8.4", "swebench>=4.1.0", "socksio>=1.0.0"] langfuse = ["langfuse>=3.0.0"] langsmith = ["langsmith>=0.1.0"] otel = ["opentelemetry-api>=1.20.0", "opentelemetry-sdk>=1.20.0", "opentelemetry-exporter-otlp>=1.20.0"] -all = ["pymupdf>=1.24.0", "python-pptx>=1.0.0", "wuying-agentbay-sdk>=0.10.0", "e2b>=2.13.0", "daytona-sdk>=0.139.0,<0.140.0", "python-socks>=2.7.0", "httpx-sse>=0.4.0", "langfuse>=3.0.0", "langsmith>=0.1.0"] +all = ["pymupdf>=1.24.0", "python-pptx>=1.0.0", "wuying-agentbay-sdk>=0.10.0", "e2b>=2.13.0", "daytona-sdk>=0.139.0,<0.140.0", "python-socks>=2.7.0", "httpx-sse>=0.4.0", "datasets>=4.8.4", "swebench>=4.1.0", "socksio>=1.0.0", "langfuse>=3.0.0", "langsmith>=0.1.0"] [project.urls] Homepage = "https://github.com/Ju-Yi-AI-Lab/leonai" From 435d8a099f9ed092ae106abade73f4d153f9c20b Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 3 Apr 2026 20:11:52 +0800 Subject: [PATCH 06/87] feat(monitor): add trace runs page and flexible ports --- backend/web/monitor.py | 104 +++++ frontend/monitor/src/App.tsx | 116 ++++- frontend/monitor/vite.config.ts | 8 +- uv.lock | 734 +++++++++++++++++++++++++++++++- 4 files changed, 950 insertions(+), 12 deletions(-) diff --git a/backend/web/monitor.py b/backend/web/monitor.py index 99eb8754d..e1451911d 100644 --- a/backend/web/monitor.py +++ b/backend/web/monitor.py @@ -16,6 +16,7 @@ from pathlib import Path from subprocess import PIPE +from typing import Any from fastapi import APIRouter, Depends, HTTPException, Query, Request from pydantic import BaseModel, Field @@ -969,6 +970,101 @@ def load_run_candidates(thread_id: str, limit: int = 20) -> list[dict]: ] +def list_trace_runs(offset: int = 0, limit: int = 50) -> dict[str, Any]: + """List recent trace-backed runs across all threads.""" + if not RUN_EVENT_DB_PATH.exists(): + return { + "title": "Recent Traces", + "count": 0, + "items": [], + "pagination": { + "offset": offset, + "limit": limit, + "total": 0, + "page": 1, + "has_prev": False, + "has_next": False, + "prev_offset": None, + "next_offset": None, + }, + } + + with sqlite3.connect(str(RUN_EVENT_DB_PATH)) as conn: + conn.row_factory = sqlite3.Row + total_row = conn.execute( + """ + SELECT COUNT(*) AS total + FROM ( + SELECT 1 + FROM run_events + WHERE run_id NOT LIKE 'activity_%' + GROUP BY thread_id, run_id + ) + """ + ).fetchone() + total = int(total_row["total"] if total_row else 0) + rows = conn.execute( + """ + SELECT + thread_id, + run_id, + COUNT(*) AS event_count, + SUM(CASE WHEN event_type = 'tool_call' THEN 1 ELSE 0 END) AS tool_call_count, + SUM(CASE WHEN event_type = 'tool_result' THEN 1 ELSE 0 END) AS tool_result_count, + MIN(created_at) AS started_at, + MAX(created_at) AS last_event_at, + MAX(CASE WHEN event_type = 'run_done' THEN 1 ELSE 0 END) AS has_run_done + FROM run_events + WHERE run_id NOT LIKE 'activity_%' + GROUP BY thread_id, run_id + ORDER BY MAX(created_at) DESC + LIMIT ? OFFSET ? + """, + (limit, offset), + ).fetchall() + + mode_map = load_thread_mode_map([str(row["thread_id"]) for row in rows if row["thread_id"]]) + items = [] + for row in rows: + thread_id = str(row["thread_id"]) + run_id = str(row["run_id"]) + mode_info = mode_map.get(thread_id, {"thread_mode": "normal", "keep_full_trace": False}) + items.append( + { + "thread_id": thread_id, + "thread_url": f"/thread/{thread_id}?run={run_id}", + "run_id": run_id, + "event_count": int(row["event_count"] or 0), + "tool_call_count": int(row["tool_call_count"] or 0), + "tool_result_count": int(row["tool_result_count"] or 0), + "started_at": row["started_at"], + "started_ago": format_time_ago(row["started_at"]) if row["started_at"] else None, + "last_event_at": row["last_event_at"], + "last_event_ago": format_time_ago(row["last_event_at"]) if row["last_event_at"] else None, + "status": "completed" if int(row["has_run_done"] or 0) > 0 else "running", + "thread_mode": mode_info["thread_mode"], + "keep_full_trace": mode_info["keep_full_trace"], + } + ) + + page = (offset // limit) + 1 + return { + "title": "Recent Traces", + "count": len(items), + "items": items, + "pagination": { + "offset": offset, + "limit": limit, + "total": total, + "page": page, + "has_prev": offset > 0, + "has_next": (offset + len(items)) < total, + "prev_offset": max(offset - limit, 0) if offset > 0 else None, + "next_offset": (offset + limit) if (offset + len(items)) < total else None, + }, + } + + def _msg_text(content: object) -> str: if isinstance(content, str): return content @@ -1324,6 +1420,14 @@ def get_thread(thread_id: str, db: sqlite3.Connection = Depends(get_db)): } +@router.get("/traces") +def get_traces( + offset: int = Query(default=0, ge=0), + limit: int = Query(default=50, ge=1, le=200), +): + return list_trace_runs(offset=offset, limit=limit) + + @router.get("/thread/{thread_id}/conversation") async def get_thread_conversation(thread_id: str, request: Request): """Return raw serialized LangChain messages for monitor conversation view.""" diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index e291770f4..e95178e17 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -167,10 +167,114 @@ function ThreadsPage() { ); } +function TracesPage() { + const [data, setData] = React.useState(null); + const [loading, setLoading] = React.useState(false); + const [offset, setOffset] = React.useState(0); + const [limit, setLimit] = React.useState(50); + + const loadTraces = React.useCallback(async () => { + setLoading(true); + try { + const payload = await fetchAPI(`/traces?offset=${offset}&limit=${limit}`); + setData(payload); + } finally { + setLoading(false); + } + }, [offset, limit]); + + React.useEffect(() => { + void loadTraces(); + }, [loadTraces]); + + if (!data) return
Loading...
; + const pagination = data.pagination || {}; + const total = Number(pagination.total || data.count || 0); + const currentCount = Number(data.count || 0); + const from = total > 0 ? offset + 1 : 0; + const to = offset + currentCount; + const page = Number(pagination.page || 1); + + return ( +
+

{data.title}

+

Showing {from}-{to} of {total} | page {page}

+
+
+
+ + + +
+
+ Rows: + +
+
+ + + + + + + + + + + + + + + {data.items.map((item: any) => ( + + + + + + + + + + + ))} + +
ThreadRunModeEventsTool CallsStartedLast EventStatus
{item.thread_id.slice(0, 18)}{shortId(item.run_id, 12)}{item.thread_mode || 'normal'} / trace={item.keep_full_trace ? 'full' : 'latest'}{item.event_count}{item.tool_call_count} / {item.tool_result_count}{item.started_ago || '-'}{item.last_event_ago || '-'}{item.status}
+
+
+ ); +} + // Page: Thread Detail function ThreadDetailPage() { const { threadId } = useParams(); + const location = useLocation(); const [data, setData] = React.useState(null); + const initialRunId = React.useMemo(() => new URLSearchParams(location.search).get('run') || '', [location.search]); React.useEffect(() => { fetchAPI(`/thread/${threadId}`).then(setData); @@ -232,7 +336,7 @@ function ThreadDetailPage() { - +
); } @@ -757,7 +861,7 @@ function TraceStepCard({ step }: { step: TraceStep }) { ); } -function ThreadTraceSection({ threadId, autoRefreshEnabled }: { threadId: string; autoRefreshEnabled: boolean }) { +function ThreadTraceSection({ threadId, autoRefreshEnabled, initialRunId = '' }: { threadId: string; autoRefreshEnabled: boolean; initialRunId?: string }) { const [traceEvents, setTraceEvents] = React.useState([]); const [traceError, setTraceError] = React.useState(null); const [traceLoading, setTraceLoading] = React.useState(false); @@ -816,10 +920,10 @@ function ThreadTraceSection({ threadId, autoRefreshEnabled }: { threadId: string if (!threadId) return; setTraceEvents([]); setRunCandidates([]); - setSelectedRunId(''); - loadTrace(''); + setSelectedRunId(initialRunId); + loadTrace(initialRunId); loadConversation(); - }, [threadId, loadTrace, loadConversation]); + }, [threadId, initialRunId, loadTrace, loadConversation]); React.useEffect(() => { if (!selectedRunId) return; @@ -1937,6 +2041,7 @@ function Layout({ children }: { children: React.ReactNode }) {

Mycel Sandbox Monitor

Threads + Trace Leases Diverged Events @@ -1959,6 +2064,7 @@ export default function App() { } /> } /> + } /> } /> } /> } /> diff --git a/frontend/monitor/vite.config.ts b/frontend/monitor/vite.config.ts index a386a6532..a98d79886 100644 --- a/frontend/monitor/vite.config.ts +++ b/frontend/monitor/vite.config.ts @@ -11,11 +11,14 @@ function getWorktreePort(key: string, fallback: string): string { } const backendPort = process.env.LEON_BACKEND_PORT || getWorktreePort("worktree.ports.backend", "8001"); +const monitorPort = parseInt(process.env.LEON_MONITOR_PORT || "5174", 10); +const monitorPreviewPort = parseInt(process.env.LEON_MONITOR_PREVIEW_PORT || "4174", 10); export default defineConfig({ plugins: [react()], server: { - port: 5174, + host: "0.0.0.0", + port: monitorPort, strictPort: true, proxy: { "/api": { @@ -25,7 +28,8 @@ export default defineConfig({ }, }, preview: { - port: 4174, + host: "0.0.0.0", + port: monitorPreviewPort, strictPort: true, }, }); diff --git a/uv.lock b/uv.lock index 78f682840..68fbac858 100644 --- a/uv.lock +++ b/uv.lock @@ -2,8 +2,15 @@ version = 1 revision = 3 requires-python = ">=3.12" resolution-markers = [ - "python_full_version >= '3.13'", - "python_full_version < '3.13'", + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'emscripten'", + "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.13' and sys_platform == 'win32'", + "python_full_version < '3.13' and sys_platform == 'emscripten'", + "python_full_version < '3.13' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] [[package]] @@ -357,6 +364,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, +] + [[package]] name = "bracex" version = "2.6" @@ -388,6 +408,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/80/56/60547f7801b97c67e97491dc3d9ade9fbccbd0325058fd3dfcb2f5d98d90/cattrs-26.1.0-py3-none-any.whl", hash = "sha256:d1e0804c42639494d469d08d4f26d6b9de9b8ab26b446db7b5f8c2e97f7c3096", size = 73054, upload-time = "2026-02-18T22:15:17.958Z" }, ] +[[package]] +name = "cbor2" +version = "5.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/cb/09939728be094d155b5d4ac262e39877875f5f7e36eea66beb359f647bd0/cbor2-5.9.0.tar.gz", hash = "sha256:85c7a46279ac8f226e1059275221e6b3d0e370d2bb6bd0500f9780781615bcea", size = 111231, upload-time = "2026-03-22T15:56:50.638Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/39/72d8a5a4b06565561ec28f4fcb41aff7bb77f51705c01f00b8254a2aca4f/cbor2-5.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1f223dffb1bcdd2764665f04c1152943d9daa4bc124a576cd8dee1cad4264313", size = 71223, upload-time = "2026-03-22T15:56:13.68Z" }, + { url = "https://files.pythonhosted.org/packages/09/fd/7ddf3d3153b54c69c3be77172b8d9aa3a9d74f62a7fbde614d53eaeed9a4/cbor2-5.9.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae6c706ac1d85a0b3cb3395308fd0c4d55e3202b4760773675957e93cdff45fc", size = 287865, upload-time = "2026-03-22T15:56:14.813Z" }, + { url = "https://files.pythonhosted.org/packages/db/9d/7ede2cc42f9bb4260492e7d29d2aab781eacbbcfb09d983de1e695077199/cbor2-5.9.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4cd43d8fc374b31643b2830910f28177a606a7bc84975a62675dd3f2e320fc7b", size = 288246, upload-time = "2026-03-22T15:56:16.113Z" }, + { url = "https://files.pythonhosted.org/packages/ce/9d/588ebc7c5bc5843f609b05fe07be8575c7dec987735b0bbc908ac9c1264a/cbor2-5.9.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4aa07b392cc3d76fb31c08a46a226b58c320d1c172ff3073e864409ced7bc50f", size = 280214, upload-time = "2026-03-22T15:56:17.519Z" }, + { url = "https://files.pythonhosted.org/packages/f7/a1/6fc8f4b15c6a27e7fbb7966c30c2b4b18c274a3221fa2f5e6235502d34bc/cbor2-5.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:971d425b3a23b75953d8853d5f9911bdeefa09d759ee3b5e6b07b5ff3cbd9073", size = 282162, upload-time = "2026-03-22T15:56:18.975Z" }, + { url = "https://files.pythonhosted.org/packages/cf/20/9a22cfe08be16ddfeef2542cf4eeed1b29f3f57ddbba0b42f7e0bb8331fd/cbor2-5.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:34a6cb15e6ab6a8eae94ad2041731cd3ef786af43a8df99f847969af5b902ee7", size = 70049, upload-time = "2026-03-22T15:56:20.502Z" }, + { url = "https://files.pythonhosted.org/packages/c6/9e/695f92d09006614034e25a9f5b10620f3b219f79c1bec3c37b7c6f27a7a9/cbor2-5.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:7d1ddc4541e7367ac58c2470cc0df847f7137167fe4f5729e2d3cc0b993d7da4", size = 65382, upload-time = "2026-03-22T15:56:21.526Z" }, + { url = "https://files.pythonhosted.org/packages/81/c5/4901e21a8afe9448fd947b11e8f383903207cd6dd0800e5f5a386838de5b/cbor2-5.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fbb06f34aa645b4deca66643bba3d400d20c15312d1fe88d429be60c1ab50f27", size = 71284, upload-time = "2026-03-22T15:56:22.836Z" }, + { url = "https://files.pythonhosted.org/packages/1b/10/df643a381aebc3f05486de4813662bc58accb640fc3275cb276a75e89694/cbor2-5.9.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac684fe195c39821fca70d18afbf748f728aefbfbf88456018d299e559b8cae0", size = 287682, upload-time = "2026-03-22T15:56:24.024Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0c/8aa6b766059ae4a0ca1ec3ff96fe3823a69a7be880dba2e249f7fbe2700b/cbor2-5.9.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a54fbb32cb828c214f7f333a707e4aec61182e7efdc06ea5d9596d3ecee624a", size = 288009, upload-time = "2026-03-22T15:56:25.305Z" }, + { url = "https://files.pythonhosted.org/packages/74/07/6236bc25c183a9cf7e8062e5dddf9eae9b0b14ebf14a58a69fe5a1e872c6/cbor2-5.9.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4753a6d1bc71054d9179557bc65740860f185095ccb401d46637fff028a5b3ec", size = 280437, upload-time = "2026-03-22T15:56:26.479Z" }, + { url = "https://files.pythonhosted.org/packages/4e/0a/84328d23c3c68874ac6497edb9b1900579a1028efa54734df3f1762bbc15/cbor2-5.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:380e534482b843e43442b87d8777a7bf9bed20cb7526f89b780c3400f617304b", size = 282247, upload-time = "2026-03-22T15:56:28.644Z" }, + { url = "https://files.pythonhosted.org/packages/9b/f6/89b4627e09d028c8e5fcaf7cb55f225c33ce6e037ec1844e65d02bcfa945/cbor2-5.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:dcf0f695873e5c94bd072d6af8698e72b8fb7f7a18f37e0bced1041b7111a6cf", size = 70089, upload-time = "2026-03-22T15:56:29.801Z" }, + { url = "https://files.pythonhosted.org/packages/e2/7c/efadcd5f0102db692490e4e206988a2f98d39a09912090db497a2b800885/cbor2-5.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:f7c9751a9611601ab326d8f5837f01379195bbf06175fb4effeb552140e7c9e8", size = 65466, upload-time = "2026-03-22T15:56:30.823Z" }, + { url = "https://files.pythonhosted.org/packages/08/7d/9ccc36d10ef96e6038e48046ebe1ce35a1e7814da0e1e204d09e6ef09b8d/cbor2-5.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23606d31ba1368bd1b6602e3020ee88fe9523ca80e8630faf6b2fc904fd84560", size = 71500, upload-time = "2026-03-22T15:56:31.876Z" }, + { url = "https://files.pythonhosted.org/packages/70/e1/a6cca2cc72e13f00030c6a649f57ae703eb2c620806ab70c40db8eab33fa/cbor2-5.9.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0322296b9d52f55880e300ba8ba09ecf644303b99b51138bbb1c0fb644fa7c3e", size = 286953, upload-time = "2026-03-22T15:56:33.292Z" }, + { url = "https://files.pythonhosted.org/packages/08/3c/24cd5ef488a957d90e016f200a3aad820e4c2f85edd61c9fe4523007a1ee/cbor2-5.9.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:422817286c1d0ce947fb2f7eca9212b39bddd7231e8b452e2d2cc52f15332dba", size = 285454, upload-time = "2026-03-22T15:56:34.703Z" }, + { url = "https://files.pythonhosted.org/packages/a4/35/dca96818494c0ba47cdd73e8d809b27fa91f8fa0ce32a068a09237687454/cbor2-5.9.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9a4907e0c3035bb8836116854ed8e56d8aef23909d601fa59706320897ec2551", size = 279441, upload-time = "2026-03-22T15:56:35.888Z" }, + { url = "https://files.pythonhosted.org/packages/a4/44/d3362378b16e53cf7e535a3f5aed8476e2109068154e24e31981ef5bde9e/cbor2-5.9.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fb7afe77f8d269e42d7c4b515c6fd14f1ccc0625379fb6829b269f493d16eddd", size = 279673, upload-time = "2026-03-22T15:56:37.08Z" }, + { url = "https://files.pythonhosted.org/packages/43/d1/3533a697e5842fff7c2f64912eb251f8dcab3a8b5d88e228d6eebc3b5021/cbor2-5.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:86baf870d4c0bfc6f79de3801f3860a84ab76d9c8b0abb7f081f2c14c38d79d3", size = 71940, upload-time = "2026-03-22T15:56:38.366Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e2/c6ba75f3fb25dfa15ab6999cc8709c821987e9ed8e375d7f58539261bcb9/cbor2-5.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:7221483fad0c63afa4244624d552abf89d7dfdbc5f5edfc56fc1ff2b4b818975", size = 67639, upload-time = "2026-03-22T15:56:39.39Z" }, + { url = "https://files.pythonhosted.org/packages/42/ff/b83492b096fbef26e9cb62c1a4bf2d3cef579ea7b33138c6c37c4ae66f67/cbor2-5.9.0-py3-none-any.whl", hash = "sha256:27695cbd70c90b8de5c4a284642c2836449b14e2c2e07e3ffe0744cb7669a01b", size = 24627, upload-time = "2026-03-22T15:56:48.847Z" }, +] + [[package]] name = "certifi" version = "2026.1.4" @@ -454,6 +504,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, ] +[[package]] +name = "cfgv" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" }, +] + +[[package]] +name = "chardet" +version = "7.4.0.post2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/4b/1fe1ade6b4d33abff0224b45a8310775b04308668ad1bdef725af8e3fcaa/chardet-7.4.0.post2.tar.gz", hash = "sha256:21a6b5ca695252c03385dcfcc8b55c27907f1fe80838aa171b1ff4e356a1bb67", size = 767694, upload-time = "2026-03-29T18:07:23.19Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/24/b012c1fd362e1a25425afd9f746166976b8ba3b2d78140a39df23bba2886/chardet-7.4.0.post2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7aced16fe8098019c7c513dd92e9ee3ad29fffac757fa7de13ff8f3a8607a344", size = 854615, upload-time = "2026-03-29T18:06:52.099Z" }, + { url = "https://files.pythonhosted.org/packages/0d/01/778bcb1e162000c5b8295a25191935b0b2eaf0000096bd3fcbf782b5c8c0/chardet-7.4.0.post2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dc6829803ba71cb427dffac03a948ae828c617710bbd5f97ae3b34ab18558414", size = 838434, upload-time = "2026-03-29T18:06:54.332Z" }, + { url = "https://files.pythonhosted.org/packages/e6/6a/827065f0390160d1c74e4cbe8f68815d56daf392c1eb5027fb16d0700d75/chardet-7.4.0.post2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46659d38ba18e7c740f10a4c2edd0ef112e0322606ab2570cb8fd387954e0de9", size = 860089, upload-time = "2026-03-29T18:06:56.233Z" }, + { url = "https://files.pythonhosted.org/packages/e2/32/3abb90c7057e2cbdd711b59d99dc4dfc1a28b7da5a41971ec918f0928682/chardet-7.4.0.post2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5933289313b8cbfb0d07cf44583a2a6c7e31bffe5dcb7ebb6592825aa197d5b0", size = 869310, upload-time = "2026-03-29T18:06:57.847Z" }, + { url = "https://files.pythonhosted.org/packages/7d/e2/c0f2a96cbda065765ad33b3a8f466b279983a72a6e3035e0f5cfa54b831f/chardet-7.4.0.post2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2b99b417fac30641429829666ee7331366e797863504260aa1b18bfc2020e4e3", size = 863047, upload-time = "2026-03-29T18:06:59.427Z" }, + { url = "https://files.pythonhosted.org/packages/46/0d/0b6039f2d254698a525d9a1b00334b3262a6521adede50885f05ba714fad/chardet-7.4.0.post2-cp312-cp312-win_amd64.whl", hash = "sha256:a07dc1257fef2685dfc5182229abccd3f9b1299006a5b4d43ac7bd252faa1118", size = 924680, upload-time = "2026-03-29T18:07:00.772Z" }, + { url = "https://files.pythonhosted.org/packages/64/6f/40998484582edf32ebcbe30a51c0b33fb476aa4d22b172d4aabc3f47c5ed/chardet-7.4.0.post2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9bdb9387e692dd53c837aa922f676e5ab51209895cd99b15d30c6004418e0d27", size = 854448, upload-time = "2026-03-29T18:07:02.432Z" }, + { url = "https://files.pythonhosted.org/packages/32/ed/0fc7f4be6d346049bafec134cb4d122317e8e803b42e520f8214f02d9d13/chardet-7.4.0.post2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:422ac637f5a2a8b13151245591cb0fabdf9ec1427725f0560628cb5ad4fb1462", size = 838289, upload-time = "2026-03-29T18:07:04.026Z" }, + { url = "https://files.pythonhosted.org/packages/11/7d/f22cf8861c18126b6775b4d4a95fa4141ecc4a24d87c5a225d1d5df472c1/chardet-7.4.0.post2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d52b3f15249ba877030045900d179d44552c3c37dda487462be473ec67bed2f", size = 859345, upload-time = "2026-03-29T18:07:05.563Z" }, + { url = "https://files.pythonhosted.org/packages/27/ff/0f582b7a9369bba8abb47d72c3d1d1122c351b8fb04dcac2637683072bcb/chardet-7.4.0.post2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccdfb13b4a727d3d944157c7f350c6d64630511a0ce39e37ffa5114e90f7d3a7", size = 868537, upload-time = "2026-03-29T18:07:07.093Z" }, + { url = "https://files.pythonhosted.org/packages/51/7b/226d88c86a5351dcb03cf7702f6916ab304d6ce5146a96d1636c9b4287a2/chardet-7.4.0.post2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daae5b0579e7e33adacb4722a62b540e6bec49944e081a859cb9a6a010713817", size = 862733, upload-time = "2026-03-29T18:07:08.948Z" }, + { url = "https://files.pythonhosted.org/packages/55/ef/b34d768e047796f69866b88dd81f10993bb5d7421a6196799512e478dd6a/chardet-7.4.0.post2-cp313-cp313-win_amd64.whl", hash = "sha256:6c448fe2d77e329cec421b95f844b75f8c9cb744e808ecc9124b6063ca6acb5e", size = 924887, upload-time = "2026-03-29T18:07:10.381Z" }, + { url = "https://files.pythonhosted.org/packages/b2/1e/8b5d54ecc873e828e9b91cddfce6bf5a058d7bb3d64007cfbbbc872b0bda/chardet-7.4.0.post2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:5862b17677f7e8fcee4e37fe641f01d30762e4b075ac37ce9584e4407896e2d9", size = 853887, upload-time = "2026-03-29T18:07:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/26/17/8c2cf762c876b04036e561d2a27df8a6305435db1cb584f71c356e319c40/chardet-7.4.0.post2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:22d05c4b7e721d5330d99ef4a6f6233a9de58ae6f2275c21a098bedd778a6cb7", size = 838555, upload-time = "2026-03-29T18:07:13.689Z" }, + { url = "https://files.pythonhosted.org/packages/3b/21/51fb8cfbcf2f1acc7c03776f4452f64ff2b9051505b38bc9e2a3941af330/chardet-7.4.0.post2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a035d407f762c21eb77069982425eb403e518dd758617aa43bf11d0d2203a1b6", size = 861305, upload-time = "2026-03-29T18:07:15.194Z" }, + { url = "https://files.pythonhosted.org/packages/fb/b6/13cc503f45beeb1117fc9c83f294df16ebce5d75eac9f0cefb8cce4357a1/chardet-7.4.0.post2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2adfa7390e69cb5ed499b54978d31f6d476788d07d83da3426811181b7ca7682", size = 868868, upload-time = "2026-03-29T18:07:16.781Z" }, + { url = "https://files.pythonhosted.org/packages/30/ca/f1ab73f8d431c5257ad536956992513a5c135c53cf2a3dc94b8a45f83082/chardet-7.4.0.post2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2345f20ea67cdadddb778b2bc31e2defc2a85ae027931f9ad6ab84fd5d345320", size = 863417, upload-time = "2026-03-29T18:07:18.467Z" }, + { url = "https://files.pythonhosted.org/packages/1a/cc/d2918dc6d110cf585a30ee11dbdcfa56a2b2fbf16e2b4117fe8bf800f320/chardet-7.4.0.post2-cp314-cp314-win_amd64.whl", hash = "sha256:52602972d4815047cee262551bc383ab394aa145f5ca9ee10d0a53d27965882e", size = 919100, upload-time = "2026-03-29T18:07:20.312Z" }, + { url = "https://files.pythonhosted.org/packages/94/d2/22ac0b5b832bb9d2f29311dcded6c09ad0c32c23e3e53a8033aad5eb8652/chardet-7.4.0.post2-py3-none-any.whl", hash = "sha256:e0c9c6b5c296c0e5197bc8876fcc04d58a6ddfba18399e598ba353aba28b038e", size = 625322, upload-time = "2026-03-29T18:07:21.81Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.4" @@ -593,6 +679,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/66/d3/a7daaee544c904548e665829b51a9fa2572acb82c73ad787a8ff90273002/darabonba_core-1.0.5-py3-none-any.whl", hash = "sha256:671ab8dbc4edc2a8f88013da71646839bb8914f1259efc069353243ef52ea27c", size = 24580, upload-time = "2025-12-12T07:53:59.494Z" }, ] +[[package]] +name = "datasets" +version = "4.8.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dill" }, + { name = "filelock" }, + { name = "fsspec", extra = ["http"] }, + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "multiprocess" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pyarrow" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/22/73e46ac7a8c25e7ef0b3bd6f10da3465021d90219a32eb0b4d2afea4c56e/datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52", size = 604382, upload-time = "2026-03-23T14:21:17.987Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/e5/247d094108e42ac26363ab8dc57f168840cf7c05774b40ffeb0d78868fcc/datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d", size = 526991, upload-time = "2026-03-23T14:21:15.89Z" }, +] + [[package]] name = "daytona-api-client" version = "0.139.0" @@ -705,6 +816,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178, upload-time = "2020-04-20T14:23:36.581Z" }, ] +[[package]] +name = "dill" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/81/e1/56027a71e31b02ddc53c7d65b01e68edf64dea2932122fe7746a516f75d5/dill-0.4.1.tar.gz", hash = "sha256:423092df4182177d4d8ba8290c8a5b640c66ab35ec7da59ccfa00f6fa3eea5fa", size = 187315, upload-time = "2026-01-19T02:36:56.85Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/77/dc8c558f7593132cf8fefec57c4f60c83b16941c574ac5f619abb3ae7933/dill-0.4.1-py3-none-any.whl", hash = "sha256:1e1ce33e978ae97fcfcff5638477032b801c46c7c65cf717f95fbc2248f79a9d", size = 120019, upload-time = "2026-01-19T02:36:55.663Z" }, +] + +[[package]] +name = "distlib" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, +] + [[package]] name = "distro" version = "1.9.0" @@ -714,6 +843,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] +[[package]] +name = "docker" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834, upload-time = "2024-05-23T11:13:57.216Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, +] + [[package]] name = "dockerfile-parse" version = "2.0.1" @@ -809,6 +952,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/dd/d0ee25348ac58245ee9f90b6f3cbb666bf01f69be7e0911f9851bddbda16/fastapi-0.129.0-py3-none-any.whl", hash = "sha256:b4946880e48f462692b31c083be0432275cbfb6e2274566b1be91479cc1a84ec", size = 102950, upload-time = "2026-02-12T13:54:54.528Z" }, ] +[[package]] +name = "fastcore" +version = "1.12.34" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/39/66/e2bf42b3cad563a7246cc8e61a49c88e611c4c4228244da6bb39909126ef/fastcore-1.12.34.tar.gz", hash = "sha256:24c06e40cf9444ee4cbfbb5ff331e59762c83f1f5e27a128beb90b46d95aa687", size = 94563, upload-time = "2026-04-01T09:43:17.953Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/89/bf2adbdbb90dab8b465d7a489e0a291e9153bf7b1db2b90ea4fa3c4bf16f/fastcore-1.12.34-py3-none-any.whl", hash = "sha256:917ed3559ef25cfbf3ec3327cb664cf75830832490ed1fb7be1425eb351783df", size = 98770, upload-time = "2026-04-01T09:43:16.288Z" }, +] + +[[package]] +name = "filelock" +version = "3.25.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480, upload-time = "2026-03-11T20:45:38.487Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" }, +] + [[package]] name = "frozenlist" version = "1.8.0" @@ -900,11 +1061,52 @@ wheels = [ [[package]] name = "fsspec" -version = "2026.3.0" +version = "2026.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/cf/b50ddf667c15276a9ab15a70ef5f257564de271957933ffea49d2cdbcdfb/fsspec-2026.3.0.tar.gz", hash = "sha256:1ee6a0e28677557f8c2f994e3eea77db6392b4de9cd1f5d7a9e87a0ae9d01b41", size = 313547, upload-time = "2026-03-27T19:11:14.892Z" } +sdist = { url = "https://files.pythonhosted.org/packages/51/7c/f60c259dcbf4f0c47cc4ddb8f7720d2dcdc8888c8e5ad84c73ea4531cc5b/fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff", size = 313441, upload-time = "2026-02-05T21:50:53.743Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" }, +] + +[package.optional-dependencies] +http = [ + { name = "aiohttp" }, +] + +[[package]] +name = "ghapi" +version = "1.0.13" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastcore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/09/1b88f97e8599cda096d42dac830bb2e28ddf202d71843f61bda52bbe99ce/ghapi-1.0.13.tar.gz", hash = "sha256:fb46f5e101efa33bd12a0ae7694de761eec5be1de90f48847699f1e00128f928", size = 72914, upload-time = "2026-02-28T02:21:01.892Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/ac/e1960ec21cfd5a0fd9b329822c04d0b5f91abb688c3b1acd7e8ff3390432/ghapi-1.0.13-py3-none-any.whl", hash = "sha256:49d7e336e5664e4d4f92b1d442dfe80f31ecccbee4370bd1d271bd63a1ccf18e", size = 71409, upload-time = "2026-02-28T02:21:00.457Z" }, +] + +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" }, +] + +[[package]] +name = "gitpython" +version = "3.1.46" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/b5/59d16470a1f0dfe8c793f9ef56fd3826093fc52b3bd96d6b9d6c26c7e27b/gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f", size = 215371, upload-time = "2026-01-01T15:37:32.073Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" }, ] [[package]] @@ -1003,6 +1205,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/b2/b096ccce418882fbfda4f7496f9357aaa9a5af1896a9a7f60d9f2b275a06/grpcio-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:dce09d6116df20a96acfdbf85e4866258c3758180e8c49845d6ba8248b6d0bbb", size = 4929852, upload-time = "2026-02-06T09:56:45.885Z" }, ] +[[package]] +name = "grpclib" +version = "0.4.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "h2" }, + { name = "multidict" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/28/5a2c299ec82a876a252c5919aa895a6f1d1d35c96417c5ce4a4660dc3a80/grpclib-0.4.9.tar.gz", hash = "sha256:cc589c330fa81004c6400a52a566407574498cb5b055fa927013361e21466c46", size = 84798, upload-time = "2025-12-14T22:23:14.349Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/90/b0cbbd9efcc82816c58f31a34963071aa19fb792a212a5d9caf8e0fc3097/grpclib-0.4.9-py3-none-any.whl", hash = "sha256:7762ec1c8ed94dfad597475152dd35cbd11aecaaca2f243e29702435ca24cf0e", size = 77063, upload-time = "2025-12-14T22:23:13.224Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -1025,6 +1240,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, ] +[[package]] +name = "hf-xet" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" }, + { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" }, + { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" }, + { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" }, + { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" }, +] + [[package]] name = "hpack" version = "4.1.0" @@ -1076,6 +1323,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, ] +[[package]] +name = "huggingface-hub" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/88/bb/62c7aa86f63a05e2f9b96642fdef9b94526a23979820b09f5455deff4983/huggingface_hub-1.9.0.tar.gz", hash = "sha256:0ea5be7a56135c91797cae6ad726e38eaeb6eb4b77cefff5c9d38ba0ecf874f7", size = 750326, upload-time = "2026-04-03T08:35:55.888Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/37/0d15d16150e1829f3e90962c99f28257f6de9e526a680b4c6f5acdb54fd2/huggingface_hub-1.9.0-py3-none-any.whl", hash = "sha256:2999328c058d39fd19ab748dd09bd4da2fbaa4f4c1ddea823eab103051e14a1f", size = 637355, upload-time = "2026-04-03T08:35:53.897Z" }, +] + [[package]] name = "hyperframe" version = "6.1.0" @@ -1085,6 +1352,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, ] +[[package]] +name = "identify" +version = "2.6.18" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/46/c4/7fb4db12296cdb11893d61c92048fe617ee853f8523b9b296ac03b43757e/identify-2.6.18.tar.gz", hash = "sha256:873ac56a5e3fd63e7438a7ecbc4d91aca692eb3fefa4534db2b7913f3fc352fd", size = 99580, upload-time = "2026-03-15T18:39:50.319Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/33/92ef41c6fad0233e41d3d84ba8e8ad18d1780f1e5d99b3c683e6d7f98b63/identify-2.6.18-py2.py3-none-any.whl", hash = "sha256:8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737", size = 99394, upload-time = "2026-03-15T18:39:48.915Z" }, +] + [[package]] name = "idna" version = "3.11" @@ -1491,6 +1767,7 @@ dependencies = [ [package.optional-dependencies] all = [ + { name = "datasets" }, { name = "daytona-sdk" }, { name = "e2b" }, { name = "httpx-sse" }, @@ -1499,6 +1776,8 @@ all = [ { name = "pymupdf" }, { name = "python-pptx" }, { name = "python-socks" }, + { name = "socksio" }, + { name = "swebench" }, { name = "wuying-agentbay-sdk" }, ] daytona = [ @@ -1513,7 +1792,10 @@ e2b = [ { name = "e2b" }, ] eval = [ + { name = "datasets" }, { name = "httpx-sse" }, + { name = "socksio" }, + { name = "swebench" }, ] langfuse = [ { name = "langfuse" }, @@ -1549,6 +1831,8 @@ dev = [ requires-dist = [ { name = "bcrypt", specifier = ">=4.0.0" }, { name = "croniter", specifier = ">=6.0.0" }, + { name = "datasets", marker = "extra == 'all'", specifier = ">=4.8.4" }, + { name = "datasets", marker = "extra == 'eval'", specifier = ">=4.8.4" }, { name = "daytona-sdk", marker = "extra == 'all'", specifier = ">=0.139.0,<0.140.0" }, { name = "daytona-sdk", marker = "extra == 'daytona'", specifier = ">=0.139.0,<0.140.0" }, { name = "duckduckgo-search", specifier = ">=8.1.1" }, @@ -1588,8 +1872,12 @@ requires-dist = [ { name = "python-socks", marker = "extra == 'daytona'", specifier = ">=2.7.0" }, { name = "pyyaml", specifier = ">=6.0" }, { name = "rich", specifier = ">=13.0.0" }, + { name = "socksio", marker = "extra == 'all'", specifier = ">=1.0.0" }, + { name = "socksio", marker = "extra == 'eval'", specifier = ">=1.0.0" }, { name = "sse-starlette", specifier = ">=1.6.0" }, { name = "supabase", specifier = ">=2.28.3" }, + { name = "swebench", marker = "extra == 'all'", specifier = ">=4.1.0" }, + { name = "swebench", marker = "extra == 'eval'", specifier = ">=4.1.0" }, { name = "uvicorn", specifier = ">=0.30.0" }, { name = "wuying-agentbay-sdk", marker = "extra == 'all'", specifier = ">=0.10.0" }, { name = "wuying-agentbay-sdk", marker = "extra == 'sandbox'", specifier = ">=0.10.0" }, @@ -1848,6 +2136,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/0f/59204bf136d1201f8d7884cfbaf7498c5b4674e87a4c693f9bde63741ce1/mmh3-5.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:dfd51b4c56b673dfbc43d7d27ef857dd91124801e2806c69bb45585ce0fa019b", size = 40391, upload-time = "2026-03-05T15:55:56.697Z" }, ] +[[package]] +name = "modal" +version = "1.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "cbor2" }, + { name = "certifi" }, + { name = "click" }, + { name = "grpclib" }, + { name = "protobuf" }, + { name = "rich" }, + { name = "synchronicity" }, + { name = "toml" }, + { name = "typer" }, + { name = "types-certifi" }, + { name = "types-toml" }, + { name = "typing-extensions" }, + { name = "watchfiles" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/b2/cdc155ef06863e3ca325fb0d6ea8feb0acd9213ff7a8a32ff1adcc37e077/modal-1.4.1.tar.gz", hash = "sha256:aadbf31e82b9ace8c77de2ee4d2c431f76ee6af54a908640fae0bdee557fd9c5", size = 685664, upload-time = "2026-03-31T01:44:32.073Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/9d/cba0aed472b303481dc931b8dea693db8ecc1fb720308a69d4c679a69a71/modal-1.4.1-py3-none-any.whl", hash = "sha256:3befc9c4ac1b18ac4bf5bcb92aa6b7a5fa966c799d1dbf0cfc78ea075b2ab030", size = 787809, upload-time = "2026-03-31T01:44:29.691Z" }, +] + [[package]] name = "multidict" version = "6.7.1" @@ -1971,6 +2284,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/d6/d547a7004b81fa0b2aafa143b09196f6635e4105cd9d2c641fa8a4051c05/multipart-1.3.0-py3-none-any.whl", hash = "sha256:439bf4b00fd7cb2dbff08ae13f49f4f49798931ecd8d496372c63537fa19f304", size = 14938, upload-time = "2025-07-26T15:09:36.884Z" }, ] +[[package]] +name = "multiprocess" +version = "0.70.19" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dill" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a2/f2/e783ac7f2aeeed14e9e12801f22529cc7e6b7ab80928d6dcce4e9f00922d/multiprocess-0.70.19.tar.gz", hash = "sha256:952021e0e6c55a4a9fe4cd787895b86e239a40e76802a789d6305398d3975897", size = 2079989, upload-time = "2026-01-19T06:47:39.744Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/45/8004d1e6b9185c1a444d6b55ac5682acf9d98035e54386d967366035a03a/multiprocess-0.70.19-py310-none-any.whl", hash = "sha256:97404393419dcb2a8385910864eedf47a3cadf82c66345b44f036420eb0b5d87", size = 134948, upload-time = "2026-01-19T06:47:32.325Z" }, + { url = "https://files.pythonhosted.org/packages/86/c2/dec9722dc3474c164a0b6bcd9a7ed7da542c98af8cabce05374abab35edd/multiprocess-0.70.19-py311-none-any.whl", hash = "sha256:928851ae7973aea4ce0eaf330bbdafb2e01398a91518d5c8818802845564f45c", size = 144457, upload-time = "2026-01-19T06:47:33.711Z" }, + { url = "https://files.pythonhosted.org/packages/71/70/38998b950a97ea279e6bd657575d22d1a2047256caf707d9a10fbce4f065/multiprocess-0.70.19-py312-none-any.whl", hash = "sha256:3a56c0e85dd5025161bac5ce138dcac1e49174c7d8e74596537e729fd5c53c28", size = 150281, upload-time = "2026-01-19T06:47:35.037Z" }, + { url = "https://files.pythonhosted.org/packages/7f/74/d2c27e03cb84251dfe7249b8e82923643c6d48fa4883b9476b025e7dc7eb/multiprocess-0.70.19-py313-none-any.whl", hash = "sha256:8d5eb4ec5017ba2fab4e34a747c6d2c2b6fecfe9e7236e77988db91580ada952", size = 156414, upload-time = "2026-01-19T06:47:35.915Z" }, + { url = "https://files.pythonhosted.org/packages/a0/61/af9115673a5870fd885247e2f1b68c4f1197737da315b520a91c757a861a/multiprocess-0.70.19-py314-none-any.whl", hash = "sha256:e8cc7fbdff15c0613f0a1f1f8744bef961b0a164c0ca29bdff53e9d2d93c5e5f", size = 160318, upload-time = "2026-01-19T06:47:37.497Z" }, + { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477, upload-time = "2026-01-19T06:47:38.619Z" }, +] + [[package]] name = "nodeenv" version = "1.10.0" @@ -1980,6 +2310,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" }, ] +[[package]] +name = "numpy" +version = "2.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/05/32396bec30fb2263770ee910142f49c1476d08e8ad41abf8403806b520ce/numpy-2.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:15716cfef24d3a9762e3acdf87e27f58dc823d1348f765bbea6bef8c639bfa1b", size = 16689272, upload-time = "2026-03-29T13:18:49.223Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f3/a983d28637bfcd763a9c7aafdb6d5c0ebf3d487d1e1459ffdb57e2f01117/numpy-2.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23cbfd4c17357c81021f21540da84ee282b9c8fba38a03b7b9d09ba6b951421e", size = 14699573, upload-time = "2026-03-29T13:18:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fd/e5ecca1e78c05106d98028114f5c00d3eddb41207686b2b7de3e477b0e22/numpy-2.4.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b3b60bb7cba2c8c81837661c488637eee696f59a877788a396d33150c35d842", size = 5204782, upload-time = "2026-03-29T13:18:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/702a4594413c1a8632092beae8aba00f1d67947389369b3777aed783fdca/numpy-2.4.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e4a010c27ff6f210ff4c6ef34394cd61470d01014439b192ec22552ee867f2a8", size = 6552038, upload-time = "2026-03-29T13:18:57.769Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/eed308a8f56cba4d1fdf467a4fc67ef4ff4bf1c888f5fc980481890104b1/numpy-2.4.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9e75681b59ddaa5e659898085ae0eaea229d054f2ac0c7e563a62205a700121", size = 15670666, upload-time = "2026-03-29T13:19:00.341Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0d/0e3ecece05b7a7e87ab9fb587855548da437a061326fff64a223b6dcb78a/numpy-2.4.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:81f4a14bee47aec54f883e0cad2d73986640c1590eb9bfaaba7ad17394481e6e", size = 16645480, upload-time = "2026-03-29T13:19:03.63Z" }, + { url = "https://files.pythonhosted.org/packages/34/49/f2312c154b82a286758ee2f1743336d50651f8b5195db18cdb63675ff649/numpy-2.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:62d6b0f03b694173f9fcb1fb317f7222fd0b0b103e784c6549f5e53a27718c44", size = 17020036, upload-time = "2026-03-29T13:19:07.428Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e9/736d17bd77f1b0ec4f9901aaec129c00d59f5d84d5e79bba540ef12c2330/numpy-2.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbc356aae7adf9e6336d336b9c8111d390a05df88f1805573ebb0807bd06fd1d", size = 18368643, upload-time = "2026-03-29T13:19:10.775Z" }, + { url = "https://files.pythonhosted.org/packages/63/f6/d417977c5f519b17c8a5c3bc9e8304b0908b0e21136fe43bf628a1343914/numpy-2.4.4-cp312-cp312-win32.whl", hash = "sha256:0d35aea54ad1d420c812bfa0385c71cd7cc5bcf7c65fed95fc2cd02fe8c79827", size = 5961117, upload-time = "2026-03-29T13:19:13.464Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5b/e1deebf88ff431b01b7406ca3583ab2bbb90972bbe1c568732e49c844f7e/numpy-2.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5f0362dc928a6ecd9db58868fca5e48485205e3855957bdedea308f8672ea4a", size = 12320584, upload-time = "2026-03-29T13:19:16.155Z" }, + { url = "https://files.pythonhosted.org/packages/58/89/e4e856ac82a68c3ed64486a544977d0e7bdd18b8da75b78a577ca31c4395/numpy-2.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:846300f379b5b12cc769334464656bc882e0735d27d9726568bc932fdc49d5ec", size = 10221450, upload-time = "2026-03-29T13:19:18.994Z" }, + { url = "https://files.pythonhosted.org/packages/14/1d/d0a583ce4fefcc3308806a749a536c201ed6b5ad6e1322e227ee4848979d/numpy-2.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08f2e31ed5e6f04b118e49821397f12767934cfdd12a1ce86a058f91e004ee50", size = 16684933, upload-time = "2026-03-29T13:19:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/c1/62/2b7a48fbb745d344742c0277f01286dead15f3f68e4f359fbfcf7b48f70f/numpy-2.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e823b8b6edc81e747526f70f71a9c0a07ac4e7ad13020aa736bb7c9d67196115", size = 14694532, upload-time = "2026-03-29T13:19:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/499737bfba066b4a3bebff24a8f1c5b2dee410b209bc6668c9be692580f0/numpy-2.4.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4a19d9dba1a76618dd86b164d608566f393f8ec6ac7c44f0cc879011c45e65af", size = 5199661, upload-time = "2026-03-29T13:19:28.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/da/464d551604320d1491bc345efed99b4b7034143a85787aab78d5691d5a0e/numpy-2.4.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d2a8490669bfe99a233298348acc2d824d496dee0e66e31b66a6022c2ad74a5c", size = 6547539, upload-time = "2026-03-29T13:19:30.97Z" }, + { url = "https://files.pythonhosted.org/packages/7d/90/8d23e3b0dafd024bf31bdec225b3bb5c2dbfa6912f8a53b8659f21216cbf/numpy-2.4.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45dbed2ab436a9e826e302fcdcbe9133f9b0006e5af7168afb8963a6520da103", size = 15668806, upload-time = "2026-03-29T13:19:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/d1/73/a9d864e42a01896bb5974475438f16086be9ba1f0d19d0bb7a07427c4a8b/numpy-2.4.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c901b15172510173f5cb310eae652908340f8dede90fff9e3bf6c0d8dfd92f83", size = 16632682, upload-time = "2026-03-29T13:19:37.336Z" }, + { url = "https://files.pythonhosted.org/packages/34/fb/14570d65c3bde4e202a031210475ae9cde9b7686a2e7dc97ee67d2833b35/numpy-2.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:99d838547ace2c4aace6c4f76e879ddfe02bb58a80c1549928477862b7a6d6ed", size = 17019810, upload-time = "2026-03-29T13:19:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/77/2ba9d87081fd41f6d640c83f26fb7351e536b7ce6dd9061b6af5904e8e46/numpy-2.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0aec54fd785890ecca25a6003fd9a5aed47ad607bbac5cd64f836ad8666f4959", size = 18357394, upload-time = "2026-03-29T13:19:44.859Z" }, + { url = "https://files.pythonhosted.org/packages/a2/23/52666c9a41708b0853fa3b1a12c90da38c507a3074883823126d4e9d5b30/numpy-2.4.4-cp313-cp313-win32.whl", hash = "sha256:07077278157d02f65c43b1b26a3886bce886f95d20aabd11f87932750dfb14ed", size = 5959556, upload-time = "2026-03-29T13:19:47.661Z" }, + { url = "https://files.pythonhosted.org/packages/57/fb/48649b4971cde70d817cf97a2a2fdc0b4d8308569f1dd2f2611959d2e0cf/numpy-2.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:5c70f1cc1c4efbe316a572e2d8b9b9cc44e89b95f79ca3331553fbb63716e2bf", size = 12317311, upload-time = "2026-03-29T13:19:50.67Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d8/11490cddd564eb4de97b4579ef6bfe6a736cc07e94c1598590ae25415e01/numpy-2.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:ef4059d6e5152fa1a39f888e344c73fdc926e1b2dd58c771d67b0acfbf2aa67d", size = 10222060, upload-time = "2026-03-29T13:19:54.229Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/dab4339177a905aad3e2221c915b35202f1ec30d750dd2e5e9d9a72b804b/numpy-2.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4bbc7f303d125971f60ec0aaad5e12c62d0d2c925f0ab1273debd0e4ba37aba5", size = 14822302, upload-time = "2026-03-29T13:19:57.585Z" }, + { url = "https://files.pythonhosted.org/packages/eb/e4/0564a65e7d3d97562ed6f9b0fd0fb0a6f559ee444092f105938b50043876/numpy-2.4.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:4d6d57903571f86180eb98f8f0c839fa9ebbfb031356d87f1361be91e433f5b7", size = 5327407, upload-time = "2026-03-29T13:20:00.601Z" }, + { url = "https://files.pythonhosted.org/packages/29/8d/35a3a6ce5ad371afa58b4700f1c820f8f279948cca32524e0a695b0ded83/numpy-2.4.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:4636de7fd195197b7535f231b5de9e4b36d2c440b6e566d2e4e4746e6af0ca93", size = 6647631, upload-time = "2026-03-29T13:20:02.855Z" }, + { url = "https://files.pythonhosted.org/packages/f4/da/477731acbd5a58a946c736edfdabb2ac5b34c3d08d1ba1a7b437fa0884df/numpy-2.4.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ad2e2ef14e0b04e544ea2fa0a36463f847f113d314aa02e5b402fdf910ef309e", size = 15727691, upload-time = "2026-03-29T13:20:06.004Z" }, + { url = "https://files.pythonhosted.org/packages/e6/db/338535d9b152beabeb511579598418ba0212ce77cf9718edd70262cc4370/numpy-2.4.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a285b3b96f951841799528cd1f4f01cd70e7e0204b4abebac9463eecfcf2a40", size = 16681241, upload-time = "2026-03-29T13:20:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/ad248e8f58beb7a0219b413c9c7d8151c5d285f7f946c3e26695bdbbe2df/numpy-2.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f8474c4241bc18b750be2abea9d7a9ec84f46ef861dbacf86a4f6e043401f79e", size = 17085767, upload-time = "2026-03-29T13:20:13.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1a/3b88ccd3694681356f70da841630e4725a7264d6a885c8d442a697e1146b/numpy-2.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4e874c976154687c1f71715b034739b45c7711bec81db01914770373d125e392", size = 18403169, upload-time = "2026-03-29T13:20:17.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c9/fcfd5d0639222c6eac7f304829b04892ef51c96a75d479214d77e3ce6e33/numpy-2.4.4-cp313-cp313t-win32.whl", hash = "sha256:9c585a1790d5436a5374bac930dad6ed244c046ed91b2b2a3634eb2971d21008", size = 6083477, upload-time = "2026-03-29T13:20:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e3/3938a61d1c538aaec8ed6fd6323f57b0c2d2d2219512434c5c878db76553/numpy-2.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:93e15038125dc1e5345d9b5b68aa7f996ec33b98118d18c6ca0d0b7d6198b7e8", size = 12457487, upload-time = "2026-03-29T13:20:22.946Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/7e345032cc60501721ef94e0e30b60f6b0bd601f9174ebd36389a2b86d40/numpy-2.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:0dfd3f9d3adbe2920b68b5cd3d51444e13a10792ec7154cd0a2f6e74d4ab3233", size = 10292002, upload-time = "2026-03-29T13:20:25.909Z" }, + { url = "https://files.pythonhosted.org/packages/6e/06/c54062f85f673dd5c04cbe2f14c3acb8c8b95e3384869bb8cc9bff8cb9df/numpy-2.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f169b9a863d34f5d11b8698ead99febeaa17a13ca044961aa8e2662a6c7766a0", size = 16684353, upload-time = "2026-03-29T13:20:29.504Z" }, + { url = "https://files.pythonhosted.org/packages/4c/39/8a320264a84404c74cc7e79715de85d6130fa07a0898f67fb5cd5bd79908/numpy-2.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2483e4584a1cb3092da4470b38866634bafb223cbcd551ee047633fd2584599a", size = 14704914, upload-time = "2026-03-29T13:20:33.547Z" }, + { url = "https://files.pythonhosted.org/packages/91/fb/287076b2614e1d1044235f50f03748f31fa287e3dbe6abeb35cdfa351eca/numpy-2.4.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2d19e6e2095506d1736b7d80595e0f252d76b89f5e715c35e06e937679ea7d7a", size = 5210005, upload-time = "2026-03-29T13:20:36.45Z" }, + { url = "https://files.pythonhosted.org/packages/63/eb/fcc338595309910de6ecabfcef2419a9ce24399680bfb149421fa2df1280/numpy-2.4.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6a246d5914aa1c820c9443ddcee9c02bec3e203b0c080349533fae17727dfd1b", size = 6544974, upload-time = "2026-03-29T13:20:39.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/5d/e7e9044032a716cdfaa3fba27a8e874bf1c5f1912a1ddd4ed071bf8a14a6/numpy-2.4.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989824e9faf85f96ec9c7761cd8d29c531ad857bfa1daa930cba85baaecf1a9a", size = 15684591, upload-time = "2026-03-29T13:20:42.146Z" }, + { url = "https://files.pythonhosted.org/packages/98/7c/21252050676612625449b4807d6b695b9ce8a7c9e1c197ee6216c8a65c7c/numpy-2.4.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27a8d92cd10f1382a67d7cf4db7ce18341b66438bdd9f691d7b0e48d104c2a9d", size = 16637700, upload-time = "2026-03-29T13:20:46.204Z" }, + { url = "https://files.pythonhosted.org/packages/b1/29/56d2bbef9465db24ef25393383d761a1af4f446a1df9b8cded4fe3a5a5d7/numpy-2.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e44319a2953c738205bf3354537979eaa3998ed673395b964c1176083dd46252", size = 17035781, upload-time = "2026-03-29T13:20:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2b/a35a6d7589d21f44cea7d0a98de5ddcbb3d421b2622a5c96b1edf18707c3/numpy-2.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e892aff75639bbef0d2a2cfd55535510df26ff92f63c92cd84ef8d4ba5a5557f", size = 18362959, upload-time = "2026-03-29T13:20:54.019Z" }, + { url = "https://files.pythonhosted.org/packages/64/c9/d52ec581f2390e0f5f85cbfd80fb83d965fc15e9f0e1aec2195faa142cde/numpy-2.4.4-cp314-cp314-win32.whl", hash = "sha256:1378871da56ca8943c2ba674530924bb8ca40cd228358a3b5f302ad60cf875fc", size = 6008768, upload-time = "2026-03-29T13:20:56.912Z" }, + { url = "https://files.pythonhosted.org/packages/fa/22/4cc31a62a6c7b74a8730e31a4274c5dc80e005751e277a2ce38e675e4923/numpy-2.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:715d1c092715954784bc79e1174fc2a90093dc4dc84ea15eb14dad8abdcdeb74", size = 12449181, upload-time = "2026-03-29T13:20:59.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/2e/14cda6f4d8e396c612d1bf97f22958e92148801d7e4f110cabebdc0eef4b/numpy-2.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:2c194dd721e54ecad9ad387c1d35e63dce5c4450c6dc7dd5611283dda239aabb", size = 10496035, upload-time = "2026-03-29T13:21:02.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/8fed8c8d848d7ecea092dc3469643f9d10bc3a134a815a3b033da1d2039b/numpy-2.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2aa0613a5177c264ff5921051a5719d20095ea586ca88cc802c5c218d1c67d3e", size = 14824958, upload-time = "2026-03-29T13:21:05.671Z" }, + { url = "https://files.pythonhosted.org/packages/05/1a/d8007a5138c179c2bf33ef44503e83d70434d2642877ee8fbb230e7c0548/numpy-2.4.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:42c16925aa5a02362f986765f9ebabf20de75cdefdca827d14315c568dcab113", size = 5330020, upload-time = "2026-03-29T13:21:08.635Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/ffb99ac6ae93faf117bcbd5c7ba48a7f45364a33e8e458545d3633615dda/numpy-2.4.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:874f200b2a981c647340f841730fc3a2b54c9d940566a3c4149099591e2c4c3d", size = 6650758, upload-time = "2026-03-29T13:21:10.949Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6e/795cc078b78a384052e73b2f6281ff7a700e9bf53bcce2ee579d4f6dd879/numpy-2.4.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9b39d38a9bd2ae1becd7eac1303d031c5c110ad31f2b319c6e7d98b135c934d", size = 15729948, upload-time = "2026-03-29T13:21:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/5f/86/2acbda8cc2af5f3d7bfc791192863b9e3e19674da7b5e533fded124d1299/numpy-2.4.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b268594bccac7d7cf5844c7732e3f20c50921d94e36d7ec9b79e9857694b1b2f", size = 16679325, upload-time = "2026-03-29T13:21:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/bc/59/cafd83018f4aa55e0ac6fa92aa066c0a1877b77a615ceff1711c260ffae8/numpy-2.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ac6b31e35612a26483e20750126d30d0941f949426974cace8e6b5c58a3657b0", size = 17084883, upload-time = "2026-03-29T13:21:21.106Z" }, + { url = "https://files.pythonhosted.org/packages/f0/85/a42548db84e65ece46ab2caea3d3f78b416a47af387fcbb47ec28e660dc2/numpy-2.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e3ed142f2728df44263aaf5fb1f5b0b99f4070c553a0d7f033be65338329150", size = 18403474, upload-time = "2026-03-29T13:21:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ad/483d9e262f4b831000062e5d8a45e342166ec8aaa1195264982bca267e62/numpy-2.4.4-cp314-cp314t-win32.whl", hash = "sha256:dddbbd259598d7240b18c9d87c56a9d2fb3b02fe266f49a7c101532e78c1d871", size = 6155500, upload-time = "2026-03-29T13:21:28.205Z" }, + { url = "https://files.pythonhosted.org/packages/c7/03/2fc4e14c7bd4ff2964b74ba90ecb8552540b6315f201df70f137faa5c589/numpy-2.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:a7164afb23be6e37ad90b2f10426149fd75aee07ca55653d2aa41e66c4ef697e", size = 12637755, upload-time = "2026-03-29T13:21:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, +] + [[package]] name = "obstore" version = "0.8.2" @@ -2262,6 +2653,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pandas" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/99/b342345300f13440fe9fe385c3c481e2d9a595ee3bab4d3219247ac94e9a/pandas-3.0.2.tar.gz", hash = "sha256:f4753e73e34c8d83221ba58f232433fca2748be8b18dbca02d242ed153945043", size = 4645855, upload-time = "2026-03-31T06:48:30.816Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/b0/c20bd4d6d3f736e6bd6b55794e9cd0a617b858eaad27c8f410ea05d953b7/pandas-3.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:232a70ebb568c0c4d2db4584f338c1577d81e3af63292208d615907b698a0f18", size = 10347921, upload-time = "2026-03-31T06:46:33.36Z" }, + { url = "https://files.pythonhosted.org/packages/35/d0/4831af68ce30cc2d03c697bea8450e3225a835ef497d0d70f31b8cdde965/pandas-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:970762605cff1ca0d3f71ed4f3a769ea8f85fc8e6348f6e110b8fea7e6eb5a14", size = 9888127, upload-time = "2026-03-31T06:46:36.253Z" }, + { url = "https://files.pythonhosted.org/packages/61/a9/16ea9346e1fc4a96e2896242d9bc674764fb9049b0044c0132502f7a771e/pandas-3.0.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aff4e6f4d722e0652707d7bcb190c445fe58428500c6d16005b02401764b1b3d", size = 10399577, upload-time = "2026-03-31T06:46:39.224Z" }, + { url = "https://files.pythonhosted.org/packages/c4/a8/3a61a721472959ab0ce865ef05d10b0d6bfe27ce8801c99f33d4fa996e65/pandas-3.0.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef8b27695c3d3dc78403c9a7d5e59a62d5464a7e1123b4e0042763f7104dc74f", size = 10880030, upload-time = "2026-03-31T06:46:42.412Z" }, + { url = "https://files.pythonhosted.org/packages/da/65/7225c0ea4d6ce9cb2160a7fb7f39804871049f016e74782e5dade4d14109/pandas-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f8d68083e49e16b84734eb1a4dcae4259a75c90fb6e2251ab9a00b61120c06ab", size = 11409468, upload-time = "2026-03-31T06:46:45.2Z" }, + { url = "https://files.pythonhosted.org/packages/fa/5b/46e7c76032639f2132359b5cf4c785dd8cf9aea5ea64699eac752f02b9db/pandas-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:32cc41f310ebd4a296d93515fcac312216adfedb1894e879303987b8f1e2b97d", size = 11936381, upload-time = "2026-03-31T06:46:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/7b/8b/721a9cff6fa6a91b162eb51019c6243b82b3226c71bb6c8ef4a9bd65cbc6/pandas-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:a4785e1d6547d8427c5208b748ae2efb64659a21bd82bf440d4262d02bfa02a4", size = 9744993, upload-time = "2026-03-31T06:46:51.488Z" }, + { url = "https://files.pythonhosted.org/packages/d5/18/7f0bd34ae27b28159aa80f2a6799f47fda34f7fb938a76e20c7b7fe3b200/pandas-3.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:08504503f7101300107ecdc8df73658e4347586db5cfdadabc1592e9d7e7a0fd", size = 9056118, upload-time = "2026-03-31T06:46:54.548Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ca/3e639a1ea6fcd0617ca4e8ca45f62a74de33a56ae6cd552735470b22c8d3/pandas-3.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5918ba197c951dec132b0c5929a00c0bf05d5942f590d3c10a807f6e15a57d3", size = 10321105, upload-time = "2026-03-31T06:46:57.327Z" }, + { url = "https://files.pythonhosted.org/packages/0b/77/dbc82ff2fb0e63c6564356682bf201edff0ba16c98630d21a1fb312a8182/pandas-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d606a041c89c0a474a4702d532ab7e73a14fe35c8d427b972a625c8e46373668", size = 9864088, upload-time = "2026-03-31T06:46:59.935Z" }, + { url = "https://files.pythonhosted.org/packages/5c/2b/341f1b04bbca2e17e13cd3f08c215b70ef2c60c5356ef1e8c6857449edc7/pandas-3.0.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:710246ba0616e86891b58ab95f2495143bb2bc83ab6b06747c74216f583a6ac9", size = 10369066, upload-time = "2026-03-31T06:47:02.792Z" }, + { url = "https://files.pythonhosted.org/packages/12/c5/cbb1ffefb20a93d3f0e1fdcda699fb84976210d411b008f97f48bf6ce27e/pandas-3.0.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5d3cfe227c725b1f3dff4278b43d8c784656a42a9325b63af6b1492a8232209e", size = 10876780, upload-time = "2026-03-31T06:47:06.205Z" }, + { url = "https://files.pythonhosted.org/packages/98/fe/2249ae5e0a69bd0ddf17353d0a5d26611d70970111f5b3600cdc8be883e7/pandas-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c3b723df9087a9a9a840e263ebd9f88b64a12075d1bf2ea401a5a42f254f084d", size = 11375181, upload-time = "2026-03-31T06:47:09.383Z" }, + { url = "https://files.pythonhosted.org/packages/de/64/77a38b09e70b6464883b8d7584ab543e748e42c1b5d337a2ee088e0df741/pandas-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a3096110bf9eac0070b7208465f2740e2d8a670d5cb6530b5bb884eca495fd39", size = 11928899, upload-time = "2026-03-31T06:47:12.686Z" }, + { url = "https://files.pythonhosted.org/packages/5e/52/42855bf626868413f761addd574acc6195880ae247a5346477a4361c3acb/pandas-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:07a10f5c36512eead51bc578eb3354ad17578b22c013d89a796ab5eee90cd991", size = 9746574, upload-time = "2026-03-31T06:47:15.64Z" }, + { url = "https://files.pythonhosted.org/packages/88/39/21304ae06a25e8bf9fc820d69b29b2c495b2ae580d1e143146c309941760/pandas-3.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:5fdbfa05931071aba28b408e59226186b01eb5e92bea2ab78b65863ca3228d84", size = 9047156, upload-time = "2026-03-31T06:47:18.595Z" }, + { url = "https://files.pythonhosted.org/packages/72/20/7defa8b27d4f330a903bb68eea33be07d839c5ea6bdda54174efcec0e1d2/pandas-3.0.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:dbc20dea3b9e27d0e66d74c42b2d0c1bed9c2ffe92adea33633e3bedeb5ac235", size = 10756238, upload-time = "2026-03-31T06:47:22.012Z" }, + { url = "https://files.pythonhosted.org/packages/e9/95/49433c14862c636afc0e9b2db83ff16b3ad92959364e52b2955e44c8e94c/pandas-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b75c347eff42497452116ce05ef461822d97ce5b9ff8df6edacb8076092c855d", size = 10408520, upload-time = "2026-03-31T06:47:25.197Z" }, + { url = "https://files.pythonhosted.org/packages/3b/f8/462ad2b5881d6b8ec8e5f7ed2ea1893faa02290d13870a1600fe72ad8efc/pandas-3.0.2-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1478075142e83a5571782ad007fb201ed074bdeac7ebcc8890c71442e96adf7", size = 10324154, upload-time = "2026-03-31T06:47:28.097Z" }, + { url = "https://files.pythonhosted.org/packages/0a/65/d1e69b649cbcddda23ad6e4c40ef935340f6f652a006e5cbc3555ac8adb3/pandas-3.0.2-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5880314e69e763d4c8b27937090de570f1fb8d027059a7ada3f7f8e98bdcb677", size = 10714449, upload-time = "2026-03-31T06:47:30.85Z" }, + { url = "https://files.pythonhosted.org/packages/47/a4/85b59bc65b8190ea3689882db6cdf32a5003c0ccd5a586c30fdcc3ffc4fc/pandas-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b5329e26898896f06035241a626d7c335daa479b9bbc82be7c2742d048e41172", size = 11338475, upload-time = "2026-03-31T06:47:34.026Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c4/bc6966c6e38e5d9478b935272d124d80a589511ed1612a5d21d36f664c68/pandas-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:81526c4afd31971f8b62671442a4b2b51e0aa9acc3819c9f0f12a28b6fcf85f1", size = 11786568, upload-time = "2026-03-31T06:47:36.941Z" }, + { url = "https://files.pythonhosted.org/packages/e8/74/09298ca9740beed1d3504e073d67e128aa07e5ca5ca2824b0c674c0b8676/pandas-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:7cadd7e9a44ec13b621aec60f9150e744cfc7a3dd32924a7e2f45edff31823b0", size = 10488652, upload-time = "2026-03-31T06:47:40.612Z" }, + { url = "https://files.pythonhosted.org/packages/bb/40/c6ea527147c73b24fc15c891c3fcffe9c019793119c5742b8784a062c7db/pandas-3.0.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:db0dbfd2a6cdf3770aa60464d50333d8f3d9165b2f2671bcc299b72de5a6677b", size = 10326084, upload-time = "2026-03-31T06:47:43.834Z" }, + { url = "https://files.pythonhosted.org/packages/95/25/bdb9326c3b5455f8d4d3549fce7abcf967259de146fe2cf7a82368141948/pandas-3.0.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0555c5882688a39317179ab4a0ed41d3ebc8812ab14c69364bbee8fb7a3f6288", size = 9914146, upload-time = "2026-03-31T06:47:46.67Z" }, + { url = "https://files.pythonhosted.org/packages/8d/77/3a227ff3337aa376c60d288e1d61c5d097131d0ac71f954d90a8f369e422/pandas-3.0.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01f31a546acd5574ef77fe199bc90b55527c225c20ccda6601cf6b0fd5ed597c", size = 10444081, upload-time = "2026-03-31T06:47:49.681Z" }, + { url = "https://files.pythonhosted.org/packages/15/88/3cdd54fa279341afa10acf8d2b503556b1375245dccc9315659f795dd2e9/pandas-3.0.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:deeca1b5a931fdf0c2212c8a659ade6d3b1edc21f0914ce71ef24456ca7a6535", size = 10897535, upload-time = "2026-03-31T06:47:53.033Z" }, + { url = "https://files.pythonhosted.org/packages/06/9d/98cc7a7624f7932e40f434299260e2917b090a579d75937cb8a57b9d2de3/pandas-3.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0f48afd9bb13300ffb5a3316973324c787054ba6665cda0da3fbd67f451995db", size = 11446992, upload-time = "2026-03-31T06:47:56.193Z" }, + { url = "https://files.pythonhosted.org/packages/9a/cd/19ff605cc3760e80602e6826ddef2824d8e7050ed80f2e11c4b079741dc3/pandas-3.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6c4d8458b97a35717b62469a4ea0e85abd5ed8687277f5ccfc67f8a5126f8c53", size = 11968257, upload-time = "2026-03-31T06:47:59.137Z" }, + { url = "https://files.pythonhosted.org/packages/db/60/aba6a38de456e7341285102bede27514795c1eaa353bc0e7638b6b785356/pandas-3.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:b35d14bb5d8285d9494fe93815a9e9307c0876e10f1e8e89ac5b88f728ec8dcf", size = 9865893, upload-time = "2026-03-31T06:48:02.038Z" }, + { url = "https://files.pythonhosted.org/packages/08/71/e5ec979dd2e8a093dacb8864598c0ff59a0cee0bbcdc0bfec16a51684d4f/pandas-3.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:63d141b56ef686f7f0d714cfb8de4e320475b86bf4b620aa0b7da89af8cbdbbb", size = 9188644, upload-time = "2026-03-31T06:48:05.045Z" }, + { url = "https://files.pythonhosted.org/packages/f1/6c/7b45d85db19cae1eb524f2418ceaa9d85965dcf7b764ed151386b7c540f0/pandas-3.0.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:140f0cffb1fa2524e874dde5b477d9defe10780d8e9e220d259b2c0874c89d9d", size = 10776246, upload-time = "2026-03-31T06:48:07.789Z" }, + { url = "https://files.pythonhosted.org/packages/a8/3e/7b00648b086c106e81766f25322b48aa8dfa95b55e621dbdf2fdd413a117/pandas-3.0.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ae37e833ff4fed0ba352f6bdd8b73ba3ab3256a85e54edfd1ab51ae40cca0af8", size = 10424801, upload-time = "2026-03-31T06:48:10.897Z" }, + { url = "https://files.pythonhosted.org/packages/da/6e/558dd09a71b53b4008e7fc8a98ec6d447e9bfb63cdaeea10e5eb9b2dabe8/pandas-3.0.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4d888a5c678a419a5bb41a2a93818e8ed9fd3172246555c0b37b7cc27027effd", size = 10345643, upload-time = "2026-03-31T06:48:13.7Z" }, + { url = "https://files.pythonhosted.org/packages/be/e3/921c93b4d9a280409451dc8d07b062b503bbec0531d2627e73a756e99a82/pandas-3.0.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b444dc64c079e84df91baa8bf613d58405645461cabca929d9178f2cd392398d", size = 10743641, upload-time = "2026-03-31T06:48:16.659Z" }, + { url = "https://files.pythonhosted.org/packages/56/ca/fd17286f24fa3b4d067965d8d5d7e14fe557dd4f979a0b068ac0deaf8228/pandas-3.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4544c7a54920de8eeacaa1466a6b7268ecfbc9bc64ab4dbb89c6bbe94d5e0660", size = 11361993, upload-time = "2026-03-31T06:48:19.475Z" }, + { url = "https://files.pythonhosted.org/packages/e4/a5/2f6ed612056819de445a433ca1f2821ac3dab7f150d569a59e9cc105de1d/pandas-3.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:734be7551687c00fbd760dc0522ed974f82ad230d4a10f54bf51b80d44a08702", size = 11815274, upload-time = "2026-03-31T06:48:22.695Z" }, + { url = "https://files.pythonhosted.org/packages/00/2f/b622683e99ec3ce00b0854bac9e80868592c5b051733f2cf3a868e5fea26/pandas-3.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:57a07209bebcbcf768d2d13c9b78b852f9a15978dac41b9e6421a81ad4cdd276", size = 10888530, upload-time = "2026-03-31T06:48:25.806Z" }, + { url = "https://files.pythonhosted.org/packages/cb/2b/f8434233fab2bd66a02ec014febe4e5adced20e2693e0e90a07d118ed30e/pandas-3.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:5371b72c2d4d415d08765f32d689217a43227484e81b2305b52076e328f6f482", size = 9455341, upload-time = "2026-03-31T06:48:28.418Z" }, +] + [[package]] name = "parso" version = "0.8.6" @@ -2340,6 +2783,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104, upload-time = "2026-01-02T09:13:12.068Z" }, ] +[[package]] +name = "platformdirs" +version = "4.9.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/56/8d4c30c8a1d07013911a8fdbd8f89440ef9f08d07a1b50ab8ca8be5a20f9/platformdirs-4.9.4.tar.gz", hash = "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934", size = 28737, upload-time = "2026-03-05T18:34:13.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" }, +] + [[package]] name = "playwright" version = "1.58.0" @@ -2383,6 +2835,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/5e/6eeb1d53d010d80e800204c1eee6b3d5419a6a2b985c364f56f36cf48cca/postgrest-2.28.3-py3-none-any.whl", hash = "sha256:5a44d6c6d509abdbe0f928c86f0dc31ef26bda36e0357129836ec54dfb50b083", size = 21865, upload-time = "2026-03-20T14:38:05.55Z" }, ] +[[package]] +name = "pre-commit" +version = "4.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232, upload-time = "2025-12-16T21:14:33.552Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" }, +] + [[package]] name = "primp" version = "0.15.0" @@ -2596,6 +3064,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/c3/26b8a0908a9db249de3b4169692e1c7c19048a9bc41a4d3209cee7dbb758/psycopg_pool-3.3.0-py3-none-any.whl", hash = "sha256:2e44329155c410b5e8666372db44276a8b1ebd8c90f1c3026ebba40d4bc81063", size = 39995, upload-time = "2025-12-01T11:34:29.761Z" }, ] +[[package]] +name = "pyarrow" +version = "23.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/4b/4166bb5abbfe6f750fc60ad337c43ecf61340fa52ab386da6e8dbf9e63c4/pyarrow-23.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f", size = 34214575, upload-time = "2026-02-16T10:09:56.225Z" }, + { url = "https://files.pythonhosted.org/packages/e1/da/3f941e3734ac8088ea588b53e860baeddac8323ea40ce22e3d0baa865cc9/pyarrow-23.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:7707d2b6673f7de054e2e83d59f9e805939038eebe1763fe811ee8fa5c0cd1a7", size = 35832540, upload-time = "2026-02-16T10:10:03.428Z" }, + { url = "https://files.pythonhosted.org/packages/88/7c/3d841c366620e906d54430817531b877ba646310296df42ef697308c2705/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86ff03fb9f1a320266e0de855dee4b17da6794c595d207f89bba40d16b5c78b9", size = 44470940, upload-time = "2026-02-16T10:10:10.704Z" }, + { url = "https://files.pythonhosted.org/packages/2c/a5/da83046273d990f256cb79796a190bbf7ec999269705ddc609403f8c6b06/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:813d99f31275919c383aab17f0f455a04f5a429c261cc411b1e9a8f5e4aaaa05", size = 47586063, upload-time = "2026-02-16T10:10:17.95Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/b7d2ebcff47a514f47f9da1e74b7949138c58cfeb108cdd4ee62f43f0cf3/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bf5842f960cddd2ef757d486041d57c96483efc295a8c4a0e20e704cbbf39c67", size = 48173045, upload-time = "2026-02-16T10:10:25.363Z" }, + { url = "https://files.pythonhosted.org/packages/43/b2/b40961262213beaba6acfc88698eb773dfce32ecdf34d19291db94c2bd73/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564baf97c858ecc03ec01a41062e8f4698abc3e6e2acd79c01c2e97880a19730", size = 50621741, upload-time = "2026-02-16T10:10:33.477Z" }, + { url = "https://files.pythonhosted.org/packages/f6/70/1fdda42d65b28b078e93d75d371b2185a61da89dda4def8ba6ba41ebdeb4/pyarrow-23.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:07deae7783782ac7250989a7b2ecde9b3c343a643f82e8a4df03d93b633006f0", size = 27620678, upload-time = "2026-02-16T10:10:39.31Z" }, + { url = "https://files.pythonhosted.org/packages/47/10/2cbe4c6f0fb83d2de37249567373d64327a5e4d8db72f486db42875b08f6/pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8", size = 34210066, upload-time = "2026-02-16T10:10:45.487Z" }, + { url = "https://files.pythonhosted.org/packages/cb/4f/679fa7e84dadbaca7a65f7cdba8d6c83febbd93ca12fa4adf40ba3b6362b/pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f", size = 35825526, upload-time = "2026-02-16T10:10:52.266Z" }, + { url = "https://files.pythonhosted.org/packages/f9/63/d2747d930882c9d661e9398eefc54f15696547b8983aaaf11d4a2e8b5426/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677", size = 44473279, upload-time = "2026-02-16T10:11:01.557Z" }, + { url = "https://files.pythonhosted.org/packages/b3/93/10a48b5e238de6d562a411af6467e71e7aedbc9b87f8d3a35f1560ae30fb/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2", size = 47585798, upload-time = "2026-02-16T10:11:09.401Z" }, + { url = "https://files.pythonhosted.org/packages/5c/20/476943001c54ef078dbf9542280e22741219a184a0632862bca4feccd666/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37", size = 48179446, upload-time = "2026-02-16T10:11:17.781Z" }, + { url = "https://files.pythonhosted.org/packages/4b/b6/5dd0c47b335fcd8edba9bfab78ad961bd0fd55ebe53468cc393f45e0be60/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2", size = 50623972, upload-time = "2026-02-16T10:11:26.185Z" }, + { url = "https://files.pythonhosted.org/packages/d5/09/a532297c9591a727d67760e2e756b83905dd89adb365a7f6e9c72578bcc1/pyarrow-23.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:cecfb12ef629cf6be0b1887f9f86463b0dd3dc3195ae6224e74006be4736035a", size = 27540749, upload-time = "2026-02-16T10:12:23.297Z" }, + { url = "https://files.pythonhosted.org/packages/a5/8e/38749c4b1303e6ae76b3c80618f84861ae0c55dd3c2273842ea6f8258233/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1", size = 34471544, upload-time = "2026-02-16T10:11:32.535Z" }, + { url = "https://files.pythonhosted.org/packages/a3/73/f237b2bc8c669212f842bcfd842b04fc8d936bfc9d471630569132dc920d/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500", size = 35949911, upload-time = "2026-02-16T10:11:39.813Z" }, + { url = "https://files.pythonhosted.org/packages/0c/86/b912195eee0903b5611bf596833def7d146ab2d301afeb4b722c57ffc966/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41", size = 44520337, upload-time = "2026-02-16T10:11:47.764Z" }, + { url = "https://files.pythonhosted.org/packages/69/c2/f2a717fb824f62d0be952ea724b4f6f9372a17eed6f704b5c9526f12f2f1/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07", size = 47548944, upload-time = "2026-02-16T10:11:56.607Z" }, + { url = "https://files.pythonhosted.org/packages/84/a7/90007d476b9f0dc308e3bc57b832d004f848fd6c0da601375d20d92d1519/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83", size = 48236269, upload-time = "2026-02-16T10:12:04.47Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3f/b16fab3e77709856eb6ac328ce35f57a6d4a18462c7ca5186ef31b45e0e0/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125", size = 50604794, upload-time = "2026-02-16T10:12:11.797Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a1/22df0620a9fac31d68397a75465c344e83c3dfe521f7612aea33e27ab6c0/pyarrow-23.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a35581e856a2fafa12f3f54fce4331862b1cfb0bef5758347a858a4aa9d6bae8", size = 27660642, upload-time = "2026-02-16T10:12:17.746Z" }, + { url = "https://files.pythonhosted.org/packages/8d/1b/6da9a89583ce7b23ac611f183ae4843cd3a6cf54f079549b0e8c14031e73/pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca", size = 34238755, upload-time = "2026-02-16T10:12:32.819Z" }, + { url = "https://files.pythonhosted.org/packages/ae/b5/d58a241fbe324dbaeb8df07be6af8752c846192d78d2272e551098f74e88/pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1", size = 35847826, upload-time = "2026-02-16T10:12:38.949Z" }, + { url = "https://files.pythonhosted.org/packages/54/a5/8cbc83f04aba433ca7b331b38f39e000efd9f0c7ce47128670e737542996/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb", size = 44536859, upload-time = "2026-02-16T10:12:45.467Z" }, + { url = "https://files.pythonhosted.org/packages/36/2e/c0f017c405fcdc252dbccafbe05e36b0d0eb1ea9a958f081e01c6972927f/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1", size = 47614443, upload-time = "2026-02-16T10:12:55.525Z" }, + { url = "https://files.pythonhosted.org/packages/af/6b/2314a78057912f5627afa13ba43809d9d653e6630859618b0fd81a4e0759/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886", size = 48232991, upload-time = "2026-02-16T10:13:04.729Z" }, + { url = "https://files.pythonhosted.org/packages/40/f2/1bcb1d3be3460832ef3370d621142216e15a2c7c62602a4ea19ec240dd64/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f", size = 50645077, upload-time = "2026-02-16T10:13:14.147Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3f/b1da7b61cd66566a4d4c8383d376c606d1c34a906c3f1cb35c479f59d1aa/pyarrow-23.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:3a4c85ef66c134161987c17b147d6bffdca4566f9a4c1d81a0a01cdf08414ea5", size = 28234271, upload-time = "2026-02-16T10:14:09.397Z" }, + { url = "https://files.pythonhosted.org/packages/b5/78/07f67434e910a0f7323269be7bfbf58699bd0c1d080b18a1ab49ba943fe8/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d", size = 34488692, upload-time = "2026-02-16T10:13:21.541Z" }, + { url = "https://files.pythonhosted.org/packages/50/76/34cf7ae93ece1f740a04910d9f7e80ba166b9b4ab9596a953e9e62b90fe1/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f", size = 35964383, upload-time = "2026-02-16T10:13:28.63Z" }, + { url = "https://files.pythonhosted.org/packages/46/90/459b827238936d4244214be7c684e1b366a63f8c78c380807ae25ed92199/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814", size = 44538119, upload-time = "2026-02-16T10:13:35.506Z" }, + { url = "https://files.pythonhosted.org/packages/28/a1/93a71ae5881e99d1f9de1d4554a87be37da11cd6b152239fb5bd924fdc64/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d", size = 47571199, upload-time = "2026-02-16T10:13:42.504Z" }, + { url = "https://files.pythonhosted.org/packages/88/a3/d2c462d4ef313521eaf2eff04d204ac60775263f1fb08c374b543f79f610/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7", size = 48259435, upload-time = "2026-02-16T10:13:49.226Z" }, + { url = "https://files.pythonhosted.org/packages/cc/f1/11a544b8c3d38a759eb3fbb022039117fd633e9a7b19e4841cc3da091915/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690", size = 50629149, upload-time = "2026-02-16T10:13:57.238Z" }, + { url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807, upload-time = "2026-02-16T10:14:03.892Z" }, +] + [[package]] name = "pycparser" version = "3.0" @@ -2935,6 +3446,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-discovery" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/88/815e53084c5079a59df912825a279f41dd2e0df82281770eadc732f5352c/python_discovery-1.2.1.tar.gz", hash = "sha256:180c4d114bff1c32462537eac5d6a332b768242b76b69c0259c7d14b1b680c9e", size = 58457, upload-time = "2026-03-26T22:30:44.496Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/0f/019d3949a40280f6193b62bc010177d4ce702d0fce424322286488569cd3/python_discovery-1.2.1-py3-none-any.whl", hash = "sha256:b6a957b24c1cd79252484d3566d1b49527581d46e789aaf43181005e56201502", size = 31674, upload-time = "2026-03-26T22:30:43.396Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.1" @@ -3310,6 +3834,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/e2/77be4fff062fa78d9b2a4dea85d14785dac5f1d0c1fb58ed52331f0ebe28/ruff-0.15.8-py3-none-win_arm64.whl", hash = "sha256:cf891fa8e3bb430c0e7fac93851a5978fc99c8fa2c053b57b118972866f8e5f2", size = 11048175, upload-time = "2026-03-26T18:40:01.06Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -3319,6 +3852,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "smmap" +version = "5.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1f/ea/49c993d6dfdd7338c9b1000a0f36817ed7ec84577ae2e52f890d1a4ff909/smmap-5.0.3.tar.gz", hash = "sha256:4d9debb8b99007ae47165abc08670bd74cb74b5227dda7f643eccc4e9eb5642c", size = 22506, upload-time = "2026-03-09T03:43:26.1Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/d4/59e74daffcb57a07668852eeeb6035af9f32cbfd7a1d2511f17d2fe6a738/smmap-5.0.3-py3-none-any.whl", hash = "sha256:c106e05d5a61449cf6ba9a1e650227ecfb141590d2a98412103ff35d89fc7b2f", size = 24390, upload-time = "2026-03-09T03:43:24.361Z" }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -3328,6 +3870,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "socksio" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/5c/48a7d9495be3d1c651198fd99dbb6ce190e2274d0f28b9051307bdec6b85/socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac", size = 19055, upload-time = "2020-04-17T15:50:34.664Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/c3/6eeb6034408dac0fa653d126c9204ade96b819c936e136c5e8a6897eee9c/socksio-1.0.0-py3-none-any.whl", hash = "sha256:95dc1f15f9b34e8d7b16f06d74b8ccf48f609af32ab33c608d08761c5dcbb1f3", size = 12763, upload-time = "2020-04-17T15:50:31.878Z" }, +] + +[[package]] +name = "soupsieve" +version = "2.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, +] + [[package]] name = "sqlite-vec" version = "0.1.6" @@ -3450,6 +4010,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a5/ca/1e720f1347a88519e3d52b6d801cd031c3a7a5df66640c5dc6e81d925057/supabase_functions-2.28.3-py3-none-any.whl", hash = "sha256:eb30578866103fed9322c54e95dd68c2f1a4b6b177e129d9369edd364637904e", size = 8801, upload-time = "2026-03-20T14:38:15.883Z" }, ] +[[package]] +name = "swebench" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "chardet" }, + { name = "datasets" }, + { name = "docker" }, + { name = "ghapi" }, + { name = "gitpython" }, + { name = "modal" }, + { name = "pre-commit" }, + { name = "python-dotenv" }, + { name = "requests" }, + { name = "rich" }, + { name = "tenacity" }, + { name = "tqdm" }, + { name = "unidiff" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/e1/c997299ad7bf088876d30398203aa1eed7dec897670dc1aa35b1d748ffcc/swebench-4.1.0.tar.gz", hash = "sha256:5aaa6a92c2db1aa64892d28a47483ca46a45a15cf1d2df673d7744f71811dc9a", size = 134341, upload-time = "2025-09-11T02:58:00.447Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/67/981d8b642ac3eac7c8a7b7832ff8b2fb74f96b28b5fcd9a8979879e5c46d/swebench-4.1.0-py3-none-any.whl", hash = "sha256:1243776f720047cc9e20a427f7a52b75c13a07abda6154fb60fe77f82ec8af57", size = 157231, upload-time = "2025-09-11T02:57:58.953Z" }, +] + +[[package]] +name = "synchronicity" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/88/11/937a34328329998fb8921684f4d1b398e1159f100e0882670e2c17a44fac/synchronicity-0.12.1.tar.gz", hash = "sha256:ec7c42b604e016ce26cdfcf71f816e87b362558820f8ab68c049f15cae909bcd", size = 58771, upload-time = "2026-03-30T22:35:25.672Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/0c/3e47bd04566e536d8c75bebaa700a0fc8f2035b682b7fb1b0dccc617ce30/synchronicity-0.12.1-py3-none-any.whl", hash = "sha256:ff6452eb0d46d9990bf038db1f476f1c140104a9a83fbd30cdb2d65ab46cc033", size = 40964, upload-time = "2026-03-30T22:35:24.818Z" }, +] + [[package]] name = "tenacity" version = "9.1.2" @@ -3527,6 +4124,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, ] +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + +[[package]] +name = "types-certifi" +version = "2021.10.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/68/943c3aeaf14624712a0357c4a67814dba5cea36d194f5c764dad7959a00c/types-certifi-2021.10.8.3.tar.gz", hash = "sha256:72cf7798d165bc0b76e1c10dd1ea3097c7063c42c21d664523b928e88b554a4f", size = 2095, upload-time = "2022-06-09T15:19:05.244Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/63/2463d89481e811f007b0e1cd0a91e52e141b47f9de724d20db7b861dcfec/types_certifi-2021.10.8.3-py3-none-any.whl", hash = "sha256:b2d1e325e69f71f7c78e5943d410e650b4707bb0ef32e4ddf3da37f54176e88a", size = 2136, upload-time = "2022-06-09T15:19:03.127Z" }, +] + +[[package]] +name = "types-toml" +version = "0.10.8.20240310" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/86/47/3e4c75042792bff8e90d7991aa5c51812cc668828cc6cce711e97f63a607/types-toml-0.10.8.20240310.tar.gz", hash = "sha256:3d41501302972436a6b8b239c850b26689657e25281b48ff0ec06345b8830331", size = 4392, upload-time = "2024-03-10T02:18:37.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/a2/d32ab58c0b216912638b140ab2170ee4b8644067c293b170e19fba340ccc/types_toml-0.10.8.20240310-py3-none-any.whl", hash = "sha256:627b47775d25fa29977d9c70dc0cbab3f314f32c8d8d0c012f2ef5de7aaec05d", size = 4777, upload-time = "2024-03-10T02:18:36.568Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -3569,6 +4199,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload-time = "2025-03-05T21:17:39.857Z" }, ] +[[package]] +name = "unidiff" +version = "0.7.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/48/81be0ac96e423a877754153699731ef439fd7b80b4c8b5425c94ed079ebd/unidiff-0.7.5.tar.gz", hash = "sha256:2e5f0162052248946b9f0970a40e9e124236bf86c82b70821143a6fc1dea2574", size = 20931, upload-time = "2023-03-10T01:05:39.185Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/54/57c411a6e8f7bd7848c8b66e4dcaffa586bf4c02e63f2280db0327a4e6eb/unidiff-0.7.5-py2.py3-none-any.whl", hash = "sha256:c93bf2265cc1ba2a520e415ab05da587370bc2a3ae9e0414329f54f0c2fc09e8", size = 14386, upload-time = "2023-03-10T01:05:36.594Z" }, +] + [[package]] name = "urllib3" version = "2.6.3" @@ -3613,6 +4252,91 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" }, ] +[[package]] +name = "virtualenv" +version = "21.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "distlib" }, + { name = "filelock" }, + { name = "platformdirs" }, + { name = "python-discovery" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/aa/92/58199fe10049f9703c2666e809c4f686c54ef0a68b0f6afccf518c0b1eb9/virtualenv-21.2.0.tar.gz", hash = "sha256:1720dc3a62ef5b443092e3f499228599045d7fea4c79199770499df8becf9098", size = 5840618, upload-time = "2026-03-09T17:24:38.013Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/59/7d02447a55b2e55755011a647479041bc92a82e143f96a8195cb33bd0a1c/virtualenv-21.2.0-py3-none-any.whl", hash = "sha256:1bd755b504931164a5a496d217c014d098426cddc79363ad66ac78125f9d908f", size = 5825084, upload-time = "2026-03-09T17:24:35.378Z" }, +] + +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" }, + { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" }, + { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816, upload-time = "2025-10-14T15:04:52.031Z" }, + { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186, upload-time = "2025-10-14T15:04:53.064Z" }, + { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" }, + { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" }, + { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" }, + { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" }, + { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" }, + { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, +] + [[package]] name = "wcmatch" version = "10.1" From f76b83fb0f03ba9eb0dd5dd343d2da234e276068 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 17:22:41 +0800 Subject: [PATCH 07/87] fix: graft compat monitor onto light ops shell --- backend/web/monitor.py | 53 ++--- frontend/monitor/src/App.tsx | 61 ++++-- frontend/monitor/src/styles.css | 237 +++++++++++++++------- tests/Unit/monitor/test_monitor_compat.py | 11 + 4 files changed, 240 insertions(+), 122 deletions(-) create mode 100644 tests/Unit/monitor/test_monitor_compat.py diff --git a/backend/web/monitor.py b/backend/web/monitor.py index e1451911d..1947b70f9 100644 --- a/backend/web/monitor.py +++ b/backend/web/monitor.py @@ -14,9 +14,8 @@ import uuid from datetime import datetime from pathlib import Path -from subprocess import PIPE - from typing import Any + from fastapi import APIRouter, Depends, HTTPException, Query, Request from pydantic import BaseModel, Field @@ -235,10 +234,7 @@ async def _run_evaluation_job(evaluation_id: str, payload: EvaluationCreateReque _update_evaluation_job_status( evaluation_id, "running", - ( - f"runner=direct pid={proc.pid} sandbox={payload.sandbox} run_dir={run_dir} " - f"stdout_log={stdout_path} stderr_log={stderr_path}" - ), + (f"runner=direct pid={proc.pid} sandbox={payload.sandbox} run_dir={run_dir} stdout_log={stdout_path} stderr_log={stderr_path}"), ) # @@@monitor-eval-hard-timeout-budget - wall-time must include both solve budget and harness scoring budget for batch runs. solve_budget_sec = payload.timeout_sec * payload.count @@ -246,7 +242,7 @@ async def _run_evaluation_job(evaluation_id: str, payload: EvaluationCreateReque hard_timeout_sec = solve_budget_sec + eval_budget_sec + 180 try: await asyncio.wait_for(proc.wait(), timeout=hard_timeout_sec) - except asyncio.TimeoutError: + except TimeoutError: proc.kill() await proc.wait() notes = ( @@ -281,10 +277,7 @@ async def _run_evaluation_job(evaluation_id: str, payload: EvaluationCreateReque final_status = _derive_evaluation_status("completed", score) _update_evaluation_job_status(evaluation_id, final_status, notes) except Exception as exc: - notes = ( - f"runner=direct error={exc} sandbox={payload.sandbox} run_dir={run_dir} " - f"stdout_log={stdout_path} stderr_log={stderr_path}" - ) + notes = f"runner=direct error={exc} sandbox={payload.sandbox} run_dir={run_dir} stdout_log={stdout_path} stderr_log={stderr_path}" _update_evaluation_job_status(evaluation_id, "error", notes) @@ -377,7 +370,7 @@ def _note_value(notes: str, key: str) -> str | None: prefix = f"{key}=" for token in (notes or "").split(): if token.startswith(prefix): - return token[len(prefix):] + return token[len(prefix) :] return None @@ -908,14 +901,20 @@ def _list_running_eval_checkpoint_threads() -> list[dict[str, str | None]]: seen: set[str] = set() with sqlite3.connect(str(DB_PATH)) as conn: conn.row_factory = sqlite3.Row - jobs = conn.execute( - """ - SELECT evaluation_id, status, created_at, updated_at - FROM evaluation_jobs - WHERE status = 'running' - ORDER BY created_at DESC - """ - ).fetchall() + try: + jobs = conn.execute( + """ + SELECT evaluation_id, status, created_at, updated_at + FROM evaluation_jobs + WHERE status = 'running' + ORDER BY created_at DESC + """ + ).fetchall() + except sqlite3.OperationalError as exc: + # @@@compat-monitor-missing-eval-table - transplanted monitor must still render on databases that have never created evaluation tables. + if "no such table: evaluation_jobs" in str(exc): + return [] + raise for job in jobs: for thread_id in _list_checkpoint_threads_for_evaluation(str(job["evaluation_id"])): if thread_id in seen: @@ -1257,7 +1256,8 @@ def list_threads( """ ).fetchone() session_total = int(total_row["total_threads"] if total_row else 0) - rows = db.execute(""" + rows = db.execute( + """ SELECT cs.thread_id, COUNT(DISTINCT cs.chat_session_id) as session_count, @@ -1272,7 +1272,9 @@ def list_threads( GROUP BY cs.thread_id ORDER BY MAX(cs.last_active_at) DESC LIMIT ? OFFSET ? - """, (limit, offset)).fetchall() + """, + (limit, offset), + ).fetchall() items = [] seen_thread_ids = {str(row["thread_id"]) for row in rows if row["thread_id"]} @@ -1532,7 +1534,7 @@ def list_evaluations( LIMIT ? OFFSET ? """, (limit, offset), - ).fetchall() + ).fetchall() items = [] for row in jobs: notes = row["notes"] or "" @@ -1818,7 +1820,9 @@ def get_evaluation_detail(evaluation_id: str, request: Request, db: sqlite3.Conn if session_row and session_row["last_active_at"] else None, }, - "status": "running" if running else (session_row["status"] if session_row else ("running" if status == "running" else "idle")), + "status": "running" + if running + else (session_row["status"] if session_row else ("running" if status == "running" else "idle")), "running": running, } ) @@ -1880,6 +1884,7 @@ def get_evaluation_detail(evaluation_id: str, request: Request, db: sqlite3.Conn "threads": {"title": "Evaluation Threads", "count": total, "items": thread_items}, } + @router.get("/session/{session_id}") def get_session(session_id: str, db: sqlite3.Connection = Depends(get_db)): session = db.execute( diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index e95178e17..86e336f0c 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1,5 +1,5 @@ import React from 'react'; -import { BrowserRouter, Routes, Route, Link, NavLink, useLocation, useParams } from 'react-router-dom'; +import { BrowserRouter, Routes, Route, Link, NavLink, Navigate, useLocation, useParams } from 'react-router-dom'; import './styles.css'; const API_BASE = '/api/monitor'; @@ -92,8 +92,9 @@ function ThreadsPage() { const page = Number(pagination.page || 1); return ( -
+

{data.title}

+

Global thread index. Start here to find the active run, then drill into session, lease, and trace detail.

Showing {from}-{to} of {total} | page {page}

@@ -196,8 +197,9 @@ function TracesPage() { const page = Number(pagination.page || 1); return ( -
+

{data.title}

+

Run-level trace index for debugging tool calls, checkpoints, and runtime transitions across monitored threads.

Showing {from}-{to} of {total} | page {page}

@@ -336,7 +338,11 @@ function ThreadDetailPage() {
- +
+

Live Trace

+

Conversation, event stream, and grouped steps for the selected run. Use this after locating the right session or lease above.

+ +
); } @@ -1164,18 +1170,30 @@ function SessionDetailPage() { // Page: Leases List function LeasesPage() { + const location = useLocation(); const [data, setData] = React.useState(null); + const divergedOnly = new URLSearchParams(location.search).get('diverged') === '1'; React.useEffect(() => { fetchAPI('/leases').then(setData); }, []); if (!data) return
Loading...
; + const items = divergedOnly + ? data.items.filter((item: any) => item.state_badge?.desired !== item.state_badge?.observed) + : data.items; return ( -
+

{data.title}

-

Total: {data.count}

+

Global sandbox lease table. Treat this as the infrastructure lens; filtered divergence and raw event history branch out from here.

+

Total: {items.length}{divergedOnly ? ` / ${data.count} (diverged only)` : ''}

+
+ + {divergedOnly ? 'Show all leases' : 'Only diverged leases'} + + Lease event timeline +
@@ -1189,7 +1207,7 @@ function LeasesPage() { - {data.items.map((item: any) => ( + {items.map((item: any) => ( @@ -1492,7 +1510,7 @@ function EvaluationPage() { void loadEvaluations(); const timer = window.setInterval(() => { void loadEvaluations(); - }, 2500); + }, 5000); return () => window.clearInterval(timer); }, [loadEvaluations]); @@ -1572,7 +1590,7 @@ function EvaluationPage() { return (
-

Evaluation

+

Evaluations

One evaluation contains many threads. Start jobs from config panel, track durable progress in list, then drill into thread trace.

@@ -1582,7 +1600,7 @@ function EvaluationPage() {

2. Track

-

List auto-refreshes every 2.5s and survives reload. Status is backend-persisted.

+

List auto-refreshes every 5s and survives reload. Status is backend-persisted.

3. Inspect

@@ -1632,7 +1650,7 @@ function EvaluationPage() {

- Auto refresh: 2.5s {runsLoading ? '| loading...' : ''} + Auto refresh: 5s {runsLoading ? '| loading...' : ''} {' '}| page {evalPagination?.page ?? 1}

Evaluation = one batch run. Progress shows total/completed/started-or-running/pending. Click Evaluation ID for detail trace and thread links.

@@ -2037,15 +2055,16 @@ function ScrollToTopOnRouteChange() { function Layout({ children }: { children: React.ReactNode }) { return (
-
))} + {data.sessions.items.length === 0 && ( + + + + )}
{item.lease_id} {item.provider} {s.error || '-'}
No sessions recorded for this thread.
@@ -335,6 +340,9 @@ function ThreadDetailPage() { {l.lease_id} ))} + {data.related_leases.items.length === 0 && ( +
  • No related leases for this thread.
  • + )} @@ -694,10 +702,11 @@ function conversationText(content: any): string { function ConversationTraceCard({ message, index }: { message: any; index: number }) { const msgType = String(message?.type || 'Unknown'); + const msgTypeKey = msgType.toLowerCase(); const text = conversationText(message?.content); const toolCalls = Array.isArray(message?.tool_calls) ? message.tool_calls : []; return ( -
    +
    [{index}] @@ -788,10 +797,7 @@ function TraceCard({ item }: { item: TraceItem }) {
    {item.summary}
    )} -
    +
    Raw payload
    {JSON.stringify(item.payload, null, 2)}
    diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index a982d333c..af3c90e7a 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -212,6 +212,18 @@ td { border-top: 1px solid var(--border); } +.page[data-testid="page-traces"] td, +.page[data-testid="page-threads"] td { + padding: 0.5rem 0.75rem; + font-size: 0.88rem; +} + +.page[data-testid="page-traces"] th, +.page[data-testid="page-threads"] th { + padding: 0.55rem 0.75rem; + font-size: 0.82rem; +} + tr:hover { background: #fcfcfc; } @@ -456,8 +468,15 @@ section li { } .trace-section-shell { - border-top: 1px solid var(--border); - padding-top: 0.5rem; + margin-top: 1.5rem; + background: var(--panel); + border: 1px solid var(--border); + border-radius: 12px; + padding: 1.2rem; +} + +.trace-section-shell > h2 { + margin-top: 0; } .trace-summary { @@ -508,7 +527,7 @@ section li { justify-content: flex-start; flex-wrap: wrap; align-items: center; - gap: 1rem; + gap: 0.55rem; } .trace-run-select { @@ -528,11 +547,16 @@ section li { .trace-filters { display: flex; gap: 0.4rem; + padding-left: 0.55rem; + border-left: 1px solid var(--border); } .trace-view-switch { display: flex; gap: 0.4rem; + margin-left: auto; + padding-left: 0.55rem; + border-left: 1px solid var(--border); } .trace-filter-btn { @@ -569,16 +593,20 @@ section li { margin-top: 0.8rem; display: flex; flex-direction: column; - gap: 0.8rem; + gap: 0.35rem; } .trace-card { border: 1px solid var(--border); background: var(--panel); - border-radius: 10px; + border-radius: 6px; padding: 0.7rem 0.8rem; } +.trace-timeline > :nth-child(even) { + background: var(--bg-soft); +} + .trace-card-assistant { border-left: 4px solid #4f7fd8; } @@ -644,16 +672,16 @@ section li { color: var(--text); white-space: pre-wrap; word-break: break-word; - max-height: 300px; + max-height: 160px; overflow: auto; } .trace-output { - max-height: 460px; + max-height: 220px; } .trace-assistant-text { - max-height: 340px; + max-height: 180px; } .trace-command { @@ -675,17 +703,30 @@ section li { border: 1px solid var(--border); background: var(--panel); border-left: 4px solid #4f7fd8; - border-radius: 10px; + border-radius: 6px; padding: 0.8rem; } .conversation-card { border: 1px solid var(--border); background: var(--panel-strong); - border-radius: 10px; + border-radius: 6px; padding: 0.8rem; } +.conversation-card[data-msg-type="assistant"] { + border-left: 3px solid #4f7fd8; +} + +.conversation-card[data-msg-type="tool"] { + border-left: 3px solid #5f9446; +} + +.conversation-card[data-msg-type="human"], +.conversation-card[data-msg-type="user"] { + border-left: 3px solid var(--border-strong); +} + .trace-step-header { display: flex; justify-content: space-between; @@ -722,6 +763,11 @@ section li { font-size: 0.82rem; } +.empty-list { + color: var(--text-muted); + font-style: italic; +} + .evaluation-flow, .evaluation-overview, .evaluation-notes { @@ -876,4 +922,8 @@ section li { .eval-composer-panel { padding: 1rem; } + + .trace-view-switch { + margin-left: 0; + } } From e819060cf5428e0d83fb87d5772090362020f264 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 17:43:50 +0800 Subject: [PATCH 10/87] style: refine monitor evaluation and session detail --- frontend/monitor/src/App.tsx | 95 ++++++++++++++++++++++++++------- frontend/monitor/src/styles.css | 61 +++++++++++++++++++++ 2 files changed, 136 insertions(+), 20 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 67e327877..6b27cdf84 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1154,8 +1154,20 @@ function SessionDetailPage() { .catch((e) => setError(e.message)); }, [sessionId]); - if (error) return
    Session load failed: {error}
    ; - if (!data) return
    Loading...
    ; + if (error) { + return ( +
    +
    Session load failed: {error}
    +
    + ); + } + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } return (
    @@ -1170,6 +1182,17 @@ function SessionDetailPage() {
    Last Active: {data.info.last_active_ago}
    Ended: {data.info.ended_ago || '-'}
    + +
    + + View thread trace + + {data.info.lease_id && ( + + View lease + + )} +
    ); } @@ -1920,12 +1943,29 @@ function EvaluationPage() { function EvaluationDetailPage() { const { evaluationId } = useParams(); const [data, setData] = React.useState(null); + const [error, setError] = React.useState(null); React.useEffect(() => { - fetchAPI(`/evaluation/${evaluationId}`).then(setData); + setError(null); + fetchAPI(`/evaluation/${evaluationId}`) + .then(setData) + .catch((e) => setError(e.message)); }, [evaluationId]); - if (!data) return
    Loading...
    ; + if (error) { + return ( +
    +
    Evaluation load failed: {error}
    +
    + ); + } + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } const detailProgress = evalProgress({ threads_done: data.info?.threads_done ?? 0, threads_running: data.info?.threads_running ?? 0, @@ -1942,12 +1982,18 @@ function EvaluationDetailPage() {

    Evaluation: {shortId(data.evaluation_id, 14)}

    -

    - {data.info.status} | dataset={data.info.dataset} | {threadStateLabel}={data.info.threads_running}/{data.info.threads_total} - {' '}| gate={scoreGate} - {' '}| publishable={String(publishable)} - {' '}| score={scoreFinal ? `${data.info.score?.resolved_instances ?? 0}/${data.info.score?.total_instances ?? 0} (${formatPct(data.info.score?.primary_score_pct)})` : 'PROVISIONAL'} -

    +
    + {data.info.status} + {data.info.dataset} + {threadStateLabel}={data.info.threads_running}/{data.info.threads_total} + gate={scoreGate} + + publishable={String(publishable)} + + + score={scoreFinal ? `${data.info.score?.resolved_instances ?? 0}/${data.info.score?.total_instances ?? 0} (${formatPct(data.info.score?.primary_score_pct)})` : 'PROVISIONAL'} + +
    phase: {String(data.info.status || '-').toUpperCase()}
    @@ -1958,16 +2004,24 @@ function EvaluationDetailPage() {
    -
    -
    Split: {data.info.split}
    -
    Start: {data.info.start_idx}
    -
    Count: {data.info.slice_count}
    -
    Profile: {data.info.prompt_profile}
    -
    Timeout: {data.info.timeout_sec}s
    -
    Recursion: {data.info.recursion_limit}
    -
    Score Gate: {scoreGate}
    -
    Publishable: {String(publishable)}
    -
    Summary: {summaryReady ? 'ready' : 'missing'}
    +
    +

    Config

    +
    +
    Split: {data.info.split}
    +
    Start: {data.info.start_idx}
    +
    Count: {data.info.slice_count}
    +
    Profile: {data.info.prompt_profile}
    +
    Timeout: {data.info.timeout_sec}s
    +
    Recursion: {data.info.recursion_limit}
    +
    +
    + +
    +

    Score

    +
    +
    Score Gate: {scoreGate}
    +
    Publishable: {String(publishable)}
    +
    Summary: {summaryReady ? 'ready' : 'missing'}
    {scoreFinal ? ( <>
    Resolved: {data.info.score?.resolved_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    @@ -1992,6 +2046,7 @@ function EvaluationDetailPage() { )}
    Run Dir: {data.info.score?.run_dir || '-'}
    +
    diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index af3c90e7a..77362ee3b 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -155,6 +155,18 @@ h2 { max-width: 72ch; } +.page-loading, +.page-error { + padding: 3rem 0; + text-align: center; + color: var(--text-muted); + font-size: 0.95rem; +} + +.page-error { + color: var(--danger); +} + button, select, input { @@ -212,6 +224,13 @@ td { border-top: 1px solid var(--border); } +td[colspan] { + text-align: center; + color: var(--text-muted); + font-style: italic; + padding: 2rem 1rem; +} + .page[data-testid="page-traces"] td, .page[data-testid="page-threads"] td { padding: 0.5rem 0.75rem; @@ -313,6 +332,12 @@ section li { font-weight: 500; } +.info-grid-compact { + grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); + padding: 1rem 1.2rem; + gap: 0.75rem; +} + .hint-box { background: linear-gradient(180deg, var(--panel) 0%, var(--panel-strong) 100%); border: 1px solid var(--border); @@ -768,6 +793,35 @@ section li { font-style: italic; } +.eval-summary-bar { + display: flex; + flex-wrap: wrap; + gap: 0.4rem; + margin-bottom: 1rem; +} + +.eval-summary-chip { + display: inline-block; + padding: 0.2rem 0.55rem; + border-radius: 4px; + font-size: 0.82rem; + background: var(--bg-muted); + color: var(--text-secondary); + border: 1px solid var(--border); +} + +.chip-success { + background: var(--success-soft); + color: var(--success); + border-color: transparent; +} + +.chip-warning { + background: var(--warning-soft); + color: var(--warning); + border-color: transparent; +} + .evaluation-flow, .evaluation-overview, .evaluation-notes { @@ -852,6 +906,13 @@ section li { gap: 0.5rem; } +section.eval-runtime-panel { + background: var(--panel); + border: 1px solid var(--border); + border-radius: 12px; + padding: 1rem 1.2rem; +} + .eval-progress-track { position: relative; width: 100%; From 9aeb524eca0da778b6b2575e882160720d632106 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 17:46:58 +0800 Subject: [PATCH 11/87] fix: fail loudly on missing monitor drilldowns --- frontend/monitor/src/App.tsx | 50 +++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 6b27cdf84..adbb940f5 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1263,12 +1263,29 @@ function LeasesPage() { function LeaseDetailPage() { const { leaseId } = useParams(); const [data, setData] = React.useState(null); + const [error, setError] = React.useState(null); React.useEffect(() => { - fetchAPI(`/lease/${leaseId}`).then(setData); + setError(null); + fetchAPI(`/lease/${leaseId}`) + .then(setData) + .catch((e) => setError(e.message)); }, [leaseId]); - if (!data) return
    Loading...
    ; + if (error) { + return ( +
    +
    Lease load failed: {error}
    +
    + ); + } + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } return (
    @@ -1319,6 +1336,9 @@ function LeaseDetailPage() { ))} + {data.related_threads.items.length === 0 && ( +

    No threads linked to this lease.

    + )}
    @@ -1341,6 +1361,11 @@ function LeaseDetailPage() { {e.created_ago} ))} + {data.lease_events.items.length === 0 && ( + + No events recorded for this lease. + + )}
    @@ -1452,12 +1477,29 @@ function EventsPage() { function EventDetailPage() { const { eventId } = useParams(); const [data, setData] = React.useState(null); + const [error, setError] = React.useState(null); React.useEffect(() => { - fetchAPI(`/event/${eventId}`).then(setData); + setError(null); + fetchAPI(`/event/${eventId}`) + .then(setData) + .catch((e) => setError(e.message)); }, [eventId]); - if (!data) return
    Loading...
    ; + if (error) { + return ( +
    +
    Event load failed: {error}
    +
    + ); + } + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } return (
    From c4759dc46455a39be8a63e47336a60cea298131f Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 17:54:10 +0800 Subject: [PATCH 12/87] style: clarify monitor evaluation detail state --- ...-06-resource-observability-split-design.md | 1 + frontend/monitor/src/App.tsx | 58 +++++++++++-------- frontend/monitor/src/styles.css | 6 ++ 3 files changed, 40 insertions(+), 25 deletions(-) diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 7cce21d67..d0f432f3f 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -35,6 +35,7 @@ - Active continuation is `#210`, not `#209`. - `#210` uses `PR #182` as the monitor baseline by transplanting the compat monitor onto a current resource-split branch instead of building on the reduced dev monitor shell. - This branch keeps the full compat operator surface (`threads`, `traces`, `leases`, `evaluation`) and applies a bounded light-theme cleanup so operators are not dropped into a dark, overloaded console. +- Latest frontend review closeout on `#210` is intentionally narrow: `EvaluationDetailPage` now gives the primary status chip semantic warning/danger/success treatment instead of leaving status visually flatter than the secondary publishable chip, and the score-grid JSX structure was re-indented so future edits do not misread the DOM hierarchy. ## Proposal Comparison diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index adbb940f5..eeb113c19 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -2019,13 +2019,21 @@ function EvaluationDetailPage() { const publishable = Boolean(data.info?.score?.publishable ?? (scoreGate === 'final')); const scoreFinal = publishable; const summaryReady = !!data.info?.score?.eval_summary_path; + const statusToneClass = + data.info.status === 'completed' + ? 'chip-success' + : data.info.status === 'error' + ? 'chip-danger' + : data.info.status === 'provisional' || data.info.status === 'completed_with_errors' + ? 'chip-warning' + : ''; return (

    Evaluation: {shortId(data.evaluation_id, 14)}

    - {data.info.status} + {data.info.status} {data.info.dataset} {threadStateLabel}={data.info.threads_running}/{data.info.threads_total} gate={scoreGate} @@ -2064,30 +2072,30 @@ function EvaluationDetailPage() {
    Score Gate: {scoreGate}
    Publishable: {String(publishable)}
    Summary: {summaryReady ? 'ready' : 'missing'}
    - {scoreFinal ? ( - <> -
    Resolved: {data.info.score?.resolved_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Resolved Rate: {formatPct(data.info.score?.resolved_rate_pct)}
    -
    Completed: {data.info.score?.completed_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Completed Rate: {formatPct(data.info.score?.completed_rate_pct)}
    -
    Non-empty Patch: {data.info.score?.non_empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Non-empty Rate: {formatPct(data.info.score?.non_empty_patch_rate_pct)}
    -
    Empty Patch: {data.info.score?.empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Errors: {data.info.score?.error_instances ?? 0}
    -
    Trace Active: {data.info.score?.active_trace_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Tool-call Threads: {data.info.score?.tool_call_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Tool-call Coverage: {formatPct(data.info.score?.tool_call_thread_rate_pct)}
    -
    Tool Calls Total: {data.info.score?.tool_calls_total ?? 0}
    -
    Avg Tool Calls(active): {data.info.score?.avg_tool_calls_per_active_thread ?? '-'}
    -
    Recursion Cap Hits: {data.info.score?.recursion_cap_hits ?? 0}{data.info.score?.recursion_limit ? ` / cap ${data.info.score.recursion_limit}` : ''}
    - - ) : ( - <> -
    Final Score: blocked (provisional)
    -
    Block Reason: {data.info.score?.manifest_eval_error ? 'manifest_eval_error' : 'missing_eval_summary'}
    - - )} -
    Run Dir: {data.info.score?.run_dir || '-'}
    + {scoreFinal ? ( + <> +
    Resolved: {data.info.score?.resolved_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Resolved Rate: {formatPct(data.info.score?.resolved_rate_pct)}
    +
    Completed: {data.info.score?.completed_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Completed Rate: {formatPct(data.info.score?.completed_rate_pct)}
    +
    Non-empty Patch: {data.info.score?.non_empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Non-empty Rate: {formatPct(data.info.score?.non_empty_patch_rate_pct)}
    +
    Empty Patch: {data.info.score?.empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Errors: {data.info.score?.error_instances ?? 0}
    +
    Trace Active: {data.info.score?.active_trace_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Tool-call Threads: {data.info.score?.tool_call_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Tool-call Coverage: {formatPct(data.info.score?.tool_call_thread_rate_pct)}
    +
    Tool Calls Total: {data.info.score?.tool_calls_total ?? 0}
    +
    Avg Tool Calls(active): {data.info.score?.avg_tool_calls_per_active_thread ?? '-'}
    +
    Recursion Cap Hits: {data.info.score?.recursion_cap_hits ?? 0}{data.info.score?.recursion_limit ? ` / cap ${data.info.score.recursion_limit}` : ''}
    + + ) : ( + <> +
    Final Score: blocked (provisional)
    +
    Block Reason: {data.info.score?.manifest_eval_error ? 'manifest_eval_error' : 'missing_eval_summary'}
    + + )} +
    Run Dir: {data.info.score?.run_dir || '-'}
    diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 77362ee3b..5b346b325 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -822,6 +822,12 @@ section li { border-color: transparent; } +.chip-danger { + background: var(--danger-soft); + color: var(--danger); + border-color: transparent; +} + .evaluation-flow, .evaluation-overview, .evaluation-notes { From c11e1d14bc2ed21f2f7c64f372b4a327dad188d7 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 18:10:17 +0800 Subject: [PATCH 13/87] fix: make monitor thread pagination honest --- backend/web/monitor.py | 62 +++++++++++------ ...2026-04-06-resource-observability-split.md | 2 + ...-06-resource-observability-split-design.md | 45 ++++++++++++ tests/Unit/monitor/test_monitor_compat.py | 68 +++++++++++++++++++ 4 files changed, 157 insertions(+), 20 deletions(-) diff --git a/backend/web/monitor.py b/backend/web/monitor.py index 1947b70f9..57054c553 100644 --- a/backend/web/monitor.py +++ b/backend/web/monitor.py @@ -30,7 +30,8 @@ def get_db(): - # @@@fastapi-threadpool-sqlite - sync endpoints may execute in worker threads; disable same-thread guard for shared request-scoped connection. + # @@@fastapi-threadpool-sqlite - sync endpoints may execute in worker + # threads; disable same-thread guard for shared request-scoped connection. db = connect_sqlite(SANDBOX_DB_PATH, row_factory=sqlite3.Row, check_same_thread=False) try: yield db @@ -216,7 +217,9 @@ async def _run_evaluation_job(evaluation_id: str, payload: EvaluationCreateReque stdout_path = run_dir / "monitor_stdout.log" stderr_path = run_dir / "monitor_stderr.log" command = _build_run_slice_command(payload, evaluation_id) - # @@@monitor-eval-sandbox-env - pass sandbox selection via env so run_slice -> LeonAgent resolves non-local provider, and isolate sandbox state per evaluation run. + # @@@monitor-eval-sandbox-env - pass sandbox selection via env so + # run_slice -> LeonAgent resolves non-local provider, and isolate sandbox + # state per evaluation run. env = dict(os.environ) env["LEON_SANDBOX"] = payload.sandbox env["LEON_SANDBOX_DB_PATH"] = str(run_dir / "sandbox.db") @@ -585,8 +588,11 @@ def _load_live_eval_session_progress(evaluation_id: str, cwd: str | None, notes: idle_minutes = float(row["idle_minutes"]) if row["idle_minutes"] is not None else None if total <= 0: return None - # @@@eval-progress-live-session - when thread mapping rows are not persisted yet, use per-run sandbox session states for true running/done counts. - # @@@eval-running-freshness - treat stale "active" sessions as non-running to avoid fake-running UI after runner exits unexpectedly. + # @@@eval-progress-live-session - when thread mapping rows are not + # persisted yet, use per-run sandbox session states for true running/done + # counts. + # @@@eval-running-freshness - treat stale "active" sessions as non-running + # to avoid fake-running UI after runner exits unexpectedly. stale_after_minutes = max(2.0, (idle_ttl_sec / 60.0) + 1.0) active_recent = bool(running > 0 and idle_minutes is not None and idle_minutes <= stale_after_minutes) running_effective = running if active_recent else 0 @@ -641,7 +647,9 @@ def _load_live_eval_sessions(evaluation_id: str, cwd: str | None, notes: str) -> def _is_eval_runner_alive(evaluation_id: str, notes: str) -> bool: - # @@@eval-runner-pid-liveness - after backend restart, task map is empty; use persisted runner pid as direct liveness source before session rows appear. + # @@@eval-runner-pid-liveness - after backend restart, task map is empty; + # use persisted runner pid as direct liveness source before session rows + # appear. m = re.search(r"\bpid=(\d+)\b", notes or "") if not m: return False @@ -911,7 +919,9 @@ def _list_running_eval_checkpoint_threads() -> list[dict[str, str | None]]: """ ).fetchall() except sqlite3.OperationalError as exc: - # @@@compat-monitor-missing-eval-table - transplanted monitor must still render on databases that have never created evaluation tables. + # @@@compat-monitor-missing-eval-table - transplanted monitor must + # still render on databases that have never created evaluation + # tables. if "no such table: evaluation_jobs" in str(exc): return [] raise @@ -1154,7 +1164,8 @@ def _load_checkpoint_events(thread_id: str, limit: int) -> tuple[list[dict], dic ) counts["tool_result"] = counts.get("tool_result", 0) + 1 seq += 1 - # @@@checkpoint-trace-fallback - convert latest checkpoint messages into event-like rows so thread trace still renders when run_events are absent. + # @@@checkpoint-trace-fallback - convert latest checkpoint messages into + # event-like rows so thread trace still renders when run_events are absent. if limit > 0: events = events[-limit:] return events, counts @@ -1271,28 +1282,21 @@ def list_threads( LEFT JOIN sandbox_leases sl ON cs.lease_id = sl.lease_id GROUP BY cs.thread_id ORDER BY MAX(cs.last_active_at) DESC - LIMIT ? OFFSET ? """, - (limit, offset), ).fetchall() - items = [] seen_thread_ids = {str(row["thread_id"]) for row in rows if row["thread_id"]} checkpoint_threads = [row for row in _list_running_eval_checkpoint_threads() if row["thread_id"] not in seen_thread_ids] total = session_total + len(checkpoint_threads) - # @@@threads-pagination-mode-map - only load mode metadata for current page to keep list endpoint lightweight on large thread sets. - mode_map = load_thread_mode_map([row["thread_id"] for row in rows if row["thread_id"]]) items = [] for row in rows: - badge = make_badge(row["desired_state"], row["observed_state"]) - mode_info = mode_map.get(row["thread_id"], {"thread_mode": "normal", "keep_full_trace": False}) items.append( { "thread_id": row["thread_id"], "thread_url": f"/thread/{row['thread_id']}", - "thread_mode": mode_info["thread_mode"], - "keep_full_trace": mode_info["keep_full_trace"], + "thread_mode": "normal", + "keep_full_trace": False, "session_count": row["session_count"], "last_active": row["last_active"], "last_active_ago": format_time_ago(row["last_active"]), @@ -1302,7 +1306,7 @@ def list_threads( "provider": row["provider_name"], "instance_id": row["current_instance_id"], }, - "state_badge": badge, + "state_badge": make_badge(row["desired_state"], row["observed_state"]), } ) @@ -1335,6 +1339,18 @@ def list_threads( items.sort(key=lambda item: str(item.get("last_active") or ""), reverse=True) items = items[offset : offset + limit] + # @@@threads-pagination-mode-map - now that session threads and checkpoint threads share one sort order, + # load thread mode only for the current page instead of pre-paginating twice. + mode_map = load_thread_mode_map( + [str(item["thread_id"]) for item in items if item.get("thread_mode") != "evaluation" and item.get("thread_id")] + ) + for item in items: + if item.get("thread_mode") == "evaluation": + continue + mode_info = mode_map.get(str(item["thread_id"]), {"thread_mode": "normal", "keep_full_trace": False}) + item["thread_mode"] = mode_info["thread_mode"] + item["keep_full_trace"] = mode_info["keep_full_trace"] + page = (offset // limit) + 1 return { "title": "All Threads", @@ -1539,7 +1555,9 @@ def list_evaluations( for row in jobs: notes = row["notes"] or "" status = str(row["status"] or "pending") - # @@@monitor-eval-orphan-reconcile - if backend restarted and task map no longer tracks a running job, mark it error to avoid permanent fake-running rows. + # @@@monitor-eval-orphan-reconcile - if backend restarted and task + # map no longer tracks a running job, mark it error to avoid + # permanent fake-running rows. if status == "running" and row["evaluation_id"] not in running_jobs: if _is_eval_runner_alive(str(row["evaluation_id"]), notes): if "runner_lost_pid_alive:" not in notes: @@ -1592,7 +1610,9 @@ def list_evaluations( threads_started = running_count live_session_progress = _load_live_eval_session_progress(str(row["evaluation_id"]), row["cwd"], notes) if status == "running": - # @@@eval-live-progress-from-checkpoints - thread rows are ingested after runner exits; use live checkpoint thread ids for in-flight progress. + # @@@eval-live-progress-from-checkpoints - thread rows are + # ingested after runner exits; use live checkpoint thread ids + # for in-flight progress. running_count = max(running_count, _count_live_eval_threads(str(row["evaluation_id"]))) threads_total = max(threads_total, running_count) if live_session_progress: @@ -1829,7 +1849,9 @@ def get_evaluation_detail(evaluation_id: str, request: Request, db: sqlite3.Conn total = len(thread_items) if status == "running": - # @@@eval-live-progress-from-checkpoints - evaluation thread mappings are persisted at the end, so derive interim running count from live checkpoint data. + # @@@eval-live-progress-from-checkpoints - evaluation thread mappings + # are persisted at the end, so derive interim running count from live + # checkpoint data. checkpoint_started = _count_live_eval_threads(evaluation_id) running_count = max(running_count, checkpoint_started) total = max(total, running_count) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 59056aa33..1019c013b 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -10,6 +10,8 @@ **Execution note:** `#209` remains useful transplant material for the resource split, but active continuation moved to `#210` because the correct monitor baseline is the compat monitor from `PR #182`, not the reduced dev monitor shell. The frontend scope here stays bounded: keep the full compat operator surface, switch it to a lighter and clearer ops shell, and prove it with real Playwright traces instead of a component-only pass. +**Additional sequencing note after live operator review:** before this branch is mergeable as a monitor base, the next follow-up cuts must address four honesty seams now visible in the real UI: `D1` threads pagination contract, `D2` provisional evaluation detail as an operator surface, `D3` lease orphan/diverged regrouping, and `D4` dashboard + global resources entry. + --- ### Task 1: Lock Storage Abstraction For Monitor Reads diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index d0f432f3f..d3a471cf6 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -148,3 +148,48 @@ This design chooses option 1 in architecture, but decomposes the implementation - visible proof: monitor shell/logo plus leases table headers - trace proof: browser requests include `/api/monitor/leases` and exclude `/api/resources/*` - Small frontend testability improvements are allowed when they are selector-only changes, especially `data-testid` markers on product resource page elements and provider cards. + +## Newly Surfaced Defects And Follow-up Slices + +These are not vague “polish later” notes. They are concrete seams that now block an honest first merge of the monitor base. + +### Slice D1: Threads Pagination Honesty + +- Current defect: + - `/api/monitor/threads?offset=50&limit=50` returns `items=[]` while still reporting `total=74`, `page=2`, and `has_next=true`. + - The page therefore shows impossible copy like `Showing 51-50 of 74`. +- Root cause: + - `backend/web/monitor.py::list_threads()` paginates once in SQL, appends checkpoint-only evaluation threads, then slices again with `items[offset:offset+limit]`. +- Required outcome: + - single pagination semantic + - truthful `has_next/next_offset` + - no inverted count labels + +### Slice D2: Evaluation Provisional Operator Surface + +- Current defect: + - real provisional eval detail technically renders, but operator-facing meaning is weak enough that the page reads like “nothing is there”. +- Required outcome: + - provisional state must explain what exists now, what is still pending, where logs/artifacts live, and what the operator should do next. + - this is a backend-first surface; if new fields are needed, add them to the payload instead of making the frontend guess from free-text notes. + +### Slice D3: Lease Semantics And Regrouping + +- Current defect: + - `/leases` currently dumps raw orphan/diverged rows with minimal explanation. + - operator cannot tell whether they are seeing stale history, expected cleanup lag, or a real infrastructure problem. +- Required outcome: + - keep raw/global truth available + - add explicit categorization/regrouping for active, diverged, orphan, and historical leases + - reduce “system looks broken” confusion without hiding the raw facts + +### Slice D4: Dashboard Entry And Global Resources Surface + +- Current defect: + - monitor still drops operators straight into a list page + - monitor has no first-class global resources surface even though `/api/monitor/resources` already exists + - the current top-nav caption is redundant and should be removed +- Required outcome: + - add a dashboard landing page + - add a monitor resources entry, likely by transplanting/reusing the existing `ResourcesPage` visual structure against the global monitor contract + - keep product `/resources` on the user-scoped contract and keep monitor resources global diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index 29ccbb53f..c5359ee6a 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -3,9 +3,77 @@ from backend.web import monitor +def _bootstrap_threads_monitor_db(db_path, count: int) -> sqlite3.Connection: + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + conn.executescript( + """ + CREATE TABLE sandbox_leases ( + lease_id TEXT PRIMARY KEY, + provider_name TEXT, + desired_state TEXT, + observed_state TEXT, + current_instance_id TEXT, + created_at TEXT, + updated_at TEXT + ); + + CREATE TABLE chat_sessions ( + chat_session_id TEXT PRIMARY KEY, + thread_id TEXT, + lease_id TEXT, + status TEXT, + started_at TEXT, + last_active_at TEXT + ); + """ + ) + for idx in range(count): + hour = idx // 60 + minute = idx % 60 + conn.execute( + """ + INSERT INTO chat_sessions ( + chat_session_id, thread_id, lease_id, status, started_at, last_active_at + ) VALUES (?, ?, ?, ?, ?, ?) + """, + ( + f"sess-{idx}", + f"thread-{idx:03d}", + None, + "closed", + f"2026-04-06T{hour:02d}:{minute:02d}:00", + f"2026-04-06T{hour:02d}:{minute:02d}:30", + ), + ) + conn.commit() + return conn + + def test_list_running_eval_checkpoint_threads_returns_empty_when_eval_tables_absent(tmp_path, monkeypatch): db_path = tmp_path / "leon.db" sqlite3.connect(db_path).close() monkeypatch.setattr(monitor, "DB_PATH", db_path) assert monitor._list_running_eval_checkpoint_threads() == [] + + +def test_list_threads_second_page_is_not_sliced_empty_after_sql_pagination(tmp_path, monkeypatch): + db_path = tmp_path / "sandbox.db" + conn = _bootstrap_threads_monitor_db(db_path, count=74) + try: + monkeypatch.setattr(monitor, "_list_running_eval_checkpoint_threads", lambda: []) + monkeypatch.setattr(monitor, "load_thread_mode_map", lambda thread_ids: {}) + + payload = monitor.list_threads(offset=50, limit=50, db=conn) + finally: + conn.close() + + assert payload["count"] == 24 + assert len(payload["items"]) == 24 + assert payload["items"][0]["thread_id"] == "thread-023" + assert payload["items"][-1]["thread_id"] == "thread-000" + assert payload["pagination"]["page"] == 2 + assert payload["pagination"]["has_prev"] is True + assert payload["pagination"]["has_next"] is False + assert payload["pagination"]["next_offset"] is None From 962ac0de5dd980e1e3273f0862060eb34c90449a Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 18:28:58 +0800 Subject: [PATCH 14/87] feat: add monitor dashboard and resources surface --- backend/web/routers/monitor.py | 59 +- ...2026-04-06-resource-observability-split.md | 17 + ...-06-resource-observability-split-design.md | 73 ++ frontend/monitor/src/App.tsx | 621 ++++++++++++++++-- frontend/monitor/src/styles.css | 253 ++++++- .../test_monitor_resources_route.py | 13 + 6 files changed, 988 insertions(+), 48 deletions(-) diff --git a/backend/web/routers/monitor.py b/backend/web/routers/monitor.py index 5c9518dee..3641adae0 100644 --- a/backend/web/routers/monitor.py +++ b/backend/web/routers/monitor.py @@ -6,9 +6,9 @@ import asyncio -from fastapi import HTTPException, Query +from fastapi import HTTPException, Query, Request -from backend.web.monitor import router +from backend.web.monitor import get_db, list_evaluations, list_leases, router from backend.web.services import monitor_service from backend.web.services.resource_cache import ( get_monitor_resource_overview_snapshot, @@ -21,6 +21,61 @@ def health_snapshot(): return monitor_service.runtime_health_snapshot() +@router.get("/dashboard") +def dashboard_snapshot(request: Request): + health = monitor_service.runtime_health_snapshot() + resources = get_monitor_resource_overview_snapshot() + db_gen = get_db() + db = next(db_gen) + try: + leases = list_leases(db=db) + finally: + db_gen.close() + evaluations = list_evaluations(limit=5, offset=0, request=request) + + resource_summary = resources.get("summary") or {} + lease_items = leases.get("items") or [] + latest_eval = (evaluations.get("items") or [None])[0] + + latest_eval_summary = None + if latest_eval: + total = int(latest_eval.get("threads_total") or 0) + done = int(latest_eval.get("threads_done") or 0) + progress_pct = round((done / total) * 100, 1) if total > 0 else 0.0 + score = latest_eval.get("score") or {} + latest_eval_summary = { + "evaluation_id": latest_eval.get("evaluation_id"), + "evaluation_url": latest_eval.get("evaluation_url"), + "status": latest_eval.get("status"), + "progress_pct": progress_pct, + "threads_done": done, + "threads_total": total, + "publishable": bool(score.get("publishable")), + "primary_score_pct": score.get("primary_score_pct"), + "updated_ago": latest_eval.get("updated_ago"), + } + + return { + "snapshot_at": health.get("snapshot_at"), + "resources_summary": resource_summary, + "infra": { + "providers_active": int(resource_summary.get("active_providers") or 0), + "providers_unavailable": int(resource_summary.get("unavailable_providers") or 0), + "leases_total": int(leases.get("count") or 0), + "leases_diverged": sum(1 for item in lease_items if not bool((item.get("state_badge") or {}).get("converged"))), + "leases_orphan": sum(1 for item in lease_items if bool((item.get("thread") or {}).get("is_orphan"))), + "leases_healthy": sum(1 for item in lease_items if bool((item.get("state_badge") or {}).get("converged"))), + }, + "workload": { + "db_sessions_total": int(((health.get("db") or {}).get("counts") or {}).get("chat_sessions") or 0), + "provider_sessions_total": int(((health.get("sessions") or {}).get("total")) or 0), + "running_sessions": int(resource_summary.get("running_sessions") or 0), + "evaluations_running": sum(1 for item in (evaluations.get("items") or []) if item.get("status") == "running"), + }, + "latest_evaluation": latest_eval_summary, + } + + @router.get("/resources") def resources_overview(): return get_monitor_resource_overview_snapshot() diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 1019c013b..cf4fa1935 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -12,6 +12,23 @@ **Additional sequencing note after live operator review:** before this branch is mergeable as a monitor base, the next follow-up cuts must address four honesty seams now visible in the real UI: `D1` threads pagination contract, `D2` provisional evaluation detail as an operator surface, `D3` lease orphan/diverged regrouping, and `D4` dashboard + global resources entry. +**Current execution order after `D1`:** +- `D4` dashboard + global resources entry +- `D3` lease semantics/regrouping inside the new resources surface +- `D2` provisional evaluation operator surface + +**Live progress after latest frontend pass:** +- `D1` is done +- `D4` now has a landed phase-1: + - `/dashboard` route and `/api/monitor/dashboard` backend payload exist + - top nav is `Dashboard / Threads / Resources / Eval` + - root lands on `/dashboard` + - monitor `Resources` uses the global monitor contract and includes grouped lease triage + - evaluation tutorial/reference sections are collapsed by default +- next honest follow-up remains: + - `D3` because lease regrouping still relies on shallow frontend grouping over raw facts + - `D2` because provisional eval detail still needs a stronger operator-facing explanation and artifact/log next-step surface + --- ### Task 1: Lock Storage Abstraction For Monitor Reads diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index d3a471cf6..e3e5162c5 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -193,3 +193,76 @@ These are not vague “polish later” notes. They are concrete seams that now b - add a dashboard landing page - add a monitor resources entry, likely by transplanting/reusing the existing `ResourcesPage` visual structure against the global monitor contract - keep product `/resources` on the user-scoped contract and keep monitor resources global + +## Current IA Direction + +This is the current recommended monitor IA after the latest user review and the Chloe/CCM design pass. + +### Top-level Navigation + +- `Dashboard` +- `Threads` +- `Resources` +- `Eval` + +### Explicit removals / merges + +- remove the top-nav caption (`Global ops surface...`) +- stop defaulting `/` to `/threads`; default to `/dashboard` +- merge the current top-level `Traces` tab into the thread drill-down path instead of keeping it as a separate first-class nav destination +- replace the top-level `Leases` tab with `Resources`; lease health remains visible, but as one section inside the broader resources/infrastructure surface + +### Dashboard Shape + +- `Infra Health` + - provider availability + - diverged lease count + - orphan lease count + - links into filtered resource/lease views +- `Active Workload` + - active threads + - running sessions + - recent errors +- `Eval Snapshot` + - latest evaluation status + - progress + - publishable/final score when available + +The dashboard is a switchboard, not a full destination page. It should answer “what needs attention?” and route the operator into the right deeper surface. + +### Resources Surface + +- top section: global provider cards and provider detail, transplanted from the existing product `ResourcesPage` family where possible +- bottom section: lease health triage, grouped instead of dumped + - diverged + - orphan + - healthy/history (collapsed or de-emphasized) + +### Current D4 Phase-1 Landing + +- compat monitor now has a real `/dashboard` entry backed by `/api/monitor/dashboard` +- top-level nav is now `Dashboard / Threads / Resources / Eval` +- root route now lands on `/dashboard` +- top-nav caption has been removed +- monitor `Resources` is now a first-class page using the global monitor contract: + - `GET /api/monitor/resources` + - `POST /api/monitor/resources/refresh` + - `GET /api/monitor/leases` +- the monitor resources page now has: + - provider grid + - selected provider detail + - global session table per provider + - grouped lease health sections (`Diverged`, `Orphans`, `All leases`) +- evaluation guidance is no longer sprayed across the first screen; tutorial/reference sections are now collapsed by default behind an operator-guide `
    ` block + +### D4 Remaining Gaps + +- provider detail is now useful, but it is still lighter than the original product `ResourcesPage` family +- lease regrouping exists, but backend-side semantic categorization is still shallow and belongs to `D3` +- dashboard is currently a compact switchboard; it does not yet expose richer error drill-down or resource anomaly timelines + +### Why this IA + +- the backend already exposes `/api/monitor/resources`; the missing piece is a monitor entry surface, not another resource backend invention +- leases are one kind of infrastructure/resource truth, not a top-level product of their own +- traces are usually reached through a thread/run drill-down, so a separate top-level `Traces` tab adds noise before it adds value diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index eeb113c19..a426b80b2 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -62,6 +62,547 @@ function StateBadge({ badge }: { badge: any }) { return {text}; } +function DashboardMetric({ + label, + value, + note, + tone = 'default', +}: { + label: string; + value: React.ReactNode; + note?: React.ReactNode; + tone?: 'default' | 'warning' | 'danger' | 'success'; +}) { + return ( +
    + {label} + {value} + {note ? {note} : null} +
    + ); +} + +function DashboardPage() { + const [data, setData] = React.useState(null); + const [loading, setLoading] = React.useState(false); + const [error, setError] = React.useState(null); + + const loadDashboard = React.useCallback(async () => { + setLoading(true); + setError(null); + try { + const payload = await fetchAPI('/dashboard'); + setData(payload); + } catch (e: any) { + setError(e?.message || String(e)); + } finally { + setLoading(false); + } + }, []); + + React.useEffect(() => { + void loadDashboard(); + }, [loadDashboard]); + + if (error) { + return ( +
    +

    Dashboard

    +
    Dashboard load failed: {error}
    +
    + ); + } + + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } + + const infra = data.infra || {}; + const workload = data.workload || {}; + const latestEval = data.latest_evaluation || null; + const resourcesSummary = data.resources_summary || {}; + + return ( +
    +
    +
    +

    Dashboard

    +

    Operator landing for resource health, workload pressure, and the latest evaluation run.

    +
    + +
    + +
    +
    +
    +
    +

    Infra Health

    +

    Global provider and lease state from the monitor backend.

    +
    + + Open resources + +
    +
    + 0 ? 'warning' : 'success'} + /> + 0 ? 'warning' : 'success'} + /> + 0 ? 'danger' : 'success'} + /> +
    +
    + +
    +
    +
    +

    Active Workload

    +

    How much monitored runtime is currently alive across DB sessions, providers, and evaluations.

    +
    + + Open threads + +
    +
    + + + 0 ? 'default' : 'warning'} + /> +
    +
    + +
    +
    +
    +

    Latest Eval

    +

    Most recent evaluation known to the monitor. Use this as the fastest jump into detail.

    +
    + + {latestEval ? 'Open latest eval' : 'Open eval list'} + +
    + {latestEval ? ( +
    +
    + + {latestEval.status} + + + publishable={String(Boolean(latestEval.publishable))} + +
    +
    {latestEval.evaluation_id}
    +
    +
    +
    +
    + {latestEval.threads_done || 0}/{latestEval.threads_total || 0} threads · {formatPct(latestEval.progress_pct || 0)} · updated {latestEval.updated_ago || '-'} +
    +
    + +
    +
    + ) : ( +
    +

    No evaluation rows yet. Open Eval to submit a minimal run.

    +
    + )} +
    +
    +
    + ); +} + +function MonitorResourcesPage() { + const [resourceData, setResourceData] = React.useState(null); + const [leaseData, setLeaseData] = React.useState(null); + const [selectedId, setSelectedId] = React.useState(''); + const [loading, setLoading] = React.useState(false); + const [refreshing, setRefreshing] = React.useState(false); + const [error, setError] = React.useState(null); + + const loadResources = React.useCallback(async () => { + setLoading(true); + setError(null); + try { + const [resources, leases] = await Promise.all([ + fetchAPI('/resources'), + fetchAPI('/leases'), + ]); + setResourceData(resources); + setLeaseData(leases); + const providers = Array.isArray(resources?.providers) ? resources.providers : []; + setSelectedId((prev) => (providers.some((provider: any) => provider.id === prev) ? prev : providers[0]?.id || '')); + } catch (e: any) { + setError(e?.message || String(e)); + } finally { + setLoading(false); + } + }, []); + + const refreshNow = React.useCallback(async () => { + setRefreshing(true); + setError(null); + try { + const [resources, leases] = await Promise.all([ + fetchJSON(`${API_BASE}/resources/refresh`, { method: 'POST' }), + fetchAPI('/leases'), + ]); + setResourceData(resources); + setLeaseData(leases); + } catch (e: any) { + setError(e?.message || String(e)); + } finally { + setRefreshing(false); + } + }, []); + + React.useEffect(() => { + void loadResources(); + }, [loadResources]); + + if (error) { + return ( +
    +

    Resources

    +
    Resource load failed: {error}
    +
    + ); + } + + if (!resourceData || !leaseData) { + return ( +
    +
    Loading...
    +
    + ); + } + + const providers = Array.isArray(resourceData.providers) ? resourceData.providers : []; + const summary = resourceData.summary || {}; + const leases = Array.isArray(leaseData.items) ? leaseData.items : []; + const selectedProvider = providers.find((provider: any) => provider.id === selectedId) || providers[0] || null; + const divergedLeases = leases.filter((item: any) => item.state_badge?.desired !== item.state_badge?.observed); + const orphanLeases = leases.filter((item: any) => Boolean(item.thread?.is_orphan)); + const healthyLeases = leases.filter((item: any) => Boolean(item.state_badge?.converged)); + const refreshedAt = summary.last_refreshed_at || summary.snapshot_at; + const selectedSessions = Array.isArray(selectedProvider?.sessions) ? selectedProvider.sessions : []; + const selectedRunning = selectedSessions.filter((session: any) => session.status === 'running').length; + const selectedPaused = selectedSessions.filter((session: any) => session.status === 'paused').length; + const selectedStopped = selectedSessions.filter((session: any) => session.status === 'stopped').length; + + return ( +
    +
    +
    +

    Resources

    +

    Global provider health and lease triage. Product resources stay user-scoped; this page keeps the infra-wide lens.

    +
    + +
    + +
    + + + 0 ? 'warning' : 'success'} /> + 0 ? 'success' : 'danger'} /> +
    + +
    +
    +
    +

    Providers

    +

    Same provider surface as the product page, but backed by the global monitor contract.

    +
    +
    +
    + {providers.map((provider: any) => { + const sessions = Array.isArray(provider.sessions) ? provider.sessions : []; + const runningCount = sessions.filter((session: any) => session.status === 'running').length; + const unavailable = provider.status === 'unavailable'; + const cpuUsed = provider.cardCpu?.used; + const memoryUsed = provider.telemetry?.memory?.used; + return ( + + ); + })} +
    +
    + + {selectedProvider ? ( +
    +
    +
    +

    {selectedProvider.name}

    +

    {selectedProvider.description || 'No provider description.'}

    +
    + {selectedProvider.consoleUrl ? ( + + Open console + + ) : null} +
    +
    + + status + {selectedProvider.status} + + + running + {selectedRunning} + + + paused + {selectedPaused} + + + stopped + {selectedStopped} + +
    +
    +
    + Provider + {selectedProvider.type}{selectedProvider.vendor ? ` · ${selectedProvider.vendor}` : ''} +
    +
    + Capabilities + {Object.entries(selectedProvider.capabilities || {}).filter(([, enabled]) => Boolean(enabled)).map(([name]) => name).join(', ') || '-'} +
    +
    + CPU + {selectedProvider.telemetry?.cpu?.used == null ? '--' : `${Number(selectedProvider.telemetry.cpu.used).toFixed(1)}%`} +
    +
    + Memory + {selectedProvider.telemetry?.memory?.used == null ? '--' : `${Number(selectedProvider.telemetry.memory.used).toFixed(1)} / ${selectedProvider.telemetry?.memory?.limit ?? '--'} GB`} +
    +
    + Disk + {selectedProvider.telemetry?.disk?.used == null ? '--' : `${Number(selectedProvider.telemetry.disk.used).toFixed(1)} / ${selectedProvider.telemetry?.disk?.limit ?? '--'} GB`} +
    +
    + Reason + {selectedProvider.unavailableReason || selectedProvider.error || 'healthy'} +
    +
    +
    +
    +
    +

    Sessions ({selectedSessions.length})

    +

    Global session rows currently attached to this provider. This is the monitor-side truth surface, not the user projection.

    +
    +
    + + + + + + + + + + + + + {selectedSessions.map((session: any) => ( + + + + + + + + + ))} + {selectedSessions.length === 0 ? ( + + + + ) : null} + +
    SessionThreadLeaseMemberStatusStarted
    {shortId(session.id, 12)}{session.threadId ? {shortId(session.threadId, 12)} : '-'}{session.leaseId ? {shortId(session.leaseId, 12)} : '-'}{session.memberName || session.memberId || '-'}{session.status}{session.startedAt ? new Date(session.startedAt).toLocaleString() : '-'}
    No sessions reported for this provider.
    +
    +
    + ) : null} + +
    +
    +
    +

    Lease Health

    +

    Grouped triage surface. Diverged rows show state drift; orphan rows show leases no longer bound to a live thread.

    +
    + + Legacy flat table + +
    +
    +
    +

    Diverged ({divergedLeases.length})

    +

    Desired and observed states no longer match.

    + + + + + + + + + + + + {divergedLeases.slice(0, 8).map((item: any) => ( + + + + + + + + ))} + {divergedLeases.length === 0 ? ( + + + + ) : null} + +
    LeaseProviderThreadStateUpdated
    {shortId(item.lease_id, 12)}{item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan}{item.updated_ago}
    No diverged leases.
    +
    + +
    +

    Orphans ({orphanLeases.length})

    +

    Lease rows with no active thread binding. These usually indicate cleanup debt or abandoned runtime state.

    + + + + + + + + + + + + {orphanLeases.slice(0, 8).map((item: any) => ( + + + + + + + + ))} + {orphanLeases.length === 0 ? ( + + + + ) : null} + +
    LeaseProviderInstanceStateError
    {shortId(item.lease_id, 12)}{item.provider}{shortId(item.instance_id, 12)}{item.error || '-'}
    No orphan leases.
    +
    +
    + +
    + All leases ({leases.length}) + + + + + + + + + + + + + + {leases.map((item: any) => ( + + + + + + + + + + ))} + +
    Lease IDProviderInstance IDThreadStateUpdatedError
    {item.lease_id}{item.provider}{item.instance_id?.slice(0, 12) || '-'} + {item.thread.thread_id ? ( + {item.thread.thread_id.slice(0, 8)} + ) : ( + orphan + )} + {item.updated_ago}{item.error || '-'}
    +
    +
    +
    + ); +} + // Page: Threads List function ThreadsPage() { const [data, setData] = React.useState(null); @@ -1664,21 +2205,6 @@ function EvaluationPage() {

    Evaluations

    One evaluation contains many threads. Start jobs from config panel, track durable progress in list, then drill into thread trace.

    -
    -
    -

    1. Submit

    -

    Open config, choose scope/profile/sandbox, then submit one batch run.

    -
    -
    -

    2. Track

    -

    List auto-refreshes every 5s and survives reload. Status is backend-persisted.

    -
    -
    -

    3. Inspect

    -

    Open evaluation detail to jump to per-thread trace and tool-call timeline.

    -
    -
    -

    Current Submission

    @@ -1713,6 +2239,43 @@ function EvaluationPage() {
    +
    + Operator guide +
    +
    +

    1. Submit

    +

    Open config, choose scope/profile/sandbox, then submit one batch run.

    +
    +
    +

    2. Track

    +

    List auto-refreshes every 5s and survives reload. Status is backend-persisted.

    +
    +
    +

    3. Inspect

    +

    Open evaluation detail to jump to per-thread trace and tool-call timeline.

    +
    +
    + +
    +
    +

    Status Guide

    +
      + {statusReference.map((row) => ( +
    • {row[0]}: {row[1]}
    • + ))} +
    +
    +
    +

    Field Guide

    +
      + {parameterReference.slice(0, 4).map((row) => ( +
    • {row[0]}: {row[1]}
    • + ))} +
    +
    +
    +
    +

    Evaluations ({evalPagination?.total ?? evaluations.length})

    @@ -1814,25 +2377,6 @@ function EvaluationPage() {
    -
    -
    -

    Status Guide

    -
      - {statusReference.map((row) => ( -
    • {row[0]}: {row[1]}
    • - ))} -
    -
    -
    -

    Field Guide

    -
      - {parameterReference.slice(0, 4).map((row) => ( -
    • {row[0]}: {row[1]}
    • - ))} -
    -
    -
    - {composerOpen && ( // @@@evaluation-composer-modal - keep config editing in a fixed layer to avoid "tail jump" in long list pages.
    setComposerOpen(false)}> @@ -2169,12 +2713,11 @@ function Layout({ children }: { children: React.ReactNode }) { @@ -2192,8 +2735,10 @@ export default function App() { - } /> + } /> + } /> } /> + } /> } /> } /> } /> diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 5b346b325..fe5c1e759 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -55,8 +55,7 @@ body { .top-nav-brand { display: flex; - flex-direction: column; - gap: 0.15rem; + align-items: center; } .logo { @@ -65,11 +64,6 @@ body { color: var(--text); } -.nav-caption { - font-size: 0.82rem; - color: var(--text-muted); -} - .nav-links { display: flex; gap: 0.55rem; @@ -155,6 +149,247 @@ h2 { max-width: 72ch; } +.dashboard-grid { + display: grid; + grid-template-columns: repeat(12, minmax(0, 1fr)); + gap: 1rem; +} + +.dashboard-card { + grid-column: span 4; + display: flex; + flex-direction: column; + gap: 1rem; +} + +.dashboard-card-eval { + grid-column: span 4; +} + +.dashboard-card-head h2 { + margin: 0 0 0.4rem; +} + +.dashboard-metric-grid, +.resource-summary-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); + gap: 0.75rem; +} + +.resource-summary-grid { + margin-bottom: 1.25rem; +} + +.dashboard-metric { + display: flex; + flex-direction: column; + gap: 0.18rem; + min-height: 96px; + padding: 0.95rem 1rem; + border: 1px solid var(--border); + border-radius: 14px; + background: var(--panel); +} + +.dashboard-metric-label { + font-size: 0.8rem; + text-transform: uppercase; + letter-spacing: 0.04em; + color: var(--text-muted); +} + +.dashboard-metric-value { + font-size: 1.45rem; + line-height: 1.15; + color: var(--text); +} + +.dashboard-metric-note { + font-size: 0.82rem; + color: var(--text-secondary); +} + +.dashboard-metric-warning { + background: var(--warning-soft); +} + +.dashboard-metric-danger { + background: var(--danger-soft); +} + +.dashboard-metric-success { + background: var(--success-soft); +} + +.dashboard-eval-body { + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.dashboard-eval-id { + color: var(--text-secondary); + white-space: pre-wrap; + word-break: break-word; +} + +.dashboard-eval-footer { + display: grid; + grid-template-columns: minmax(0, 1fr); +} + +.dashboard-empty { + border: 1px dashed var(--border-strong); + border-radius: 14px; + padding: 1rem; + background: var(--bg-muted); +} + +.resource-section-shell { + margin-bottom: 1.25rem; +} + +.monitor-provider-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); + gap: 0.9rem; +} + +.monitor-provider-card { + border: 1px solid var(--border); + background: linear-gradient(180deg, var(--panel) 0%, var(--panel-strong) 100%); + border-radius: 16px; + padding: 1rem; + text-align: left; + display: flex; + flex-direction: column; + gap: 0.9rem; +} + +.monitor-provider-card:hover:not(:disabled) { + border-color: var(--border-strong); + background: var(--bg-soft); +} + +.monitor-provider-card.is-selected { + border-color: rgba(37, 99, 235, 0.24); + box-shadow: inset 0 0 0 1px rgba(37, 99, 235, 0.1); +} + +.monitor-provider-card.is-unavailable { + opacity: 0.82; +} + +.monitor-provider-header { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: 0.75rem; +} + +.monitor-provider-header strong { + display: block; + font-size: 0.95rem; +} + +.monitor-provider-header p { + margin: 0.2rem 0 0; + font-size: 0.82rem; + color: var(--text-muted); +} + +.monitor-provider-metrics { + display: grid; + grid-template-columns: repeat(3, minmax(0, 1fr)); + gap: 0.55rem; +} + +.monitor-provider-metrics .dashboard-metric { + min-height: 0; + padding: 0.75rem 0.8rem; + border-radius: 12px; +} + +.provider-inline-error { + color: var(--danger); + font-size: 0.84rem; + line-height: 1.45; +} + +.resource-session-shell { + margin-top: 1rem; +} + +.resource-overview-strip { + display: flex; + gap: 0.65rem; + flex-wrap: wrap; + margin-bottom: 1rem; +} + +.resource-overview-pill { + display: inline-flex; + align-items: center; + gap: 0.45rem; + padding: 0.5rem 0.75rem; + border-radius: 999px; + border: 1px solid var(--border); + background: var(--bg-muted); + color: var(--text-secondary); +} + +.resource-overview-label { + font-size: 0.74rem; + letter-spacing: 0.04em; + text-transform: uppercase; + color: var(--text-muted); +} + +.lease-cluster-grid { + display: grid; + grid-template-columns: repeat(12, minmax(0, 1fr)); + gap: 1rem; +} + +.lease-cluster-grid > * { + grid-column: span 6; +} + +.lease-details-shell { + margin-top: 1rem; + border: 1px solid var(--border); + border-radius: 16px; + background: var(--panel); + padding: 0.9rem 1rem 1rem; +} + +.lease-details-shell summary { + cursor: pointer; + color: var(--text); + font-weight: 600; + margin-bottom: 0.9rem; +} + +.operator-notes-shell { + margin-bottom: 1.25rem; + border: 1px solid var(--border); + border-radius: 16px; + background: var(--panel); + padding: 0.85rem 1rem 1rem; +} + +.operator-notes-shell summary { + cursor: pointer; + color: var(--text); + font-weight: 600; +} + +.operator-notes-shell .evaluation-flow, +.operator-notes-shell .evaluation-notes { + margin-top: 1rem; +} + .page-loading, .page-error { padding: 3rem 0; @@ -963,10 +1198,12 @@ section.eval-runtime-panel { } @media (max-width: 1080px) { + .dashboard-card, .evaluation-flow > *, .evaluation-notes > *, .evaluation-overview > *, - .evaluation-column { + .evaluation-column, + .lease-cluster-grid > * { grid-column: span 12; } } diff --git a/tests/Integration/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py index 3d8d3c7a0..5aa5cdc9f 100644 --- a/tests/Integration/test_monitor_resources_route.py +++ b/tests/Integration/test_monitor_resources_route.py @@ -46,3 +46,16 @@ def test_monitor_health_route_smoke(): assert "snapshot_at" in payload assert "db" in payload assert "sessions" in payload + + +def test_monitor_dashboard_route_smoke(): + with TestClient(app) as client: + response = client.get("/api/monitor/dashboard") + + assert response.status_code == 200 + payload = response.json() + assert "snapshot_at" in payload + assert "resources_summary" in payload + assert "infra" in payload + assert "workload" in payload + assert "latest_evaluation" in payload From f8136f9e7687ad21b130d5155aa298b412fb1a0c Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 18:34:49 +0800 Subject: [PATCH 15/87] feat: add monitor lease semantics summary --- backend/web/monitor.py | 106 +++++++++++++++--- backend/web/routers/monitor.py | 10 +- ...2026-04-06-resource-observability-split.md | 2 +- ...-06-resource-observability-split-design.md | 18 +++ frontend/monitor/src/App.tsx | 30 ++--- .../test_monitor_resources_route.py | 12 ++ tests/Unit/monitor/test_monitor_compat.py | 78 +++++++++++++ 7 files changed, 220 insertions(+), 36 deletions(-) diff --git a/backend/web/monitor.py b/backend/web/monitor.py index 57054c553..53bf68e85 100644 --- a/backend/web/monitor.py +++ b/backend/web/monitor.py @@ -851,6 +851,73 @@ def make_badge(desired, observed): } +LEASE_SEMANTIC_ORDER = [ + "orphan_diverged", + "diverged", + "orphan", + "healthy", +] + +LEASE_SEMANTIC_META = { + "orphan_diverged": { + "title": "Orphaned + Diverged", + "description": "Lease lost thread binding while desired and observed state still disagree.", + }, + "diverged": { + "title": "Diverged", + "description": "Lease is still attached to a thread, but runtime state has not converged.", + }, + "orphan": { + "title": "Orphans", + "description": "Lease has no active thread binding. Usually cleanup or historical residue.", + }, + "healthy": { + "title": "Healthy", + "description": "Lease has a thread binding and desired state matches observed state.", + }, +} + + +def classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) -> dict[str, str]: + is_orphan = not bool(thread_id) + is_converged = bool(badge.get("converged")) + if is_orphan and not is_converged: + category = "orphan_diverged" + elif not is_converged: + category = "diverged" + elif is_orphan: + category = "orphan" + else: + category = "healthy" + meta = LEASE_SEMANTIC_META[category] + return { + "category": category, + "title": meta["title"], + "description": meta["description"], + } + + +def _serialize_lease_row(row: sqlite3.Row) -> dict[str, Any]: + badge = make_badge(row["desired_state"], row["observed_state"]) + semantics = classify_lease_semantics(thread_id=row["thread_id"], badge=badge) + return { + "lease_id": row["lease_id"], + "lease_url": f"/lease/{row['lease_id']}", + "provider": row["provider_name"], + "instance_id": row["current_instance_id"], + "thread": { + "thread_id": row["thread_id"], + "thread_url": f"/thread/{row['thread_id']}" if row["thread_id"] else None, + "is_orphan": not row["thread_id"], + }, + "state_badge": badge, + "semantics": semantics, + "error": row["last_error"], + "updated_at": row["updated_at"], + "updated_ago": format_time_ago(row["updated_at"]), + } + + def load_thread_mode_map(thread_ids: list[str]) -> dict[str, dict]: """Load thread mode metadata from thread_config.""" if not thread_ids or not DB_PATH.exists(): @@ -1988,27 +2055,32 @@ def list_leases(db: sqlite3.Connection = Depends(get_db)): ORDER BY sl.updated_at DESC """).fetchall() - items = [] - for row in rows: - items.append( + items = [_serialize_lease_row(row) for row in rows] + summary = {key: 0 for key in LEASE_SEMANTIC_ORDER} + for item in items: + summary[item["semantics"]["category"]] += 1 + summary["total"] = len(items) + groups = [] + for key in LEASE_SEMANTIC_ORDER: + group_items = [item for item in items if item["semantics"]["category"] == key] + meta = LEASE_SEMANTIC_META[key] + groups.append( { - "lease_id": row["lease_id"], - "lease_url": f"/lease/{row['lease_id']}", - "provider": row["provider_name"], - "instance_id": row["current_instance_id"], - "thread": { - "thread_id": row["thread_id"], - "thread_url": f"/thread/{row['thread_id']}" if row["thread_id"] else None, - "is_orphan": not row["thread_id"], - }, - "state_badge": make_badge(row["desired_state"], row["observed_state"]), - "error": row["last_error"], - "updated_at": row["updated_at"], - "updated_ago": format_time_ago(row["updated_at"]), + "key": key, + "title": meta["title"], + "description": meta["description"], + "count": len(group_items), + "items": group_items, } ) - return {"title": "All Leases", "count": len(items), "items": items} + return { + "title": "All Leases", + "count": len(items), + "summary": summary, + "groups": groups, + "items": items, + } @router.get("/lease/{lease_id}") diff --git a/backend/web/routers/monitor.py b/backend/web/routers/monitor.py index 3641adae0..143a07131 100644 --- a/backend/web/routers/monitor.py +++ b/backend/web/routers/monitor.py @@ -34,7 +34,7 @@ def dashboard_snapshot(request: Request): evaluations = list_evaluations(limit=5, offset=0, request=request) resource_summary = resources.get("summary") or {} - lease_items = leases.get("items") or [] + lease_summary = leases.get("summary") or {} latest_eval = (evaluations.get("items") or [None])[0] latest_eval_summary = None @@ -61,10 +61,10 @@ def dashboard_snapshot(request: Request): "infra": { "providers_active": int(resource_summary.get("active_providers") or 0), "providers_unavailable": int(resource_summary.get("unavailable_providers") or 0), - "leases_total": int(leases.get("count") or 0), - "leases_diverged": sum(1 for item in lease_items if not bool((item.get("state_badge") or {}).get("converged"))), - "leases_orphan": sum(1 for item in lease_items if bool((item.get("thread") or {}).get("is_orphan"))), - "leases_healthy": sum(1 for item in lease_items if bool((item.get("state_badge") or {}).get("converged"))), + "leases_total": int(lease_summary.get("total") or leases.get("count") or 0), + "leases_diverged": int(lease_summary.get("diverged") or 0) + int(lease_summary.get("orphan_diverged") or 0), + "leases_orphan": int(lease_summary.get("orphan") or 0) + int(lease_summary.get("orphan_diverged") or 0), + "leases_healthy": int(lease_summary.get("healthy") or 0), }, "workload": { "db_sessions_total": int(((health.get("db") or {}).get("counts") or {}).get("chat_sessions") or 0), diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index cf4fa1935..3d2eeb11a 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -26,7 +26,7 @@ - monitor `Resources` uses the global monitor contract and includes grouped lease triage - evaluation tutorial/reference sections are collapsed by default - next honest follow-up remains: - - `D3` because lease regrouping still relies on shallow frontend grouping over raw facts + - `D3` because lease regrouping has now moved onto a backend semantic contract, but the categories are still shallow and need stronger lifecycle meaning - `D2` because provisional eval detail still needs a stronger operator-facing explanation and artifact/log next-step surface --- diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index e3e5162c5..1473646e9 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -261,6 +261,24 @@ The dashboard is a switchboard, not a full destination page. It should answer - lease regrouping exists, but backend-side semantic categorization is still shallow and belongs to `D3` - dashboard is currently a compact switchboard; it does not yet expose richer error drill-down or resource anomaly timelines +### Current D3 Phase-1 Landing + +- `/api/monitor/leases` now returns: + - flat `items` + - `summary` + - ordered semantic `groups` +- each lease item now carries backend-owned `semantics`: + - `healthy` + - `diverged` + - `orphan` + - `orphan_diverged` +- monitor dashboard and resources page now read those backend semantics instead of recomputing lease meaning from raw `thread.is_orphan` and `desired != observed` + +### D3 Remaining Gaps + +- semantics are still inferred from current lease row + thread binding only; they do not yet account for stronger lifecycle facts such as historical cleanup windows or explicit terminal/session shutdown markers +- the legacy `/leases` flat table still exists as a drill-down/debug surface and has not been redesigned beyond consuming the new summary/category contract + ### Why this IA - the backend already exposes `/api/monitor/resources`; the missing piece is a monitor entry surface, not another resource backend invention diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index a426b80b2..37be34775 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -316,10 +316,13 @@ function MonitorResourcesPage() { const providers = Array.isArray(resourceData.providers) ? resourceData.providers : []; const summary = resourceData.summary || {}; const leases = Array.isArray(leaseData.items) ? leaseData.items : []; + const leaseSummary = leaseData.summary || {}; + const leaseGroups = Array.isArray(leaseData.groups) ? leaseData.groups : []; const selectedProvider = providers.find((provider: any) => provider.id === selectedId) || providers[0] || null; - const divergedLeases = leases.filter((item: any) => item.state_badge?.desired !== item.state_badge?.observed); - const orphanLeases = leases.filter((item: any) => Boolean(item.thread?.is_orphan)); - const healthyLeases = leases.filter((item: any) => Boolean(item.state_badge?.converged)); + const divergedLeases = (leaseGroups.find((group: any) => group.key === 'diverged')?.items || []) as any[]; + const orphanDivergedLeases = (leaseGroups.find((group: any) => group.key === 'orphan_diverged')?.items || []) as any[]; + const orphanLeases = (leaseGroups.find((group: any) => group.key === 'orphan')?.items || []) as any[]; + const healthyLeases = (leaseGroups.find((group: any) => group.key === 'healthy')?.items || []) as any[]; const refreshedAt = summary.last_refreshed_at || summary.snapshot_at; const selectedSessions = Array.isArray(selectedProvider?.sessions) ? selectedProvider.sessions : []; const selectedRunning = selectedSessions.filter((session: any) => session.status === 'running').length; @@ -341,8 +344,8 @@ function MonitorResourcesPage() {
    - 0 ? 'warning' : 'success'} /> - 0 ? 'success' : 'danger'} /> + 0 ? 'warning' : 'success'} /> + 0 ? 'success' : 'danger'} />
    @@ -491,7 +494,7 @@ function MonitorResourcesPage() {

    Lease Health

    -

    Grouped triage surface. Diverged rows show state drift; orphan rows show leases no longer bound to a live thread.

    +

    Grouped triage surface from backend lease semantics. Diverged rows show state drift; orphan rows show leases no longer bound to a live thread.

    Legacy flat table @@ -499,8 +502,8 @@ function MonitorResourcesPage() {
    -

    Diverged ({divergedLeases.length})

    -

    Desired and observed states no longer match.

    +

    Diverged ({divergedLeases.length + orphanDivergedLeases.length})

    +

    Desired and observed states no longer match, including leases that already lost thread binding.

    @@ -512,7 +515,7 @@ function MonitorResourcesPage() { - {divergedLeases.slice(0, 8).map((item: any) => ( + {[...orphanDivergedLeases, ...divergedLeases].slice(0, 8).map((item: any) => ( @@ -521,7 +524,7 @@ function MonitorResourcesPage() { ))} - {divergedLeases.length === 0 ? ( + {divergedLeases.length + orphanDivergedLeases.length === 0 ? ( @@ -1750,14 +1753,15 @@ function LeasesPage() { if (!data) return
    Loading...
    ; const items = divergedOnly - ? data.items.filter((item: any) => item.state_badge?.desired !== item.state_badge?.observed) + ? data.items.filter((item: any) => ['diverged', 'orphan_diverged'].includes(item.semantics?.category)) : data.items; + const summary = data.summary || {}; return (

    {data.title}

    -

    Global sandbox lease table. Treat this as the infrastructure lens; filtered divergence and raw event history branch out from here.

    -

    Total: {items.length}{divergedOnly ? ` / ${data.count} (diverged only)` : ''}

    +

    Global sandbox lease table. Treat this as the infrastructure lens; backend semantics now distinguish healthy, diverged, orphan, and orphan-diverged rows.

    +

    Total: {items.length}{divergedOnly ? ` / ${data.count} (diverged only)` : ''} · healthy {summary.healthy || 0} · orphan {summary.orphan || 0} · orphan+diverged {summary.orphan_diverged || 0}

    {divergedOnly ? 'Show all leases' : 'Only diverged leases'} diff --git a/tests/Integration/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py index 5aa5cdc9f..d3cf5f404 100644 --- a/tests/Integration/test_monitor_resources_route.py +++ b/tests/Integration/test_monitor_resources_route.py @@ -59,3 +59,15 @@ def test_monitor_dashboard_route_smoke(): assert "infra" in payload assert "workload" in payload assert "latest_evaluation" in payload + + +def test_monitor_leases_route_exposes_summary_and_groups(): + with TestClient(app) as client: + response = client.get("/api/monitor/leases") + + assert response.status_code == 200 + payload = response.json() + assert "summary" in payload + assert "groups" in payload + assert set(payload["summary"]).issuperset({"total", "healthy", "diverged", "orphan", "orphan_diverged"}) + assert isinstance(payload["groups"], list) diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index c5359ee6a..5cc253eed 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -77,3 +77,81 @@ def test_list_threads_second_page_is_not_sliced_empty_after_sql_pagination(tmp_p assert payload["pagination"]["has_prev"] is True assert payload["pagination"]["has_next"] is False assert payload["pagination"]["next_offset"] is None + + +def test_list_leases_exposes_semantic_groups_and_summary(tmp_path): + db_path = tmp_path / "sandbox.db" + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + conn.executescript( + """ + CREATE TABLE sandbox_leases ( + lease_id TEXT PRIMARY KEY, + provider_name TEXT, + desired_state TEXT, + observed_state TEXT, + current_instance_id TEXT, + last_error TEXT, + created_at TEXT, + updated_at TEXT + ); + + CREATE TABLE chat_sessions ( + chat_session_id TEXT PRIMARY KEY, + thread_id TEXT, + lease_id TEXT, + status TEXT, + started_at TEXT, + last_active_at TEXT + ); + """ + ) + conn.executemany( + """ + INSERT INTO sandbox_leases ( + lease_id, provider_name, desired_state, observed_state, current_instance_id, last_error, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + [ + ("lease-healthy", "local", "running", "running", "inst-1", None, "2026-04-06T00:00:00", "2026-04-06T00:10:00"), + ("lease-diverged", "local", "running", "detached", "inst-2", "drift", "2026-04-06T00:00:00", "2026-04-06T00:11:00"), + ("lease-orphan-diverged", "local", "running", "detached", "inst-3", None, "2026-04-06T00:00:00", "2026-04-06T00:12:00"), + ("lease-orphan", "local", "stopped", "stopped", "inst-4", None, "2026-04-06T00:00:00", "2026-04-06T00:13:00"), + ], + ) + conn.executemany( + """ + INSERT INTO chat_sessions ( + chat_session_id, thread_id, lease_id, status, started_at, last_active_at + ) VALUES (?, ?, ?, ?, ?, ?) + """, + [ + ("sess-1", "thread-1", "lease-healthy", "running", "2026-04-06T00:01:00", "2026-04-06T00:10:00"), + ("sess-2", "thread-2", "lease-diverged", "running", "2026-04-06T00:02:00", "2026-04-06T00:11:00"), + ], + ) + conn.commit() + + try: + payload = monitor.list_leases(db=conn) + finally: + conn.close() + + assert payload["summary"] == { + "total": 4, + "healthy": 1, + "diverged": 1, + "orphan": 1, + "orphan_diverged": 1, + } + assert [group["key"] for group in payload["groups"]] == [ + "orphan_diverged", + "diverged", + "orphan", + "healthy", + ] + by_id = {item["lease_id"]: item for item in payload["items"]} + assert by_id["lease-healthy"]["semantics"]["category"] == "healthy" + assert by_id["lease-diverged"]["semantics"]["category"] == "diverged" + assert by_id["lease-orphan-diverged"]["semantics"]["category"] == "orphan_diverged" + assert by_id["lease-orphan"]["semantics"]["category"] == "orphan" From aca9b24e72b508c9f594d800ea42ec60d31c692e Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 18:41:25 +0800 Subject: [PATCH 16/87] refactor: lift monitor lease semantics into service --- backend/web/monitor.py | 112 +----------------- backend/web/routers/monitor.py | 9 +- backend/web/services/monitor_service.py | 105 +++++++++++++--- ...-06-resource-observability-split-design.md | 1 + tests/Unit/monitor/test_monitor_compat.py | 110 +++++++++-------- 5 files changed, 149 insertions(+), 188 deletions(-) diff --git a/backend/web/monitor.py b/backend/web/monitor.py index 53bf68e85..1e82c39e5 100644 --- a/backend/web/monitor.py +++ b/backend/web/monitor.py @@ -851,73 +851,6 @@ def make_badge(desired, observed): } -LEASE_SEMANTIC_ORDER = [ - "orphan_diverged", - "diverged", - "orphan", - "healthy", -] - -LEASE_SEMANTIC_META = { - "orphan_diverged": { - "title": "Orphaned + Diverged", - "description": "Lease lost thread binding while desired and observed state still disagree.", - }, - "diverged": { - "title": "Diverged", - "description": "Lease is still attached to a thread, but runtime state has not converged.", - }, - "orphan": { - "title": "Orphans", - "description": "Lease has no active thread binding. Usually cleanup or historical residue.", - }, - "healthy": { - "title": "Healthy", - "description": "Lease has a thread binding and desired state matches observed state.", - }, -} - - -def classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) -> dict[str, str]: - is_orphan = not bool(thread_id) - is_converged = bool(badge.get("converged")) - if is_orphan and not is_converged: - category = "orphan_diverged" - elif not is_converged: - category = "diverged" - elif is_orphan: - category = "orphan" - else: - category = "healthy" - meta = LEASE_SEMANTIC_META[category] - return { - "category": category, - "title": meta["title"], - "description": meta["description"], - } - - -def _serialize_lease_row(row: sqlite3.Row) -> dict[str, Any]: - badge = make_badge(row["desired_state"], row["observed_state"]) - semantics = classify_lease_semantics(thread_id=row["thread_id"], badge=badge) - return { - "lease_id": row["lease_id"], - "lease_url": f"/lease/{row['lease_id']}", - "provider": row["provider_name"], - "instance_id": row["current_instance_id"], - "thread": { - "thread_id": row["thread_id"], - "thread_url": f"/thread/{row['thread_id']}" if row["thread_id"] else None, - "is_orphan": not row["thread_id"], - }, - "state_badge": badge, - "semantics": semantics, - "error": row["last_error"], - "updated_at": row["updated_at"], - "updated_ago": format_time_ago(row["updated_at"]), - } - - def load_thread_mode_map(thread_ids: list[str]) -> dict[str, dict]: """Load thread mode metadata from thread_config.""" if not thread_ids or not DB_PATH.exists(): @@ -2038,49 +1971,10 @@ def get_thread_trace(thread_id: str, run_id: str | None = None, limit: int = 200 @router.get("/leases") -def list_leases(db: sqlite3.Connection = Depends(get_db)): - rows = db.execute(""" - SELECT - sl.lease_id, - sl.provider_name, - sl.desired_state, - sl.observed_state, - sl.current_instance_id, - sl.last_error, - sl.updated_at, - MAX(cs.thread_id) as thread_id - FROM sandbox_leases sl - LEFT JOIN chat_sessions cs ON sl.lease_id = cs.lease_id - GROUP BY sl.lease_id - ORDER BY sl.updated_at DESC - """).fetchall() +def list_leases(): + from backend.web.services import monitor_service - items = [_serialize_lease_row(row) for row in rows] - summary = {key: 0 for key in LEASE_SEMANTIC_ORDER} - for item in items: - summary[item["semantics"]["category"]] += 1 - summary["total"] = len(items) - groups = [] - for key in LEASE_SEMANTIC_ORDER: - group_items = [item for item in items if item["semantics"]["category"] == key] - meta = LEASE_SEMANTIC_META[key] - groups.append( - { - "key": key, - "title": meta["title"], - "description": meta["description"], - "count": len(group_items), - "items": group_items, - } - ) - - return { - "title": "All Leases", - "count": len(items), - "summary": summary, - "groups": groups, - "items": items, - } + return monitor_service.list_leases() @router.get("/lease/{lease_id}") diff --git a/backend/web/routers/monitor.py b/backend/web/routers/monitor.py index 143a07131..12ed2cdca 100644 --- a/backend/web/routers/monitor.py +++ b/backend/web/routers/monitor.py @@ -8,7 +8,7 @@ from fastapi import HTTPException, Query, Request -from backend.web.monitor import get_db, list_evaluations, list_leases, router +from backend.web.monitor import list_evaluations, list_leases, router from backend.web.services import monitor_service from backend.web.services.resource_cache import ( get_monitor_resource_overview_snapshot, @@ -25,12 +25,7 @@ def health_snapshot(): def dashboard_snapshot(request: Request): health = monitor_service.runtime_health_snapshot() resources = get_monitor_resource_overview_snapshot() - db_gen = get_db() - db = next(db_gen) - try: - leases = list_leases(db=db) - finally: - db_gen.close() + leases = list_leases() evaluations = list_evaluations(limit=5, offset=0, request=request) resource_summary = resources.get("summary") or {} diff --git a/backend/web/services/monitor_service.py b/backend/web/services/monitor_service.py index 31f59b729..4d30d074e 100644 --- a/backend/web/services/monitor_service.py +++ b/backend/web/services/monitor_service.py @@ -75,6 +75,52 @@ def _lease_link(lease_id: str | None) -> dict[str, Any]: return {"lease_id": lease_id, "lease_url": f"/lease/{lease_id}" if lease_id else None} +LEASE_SEMANTIC_ORDER = [ + "orphan_diverged", + "diverged", + "orphan", + "healthy", +] + +LEASE_SEMANTIC_META = { + "orphan_diverged": { + "title": "Orphaned + Diverged", + "description": "Lease lost thread binding while desired and observed state still disagree.", + }, + "diverged": { + "title": "Diverged", + "description": "Lease is still attached to a thread, but runtime state has not converged.", + }, + "orphan": { + "title": "Orphans", + "description": "Lease has no active thread binding. Usually cleanup or historical residue.", + }, + "healthy": { + "title": "Healthy", + "description": "Lease has a thread binding and desired state matches observed state.", + }, +} + + +def _classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) -> dict[str, str]: + is_orphan = not bool(thread_id) + is_converged = bool(badge.get("converged")) + if is_orphan and not is_converged: + category = "orphan_diverged" + elif not is_converged: + category = "diverged" + elif is_orphan: + category = "orphan" + else: + category = "healthy" + meta = LEASE_SEMANTIC_META[category] + return { + "category": category, + "title": meta["title"], + "description": meta["description"], + } + + # --------------------------------------------------------------------------- # Mappers (private) # --------------------------------------------------------------------------- @@ -130,21 +176,50 @@ def _map_thread_detail(thread_id: str, sessions: list[dict[str, Any]]) -> dict[s def _map_leases(rows: list[dict[str, Any]]) -> dict[str, Any]: - items = [ - { - "lease_id": row["lease_id"], - "lease_url": f"/lease/{row['lease_id']}", - "provider": row["provider_name"], - "instance_id": row["current_instance_id"], - "thread": _thread_ref(row["thread_id"]), - "state_badge": _make_badge(row["desired_state"], row["observed_state"]), - "error": row["last_error"], - "updated_at": row["updated_at"], - "updated_ago": _format_time_ago(row["updated_at"]), - } - for row in rows - ] - return {"title": "All Leases", "count": len(items), "items": items} + items = [] + for row in rows: + badge = _make_badge(row["desired_state"], row["observed_state"]) + items.append( + { + "lease_id": row["lease_id"], + "lease_url": f"/lease/{row['lease_id']}", + "provider": row["provider_name"], + "instance_id": row["current_instance_id"], + "thread": _thread_ref(row["thread_id"]), + "state_badge": badge, + "semantics": _classify_lease_semantics(thread_id=row["thread_id"], badge=badge), + "error": row["last_error"], + "updated_at": row["updated_at"], + "updated_ago": _format_time_ago(row["updated_at"]), + } + ) + + summary = {key: 0 for key in LEASE_SEMANTIC_ORDER} + for item in items: + summary[item["semantics"]["category"]] += 1 + summary["total"] = len(items) + + groups = [] + for key in LEASE_SEMANTIC_ORDER: + meta = LEASE_SEMANTIC_META[key] + group_items = [item for item in items if item["semantics"]["category"] == key] + groups.append( + { + "key": key, + "title": meta["title"], + "description": meta["description"], + "count": len(group_items), + "items": group_items, + } + ) + + return { + "title": "All Leases", + "count": len(items), + "summary": summary, + "groups": groups, + "items": items, + } def _map_lease_detail( diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 1473646e9..1aadc8646 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -272,6 +272,7 @@ The dashboard is a switchboard, not a full destination page. It should answer - `diverged` - `orphan` - `orphan_diverged` +- the semantic projection now lives in `backend/web/services/monitor_service.py`, while compat monitor route code only delegates - monitor dashboard and resources page now read those backend semantics instead of recomputing lease meaning from raw `thread.is_orphan` and `desired != observed` ### D3 Remaining Gaps diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index 5cc253eed..126160c54 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -1,6 +1,7 @@ import sqlite3 from backend.web import monitor +from backend.web.services import monitor_service def _bootstrap_threads_monitor_db(db_path, count: int) -> sqlite3.Connection: @@ -79,63 +80,58 @@ def test_list_threads_second_page_is_not_sliced_empty_after_sql_pagination(tmp_p assert payload["pagination"]["next_offset"] is None -def test_list_leases_exposes_semantic_groups_and_summary(tmp_path): - db_path = tmp_path / "sandbox.db" - conn = sqlite3.connect(db_path) - conn.row_factory = sqlite3.Row - conn.executescript( - """ - CREATE TABLE sandbox_leases ( - lease_id TEXT PRIMARY KEY, - provider_name TEXT, - desired_state TEXT, - observed_state TEXT, - current_instance_id TEXT, - last_error TEXT, - created_at TEXT, - updated_at TEXT - ); - - CREATE TABLE chat_sessions ( - chat_session_id TEXT PRIMARY KEY, - thread_id TEXT, - lease_id TEXT, - status TEXT, - started_at TEXT, - last_active_at TEXT - ); - """ - ) - conn.executemany( - """ - INSERT INTO sandbox_leases ( - lease_id, provider_name, desired_state, observed_state, current_instance_id, last_error, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """, - [ - ("lease-healthy", "local", "running", "running", "inst-1", None, "2026-04-06T00:00:00", "2026-04-06T00:10:00"), - ("lease-diverged", "local", "running", "detached", "inst-2", "drift", "2026-04-06T00:00:00", "2026-04-06T00:11:00"), - ("lease-orphan-diverged", "local", "running", "detached", "inst-3", None, "2026-04-06T00:00:00", "2026-04-06T00:12:00"), - ("lease-orphan", "local", "stopped", "stopped", "inst-4", None, "2026-04-06T00:00:00", "2026-04-06T00:13:00"), - ], - ) - conn.executemany( - """ - INSERT INTO chat_sessions ( - chat_session_id, thread_id, lease_id, status, started_at, last_active_at - ) VALUES (?, ?, ?, ?, ?, ?) - """, - [ - ("sess-1", "thread-1", "lease-healthy", "running", "2026-04-06T00:01:00", "2026-04-06T00:10:00"), - ("sess-2", "thread-2", "lease-diverged", "running", "2026-04-06T00:02:00", "2026-04-06T00:11:00"), - ], - ) - conn.commit() - - try: - payload = monitor.list_leases(db=conn) - finally: - conn.close() +def test_list_leases_exposes_semantic_groups_and_summary(monkeypatch): + class FakeRepo: + def query_leases(self): + return [ + { + "lease_id": "lease-healthy", + "provider_name": "local", + "desired_state": "running", + "observed_state": "running", + "current_instance_id": "inst-1", + "last_error": None, + "updated_at": "2026-04-06T00:10:00", + "thread_id": "thread-1", + }, + { + "lease_id": "lease-diverged", + "provider_name": "local", + "desired_state": "running", + "observed_state": "detached", + "current_instance_id": "inst-2", + "last_error": "drift", + "updated_at": "2026-04-06T00:11:00", + "thread_id": "thread-2", + }, + { + "lease_id": "lease-orphan-diverged", + "provider_name": "local", + "desired_state": "running", + "observed_state": "detached", + "current_instance_id": "inst-3", + "last_error": None, + "updated_at": "2026-04-06T00:12:00", + "thread_id": None, + }, + { + "lease_id": "lease-orphan", + "provider_name": "local", + "desired_state": "stopped", + "observed_state": "stopped", + "current_instance_id": "inst-4", + "last_error": None, + "updated_at": "2026-04-06T00:13:00", + "thread_id": None, + }, + ] + + def close(self): + return None + + monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeRepo()) + + payload = monitor_service.list_leases() assert payload["summary"] == { "total": 4, From 3a7c798421022660c8c61c30895a77db223a88b5 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 18:50:50 +0800 Subject: [PATCH 17/87] feat: clarify provisional evaluation operator state --- backend/web/monitor.py | 9 ++ backend/web/services/monitor_service.py | 113 +++++++++++++++++ ...2026-04-06-resource-observability-split.md | 6 +- ...-06-resource-observability-split-design.md | 7 ++ frontend/monitor/src/App.tsx | 119 +++++++++++++----- frontend/monitor/src/styles.css | 111 +++++++++++++++- tests/Unit/monitor/test_monitor_compat.py | 37 ++++++ 7 files changed, 368 insertions(+), 34 deletions(-) diff --git a/backend/web/monitor.py b/backend/web/monitor.py index 1e82c39e5..70bbf062e 100644 --- a/backend/web/monitor.py +++ b/backend/web/monitor.py @@ -20,6 +20,7 @@ from pydantic import BaseModel, Field from backend.web.core.config import DB_PATH +from backend.web.services.monitor_service import build_evaluation_operator_surface from storage.providers.sqlite.kernel import SQLiteDBRole, connect_sqlite, resolve_role_db_path PROJECT_ROOT = Path(__file__).resolve().parents[2] @@ -1902,6 +1903,14 @@ def get_evaluation_detail(evaluation_id: str, request: Request, db: sqlite3.Conn "threads_started": threads_started, "progress_source": progress_source, "score": score, + "operator_surface": build_evaluation_operator_surface( + status=status, + notes=notes, + score=score, + threads_total=total, + threads_running=running_count, + threads_done=threads_done, + ), }, "threads": {"title": "Evaluation Threads", "count": total, "items": thread_items}, } diff --git a/backend/web/services/monitor_service.py b/backend/web/services/monitor_service.py index 4d30d074e..529c18e41 100644 --- a/backend/web/services/monitor_service.py +++ b/backend/web/services/monitor_service.py @@ -3,6 +3,7 @@ from __future__ import annotations import json +import re from datetime import UTC, datetime from typing import Any @@ -102,6 +103,16 @@ def _lease_link(lease_id: str | None) -> dict[str, Any]: } +EVAL_NOTE_KEYS = [ + "runner", + "rc", + "sandbox", + "run_dir", + "stdout_log", + "stderr_log", +] + + def _classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) -> dict[str, str]: is_orphan = not bool(thread_id) is_converged = bool(badge.get("converged")) @@ -121,6 +132,108 @@ def _classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) - } +def _extract_eval_note_value(notes: str, key: str) -> str | None: + match = re.search(rf"(?:^|[ |]){re.escape(key)}=([^ ]+)", notes) + if not match: + return None + return match.group(1).strip() + + +def build_evaluation_operator_surface( + *, + status: str, + notes: str, + score: dict[str, Any], + threads_total: int, + threads_running: int, + threads_done: int, +) -> dict[str, Any]: + extracted = {key: _extract_eval_note_value(notes, key) for key in EVAL_NOTE_KEYS} + rc_text = extracted.get("rc") + try: + rc = int(rc_text) if rc_text is not None else None + except ValueError: + rc = None + + scored = bool(score.get("scored")) + score_gate = str(score.get("score_gate") or "provisional") + artifacts = [ + {"label": "Run directory", "path": score.get("run_dir") or extracted.get("run_dir")}, + {"label": "Run manifest", "path": score.get("manifest_path")}, + {"label": "STDOUT log", "path": extracted.get("stdout_log")}, + {"label": "STDERR log", "path": extracted.get("stderr_log")}, + {"label": "Eval summary", "path": score.get("eval_summary_path")}, + {"label": "Trace summaries", "path": score.get("trace_summaries_path")}, + ] + artifacts = [item for item in artifacts if item["path"]] + + facts = [ + {"label": "Status", "value": status}, + {"label": "Score gate", "value": score_gate}, + {"label": "Threads materialized", "value": str(threads_total)}, + {"label": "Threads running", "value": str(threads_running)}, + {"label": "Threads done", "value": str(threads_done)}, + ] + runner = extracted.get("runner") + if runner: + facts.append({"label": "Runner", "value": runner}) + if rc is not None: + facts.append({"label": "Exit code", "value": str(rc)}) + + tone = "default" + headline = "Evaluation is still collecting runtime evidence." + summary = "Use the artifacts below to inspect progress and confirm whether thread rows are materializing." + next_steps = [ + "Open the run manifest to confirm the slice payload and output directory.", + "Inspect stdout/stderr before assuming the run is healthy.", + ] + + if status == "provisional" and not scored: + tone = "warning" + headline = "Evaluation is provisional. Final score is blocked." + summary = "This run has not produced the final eval summary yet, so publishable scoring is intentionally withheld." + next_steps = [ + "Check whether eval_summary_path is still missing because the run is ongoing or because the runner exited early.", + "Use stdout/stderr logs to confirm whether the solve phase actually started.", + ] + + if rc is not None and rc != 0 and threads_total == 0: + tone = "danger" + headline = "Runner exited before evaluation threads materialized." + summary = "Treat this as a bootstrap failure, not as an empty successful run. No evaluation thread rows were created." + next_steps = [ + "Inspect STDERR first to find the failing bootstrap step.", + "Use the run manifest and stdout log to confirm whether the slice was prepared before exit.", + "Re-run only after the failing dependency or model configuration is understood.", + ] + elif status == "running": + tone = "default" + headline = "Evaluation is actively running." + summary = "Thread rows and traces may lag behind the runner. Use live progress and logs before declaring drift." + next_steps = [ + "Refresh after new thread rows materialize.", + "Inspect traces only after the first active thread appears.", + ] + elif status == "completed" and scored: + tone = "success" + headline = "Evaluation finished with a publishable score surface." + summary = "Score artifacts are present. Use the thread table to drill into trace-level evidence." + next_steps = [ + "Open threads with low-quality traces and inspect tool-call detail.", + "Use the eval summary and trace summaries to compare runs.", + ] + + return { + "tone": tone, + "headline": headline, + "summary": summary, + "facts": facts, + "artifacts": artifacts, + "next_steps": next_steps, + "raw_notes": notes, + } + + # --------------------------------------------------------------------------- # Mappers (private) # --------------------------------------------------------------------------- diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 3d2eeb11a..142bcae8d 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -25,9 +25,13 @@ - root lands on `/dashboard` - monitor `Resources` uses the global monitor contract and includes grouped lease triage - evaluation tutorial/reference sections are collapsed by default +- `D2` now has a landed phase-1: + - evaluation detail payload includes backend-owned `info.operator_surface` + - provisional eval detail opens with `Operator Status`, artifact paths, and explicit next steps + - redundant provisional score metadata is folded behind `Score artifacts (provisional)` instead of occupying the first screen - next honest follow-up remains: - `D3` because lease regrouping has now moved onto a backend semantic contract, but the categories are still shallow and need stronger lifecycle meaning - - `D2` because provisional eval detail still needs a stronger operator-facing explanation and artifact/log next-step surface + - `D2` because the current operator surface only hardens the provisional/bootstrap-failure case and still lacks richer lifecycle typing for other eval states --- diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 1aadc8646..6774d652f 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -172,6 +172,13 @@ These are not vague “polish later” notes. They are concrete seams that now b - Required outcome: - provisional state must explain what exists now, what is still pending, where logs/artifacts live, and what the operator should do next. - this is a backend-first surface; if new fields are needed, add them to the payload instead of making the frontend guess from free-text notes. +- Current landed phase: + - evaluation detail payload now includes `info.operator_surface`, built by a database-agnostic helper in `backend/web/services/monitor_service.py` + - the monitor eval detail page now opens with a dedicated `Operator Status` block instead of leading with a sparse provisional score grid + - the first screen now explains `runner exit before threads materialized`, surfaces `run_dir / manifest / stdout / stderr`, and gives explicit next-step guidance + - redundant provisional score metadata is still available, but collapsed behind `Score artifacts (provisional)` by default so the page reads like an operator surface instead of a failed report +- Honest boundary: + - this phase explains one real provisional failure mode clearly, but it does not yet model every evaluation lifecycle branch as a richer typed contract ### Slice D3: Lease Semantics And Regrouping diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 37be34775..73f862dcb 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -2567,6 +2567,7 @@ function EvaluationDetailPage() { const publishable = Boolean(data.info?.score?.publishable ?? (scoreGate === 'final')); const scoreFinal = publishable; const summaryReady = !!data.info?.score?.eval_summary_path; + const operator = data.info?.operator_surface || {}; const statusToneClass = data.info.status === 'completed' ? 'chip-success' @@ -2602,6 +2603,55 @@ function EvaluationDetailPage() {
    +
    +
    +
    +

    Operator Status

    +

    {operator.summary || 'Inspect the current evaluation state before judging score or trace coverage.'}

    +
    + + {operator.tone || 'default'} + +
    + {operator.headline || 'Evaluation operator surface'} +
    +
    +

    Facts

    +
    + {(operator.facts || []).map((item: any) => ( +
    + {item.label}: {item.value} +
    + ))} +
    +
    +
    +

    Artifacts

    +
      + {(operator.artifacts || []).map((item: any) => ( +
    • + {item.label}: {item.path} +
    • + ))} +
    +
    +
    +
    +

    Next Step

    +
      + {(operator.next_steps || []).map((item: string) => ( +
    1. {item}
    2. + ))} +
    +
    + {operator.raw_notes ? ( +
    + Raw runner notes +
    {operator.raw_notes}
    +
    + ) : null} +
    +

    Config

    @@ -2614,38 +2664,43 @@ function EvaluationDetailPage() {
    -
    -

    Score

    -
    -
    Score Gate: {scoreGate}
    -
    Publishable: {String(publishable)}
    -
    Summary: {summaryReady ? 'ready' : 'missing'}
    - {scoreFinal ? ( - <> -
    Resolved: {data.info.score?.resolved_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Resolved Rate: {formatPct(data.info.score?.resolved_rate_pct)}
    -
    Completed: {data.info.score?.completed_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Completed Rate: {formatPct(data.info.score?.completed_rate_pct)}
    -
    Non-empty Patch: {data.info.score?.non_empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Non-empty Rate: {formatPct(data.info.score?.non_empty_patch_rate_pct)}
    -
    Empty Patch: {data.info.score?.empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Errors: {data.info.score?.error_instances ?? 0}
    -
    Trace Active: {data.info.score?.active_trace_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Tool-call Threads: {data.info.score?.tool_call_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Tool-call Coverage: {formatPct(data.info.score?.tool_call_thread_rate_pct)}
    -
    Tool Calls Total: {data.info.score?.tool_calls_total ?? 0}
    -
    Avg Tool Calls(active): {data.info.score?.avg_tool_calls_per_active_thread ?? '-'}
    -
    Recursion Cap Hits: {data.info.score?.recursion_cap_hits ?? 0}{data.info.score?.recursion_limit ? ` / cap ${data.info.score.recursion_limit}` : ''}
    - - ) : ( - <> -
    Final Score: blocked (provisional)
    -
    Block Reason: {data.info.score?.manifest_eval_error ? 'manifest_eval_error' : 'missing_eval_summary'}
    - - )} -
    Run Dir: {data.info.score?.run_dir || '-'}
    -
    -
    + {scoreFinal ? ( +
    +

    Score

    +
    +
    Score Gate: {scoreGate}
    +
    Publishable: {String(publishable)}
    +
    Summary: {summaryReady ? 'ready' : 'missing'}
    +
    Resolved: {data.info.score?.resolved_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Resolved Rate: {formatPct(data.info.score?.resolved_rate_pct)}
    +
    Completed: {data.info.score?.completed_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Completed Rate: {formatPct(data.info.score?.completed_rate_pct)}
    +
    Non-empty Patch: {data.info.score?.non_empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Non-empty Rate: {formatPct(data.info.score?.non_empty_patch_rate_pct)}
    +
    Empty Patch: {data.info.score?.empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Errors: {data.info.score?.error_instances ?? 0}
    +
    Trace Active: {data.info.score?.active_trace_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Tool-call Threads: {data.info.score?.tool_call_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    +
    Tool-call Coverage: {formatPct(data.info.score?.tool_call_thread_rate_pct)}
    +
    Tool Calls Total: {data.info.score?.tool_calls_total ?? 0}
    +
    Avg Tool Calls(active): {data.info.score?.avg_tool_calls_per_active_thread ?? '-'}
    +
    Recursion Cap Hits: {data.info.score?.recursion_cap_hits ?? 0}{data.info.score?.recursion_limit ? ` / cap ${data.info.score.recursion_limit}` : ''}
    +
    Run Dir: {data.info.score?.run_dir || '-'}
    +
    +
    + ) : ( +
    + Score artifacts (provisional) +
    +
    Score Gate: {scoreGate}
    +
    Publishable: {String(publishable)}
    +
    Summary: {summaryReady ? 'ready' : 'missing'}
    +
    Final Score: blocked (provisional)
    +
    Block Reason: {data.info.score?.manifest_eval_error ? 'manifest_eval_error' : 'missing_eval_summary'}
    +
    Run Dir: {data.info.score?.run_dir || '-'}
    +
    +
    + )}

    {data.threads.title} ({data.threads.count})

    diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index fe5c1e759..7decc498d 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -1177,6 +1177,114 @@ section.eval-runtime-panel { white-space: normal; } +.eval-operator-shell { + margin-top: 1rem; + margin-bottom: 1.5rem; + border: 1px solid var(--border); + border-radius: 16px; + padding: 1rem 1.1rem; + background: var(--panel); + display: flex; + flex-direction: column; + gap: 0.9rem; +} + +.eval-operator-warning { + background: linear-gradient(180deg, var(--panel) 0%, var(--warning-soft) 100%); +} + +.eval-operator-danger { + background: linear-gradient(180deg, var(--panel) 0%, var(--danger-soft) 100%); +} + +.eval-operator-success { + background: linear-gradient(180deg, var(--panel) 0%, var(--success-soft) 100%); +} + +.eval-operator-hero { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: 1rem; +} + +.eval-operator-hero h2 { + margin: 0 0 0.35rem; +} + +.eval-operator-headline { + font-size: 1.02rem; + color: var(--text); +} + +.eval-operator-grid { + display: grid; + grid-template-columns: repeat(12, minmax(0, 1fr)); + gap: 1rem; +} + +.eval-operator-grid > * { + grid-column: span 6; +} + +.eval-operator-shell h3 { + margin: 0 0 0.6rem; + font-size: 0.95rem; +} + +.eval-artifact-list, +.eval-next-step-list { + margin: 0; + padding-left: 1.1rem; + display: flex; + flex-direction: column; + gap: 0.45rem; +} + +.eval-artifact-list li, +.eval-next-step-list li { + color: var(--text-secondary); +} + +.eval-raw-notes { + border-top: 1px solid var(--border); + padding-top: 0.8rem; +} + +.eval-raw-notes summary { + cursor: pointer; + color: var(--text-secondary); + font-weight: 500; +} + +.eval-raw-notes pre { + margin-top: 0.75rem; + padding: 0.85rem 0.95rem; + border-radius: 12px; + border: 1px solid var(--border); + background: rgba(255, 255, 255, 0.72); + white-space: pre-wrap; + word-break: break-word; +} + +.eval-score-details { + margin: 1rem 0 0; + border: 1px solid var(--border); + border-radius: 14px; + background: var(--panel); + padding: 0.85rem 1rem; +} + +.eval-score-details summary { + cursor: pointer; + color: var(--text-secondary); + font-weight: 500; +} + +.eval-score-details .info-grid { + margin-top: 1rem; +} + .eval-composer-backdrop { position: fixed; inset: 0; @@ -1203,7 +1311,8 @@ section.eval-runtime-panel { .evaluation-notes > *, .evaluation-overview > *, .evaluation-column, - .lease-cluster-grid > * { + .lease-cluster-grid > *, + .eval-operator-grid > * { grid-column: span 12; } } diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index 126160c54..e63c28bf8 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -151,3 +151,40 @@ def close(self): assert by_id["lease-diverged"]["semantics"]["category"] == "diverged" assert by_id["lease-orphan-diverged"]["semantics"]["category"] == "orphan_diverged" assert by_id["lease-orphan"]["semantics"]["category"] == "orphan" + + +def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_materialize(): + payload = monitor_service.build_evaluation_operator_surface( + status="provisional", + notes=( + "runner=direct rc=1 sandbox=local " + "run_dir=/tmp/eval stdout_log=/tmp/eval/out.log stderr_log=/tmp/eval/err.log" + ), + score={ + "score_gate": "provisional", + "publishable": False, + "run_dir": "/tmp/eval", + "manifest_path": "/tmp/eval/run_manifest.json", + "eval_summary_path": None, + "trace_summaries_path": None, + "scored": False, + }, + threads_total=0, + threads_running=0, + threads_done=0, + ) + + assert payload["tone"] == "danger" + assert payload["headline"] == "Runner exited before evaluation threads materialized." + assert "bootstrap failure" in payload["summary"] + assert payload["facts"][-2:] == [ + {"label": "Runner", "value": "direct"}, + {"label": "Exit code", "value": "1"}, + ] + artifact_labels = {item["label"] for item in payload["artifacts"]} + assert artifact_labels == { + "Run directory", + "Run manifest", + "STDOUT log", + "STDERR log", + } From d24b9c276ac5b16092a4b4a3b2a8c416900e9bfd Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 18:57:14 +0800 Subject: [PATCH 18/87] feat: triage monitor lease lifecycle groups --- backend/web/services/monitor_service.py | 122 +++++++++++++++++ ...2026-04-06-resource-observability-split.md | 6 +- ...-06-resource-observability-split-design.md | 7 + frontend/monitor/src/App.tsx | 128 +++++++++++++++--- .../test_monitor_resources_route.py | 5 + tests/Unit/monitor/test_monitor_compat.py | 56 ++++++++ 6 files changed, 302 insertions(+), 22 deletions(-) diff --git a/backend/web/services/monitor_service.py b/backend/web/services/monitor_service.py index 529c18e41..e711c7e92 100644 --- a/backend/web/services/monitor_service.py +++ b/backend/web/services/monitor_service.py @@ -112,6 +112,41 @@ def _lease_link(lease_id: str | None) -> dict[str, Any]: "stderr_log", ] +LEASE_TRIAGE_ORDER = [ + "active_drift", + "detached_residue", + "orphan_cleanup", + "healthy_capacity", +] + +LEASE_TRIAGE_META = { + "active_drift": { + "title": "Active Drift", + "description": "Leases whose desired and observed state still disagree recently enough to warrant active operator attention.", + "tone": "warning", + }, + "detached_residue": { + "title": "Detached Residue", + "description": ( + "Leases still marked desired=running but observed=detached long after the runtime " + "stopped moving. Usually cleanup debt, not live pressure." + ), + "tone": "danger", + }, + "orphan_cleanup": { + "title": "Orphan Cleanup", + "description": "Lease rows that have already lost thread binding and mainly represent cleanup backlog or historical residue.", + "tone": "warning", + }, + "healthy_capacity": { + "title": "Healthy Capacity", + "description": "Leases with attached thread context and converged runtime state.", + "tone": "success", + }, +} + +DETACHED_RESIDUE_THRESHOLD_HOURS = 4.0 + def _classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) -> dict[str, str]: is_orphan = not bool(thread_id) @@ -132,6 +167,61 @@ def _classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) - } +def _parse_local_timestamp(iso_timestamp: str | None) -> datetime | None: + if not iso_timestamp: + return None + cleaned = iso_timestamp + if "Z" in cleaned: + cleaned = cleaned.replace("Z", "") + if "+" in cleaned: + cleaned = cleaned.split("+")[0] + try: + return datetime.fromisoformat(cleaned) + except ValueError: + return None + + +def _hours_since(iso_timestamp: str | None) -> float | None: + dt = _parse_local_timestamp(iso_timestamp) + if dt is None: + return None + delta = datetime.now() - dt + return delta.total_seconds() / 3600 + + +def _classify_lease_triage( + *, + thread_id: str | None, + badge: dict[str, Any], + observed_state: str | None, + desired_state: str | None, + updated_at: str | None, +) -> dict[str, Any]: + observed = str(observed_state or "").strip().lower() or None + desired = str(desired_state or "").strip().lower() or None + age_hours = _hours_since(updated_at) + is_orphan = not bool(thread_id) + is_converged = bool(badge.get("converged")) + + if is_orphan: + key = "orphan_cleanup" + elif is_converged: + key = "healthy_capacity" + elif observed == "detached" and desired == "running" and age_hours is not None and age_hours >= DETACHED_RESIDUE_THRESHOLD_HOURS: + key = "detached_residue" + else: + key = "active_drift" + + meta = LEASE_TRIAGE_META[key] + return { + "category": key, + "title": meta["title"], + "description": meta["description"], + "tone": meta["tone"], + "age_hours": age_hours, + } + + def _extract_eval_note_value(notes: str, key: str) -> str | None: match = re.search(rf"(?:^|[ |]){re.escape(key)}=([^ ]+)", notes) if not match: @@ -292,6 +382,13 @@ def _map_leases(rows: list[dict[str, Any]]) -> dict[str, Any]: items = [] for row in rows: badge = _make_badge(row["desired_state"], row["observed_state"]) + triage = _classify_lease_triage( + thread_id=row["thread_id"], + badge=badge, + observed_state=row["observed_state"], + desired_state=row["desired_state"], + updated_at=row["updated_at"], + ) items.append( { "lease_id": row["lease_id"], @@ -301,6 +398,7 @@ def _map_leases(rows: list[dict[str, Any]]) -> dict[str, Any]: "thread": _thread_ref(row["thread_id"]), "state_badge": badge, "semantics": _classify_lease_semantics(thread_id=row["thread_id"], badge=badge), + "triage": triage, "error": row["last_error"], "updated_at": row["updated_at"], "updated_ago": _format_time_ago(row["updated_at"]), @@ -326,11 +424,35 @@ def _map_leases(rows: list[dict[str, Any]]) -> dict[str, Any]: } ) + triage_summary = {key: 0 for key in LEASE_TRIAGE_ORDER} + for item in items: + triage_summary[item["triage"]["category"]] += 1 + triage_summary["total"] = len(items) + + triage_groups = [] + for key in LEASE_TRIAGE_ORDER: + meta = LEASE_TRIAGE_META[key] + group_items = [item for item in items if item["triage"]["category"] == key] + triage_groups.append( + { + "key": key, + "title": meta["title"], + "description": meta["description"], + "tone": meta["tone"], + "count": len(group_items), + "items": group_items, + } + ) + return { "title": "All Leases", "count": len(items), "summary": summary, "groups": groups, + "triage": { + "summary": triage_summary, + "groups": triage_groups, + }, "items": items, } diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 142bcae8d..8557bee14 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -29,8 +29,12 @@ - evaluation detail payload includes backend-owned `info.operator_surface` - provisional eval detail opens with `Operator Status`, artifact paths, and explicit next steps - redundant provisional score metadata is folded behind `Score artifacts (provisional)` instead of occupying the first screen +- `D3` now has a landed phase-2: + - `/api/monitor/leases` now adds backend-owned `triage.summary` and `triage.groups` + - triage distinguishes `active_drift`, `detached_residue`, `orphan_cleanup`, and `healthy_capacity` + - monitor `Resources` consumes that triage surface directly instead of flattening everything back into `diverged/orphan` - next honest follow-up remains: - - `D3` because lease regrouping has now moved onto a backend semantic contract, but the categories are still shallow and need stronger lifecycle meaning + - `D3` because lease regrouping is still heuristic and needs stronger lifecycle meaning than age-based detached residue alone - `D2` because the current operator surface only hardens the provisional/bootstrap-failure case and still lacks richer lifecycle typing for other eval states --- diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 6774d652f..62be5e761 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -189,6 +189,13 @@ These are not vague “polish later” notes. They are concrete seams that now b - keep raw/global truth available - add explicit categorization/regrouping for active, diverged, orphan, and historical leases - reduce “system looks broken” confusion without hiding the raw facts +- Current landed phase: + - `/api/monitor/leases` still preserves the original `summary/groups/items` contract, but now also returns backend-owned `triage.summary` and ordered `triage.groups` + - the new `triage` layer separates `active_drift`, `detached_residue`, `orphan_cleanup`, and `healthy_capacity` + - classification is still built from existing database-agnostic fields (`desired_state`, `observed_state`, `thread_id`, `updated_at`) rather than new SQLite-specific lookups + - the monitor `Resources` page now reads that triage surface directly, so the live page can show `3 active drift + 26 detached residue` instead of one opaque `29 diverged` +- Honest boundary: + - this is still a phase-2 heuristic, not a full lifecycle model; age-based detached residue is a better operator default, but not yet a richer typed runtime contract ### Slice D4: Dashboard Entry And Global Resources Surface diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 73f862dcb..8d591eb35 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -316,13 +316,14 @@ function MonitorResourcesPage() { const providers = Array.isArray(resourceData.providers) ? resourceData.providers : []; const summary = resourceData.summary || {}; const leases = Array.isArray(leaseData.items) ? leaseData.items : []; - const leaseSummary = leaseData.summary || {}; - const leaseGroups = Array.isArray(leaseData.groups) ? leaseData.groups : []; + const leaseTriage = leaseData.triage || {}; + const triageSummary = leaseTriage.summary || {}; + const triageGroups = Array.isArray(leaseTriage.groups) ? leaseTriage.groups : []; const selectedProvider = providers.find((provider: any) => provider.id === selectedId) || providers[0] || null; - const divergedLeases = (leaseGroups.find((group: any) => group.key === 'diverged')?.items || []) as any[]; - const orphanDivergedLeases = (leaseGroups.find((group: any) => group.key === 'orphan_diverged')?.items || []) as any[]; - const orphanLeases = (leaseGroups.find((group: any) => group.key === 'orphan')?.items || []) as any[]; - const healthyLeases = (leaseGroups.find((group: any) => group.key === 'healthy')?.items || []) as any[]; + const activeDriftLeases = (triageGroups.find((group: any) => group.key === 'active_drift')?.items || []) as any[]; + const detachedResidueLeases = (triageGroups.find((group: any) => group.key === 'detached_residue')?.items || []) as any[]; + const orphanCleanupLeases = (triageGroups.find((group: any) => group.key === 'orphan_cleanup')?.items || []) as any[]; + const healthyCapacityLeases = (triageGroups.find((group: any) => group.key === 'healthy_capacity')?.items || []) as any[]; const refreshedAt = summary.last_refreshed_at || summary.snapshot_at; const selectedSessions = Array.isArray(selectedProvider?.sessions) ? selectedProvider.sessions : []; const selectedRunning = selectedSessions.filter((session: any) => session.status === 'running').length; @@ -344,8 +345,9 @@ function MonitorResourcesPage() {
    - 0 ? 'warning' : 'success'} /> - 0 ? 'success' : 'danger'} /> + 0 ? 'warning' : 'success'} /> + 0 ? 'danger' : 'success'} /> + 0 ? 'success' : 'danger'} />
    @@ -494,16 +496,34 @@ function MonitorResourcesPage() {

    Lease Health

    -

    Grouped triage surface from backend lease semantics. Diverged rows show state drift; orphan rows show leases no longer bound to a live thread.

    +

    Backend-owned lease lifecycle triage. Separate live drift from stale detached residue before assuming the whole system is on fire.

    Legacy flat table
    +
    + + active drift + {activeDriftLeases.length} + + + detached residue + {detachedResidueLeases.length} + + + orphan cleanup + {orphanCleanupLeases.length} + + + healthy + {healthyCapacityLeases.length} + +
    -

    Diverged ({divergedLeases.length + orphanDivergedLeases.length})

    -

    Desired and observed states no longer match, including leases that already lost thread binding.

    +

    Active Drift ({activeDriftLeases.length})

    +

    Recent desired/observed mismatch. These rows deserve live operator attention before they age into residue.

    {shortId(item.lease_id, 12)} {item.provider}{item.updated_ago}
    No diverged leases.
    @@ -515,7 +535,7 @@ function MonitorResourcesPage() { - {[...orphanDivergedLeases, ...divergedLeases].slice(0, 8).map((item: any) => ( + {activeDriftLeases.slice(0, 8).map((item: any) => ( @@ -524,9 +544,9 @@ function MonitorResourcesPage() { ))} - {divergedLeases.length + orphanDivergedLeases.length === 0 ? ( + {activeDriftLeases.length === 0 ? ( - + ) : null} @@ -534,8 +554,42 @@ function MonitorResourcesPage() {
    -

    Orphans ({orphanLeases.length})

    -

    Lease rows with no active thread binding. These usually indicate cleanup debt or abandoned runtime state.

    +

    Detached Residue ({detachedResidueLeases.length})

    +

    Detached rows that still want `running` long after the runtime stopped moving. Usually cleanup debt, not fresh pressure.

    +
    {shortId(item.lease_id, 12)} {item.provider}{item.updated_ago}
    No diverged leases.No active drift right now.
    + + + + + + + + + + + {detachedResidueLeases.slice(0, 8).map((item: any) => ( + + + + + + + + ))} + {detachedResidueLeases.length === 0 ? ( + + + + ) : null} + +
    LeaseProviderThreadStateUpdated
    {shortId(item.lease_id, 12)}{item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan}{item.updated_ago}
    No detached residue.
    +
    +
    + +
    +
    +

    Cleanup Backlog ({orphanCleanupLeases.length})

    +

    Rows that already lost thread binding. Keep them visible for cleanup honesty, but do not confuse them with live compute pressure.

    @@ -543,22 +597,54 @@ function MonitorResourcesPage() { - + - {orphanLeases.slice(0, 8).map((item: any) => ( + {orphanCleanupLeases.slice(0, 8).map((item: any) => ( - + + + ))} + {orphanCleanupLeases.length === 0 ? ( + + + + ) : null} + +
    Provider Instance StateErrorUpdated
    {shortId(item.lease_id, 12)} {item.provider} {shortId(item.instance_id, 12)} {item.error || '-'}{item.updated_ago}
    No orphan cleanup rows.
    +
    + +
    +

    Healthy Capacity ({healthyCapacityLeases.length})

    +

    Converged lease rows still attached to thread context. Use this as the counterweight to the noisy failure buckets above.

    + + + + + + + + + + + + {healthyCapacityLeases.slice(0, 8).map((item: any) => ( + + + + + + ))} - {orphanLeases.length === 0 ? ( + {healthyCapacityLeases.length === 0 ? ( - + ) : null} diff --git a/tests/Integration/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py index d3cf5f404..cdf28e6f7 100644 --- a/tests/Integration/test_monitor_resources_route.py +++ b/tests/Integration/test_monitor_resources_route.py @@ -69,5 +69,10 @@ def test_monitor_leases_route_exposes_summary_and_groups(): payload = response.json() assert "summary" in payload assert "groups" in payload + assert "triage" in payload assert set(payload["summary"]).issuperset({"total", "healthy", "diverged", "orphan", "orphan_diverged"}) assert isinstance(payload["groups"], list) + assert set(payload["triage"]["summary"]).issuperset( + {"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"} + ) + assert isinstance(payload["triage"]["groups"], list) diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index e63c28bf8..485964dd8 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -130,6 +130,16 @@ def close(self): return None monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeRepo()) + monkeypatch.setattr( + monitor_service, + "_hours_since", + lambda iso_timestamp: { + "2026-04-06T00:10:00": 0.5, + "2026-04-06T00:11:00": 0.5, + "2026-04-06T00:12:00": 10.0, + "2026-04-06T00:13:00": 10.0, + }.get(iso_timestamp), + ) payload = monitor_service.list_leases() @@ -146,12 +156,58 @@ def close(self): "orphan", "healthy", ] + assert payload["triage"]["summary"] == { + "total": 4, + "active_drift": 1, + "detached_residue": 0, + "orphan_cleanup": 2, + "healthy_capacity": 1, + } + assert [group["key"] for group in payload["triage"]["groups"]] == [ + "active_drift", + "detached_residue", + "orphan_cleanup", + "healthy_capacity", + ] by_id = {item["lease_id"]: item for item in payload["items"]} assert by_id["lease-healthy"]["semantics"]["category"] == "healthy" + assert by_id["lease-healthy"]["triage"]["category"] == "healthy_capacity" assert by_id["lease-diverged"]["semantics"]["category"] == "diverged" + assert by_id["lease-diverged"]["triage"]["category"] == "active_drift" assert by_id["lease-orphan-diverged"]["semantics"]["category"] == "orphan_diverged" + assert by_id["lease-orphan-diverged"]["triage"]["category"] == "orphan_cleanup" assert by_id["lease-orphan"]["semantics"]["category"] == "orphan" + assert by_id["lease-orphan"]["triage"]["category"] == "orphan_cleanup" + + +def test_list_leases_marks_old_detached_running_rows_as_detached_residue(monkeypatch): + class FakeRepo: + def query_leases(self): + return [ + { + "lease_id": "lease-stale", + "provider_name": "local", + "desired_state": "running", + "observed_state": "detached", + "current_instance_id": "inst-9", + "last_error": None, + "updated_at": "2026-04-05T00:00:00", + "thread_id": "subagent-1234", + } + ] + + def close(self): + return None + + monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeRepo()) + monkeypatch.setattr(monitor_service, "_hours_since", lambda _: 24.0) + + payload = monitor_service.list_leases() + item = payload["items"][0] + assert item["semantics"]["category"] == "diverged" + assert item["triage"]["category"] == "detached_residue" + assert payload["triage"]["summary"]["detached_residue"] == 1 def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_materialize(): payload = monitor_service.build_evaluation_operator_surface( From ccfb5942e85bad848a26509139b044652dab5f01 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 19:02:53 +0800 Subject: [PATCH 19/87] feat: polish monitor provider surfaces --- ...2026-04-06-resource-observability-split.md | 4 + ...-06-resource-observability-split-design.md | 17 +- frontend/monitor/src/App.tsx | 226 +++++++++++++----- frontend/monitor/src/styles.css | 160 ++++++++++++- 4 files changed, 350 insertions(+), 57 deletions(-) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 8557bee14..996d74690 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -25,6 +25,10 @@ - root lands on `/dashboard` - monitor `Resources` uses the global monitor contract and includes grouped lease triage - evaluation tutorial/reference sections are collapsed by default +- `D4` now has a landed phase-2: + - monitor provider cards now expose a product-like status light, metric cells, capability strip, and session dots + - selected provider detail now reads like a real panel instead of a loose stats stack + - null telemetry in monitor resources no longer renders as fake `0.0` values - `D2` now has a landed phase-1: - evaluation detail payload includes backend-owned `info.operator_surface` - provisional eval detail opens with `Operator Status`, artifact paths, and explicit next steps diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 62be5e761..23c5ca42e 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -269,9 +269,24 @@ The dashboard is a switchboard, not a full destination page. It should answer - grouped lease health sections (`Diverged`, `Orphans`, `All leases`) - evaluation guidance is no longer sprayed across the first screen; tutorial/reference sections are now collapsed by default behind an operator-guide `
    ` block +### Current D4 Phase-2 Landing + +- monitor provider cards are now much closer to the product `ResourcesPage` family: + - status light in the title row + - compact metric cells instead of plain text-only stats + - capability strip + - session status dot strip + - unavailable providers still stay selectable in monitor so ops can inspect them, even though product cards disable that path +- selected provider detail is now a true panel instead of a loose stack: + - provider header + status/type context + - overview pill strip + - capability strip reused in the detail pane + - global session table kept below as the monitor-only truth surface +- monitor-side null telemetry now stays `--` instead of being accidentally coerced into `0.0`, which was misleading for unavailable providers + ### D4 Remaining Gaps -- provider detail is now useful, but it is still lighter than the original product `ResourcesPage` family +- monitor provider/detail surface is now close to the product resources page in interaction quality, but still lacks the richer sandbox-card drill-down family the product page has - lease regrouping exists, but backend-side semantic categorization is still shallow and belongs to `D3` - dashboard is currently a compact switchboard; it does not yet expose richer error drill-down or resource anomaly timelines diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 8d591eb35..40ad91a27 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -248,6 +248,99 @@ function DashboardPage() { ); } +const CAPABILITY_LABELS: Record = { + filesystem: "FS", + terminal: "TERM", + metrics: "METRICS", + screenshot: "SHOT", + web: "WEB", + process: "PROC", + hooks: "HOOKS", + mount: "MOUNT", +}; + +function formatMonitorMetric(value: any, suffix = '', digits = 1): string { + if (value == null) return '--'; + const num = Number(value); + if (!Number.isFinite(num)) return '--'; + return `${num.toFixed(digits)}${suffix}`; +} + +function ProviderStatusLight({ status }: { status: string }) { + const className = + status === 'active' + ? 'provider-status-light is-active' + : status === 'ready' + ? 'provider-status-light is-ready' + : 'provider-status-light is-unavailable'; + return
    LeaseProviderThreadStateUpdated
    {shortId(item.lease_id, 12)}{item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan}{item.updated_ago}
    No orphan leases.No healthy capacity rows yet.
    + + + + + + + + + + + + + {rows.map((item: any) => ( + + + + + + + + + + ))} + +
    Lease IDProviderInstance IDThreadStateUpdatedError
    {item.lease_id}{item.provider}{item.instance_id?.slice(0, 12) || '-'} + {item.thread.thread_id ? ( + {item.thread.thread_id.slice(0, 8)} + ) : ( + orphan + )} + {item.updated_ago}{item.error || '-'}
    + ); return (

    {data.title}

    -

    Global sandbox lease table. Treat this as the infrastructure lens; backend semantics now distinguish healthy, diverged, orphan, and orphan-diverged rows.

    -

    Total: {items.length}{divergedOnly ? ` / ${data.count} (diverged only)` : ''} · healthy {summary.healthy || 0} · orphan {summary.orphan || 0} · orphan+diverged {summary.orphan_diverged || 0}

    +

    Legacy lease view, now backed by backend triage semantics. Use this when you want lease-only focus without losing the full raw table.

    +
    + + total + {items.length}{divergedOnly ? ` / ${data.count}` : ''} + + + active drift + {triageSummary.active_drift || 0} + + + detached residue + {triageSummary.detached_residue || 0} + + + cleanup + {triageSummary.orphan_cleanup || 0} + + + healthy + {triageSummary.healthy_capacity || 0} + +
    - {divergedOnly ? 'Show all leases' : 'Only diverged leases'} + {divergedOnly ? 'Show all leases' : 'Only attention buckets'} - Lease event timeline + Open resources
    - - - - - - - - - - - - - - {items.map((item: any) => ( - - - - - - - - - - ))} - -
    Lease IDProviderInstance IDThreadStateUpdatedError
    {item.lease_id}{item.provider}{item.instance_id?.slice(0, 12) || '-'} - {item.thread.thread_id ? ( - {item.thread.thread_id.slice(0, 8)} - ) : ( - orphan - )} - {item.updated_ago}{item.error || '-'}
    + {visibleGroups + .filter((group: any) => group.count > 0) + .map((group: any) => ( +
    +

    {group.title} ({group.count})

    +

    {group.description}

    + {renderLeaseTable(group.items)} +
    + ))} +
    + All leases ({items.length}) + {renderLeaseTable(items)} +
    ); } From 4718b7fb1e7c3b15c7dd4cb3d75188193823edfc Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 19:15:27 +0800 Subject: [PATCH 22/87] feat: harden monitor eval operator contract --- backend/web/services/monitor_service.py | 47 ++++++++++++- ...2026-04-06-resource-observability-split.md | 5 +- ...-06-resource-observability-split-design.md | 4 +- tests/Unit/monitor/test_monitor_compat.py | 69 +++++++++++++++++-- 4 files changed, 116 insertions(+), 9 deletions(-) diff --git a/backend/web/services/monitor_service.py b/backend/web/services/monitor_service.py index e711c7e92..697ac300c 100644 --- a/backend/web/services/monitor_service.py +++ b/backend/web/services/monitor_service.py @@ -248,14 +248,28 @@ def build_evaluation_operator_surface( scored = bool(score.get("scored")) score_gate = str(score.get("score_gate") or "provisional") artifacts = [ - {"label": "Run directory", "path": score.get("run_dir") or extracted.get("run_dir")}, + { + "label": "Run directory", + "path": score.get("run_dir") or extracted.get("run_dir"), + }, {"label": "Run manifest", "path": score.get("manifest_path")}, {"label": "STDOUT log", "path": extracted.get("stdout_log")}, {"label": "STDERR log", "path": extracted.get("stderr_log")}, {"label": "Eval summary", "path": score.get("eval_summary_path")}, {"label": "Trace summaries", "path": score.get("trace_summaries_path")}, ] - artifacts = [item for item in artifacts if item["path"]] + artifacts = [ + { + **item, + "status": "present" if item["path"] else "missing", + } + for item in artifacts + ] + artifact_summary = { + "present": sum(1 for item in artifacts if item["status"] == "present"), + "missing": sum(1 for item in artifacts if item["status"] == "missing"), + "total": len(artifacts), + } facts = [ {"label": "Status", "value": status}, @@ -270,6 +284,7 @@ def build_evaluation_operator_surface( if rc is not None: facts.append({"label": "Exit code", "value": str(rc)}) + kind = "collecting_runtime_evidence" tone = "default" headline = "Evaluation is still collecting runtime evidence." summary = "Use the artifacts below to inspect progress and confirm whether thread rows are materializing." @@ -279,6 +294,7 @@ def build_evaluation_operator_surface( ] if status == "provisional" and not scored: + kind = "provisional_waiting_for_summary" tone = "warning" headline = "Evaluation is provisional. Final score is blocked." summary = "This run has not produced the final eval summary yet, so publishable scoring is intentionally withheld." @@ -288,6 +304,7 @@ def build_evaluation_operator_surface( ] if rc is not None and rc != 0 and threads_total == 0: + kind = "bootstrap_failure" tone = "danger" headline = "Runner exited before evaluation threads materialized." summary = "Treat this as a bootstrap failure, not as an empty successful run. No evaluation thread rows were created." @@ -296,7 +313,19 @@ def build_evaluation_operator_surface( "Use the run manifest and stdout log to confirm whether the slice was prepared before exit.", "Re-run only after the failing dependency or model configuration is understood.", ] + elif status == "running" and threads_total == 0 and threads_running > 0: + kind = "running_waiting_for_threads" + tone = "default" + headline = "Evaluation is actively running while thread rows catch up." + summary = ( + "The runner is alive, but thread rows have not materialized yet. Treat this as an ingestion lag window, not as an empty run." + ) + next_steps = [ + "Refresh after the first thread row materializes.", + "Use stdout/stderr to confirm the solve loop is still advancing.", + ] elif status == "running": + kind = "running_active" tone = "default" headline = "Evaluation is actively running." summary = "Thread rows and traces may lag behind the runner. Use live progress and logs before declaring drift." @@ -304,7 +333,19 @@ def build_evaluation_operator_surface( "Refresh after new thread rows materialize.", "Inspect traces only after the first active thread appears.", ] + elif status == "completed_with_errors" and scored: + kind = "completed_with_errors" + tone = "warning" + headline = "Evaluation completed with recorded errors." + summary = ( + "Some thread rows reached completion, but at least one instance recorded an error. Treat this as reviewable but not clean." + ) + next_steps = [ + "Inspect error-bearing threads before comparing this run against cleaner baselines.", + "Use eval summary and trace summaries to isolate failing instances.", + ] elif status == "completed" and scored: + kind = "completed_publishable" tone = "success" headline = "Evaluation finished with a publishable score surface." summary = "Score artifacts are present. Use the thread table to drill into trace-level evidence." @@ -314,11 +355,13 @@ def build_evaluation_operator_surface( ] return { + "kind": kind, "tone": tone, "headline": headline, "summary": summary, "facts": facts, "artifacts": artifacts, + "artifact_summary": artifact_summary, "next_steps": next_steps, "raw_notes": notes, } diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 4444962e7..eb1f4342d 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -32,10 +32,12 @@ - `D4` now has a landed phase-3: - selected provider detail now shows a lease card grid before the raw session table - monitor keeps the raw session table for truth, but no longer forces operators to start from the noisiest surface -- `D2` now has a landed phase-1: +- `D2` now has a landed phase-2: - evaluation detail payload includes backend-owned `info.operator_surface` - provisional eval detail opens with `Operator Status`, artifact paths, and explicit next steps - redundant provisional score metadata is folded behind `Score artifacts (provisional)` instead of occupying the first screen + - operator payload now includes typed lifecycle `kind` and `artifact_summary` + - all six artifact slots stay visible with explicit `present|missing` status instead of silently dropping missing files - `D3` now has a landed phase-2: - `/api/monitor/leases` now adds backend-owned `triage.summary` and `triage.groups` - triage distinguishes `active_drift`, `detached_residue`, `orphan_cleanup`, and `healthy_capacity` @@ -43,7 +45,6 @@ - legacy `/leases` also now leads with triage buckets before the collapsed raw table - next honest follow-up remains: - `D3` because lease regrouping is still heuristic and needs stronger lifecycle meaning than age-based detached residue alone - - `D2` because the current operator surface only hardens the provisional/bootstrap-failure case and still lacks richer lifecycle typing for other eval states --- diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 16e09d5a2..bad253c05 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -177,8 +177,10 @@ These are not vague “polish later” notes. They are concrete seams that now b - the monitor eval detail page now opens with a dedicated `Operator Status` block instead of leading with a sparse provisional score grid - the first screen now explains `runner exit before threads materialized`, surfaces `run_dir / manifest / stdout / stderr`, and gives explicit next-step guidance - redundant provisional score metadata is still available, but collapsed behind `Score artifacts (provisional)` by default so the page reads like an operator surface instead of a failed report + - operator payload now also carries a typed `kind` plus `artifact_summary`, and keeps all six artifact slots (`run_dir / manifest / stdout / stderr / eval_summary / trace_summaries`) with explicit `present|missing` status instead of filtering missing ones away + - the same backend helper now distinguishes at least `bootstrap_failure`, `running_waiting_for_threads`, `running_active`, `completed_with_errors`, `completed_publishable`, and `provisional_waiting_for_summary` - Honest boundary: - - this phase explains one real provisional failure mode clearly, but it does not yet model every evaluation lifecycle branch as a richer typed contract + - this phase now covers the main eval lifecycle branches more honestly, but it is still a typed operator contract layered over compat-monitor facts rather than a deeper evaluation storage rewrite ### Slice D3: Lease Semantics And Regrouping diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index 485964dd8..f36196fe7 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -209,13 +209,11 @@ def close(self): assert item["triage"]["category"] == "detached_residue" assert payload["triage"]["summary"]["detached_residue"] == 1 + def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_materialize(): payload = monitor_service.build_evaluation_operator_surface( status="provisional", - notes=( - "runner=direct rc=1 sandbox=local " - "run_dir=/tmp/eval stdout_log=/tmp/eval/out.log stderr_log=/tmp/eval/err.log" - ), + notes=("runner=direct rc=1 sandbox=local run_dir=/tmp/eval stdout_log=/tmp/eval/out.log stderr_log=/tmp/eval/err.log"), score={ "score_gate": "provisional", "publishable": False, @@ -230,6 +228,7 @@ def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_mate threads_done=0, ) + assert payload["kind"] == "bootstrap_failure" assert payload["tone"] == "danger" assert payload["headline"] == "Runner exited before evaluation threads materialized." assert "bootstrap failure" in payload["summary"] @@ -243,4 +242,66 @@ def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_mate "Run manifest", "STDOUT log", "STDERR log", + "Eval summary", + "Trace summaries", + } + assert payload["artifact_summary"] == { + "present": 4, + "missing": 2, + "total": 6, + } + assert payload["artifacts"][0]["status"] == "present" + assert payload["artifacts"][-1]["status"] == "missing" + + +def test_build_evaluation_operator_surface_marks_running_waiting_for_threads(): + payload = monitor_service.build_evaluation_operator_surface( + status="running", + notes="runner=direct rc=0", + score={ + "score_gate": "provisional", + "publishable": False, + "run_dir": "/tmp/eval", + "manifest_path": "/tmp/eval/run_manifest.json", + "eval_summary_path": None, + "trace_summaries_path": None, + "scored": False, + }, + threads_total=0, + threads_running=2, + threads_done=0, + ) + + assert payload["kind"] == "running_waiting_for_threads" + assert payload["tone"] == "default" + assert "actively running" in payload["headline"] + assert payload["artifact_summary"]["present"] == 2 + + +def test_build_evaluation_operator_surface_marks_completed_with_errors(): + payload = monitor_service.build_evaluation_operator_surface( + status="completed_with_errors", + notes="runner=direct rc=0", + score={ + "score_gate": "final", + "publishable": True, + "run_dir": "/tmp/eval", + "manifest_path": "/tmp/eval/run_manifest.json", + "eval_summary_path": "/tmp/eval/eval_summary.json", + "trace_summaries_path": "/tmp/eval/trace_summaries.jsonl", + "scored": True, + "error_instances": 2, + }, + threads_total=10, + threads_running=0, + threads_done=10, + ) + + assert payload["kind"] == "completed_with_errors" + assert payload["tone"] == "warning" + assert "completed with recorded errors" in payload["headline"] + assert payload["artifact_summary"] == { + "present": 4, + "missing": 2, + "total": 6, } From 8d190bed340281bc350474281c0fec3d22b2fca9 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 19:23:52 +0800 Subject: [PATCH 23/87] feat: tighten monitor resources surface --- ...2026-04-06-resource-observability-split.md | 4 + ...-06-resource-observability-split-design.md | 14 +- frontend/monitor/src/App.tsx | 221 +++++++++--------- frontend/monitor/src/styles.css | 23 +- 4 files changed, 135 insertions(+), 127 deletions(-) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index eb1f4342d..2294c6b75 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -32,6 +32,10 @@ - `D4` now has a landed phase-3: - selected provider detail now shows a lease card grid before the raw session table - monitor keeps the raw session table for truth, but no longer forces operators to start from the noisiest surface +- `D4` now has a landed phase-4: + - dashboard `Diverged leases` and `Orphans` metrics now jump straight to `resources#lease-health` + - provider cards are tighter because duplicated paused/stopped footer counts were removed + - lease-health now only renders non-empty attention buckets by default and collapses healthy capacity behind a details shell - `D2` now has a landed phase-2: - evaluation detail payload includes backend-owned `info.operator_surface` - provisional eval detail opens with `Operator Status`, artifact paths, and explicit next steps diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index bad253c05..499c13730 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -296,6 +296,18 @@ The dashboard is a switchboard, not a full destination page. It should answer - keeps the full raw session table below instead of replacing it - the monitor page still does not import product frontend components directly; it mirrors the interaction shape locally so the contract boundary remains clean +### Current D4 Phase-4 Landing + +- dashboard infra metrics now deep-link directly into monitor lease-health instead of stopping at the top of the resources page +- provider cards are tighter: + - duplicated paused/stopped footer counts were removed + - unavailable/error reason now lives in the header block instead of stretching card height +- lease-health now defaults to the non-empty attention buckets: + - `active_drift` and `detached_residue` stay first-class + - `orphan_cleanup` only renders when present + - `healthy_capacity` is collapsed behind a details shell instead of competing with active failure buckets +- the net effect is not a new contract; it is a first-screen density cut so operators land on attention surfaces before passive inventory + ### D4 Remaining Gaps - monitor provider/detail surface is now close to the product resources page in interaction quality, but still lacks the richer sandbox-sheet / deep drill-down family the product page has @@ -319,7 +331,7 @@ The dashboard is a switchboard, not a full destination page. It should answer ### D3 Remaining Gaps - semantics are still inferred from current lease row + thread binding only; they do not yet account for stronger lifecycle facts such as historical cleanup windows or explicit terminal/session shutdown markers -- the legacy `/leases` flat table still exists as a drill-down/debug surface and has not been redesigned beyond consuming the new summary/category contract +- the legacy `/leases` flat table still exists as a drill-down/debug surface, though the monitor resources page now gives a better default entry by rendering only non-empty attention groups and collapsing healthy capacity ### Why this IA diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 02a67513f..0c40ef3b9 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -158,13 +158,13 @@ function DashboardPage() { /> {infra.leases_diverged || 0}} note={`${infra.leases_total || 0} total`} tone={(infra.leases_diverged || 0) > 0 ? 'warning' : 'success'} /> {infra.leases_orphan || 0}} note={`${infra.leases_healthy || 0} healthy`} tone={(infra.leases_orphan || 0) > 0 ? 'danger' : 'success'} /> @@ -501,6 +501,8 @@ function MonitorResourcesPage() { const detachedResidueLeases = (triageGroups.find((group: any) => group.key === 'detached_residue')?.items || []) as any[]; const orphanCleanupLeases = (triageGroups.find((group: any) => group.key === 'orphan_cleanup')?.items || []) as any[]; const healthyCapacityLeases = (triageGroups.find((group: any) => group.key === 'healthy_capacity')?.items || []) as any[]; + const hasPrimaryLeaseAttention = activeDriftLeases.length > 0 || detachedResidueLeases.length > 0; + const hasSecondaryLeaseAttention = orphanCleanupLeases.length > 0; const refreshedAt = summary.last_refreshed_at || summary.snapshot_at; const selectedSessions = Array.isArray(selectedProvider?.sessions) ? selectedProvider.sessions : []; const selectedLeaseGroups = groupSessionsByLease(selectedSessions); @@ -539,8 +541,6 @@ function MonitorResourcesPage() { {providers.map((provider: any) => { const sessions = Array.isArray(provider.sessions) ? provider.sessions : []; const runningCount = sessions.filter((session: any) => session.status === 'running').length; - const pausedCount = sessions.filter((session: any) => session.status === 'paused').length; - const stoppedCount = sessions.filter((session: any) => session.status === 'stopped').length; const unavailable = provider.status === 'unavailable'; const cpuUsed = provider.cardCpu?.used; const memoryUsed = provider.telemetry?.memory?.used; @@ -559,6 +559,9 @@ function MonitorResourcesPage() { {provider.name}

    {provider.type} {provider.vendor ? `· ${provider.vendor}` : ''}

    + {provider.unavailableReason || provider.error ? ( +

    {provider.unavailableReason || provider.error}

    + ) : null}
    {provider.status} @@ -572,13 +575,6 @@ function MonitorResourcesPage() {
    -
    - {pausedCount} paused - {stoppedCount} stopped -
    - {provider.unavailableReason || provider.error ? ( -

    {provider.unavailableReason || provider.error}

    - ) : null} ); })} @@ -735,108 +731,102 @@ function MonitorResourcesPage() { {healthyCapacityLeases.length}
    -
    -
    -

    Active Drift ({activeDriftLeases.length})

    -

    Recent desired/observed mismatch. These rows deserve live operator attention before they age into residue.

    - - - - - - - - - - - - {activeDriftLeases.slice(0, 8).map((item: any) => ( - - - - - - - - ))} - {activeDriftLeases.length === 0 ? ( - - - - ) : null} - -
    LeaseProviderThreadStateUpdated
    {shortId(item.lease_id, 12)}{item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan}{item.updated_ago}
    No active drift right now.
    -
    - -
    -

    Detached Residue ({detachedResidueLeases.length})

    -

    Detached rows that still want `running` long after the runtime stopped moving. Usually cleanup debt, not fresh pressure.

    - - - - - - - - - - - - {detachedResidueLeases.slice(0, 8).map((item: any) => ( - - - - - - - - ))} - {detachedResidueLeases.length === 0 ? ( - - - - ) : null} - -
    LeaseProviderThreadStateUpdated
    {shortId(item.lease_id, 12)}{item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan}{item.updated_ago}
    No detached residue.
    -
    -
    + {hasPrimaryLeaseAttention ? ( +
    + {activeDriftLeases.length > 0 ? ( +
    +

    Active Drift ({activeDriftLeases.length})

    +

    Recent desired/observed mismatch. These rows deserve live operator attention before they age into residue.

    + + + + + + + + + + + + {activeDriftLeases.slice(0, 8).map((item: any) => ( + + + + + + + + ))} + +
    LeaseProviderThreadStateUpdated
    {shortId(item.lease_id, 12)}{item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan}{item.updated_ago}
    +
    + ) : null} + + {detachedResidueLeases.length > 0 ? ( +
    +

    Detached Residue ({detachedResidueLeases.length})

    +

    Detached rows that still want `running` long after the runtime stopped moving. Usually cleanup debt, not fresh pressure.

    + + + + + + + + + + + + {detachedResidueLeases.slice(0, 8).map((item: any) => ( + + + + + + + + ))} + +
    LeaseProviderThreadStateUpdated
    {shortId(item.lease_id, 12)}{item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan}{item.updated_ago}
    +
    + ) : null} +
    + ) : null} -
    -
    -

    Cleanup Backlog ({orphanCleanupLeases.length})

    -

    Rows that already lost thread binding. Keep them visible for cleanup honesty, but do not confuse them with live compute pressure.

    - - - - - - - - - - - - {orphanCleanupLeases.slice(0, 8).map((item: any) => ( - - - - - - - - ))} - {orphanCleanupLeases.length === 0 ? ( + {hasSecondaryLeaseAttention ? ( +
    +
    +

    Cleanup Backlog ({orphanCleanupLeases.length})

    +

    Rows that already lost thread binding. Keep them visible for cleanup honesty, but do not confuse them with live compute pressure.

    +
    LeaseProviderInstanceStateUpdated
    {shortId(item.lease_id, 12)}{item.provider}{shortId(item.instance_id, 12)}{item.updated_ago}
    + - + + + + + - ) : null} - -
    No orphan cleanup rows.LeaseProviderInstanceStateUpdated
    -
    + + + {orphanCleanupLeases.slice(0, 8).map((item: any) => ( + + {shortId(item.lease_id, 12)} + {item.provider} + {shortId(item.instance_id, 12)} + + {item.updated_ago} + + ))} + + +
    +
    + ) : null} -
    -

    Healthy Capacity ({healthyCapacityLeases.length})

    -

    Converged lease rows still attached to thread context. Use this as the counterweight to the noisy failure buckets above.

    + {healthyCapacityLeases.length > 0 ? ( +
    + Healthy Capacity ({healthyCapacityLeases.length}) @@ -857,15 +847,14 @@ function MonitorResourcesPage() { ))} - {healthyCapacityLeases.length === 0 ? ( - - - - ) : null}
    {item.updated_ago}
    No healthy capacity rows yet.
    -
    -
    + + ) : null} + + {!hasPrimaryLeaseAttention && !hasSecondaryLeaseAttention && healthyCapacityLeases.length === 0 ? ( +
    No lease groups reported yet.
    + ) : null}
    All leases ({leases.length}) diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 35b2b37d6..237c5647c 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -205,6 +205,15 @@ h2 { color: var(--text); } +.dashboard-inline-link { + color: inherit; + text-decoration: none; +} + +.dashboard-inline-link:hover { + color: var(--accent); +} + .dashboard-metric-note { font-size: 0.82rem; color: var(--text-secondary); @@ -432,22 +441,16 @@ h2 { background: var(--danger); } -.provider-session-copy, -.provider-card-footer { +.provider-session-copy { font-size: 0.78rem; color: var(--text-secondary); } -.provider-card-footer { - display: flex; - gap: 0.75rem; - flex-wrap: wrap; -} - .provider-inline-error { + margin-top: 0.32rem; color: var(--danger); - font-size: 0.84rem; - line-height: 1.45; + font-size: 0.76rem; + line-height: 1.35; } .provider-detail-shell { From 9d741ef5bc0656ce321a0cb3205b4e71aa8b3672 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 19:31:37 +0800 Subject: [PATCH 24/87] feat: add monitor lease drilldown panel --- ...2026-04-06-resource-observability-split.md | 4 + ...-06-resource-observability-split-design.md | 14 +- frontend/monitor/src/App.tsx | 2364 ++++++++++++----- frontend/monitor/src/styles.css | 102 +- 4 files changed, 1871 insertions(+), 613 deletions(-) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 2294c6b75..5595dd8f9 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -36,6 +36,10 @@ - dashboard `Diverged leases` and `Orphans` metrics now jump straight to `resources#lease-health` - provider cards are tighter because duplicated paused/stopped footer counts were removed - lease-health now only renders non-empty attention buckets by default and collapses healthy capacity behind a details shell +- `D4` now has a landed phase-5: + - selected lease cards now open a dedicated `Lease Detail` panel before the full provider session table + - the panel reuses existing payload data only: lease/thread links, member, started time, and grouped session rows + - this gives monitor resources a local deep-drill layer without changing backend contracts - `D2` now has a landed phase-2: - evaluation detail payload includes backend-owned `info.operator_surface` - provisional eval detail opens with `Operator Status`, artifact paths, and explicit next steps diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 499c13730..340f5f69d 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -308,9 +308,21 @@ The dashboard is a switchboard, not a full destination page. It should answer - `healthy_capacity` is collapsed behind a details shell instead of competing with active failure buckets - the net effect is not a new contract; it is a first-screen density cut so operators land on attention surfaces before passive inventory +### Current D4 Phase-5 Landing + +- selected provider lease cards now drive a dedicated monitor-side `Lease Detail` panel before the global session truth table +- this is the smallest local equivalent of the product sandbox-sheet layer: + - click a lease group card + - inspect lease/thread quick links, member, started time, and per-session status rows + - only then fall through to the noisier full provider session table +- the interaction stays frontend-local and contract-preserving: + - no new backend fields + - no import of product sandbox components + - only existing provider/session/lease payload data is reused + ### D4 Remaining Gaps -- monitor provider/detail surface is now close to the product resources page in interaction quality, but still lacks the richer sandbox-sheet / deep drill-down family the product page has +- monitor provider/detail surface is now close to the product resources page in interaction quality, but still lacks the richer sandbox-sheet capabilities such as file browsing or per-session live metrics - lease regrouping exists, but backend-side semantic categorization is still shallow and belongs to `D3` - dashboard is currently a compact switchboard; it does not yet expose richer error drill-down or resource anomaly timelines diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 0c40ef3b9..313b6b6ca 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1,8 +1,17 @@ -import React from 'react'; -import { BrowserRouter, Routes, Route, Link, NavLink, Navigate, useLocation, useParams } from 'react-router-dom'; -import './styles.css'; - -const API_BASE = '/api/monitor'; +import React from "react"; +import { + BrowserRouter, + Routes, + Route, + Link, + NavLink, + Navigate, + useLocation, + useParams, +} from "react-router-dom"; +import "./styles.css"; + +const API_BASE = "/api/monitor"; // Utility: Fetch JSON from API async function fetchAPI(path: string) { @@ -12,7 +21,9 @@ async function fetchAPI(path: string) { try { payload = text ? JSON.parse(text) : {}; } catch { - throw new Error(`Invalid JSON from ${path} (${res.status}): ${text.slice(0, 180)}`); + throw new Error( + `Invalid JSON from ${path} (${res.status}): ${text.slice(0, 180)}`, + ); } if (!res.ok) { throw new Error(payload?.detail || `${res.status} ${res.statusText}`); @@ -27,7 +38,9 @@ async function fetchJSON(path: string, init?: RequestInit) { try { payload = text ? JSON.parse(text) : {}; } catch { - throw new Error(`Invalid JSON from ${path} (${res.status}): ${text.slice(0, 180)}`); + throw new Error( + `Invalid JSON from ${path} (${res.status}): ${text.slice(0, 180)}`, + ); } if (!res.ok) { throw new Error(payload?.detail || `${res.status} ${res.statusText}`); @@ -36,7 +49,11 @@ async function fetchJSON(path: string, init?: RequestInit) { } // Component: Breadcrumb navigation -function Breadcrumb({ items }: { items: Array<{ label: string; url: string }> }) { +function Breadcrumb({ + items, +}: { + items: Array<{ label: string; url: string }>; +}) { return (
    {items.map((item, i) => ( @@ -56,22 +73,26 @@ function StateBadge({ badge }: { badge: any }) { const tooltip = badge.hours_diverged ? `Diverged for ${badge.hours_diverged}h` : badge.converged - ? 'Converged' - : `${badge.observed} → ${badge.desired}`; + ? "Converged" + : `${badge.observed} → ${badge.desired}`; - return {text}; + return ( + + {text} + + ); } function DashboardMetric({ label, value, note, - tone = 'default', + tone = "default", }: { label: string; value: React.ReactNode; note?: React.ReactNode; - tone?: 'default' | 'warning' | 'danger' | 'success'; + tone?: "default" | "warning" | "danger" | "success"; }) { return (
    @@ -91,7 +112,7 @@ function DashboardPage() { setLoading(true); setError(null); try { - const payload = await fetchAPI('/dashboard'); + const payload = await fetchAPI("/dashboard"); setData(payload); } catch (e: any) { setError(e?.message || String(e)); @@ -131,10 +152,17 @@ function DashboardPage() {

    Dashboard

    -

    Operator landing for resource health, workload pressure, and the latest evaluation run.

    +

    + Operator landing for resource health, workload pressure, and the + latest evaluation run. +

    -
    @@ -143,7 +171,9 @@ function DashboardPage() {

    Infra Health

    -

    Global provider and lease state from the monitor backend.

    +

    + Global provider and lease state from the monitor backend. +

    Open resources @@ -154,19 +184,37 @@ function DashboardPage() { label="Providers" value={`${resourcesSummary.active_providers || 0}/${resourcesSummary.total_providers || 0}`} note={`${resourcesSummary.unavailable_providers || 0} unavailable`} - tone={(resourcesSummary.unavailable_providers || 0) > 0 ? 'warning' : 'success'} + tone={ + (resourcesSummary.unavailable_providers || 0) > 0 + ? "warning" + : "success" + } /> {infra.leases_diverged || 0}} + value={ + + {infra.leases_diverged || 0} + + } note={`${infra.leases_total || 0} total`} - tone={(infra.leases_diverged || 0) > 0 ? 'warning' : 'success'} + tone={(infra.leases_diverged || 0) > 0 ? "warning" : "success"} /> {infra.leases_orphan || 0}} + value={ + + {infra.leases_orphan || 0} + + } note={`${infra.leases_healthy || 0} healthy`} - tone={(infra.leases_orphan || 0) > 0 ? 'danger' : 'success'} + tone={(infra.leases_orphan || 0) > 0 ? "danger" : "success"} />
    @@ -175,7 +223,10 @@ function DashboardPage() {

    Active Workload

    -

    How much monitored runtime is currently alive across DB sessions, providers, and evaluations.

    +

    + How much monitored runtime is currently alive across DB + sessions, providers, and evaluations. +

    Open threads @@ -196,7 +247,9 @@ function DashboardPage() { label="Running sessions" value={workload.running_sessions || 0} note={`${workload.evaluations_running || 0} eval jobs running`} - tone={(workload.running_sessions || 0) > 0 ? 'default' : 'warning'} + tone={ + (workload.running_sessions || 0) > 0 ? "default" : "warning" + } />
    @@ -205,41 +258,70 @@ function DashboardPage() {

    Latest Eval

    -

    Most recent evaluation known to the monitor. Use this as the fastest jump into detail.

    +

    + Most recent evaluation known to the monitor. Use this as the + fastest jump into detail. +

    - - {latestEval ? 'Open latest eval' : 'Open eval list'} + + {latestEval ? "Open latest eval" : "Open eval list"}
    {latestEval ? (
    - + {latestEval.status} - + publishable={String(Boolean(latestEval.publishable))}
    -
    {latestEval.evaluation_id}
    +
    + {latestEval.evaluation_id} +
    -
    +
    - {latestEval.threads_done || 0}/{latestEval.threads_total || 0} threads · {formatPct(latestEval.progress_pct || 0)} · updated {latestEval.updated_ago || '-'} + {latestEval.threads_done || 0}/{latestEval.threads_total || 0}{" "} + threads · {formatPct(latestEval.progress_pct || 0)} · updated{" "} + {latestEval.updated_ago || "-"}
    ) : (
    -

    No evaluation rows yet. Open Eval to submit a minimal run.

    +

    + No evaluation rows yet. Open Eval to submit a minimal run. +

    )} @@ -259,20 +341,20 @@ const CAPABILITY_LABELS: Record = { mount: "MOUNT", }; -function formatMonitorMetric(value: any, suffix = '', digits = 1): string { - if (value == null) return '--'; +function formatMonitorMetric(value: any, suffix = "", digits = 1): string { + if (value == null) return "--"; const num = Number(value); - if (!Number.isFinite(num)) return '--'; + if (!Number.isFinite(num)) return "--"; return `${num.toFixed(digits)}${suffix}`; } function ProviderStatusLight({ status }: { status: string }) { const className = - status === 'active' - ? 'provider-status-light is-active' - : status === 'ready' - ? 'provider-status-light is-ready' - : 'provider-status-light is-unavailable'; + status === "active" + ? "provider-status-light is-active" + : status === "ready" + ? "provider-status-light is-ready" + : "provider-status-light is-unavailable"; return
    - - {selectedProvider.type}{selectedProvider.vendor ? ` · ${selectedProvider.vendor}` : ''} + + {selectedProvider.type} + {selectedProvider.vendor + ? ` · ${selectedProvider.vendor}` + : ""} {selectedProvider.consoleUrl ? ( - + Open console ) : null} @@ -625,27 +1046,52 @@ function MonitorResourcesPage() {
    Provider - {selectedProvider.type}{selectedProvider.vendor ? ` · ${selectedProvider.vendor}` : ''} + + {selectedProvider.type} + {selectedProvider.vendor + ? ` · ${selectedProvider.vendor}` + : ""} +
    CPU - {selectedProvider.telemetry?.cpu?.used == null ? '--' : `${Number(selectedProvider.telemetry.cpu.used).toFixed(1)}%`} + + {selectedProvider.telemetry?.cpu?.used == null + ? "--" + : `${Number(selectedProvider.telemetry.cpu.used).toFixed(1)}%`} +
    Memory - {selectedProvider.telemetry?.memory?.used == null ? '--' : `${Number(selectedProvider.telemetry.memory.used).toFixed(1)} / ${selectedProvider.telemetry?.memory?.limit ?? '--'} GB`} + + {selectedProvider.telemetry?.memory?.used == null + ? "--" + : `${Number(selectedProvider.telemetry.memory.used).toFixed(1)} / ${selectedProvider.telemetry?.memory?.limit ?? "--"} GB`} +
    Disk - {selectedProvider.telemetry?.disk?.used == null ? '--' : `${Number(selectedProvider.telemetry.disk.used).toFixed(1)} / ${selectedProvider.telemetry?.disk?.limit ?? '--'} GB`} + + {selectedProvider.telemetry?.disk?.used == null + ? "--" + : `${Number(selectedProvider.telemetry.disk.used).toFixed(1)} / ${selectedProvider.telemetry?.disk?.limit ?? "--"} GB`} +
    Running metric - {selectedProvider.telemetry?.running?.used == null ? '--' : `${selectedProvider.telemetry.running.used} / ${selectedProvider.telemetry?.running?.limit ?? '--'} ${selectedProvider.telemetry?.running?.unit || ''}`} + + {selectedProvider.telemetry?.running?.used == null + ? "--" + : `${selectedProvider.telemetry.running.used} / ${selectedProvider.telemetry?.running?.limit ?? "--"} ${selectedProvider.telemetry?.running?.unit || ""}`} +
    Reason - {selectedProvider.unavailableReason || selectedProvider.error || 'healthy'} + + {selectedProvider.unavailableReason || + selectedProvider.error || + "healthy"} +
    @@ -653,21 +1099,41 @@ function MonitorResourcesPage() {

    Leases ({selectedLeaseGroups.length})

    -

    Monitor-side lease grouping for this provider. This is the closest equivalent to the product sandbox cards, but still grounded in global monitor truth.

    +

    + Monitor-side lease grouping for this provider. This is the + closest equivalent to the product sandbox cards, but still + grounded in global monitor truth. +

    {selectedLeaseGroups.map((group: any) => ( - + setSelectedLeaseId(leaseGroupKey(group))} + /> ))} {selectedLeaseGroups.length === 0 ? ( -
    No lease groups reported for this provider.
    +
    + No lease groups reported for this provider. +
    ) : null}
    + {selectedLeaseGroup ? ( + + ) : null}

    Sessions ({selectedSessions.length})

    -

    Global session rows currently attached to this provider. This is the monitor-side truth surface, not the user projection.

    +

    + Global session rows currently attached to this provider. This + is the monitor-side truth surface, not the user projection. +

    @@ -685,11 +1151,31 @@ function MonitorResourcesPage() { {selectedSessions.map((session: any) => ( - - - + + + - + ))} {selectedSessions.length === 0 ? ( @@ -707,7 +1193,11 @@ function MonitorResourcesPage() {

    Lease Health

    -

    Backend-owned lease lifecycle triage. Separate live drift from stale detached residue before assuming the whole system is on fire.

    +

    + Backend-owned lease lifecycle triage. Separate live drift from + stale detached residue before assuming the whole system is on + fire. +

    Legacy flat table @@ -736,7 +1226,10 @@ function MonitorResourcesPage() { {activeDriftLeases.length > 0 ? (

    Active Drift ({activeDriftLeases.length})

    -

    Recent desired/observed mismatch. These rows deserve live operator attention before they age into residue.

    +

    + Recent desired/observed mismatch. These rows deserve live + operator attention before they age into residue. +

    {shortId(session.id, 12)}{session.threadId ? {shortId(session.threadId, 12)} : '-'}{session.leaseId ? {shortId(session.leaseId, 12)} : '-'}{session.memberName || session.memberId || '-'} + {session.threadId ? ( + + {shortId(session.threadId, 12)} + + ) : ( + "-" + )} + + {session.leaseId ? ( + + {shortId(session.leaseId, 12)} + + ) : ( + "-" + )} + {session.memberName || session.memberId || "-"} {session.status}{session.startedAt ? new Date(session.startedAt).toLocaleString() : '-'} + {session.startedAt + ? new Date(session.startedAt).toLocaleString() + : "-"} +
    @@ -750,10 +1243,24 @@ function MonitorResourcesPage() { {activeDriftLeases.slice(0, 8).map((item: any) => ( - + - - + + ))} @@ -765,7 +1272,10 @@ function MonitorResourcesPage() { {detachedResidueLeases.length > 0 ? (

    Detached Residue ({detachedResidueLeases.length})

    -

    Detached rows that still want `running` long after the runtime stopped moving. Usually cleanup debt, not fresh pressure.

    +

    + Detached rows that still want `running` long after the runtime + stopped moving. Usually cleanup debt, not fresh pressure. +

    {shortId(item.lease_id, 12)} + + {shortId(item.lease_id, 12)} + + {item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan} + {item.thread?.thread_id ? ( + + {shortId(item.thread.thread_id, 12)} + + ) : ( + orphan + )} + + + {item.updated_ago}
    @@ -779,10 +1289,24 @@ function MonitorResourcesPage() { {detachedResidueLeases.slice(0, 8).map((item: any) => ( - + - - + + ))} @@ -797,7 +1321,11 @@ function MonitorResourcesPage() {

    Cleanup Backlog ({orphanCleanupLeases.length})

    -

    Rows that already lost thread binding. Keep them visible for cleanup honesty, but do not confuse them with live compute pressure.

    +

    + Rows that already lost thread binding. Keep them visible for + cleanup honesty, but do not confuse them with live compute + pressure. +

    {shortId(item.lease_id, 12)} + + {shortId(item.lease_id, 12)} + + {item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan} + {item.thread?.thread_id ? ( + + {shortId(item.thread.thread_id, 12)} + + ) : ( + orphan + )} + + + {item.updated_ago}
    @@ -811,10 +1339,16 @@ function MonitorResourcesPage() { {orphanCleanupLeases.slice(0, 8).map((item: any) => ( - + - + ))} @@ -840,10 +1374,24 @@ function MonitorResourcesPage() { {healthyCapacityLeases.slice(0, 8).map((item: any) => ( - + - - + + ))} @@ -852,7 +1400,9 @@ function MonitorResourcesPage() { ) : null} - {!hasPrimaryLeaseAttention && !hasSecondaryLeaseAttention && healthyCapacityLeases.length === 0 ? ( + {!hasPrimaryLeaseAttention && + !hasSecondaryLeaseAttention && + healthyCapacityLeases.length === 0 ? (
    No lease groups reported yet.
    ) : null} @@ -873,19 +1423,27 @@ function MonitorResourcesPage() { {leases.map((item: any) => ( - + - + - + - + ))} @@ -906,7 +1464,9 @@ function ThreadsPage() { const loadThreads = React.useCallback(async () => { setLoading(true); try { - const payload = await fetchAPI(`/threads?offset=${offset}&limit=${limit}`); + const payload = await fetchAPI( + `/threads?offset=${offset}&limit=${limit}`, + ); setData(payload); } finally { setLoading(false); @@ -928,8 +1488,13 @@ function ThreadsPage() { return (

    {data.title}

    -

    Global thread index. Start here to find the active run, then drill into session, lease, and trace detail.

    -

    Showing {from}-{to} of {total} | page {page}

    +

    + Global thread index. Start here to find the active run, then drill into + session, lease, and trace detail. +

    +

    + Showing {from}-{to} of {total} | page {page} +

    @@ -947,8 +1512,12 @@ function ThreadsPage() { > Next -
    @@ -982,17 +1551,26 @@ function ThreadsPage() {
    {data.items.map((item: any) => ( - - + + + + - - ))} @@ -1033,8 +1611,13 @@ function TracesPage() { return (

    {data.title}

    -

    Run-level trace index for debugging tool calls, checkpoints, and runtime transitions across monitored threads.

    -

    Showing {from}-{to} of {total} | page {page}

    +

    + Run-level trace index for debugging tool calls, checkpoints, and runtime + transitions across monitored threads. +

    +

    + Showing {from}-{to} of {total} | page {page} +

    @@ -1052,8 +1635,12 @@ function TracesPage() { > Next -
    @@ -1088,13 +1675,22 @@ function TracesPage() {
    {data.items.map((item: any) => ( - + - + - - - + + + ))} @@ -1110,7 +1706,10 @@ function ThreadDetailPage() { const { threadId } = useParams(); const location = useLocation(); const [data, setData] = React.useState(null); - const initialRunId = React.useMemo(() => new URLSearchParams(location.search).get('run') || '', [location.search]); + const initialRunId = React.useMemo( + () => new URLSearchParams(location.search).get("run") || "", + [location.search], + ); React.useEffect(() => { fetchAPI(`/thread/${threadId}`).then(setData); @@ -1118,17 +1717,22 @@ function ThreadDetailPage() { if (!data) return
    Loading...
    ; const threadIsActive = Array.isArray(data?.sessions?.items) - ? data.sessions.items.some((s: any) => s.status === 'active') + ? data.sessions.items.some((s: any) => s.status === "active") : false; return (

    Thread: {data.thread_id.slice(0, 8)}

    -

    mode: {data.thread_mode || 'normal'} | trace: {data.keep_full_trace ? 'full' : 'latest'}

    +

    + mode: {data.thread_mode || "normal"} | trace:{" "} + {data.keep_full_trace ? "full" : "latest"} +

    -

    {data.sessions.title} ({data.sessions.count})

    +

    + {data.sessions.title} ({data.sessions.count}) +

    {shortId(item.lease_id, 12)} + + {shortId(item.lease_id, 12)} + + {item.provider} {shortId(item.instance_id, 12)} + + {item.updated_ago}
    {shortId(item.lease_id, 12)} + + {shortId(item.lease_id, 12)} + + {item.provider}{item.thread?.thread_id ? {shortId(item.thread.thread_id, 12)} : orphan} + {item.thread?.thread_id ? ( + + {shortId(item.thread.thread_id, 12)} + + ) : ( + orphan + )} + + + {item.updated_ago}
    {item.lease_id} + {item.lease_id} + {item.provider}{item.instance_id?.slice(0, 12) || '-'} + {item.instance_id?.slice(0, 12) || "-"} + {item.thread.thread_id ? ( - {item.thread.thread_id.slice(0, 8)} + + {item.thread.thread_id.slice(0, 8)} + ) : ( orphan )} + + {item.updated_ago}{item.error || '-'}{item.error || "-"}
    {item.thread_id.slice(0, 8)}{item.thread_mode || 'normal'} / trace={item.keep_full_trace ? 'full' : 'latest'} + {item.thread_id.slice(0, 8)} + + {item.thread_mode || "normal"} / trace= + {item.keep_full_trace ? "full" : "latest"} + {item.session_count} {item.last_active_ago} {item.lease.lease_id ? ( {item.lease.lease_id} - ) : '-'} + ) : ( + "-" + )} + {item.lease.provider || "-"} + {item.lease.provider || '-'}
    {item.thread_id.slice(0, 18)} + + {item.thread_id.slice(0, 18)} + + {shortId(item.run_id, 12)}{item.thread_mode || 'normal'} / trace={item.keep_full_trace ? 'full' : 'latest'} + {item.thread_mode || "normal"} / trace= + {item.keep_full_trace ? "full" : "latest"} + {item.event_count}{item.tool_call_count} / {item.tool_result_count}{item.started_ago || '-'}{item.last_event_ago || '-'} + {item.tool_call_count} / {item.tool_result_count} + {item.started_ago || "-"}{item.last_event_ago || "-"} {item.status}
    @@ -1144,17 +1748,23 @@ function ThreadDetailPage() { {data.sessions.items.map((s: any) => ( - + - + + - - + ))} {data.sessions.items.length === 0 && ( @@ -1182,23 +1792,35 @@ function ThreadDetailPage() {

    Live Trace

    -

    Conversation, event stream, and grouped steps for the selected run. Use this after locating the right session or lease above.

    - +

    + Conversation, event stream, and grouped steps for the selected run. + Use this after locating the right session or lease above. +

    +
    ); } function summarizeTraceEvent(eventType: string, payload: any): string { - if (eventType === 'tool_call') return `${payload?.name || 'tool'}(${JSON.stringify(payload?.args || {})})`; - if (eventType === 'tool_result') return `${payload?.name || 'tool'} -> ${String(payload?.content || '').slice(0, 240)}`; - if (eventType === 'text') return String(payload?.content || '').slice(0, 120); - if (eventType === 'status') { - const state = typeof payload?.state === 'string' ? payload.state : JSON.stringify(payload?.state || '-'); - return `state=${state} calls=${payload?.call_count ?? '-'}`; + if (eventType === "tool_call") + return `${payload?.name || "tool"}(${JSON.stringify(payload?.args || {})})`; + if (eventType === "tool_result") + return `${payload?.name || "tool"} -> ${String(payload?.content || "").slice(0, 240)}`; + if (eventType === "text") return String(payload?.content || "").slice(0, 120); + if (eventType === "status") { + const state = + typeof payload?.state === "string" + ? payload.state + : JSON.stringify(payload?.state || "-"); + return `state=${state} calls=${payload?.call_count ?? "-"}`; } - if (eventType === 'error') return payload?.error || 'error'; - if (eventType === 'done') return 'done'; + if (eventType === "error") return payload?.error || "error"; + if (eventType === "done") return "done"; return JSON.stringify(payload).slice(0, 120); } @@ -1208,61 +1830,81 @@ type TraceItem = { created_at?: string | null; created_ago?: string | null; event_type: string; - actor: 'assistant' | 'tool' | 'runtime'; + actor: "assistant" | "tool" | "runtime"; summary: string; payload: any; }; -function normalizeTraceEvent(eventType: string, payload: any): TraceItem | null { +function normalizeTraceEvent( + eventType: string, + payload: any, +): TraceItem | null { const seq = payload?._seq ?? null; const run_id = payload?._run_id ?? null; - if (eventType === 'text') { - const content = typeof payload?.content === 'string' ? payload.content : String(payload?.content ?? ''); + if (eventType === "text") { + const content = + typeof payload?.content === "string" + ? payload.content + : String(payload?.content ?? ""); if (!content) return null; - return { seq, run_id, event_type: 'assistant_text', actor: 'assistant', summary: content, payload }; + return { + seq, + run_id, + event_type: "assistant_text", + actor: "assistant", + summary: content, + payload, + }; } - if (eventType === 'tool_call') { + if (eventType === "tool_call") { return { seq, run_id, - event_type: 'tool_call', - actor: 'tool', - summary: `${payload?.name || 'tool'}`, + event_type: "tool_call", + actor: "tool", + summary: `${payload?.name || "tool"}`, payload, }; } - if (eventType === 'tool_result') { + if (eventType === "tool_result") { return { seq, run_id, - event_type: 'tool_result', - actor: 'tool', - summary: `${payload?.name || 'tool'}`, + event_type: "tool_result", + actor: "tool", + summary: `${payload?.name || "tool"}`, payload, }; } - if (eventType === 'status') { - const state = typeof payload?.state === 'string' ? payload.state : JSON.stringify(payload?.state || '-'); + if (eventType === "status") { + const state = + typeof payload?.state === "string" + ? payload.state + : JSON.stringify(payload?.state || "-"); return { seq, run_id, - event_type: 'status', - actor: 'runtime', - summary: `state=${state} calls=${payload?.call_count ?? '-'}`, + event_type: "status", + actor: "runtime", + summary: `state=${state} calls=${payload?.call_count ?? "-"}`, payload, }; } - if (eventType === 'error' || eventType === 'cancelled' || eventType === 'done') { + if ( + eventType === "error" || + eventType === "cancelled" || + eventType === "done" + ) { return { seq, run_id, event_type: eventType, - actor: 'runtime', + actor: "runtime", summary: summarizeTraceEvent(eventType, payload), payload, }; @@ -1270,11 +1912,18 @@ function normalizeTraceEvent(eventType: string, payload: any): TraceItem | null return null; } -function normalizeStoredTraceEvent(row: any, fallbackRunId: string | null): TraceItem | null { +function normalizeStoredTraceEvent( + row: any, + fallbackRunId: string | null, +): TraceItem | null { const payload = row?.payload || {}; if (payload?._seq == null && row?.seq != null) payload._seq = row.seq; - if (payload?._run_id == null && fallbackRunId) payload._run_id = fallbackRunId; - const normalized = normalizeTraceEvent(String(row?.event_type || ''), payload); + if (payload?._run_id == null && fallbackRunId) + payload._run_id = fallbackRunId; + const normalized = normalizeTraceEvent( + String(row?.event_type || ""), + payload, + ); if (!normalized) return null; return { ...normalized, @@ -1289,7 +1938,12 @@ function mergeTraceItems(prev: TraceItem[], next: TraceItem): TraceItem[] { const last = prev.length ? prev[prev.length - 1] : null; // @@@streaming-text-fold - collapse token-level text stream into one assistant step for readable trace timeline. - if (next.event_type === 'assistant_text' && last && last.event_type === 'assistant_text' && last.run_id === next.run_id) { + if ( + next.event_type === "assistant_text" && + last && + last.event_type === "assistant_text" && + last.run_id === next.run_id + ) { const merged = [...prev]; merged[merged.length - 1] = { ...last, @@ -1301,7 +1955,12 @@ function mergeTraceItems(prev: TraceItem[], next: TraceItem): TraceItem[] { } // @@@status-coalesce - keep only latest status snapshot for same run to reduce noise. - if (next.event_type === 'status' && last && last.event_type === 'status' && last.run_id === next.run_id) { + if ( + next.event_type === "status" && + last && + last.event_type === "status" && + last.run_id === next.run_id + ) { const merged = [...prev]; merged[merged.length - 1] = next; return merged; @@ -1328,14 +1987,14 @@ type TraceStep = { function buildTraceSteps(items: TraceItem[]): TraceStep[] { const steps: TraceStep[] = []; let assistantBuffer: string[] = []; - let pending: Omit | null = null; + let pending: Omit | null = null; - const pushStep = (step: Omit) => { + const pushStep = (step: Omit) => { steps.push({ ...step, step: steps.length + 1 }); }; for (const item of items) { - if (item.event_type === 'assistant_text') { + if (item.event_type === "assistant_text") { if (pending) { pending.runtime_notes.push(item.summary); pending.raw_items.push(item); @@ -1346,7 +2005,7 @@ function buildTraceSteps(items: TraceItem[]): TraceStep[] { continue; } - if (item.event_type === 'tool_call') { + if (item.event_type === "tool_call") { if (pending) { pushStep(pending); pending = null; @@ -1356,10 +2015,12 @@ function buildTraceSteps(items: TraceItem[]): TraceStep[] { seq_start: item.seq, seq_end: item.seq, created_ago: item.created_ago || null, - assistant_text: assistantBuffer.join('\n').trim(), + assistant_text: assistantBuffer.join("\n").trim(), tool_name: item.payload?.name || item.summary, tool_args: item.payload?.args || {}, - command_line: item.payload?.args?.CommandLine ? String(item.payload.args.CommandLine) : null, + command_line: item.payload?.args?.CommandLine + ? String(item.payload.args.CommandLine) + : null, tool_output: null, runtime_notes: [], raw_items: [item], @@ -1368,9 +2029,9 @@ function buildTraceSteps(items: TraceItem[]): TraceStep[] { continue; } - if (item.event_type === 'tool_result') { + if (item.event_type === "tool_result") { if (pending && !pending.tool_output) { - pending.tool_output = String(item.payload?.content || '(no output)'); + pending.tool_output = String(item.payload?.content || "(no output)"); pending.raw_items.push(item); pending.seq_end = item.seq ?? pending.seq_end; } else { @@ -1379,11 +2040,11 @@ function buildTraceSteps(items: TraceItem[]): TraceStep[] { seq_start: item.seq, seq_end: item.seq, created_ago: item.created_ago || null, - assistant_text: assistantBuffer.join('\n').trim(), + assistant_text: assistantBuffer.join("\n").trim(), tool_name: item.payload?.name || item.summary, tool_args: null, command_line: null, - tool_output: String(item.payload?.content || '(no output)'), + tool_output: String(item.payload?.content || "(no output)"), runtime_notes: [], raw_items: [item], }); @@ -1392,12 +2053,19 @@ function buildTraceSteps(items: TraceItem[]): TraceStep[] { continue; } - const runtimeNote = item.event_type === 'status' ? formatStatusSummary(item.payload) : item.summary; + const runtimeNote = + item.event_type === "status" + ? formatStatusSummary(item.payload) + : item.summary; if (pending) { pending.runtime_notes.push(runtimeNote); pending.raw_items.push(item); pending.seq_end = item.seq ?? pending.seq_end; - if (item.event_type === 'error' || item.event_type === 'cancelled' || item.event_type === 'done') { + if ( + item.event_type === "error" || + item.event_type === "cancelled" || + item.event_type === "done" + ) { pushStep(pending); pending = null; } @@ -1407,7 +2075,7 @@ function buildTraceSteps(items: TraceItem[]): TraceStep[] { seq_start: item.seq, seq_end: item.seq, created_ago: item.created_ago || null, - assistant_text: assistantBuffer.join('\n').trim(), + assistant_text: assistantBuffer.join("\n").trim(), tool_name: null, tool_args: null, command_line: null, @@ -1421,7 +2089,7 @@ function buildTraceSteps(items: TraceItem[]): TraceStep[] { if (pending) pushStep(pending); - const remain = assistantBuffer.join('\n').trim(); + const remain = assistantBuffer.join("\n").trim(); if (remain) { pushStep({ run_id: items.length ? items[items.length - 1].run_id : null, @@ -1442,12 +2110,15 @@ function buildTraceSteps(items: TraceItem[]): TraceStep[] { } function shortId(value: string | null, size = 8): string { - if (!value) return '-'; + if (!value) return "-"; return String(value).slice(0, size); } -function evalThreadLabel(threadId: string | null, evaluationId: string | null): string { - if (!threadId) return '-'; +function evalThreadLabel( + threadId: string | null, + evaluationId: string | null, +): string { + if (!threadId) return "-"; if (!evaluationId) return shortId(threadId, 20); const prefix = `swebench-${evaluationId}-`; if (threadId.startsWith(prefix)) { @@ -1459,7 +2130,7 @@ function evalThreadLabel(threadId: string | null, evaluationId: string | null): function formatPct(value: any): string { const num = Number(value); - if (!Number.isFinite(num)) return '-'; + if (!Number.isFinite(num)) return "-"; return `${num.toFixed(1)}%`; } @@ -1474,20 +2145,23 @@ function evalProgress(item: any): { target: number; running: number; pct: number; - mode: 'thread_rows' | 'session_rows' | 'checkpoint_estimate'; + mode: "thread_rows" | "session_rows" | "checkpoint_estimate"; } { const doneRaw = Number(item?.threads_done ?? 0); const runningRaw = Number(item?.threads_running ?? 0); const targetRaw = Number(item?.slice_count ?? item?.threads_total ?? 0); - const modeRaw = String(item?.progress_source || ''); + const modeRaw = String(item?.progress_source || ""); const done = Number.isFinite(doneRaw) ? Math.max(0, doneRaw) : 0; const running = Number.isFinite(runningRaw) ? Math.max(0, runningRaw) : 0; - const targetCandidate = Number.isFinite(targetRaw) ? Math.max(0, targetRaw) : 0; + const targetCandidate = Number.isFinite(targetRaw) + ? Math.max(0, targetRaw) + : 0; const mode = - modeRaw === 'checkpoint_estimate' || modeRaw === 'session_rows' + modeRaw === "checkpoint_estimate" || modeRaw === "session_rows" ? modeRaw - : 'thread_rows'; - const target = targetCandidate > 0 ? targetCandidate : Math.max(done + running, 0); + : "thread_rows"; + const target = + targetCandidate > 0 ? targetCandidate : Math.max(done + running, 0); // @@@progress-active-ratio - evaluation threads can be running long before any thread reaches "done". // Use (done + running) to reflect visible in-flight progress instead of a flat 0% bar. const active = Math.min(target, done + running); @@ -1500,45 +2174,61 @@ function formatProgressSummary(progress: { target: number; running: number; pct: number; - mode: 'thread_rows' | 'session_rows' | 'checkpoint_estimate'; + mode: "thread_rows" | "session_rows" | "checkpoint_estimate"; }): string { - const pending = Math.max(0, progress.target - progress.done - progress.running); - const activeLabel = progress.mode === 'checkpoint_estimate' ? 'Started' : 'In Progress'; - const sourceSuffix = progress.mode === 'thread_rows' ? '' : ` · source=${progress.mode}`; + const pending = Math.max( + 0, + progress.target - progress.done - progress.running, + ); + const activeLabel = + progress.mode === "checkpoint_estimate" ? "Started" : "In Progress"; + const sourceSuffix = + progress.mode === "thread_rows" ? "" : ` · source=${progress.mode}`; return `Total ${progress.target} · Completed ${progress.done} · ${activeLabel} ${progress.running} · Pending ${pending} · Progress ${formatPct(progress.pct)}${sourceSuffix}`; } function formatStatusSummary(payload: any): string { const stateText = - typeof payload?.state === 'string' + typeof payload?.state === "string" ? payload.state - : payload?.state?.state || JSON.stringify(payload?.state || '-'); - const calls = payload?.call_count ?? '-'; - const inTokens = payload?.input_tokens ?? payload?.token_count ?? '-'; - const outTokens = payload?.output_tokens ?? '-'; + : payload?.state?.state || JSON.stringify(payload?.state || "-"); + const calls = payload?.call_count ?? "-"; + const inTokens = payload?.input_tokens ?? payload?.token_count ?? "-"; + const outTokens = payload?.output_tokens ?? "-"; return `state=${stateText} calls=${calls} tokens=${inTokens}/${outTokens}`; } function conversationText(content: any): string { - if (typeof content === 'string') return content; + if (typeof content === "string") return content; if (Array.isArray(content)) { return content .map((part) => { - if (typeof part === 'string') return part; - if (part && typeof part === 'object' && part.type === 'text') return String(part.text || ''); + if (typeof part === "string") return part; + if (part && typeof part === "object" && part.type === "text") + return String(part.text || ""); return JSON.stringify(part); }) - .join(''); + .join(""); } - if (content == null) return ''; - return typeof content === 'object' ? JSON.stringify(content, null, 2) : String(content); + if (content == null) return ""; + return typeof content === "object" + ? JSON.stringify(content, null, 2) + : String(content); } -function ConversationTraceCard({ message, index }: { message: any; index: number }) { - const msgType = String(message?.type || 'Unknown'); +function ConversationTraceCard({ + message, + index, +}: { + message: any; + index: number; +}) { + const msgType = String(message?.type || "Unknown"); const msgTypeKey = msgType.toLowerCase(); const text = conversationText(message?.content); - const toolCalls = Array.isArray(message?.tool_calls) ? message.tool_calls : []; + const toolCalls = Array.isArray(message?.tool_calls) + ? message.tool_calls + : []; return (
    @@ -1546,13 +2236,17 @@ function ConversationTraceCard({ message, index }: { message: any; index: number [{index}] {msgType} - id {shortId(message?.id || '-', 12)} + + id {shortId(message?.id || "-", 12)} +
    {toolCalls.length > 0 && (
    tool_calls
    -
    {JSON.stringify(toolCalls, null, 2)}
    +
    +            {JSON.stringify(toolCalls, null, 2)}
    +          
    )} @@ -1565,19 +2259,24 @@ function ConversationTraceCard({ message, index }: { message: any; index: number
    content
    -
    {text || '(empty)'}
    +
    +          {text || "(empty)"}
    +        
    Raw message -
    {JSON.stringify(message, null, 2)}
    +
    +          {JSON.stringify(message, null, 2)}
    +        
    ); } function TraceCard({ item }: { item: TraceItem }) { - const statusText = item.event_type === 'status' ? formatStatusSummary(item.payload) : null; + const statusText = + item.event_type === "status" ? formatStatusSummary(item.payload) : null; const commandLine = item.payload?.args?.CommandLine; const toolArgs = item.payload?.args; const toolOutput = item.payload?.content; @@ -1585,55 +2284,71 @@ function TraceCard({ item }: { item: TraceItem }) {
    - #{item.seq ?? '-'} - {item.actor} + #{item.seq ?? "-"} + + {item.actor} + {item.event_type}
    run {shortId(item.run_id)}
    - {item.event_type === 'assistant_text' && ( + {item.event_type === "assistant_text" && (
    {item.summary}
    )} - {item.event_type === 'tool_call' && ( + {item.event_type === "tool_call" && (
    Tool
    -
    {item.payload?.name || item.summary}
    +
    +            {item.payload?.name || item.summary}
    +          
    {commandLine && ( <>
    CommandLine
    -
    {String(commandLine)}
    +
    +                {String(commandLine)}
    +              
    )}
    Args
    -
    {JSON.stringify(toolArgs || {}, null, 2)}
    +
    +            {JSON.stringify(toolArgs || {}, null, 2)}
    +          
    )} - {item.event_type === 'tool_result' && ( + {item.event_type === "tool_result" && (
    Tool
    -
    {item.payload?.name || item.summary}
    +
    +            {item.payload?.name || item.summary}
    +          
    Output
    -
    {String(toolOutput || '(no output)')}
    +
    +            {String(toolOutput || "(no output)")}
    +          
    )} - {item.event_type === 'status' && ( + {item.event_type === "status" && (
    Runtime
    {statusText}
    )} - {(item.event_type === 'error' || item.event_type === 'cancelled' || item.event_type === 'done') && ( + {(item.event_type === "error" || + item.event_type === "cancelled" || + item.event_type === "done") && (
    {item.summary}
    )}
    Raw payload -
    {JSON.stringify(item.payload, null, 2)}
    +
    +          {JSON.stringify(item.payload, null, 2)}
    +        
    ); @@ -1645,16 +2360,20 @@ function TraceStepCard({ step }: { step: TraceStep }) {
    Step {step.step} - seq {step.seq_start ?? '-'}..{step.seq_end ?? '-'} + + seq {step.seq_start ?? "-"}..{step.seq_end ?? "-"} + run {shortId(step.run_id)}
    - {step.created_ago || '-'} + {step.created_ago || "-"}
    {step.assistant_text && (
    Intent
    -
    {step.assistant_text}
    +
    +            {step.assistant_text}
    +          
    )} @@ -1665,13 +2384,17 @@ function TraceStepCard({ step }: { step: TraceStep }) { {step.command_line && ( <>
    CommandLine
    -
    {step.command_line}
    +
    +                {step.command_line}
    +              
    )} {step.tool_args && ( <>
    Args
    -
    {JSON.stringify(step.tool_args, null, 2)}
    +
    +                {JSON.stringify(step.tool_args, null, 2)}
    +              
    )} @@ -1687,19 +2410,21 @@ function TraceStepCard({ step }: { step: TraceStep }) { {step.runtime_notes.length > 0 && (
    Runtime
    -
    {step.runtime_notes.join('\n')}
    +
    {step.runtime_notes.join("\n")}
    )}
    Raw events ({step.raw_items.length}) {step.raw_items.map((item, idx) => ( -
    +
    - #{item.seq || '-'} + #{item.seq || "-"} {item.event_type}
    -
    {JSON.stringify(item.payload, null, 2)}
    +
    +              {JSON.stringify(item.payload, null, 2)}
    +            
    ))}
    @@ -1707,48 +2432,75 @@ function TraceStepCard({ step }: { step: TraceStep }) { ); } -function ThreadTraceSection({ threadId, autoRefreshEnabled, initialRunId = '' }: { threadId: string; autoRefreshEnabled: boolean; initialRunId?: string }) { +function ThreadTraceSection({ + threadId, + autoRefreshEnabled, + initialRunId = "", +}: { + threadId: string; + autoRefreshEnabled: boolean; + initialRunId?: string; +}) { const [traceEvents, setTraceEvents] = React.useState([]); const [traceError, setTraceError] = React.useState(null); const [traceLoading, setTraceLoading] = React.useState(false); const [rawEventCount, setRawEventCount] = React.useState(0); - const [streamState, setStreamState] = React.useState<'idle' | 'polling' | 'error'>('idle'); - const [eventFilter, setEventFilter] = React.useState<'all' | 'assistant' | 'tool' | 'runtime'>('all'); - const [traceView, setTraceView] = React.useState<'conversation' | 'events' | 'steps'>('conversation'); + const [streamState, setStreamState] = React.useState< + "idle" | "polling" | "error" + >("idle"); + const [eventFilter, setEventFilter] = React.useState< + "all" | "assistant" | "tool" | "runtime" + >("all"); + const [traceView, setTraceView] = React.useState< + "conversation" | "events" | "steps" + >("conversation"); const [showRawTable, setShowRawTable] = React.useState(false); - const [selectedRunId, setSelectedRunId] = React.useState(''); + const [selectedRunId, setSelectedRunId] = React.useState(""); const [runCandidates, setRunCandidates] = React.useState([]); const [autoRefresh, setAutoRefresh] = React.useState(true); - const [conversationMessages, setConversationMessages] = React.useState([]); - const [conversationLoading, setConversationLoading] = React.useState(false); - const [conversationError, setConversationError] = React.useState(null); - - const loadTrace = React.useCallback((runId: string) => { - if (!threadId) return; - const query = runId ? `?run_id=${encodeURIComponent(runId)}` : ''; - setTraceLoading(true); - setTraceError(null); - setStreamState('polling'); - fetchAPI(`/thread/${threadId}/trace${query}`) - .then((payload) => { - setRawEventCount(payload?.event_count || 0); - setRunCandidates(payload?.run_candidates || []); - if (!runId && payload?.run_id) { - setSelectedRunId((prev) => prev || String(payload.run_id)); - } - const normalized = (payload?.events || []) - .map((row: any) => normalizeStoredTraceEvent(row, payload?.run_id || runId || null)) - .filter(Boolean) as TraceItem[]; - const merged = normalized.reduce((acc: TraceItem[], item) => mergeTraceItems(acc, item), []); - setTraceEvents(merged); - setStreamState('idle'); - }) - .catch((e) => { - setTraceError(e.message); - setStreamState('error'); - }) - .finally(() => setTraceLoading(false)); - }, [threadId]); + const [conversationMessages, setConversationMessages] = React.useState( + [], + ); + const [conversationLoading, setConversationLoading] = + React.useState(false); + const [conversationError, setConversationError] = React.useState< + string | null + >(null); + + const loadTrace = React.useCallback( + (runId: string) => { + if (!threadId) return; + const query = runId ? `?run_id=${encodeURIComponent(runId)}` : ""; + setTraceLoading(true); + setTraceError(null); + setStreamState("polling"); + fetchAPI(`/thread/${threadId}/trace${query}`) + .then((payload) => { + setRawEventCount(payload?.event_count || 0); + setRunCandidates(payload?.run_candidates || []); + if (!runId && payload?.run_id) { + setSelectedRunId((prev) => prev || String(payload.run_id)); + } + const normalized = (payload?.events || []) + .map((row: any) => + normalizeStoredTraceEvent(row, payload?.run_id || runId || null), + ) + .filter(Boolean) as TraceItem[]; + const merged = normalized.reduce( + (acc: TraceItem[], item) => mergeTraceItems(acc, item), + [], + ); + setTraceEvents(merged); + setStreamState("idle"); + }) + .catch((e) => { + setTraceError(e.message); + setStreamState("error"); + }) + .finally(() => setTraceLoading(false)); + }, + [threadId], + ); const loadConversation = React.useCallback(() => { if (!threadId) return; @@ -1756,7 +2508,9 @@ function ThreadTraceSection({ threadId, autoRefreshEnabled, initialRunId = '' }: setConversationError(null); fetchAPI(`/thread/${threadId}/conversation`) .then((payload) => { - setConversationMessages(Array.isArray(payload?.messages) ? payload.messages : []); + setConversationMessages( + Array.isArray(payload?.messages) ? payload.messages : [], + ); }) .catch((e) => setConversationError(e.message)) .finally(() => setConversationLoading(false)); @@ -1783,46 +2537,58 @@ function ThreadTraceSection({ threadId, autoRefreshEnabled, initialRunId = '' }: loadConversation(); }, 2000); return () => window.clearInterval(timer); - }, [threadId, autoRefreshEnabled, autoRefresh, selectedRunId, loadTrace, loadConversation]); + }, [ + threadId, + autoRefreshEnabled, + autoRefresh, + selectedRunId, + loadTrace, + loadConversation, + ]); const traceTail = traceEvents.slice(-300); - const visibleTrace = traceTail.filter((item) => eventFilter === 'all' || item.actor === eventFilter); + const visibleTrace = traceTail.filter( + (item) => eventFilter === "all" || item.actor === eventFilter, + ); const traceSteps = buildTraceSteps(visibleTrace); const conversationTail = conversationMessages.slice(-200); const traceStats = { - assistant: traceTail.filter((item) => item.actor === 'assistant').length, - tool: traceTail.filter((item) => item.actor === 'tool').length, - runtime: traceTail.filter((item) => item.actor === 'runtime').length, + assistant: traceTail.filter((item) => item.actor === "assistant").length, + tool: traceTail.filter((item) => item.actor === "tool").length, + runtime: traceTail.filter((item) => item.actor === "runtime").length, }; return (

    - Thread Trace { - traceView === 'conversation' - ? 'Conversation' - : traceView === 'events' - ? 'Events' - : 'Steps' - } - {' '} - ({ - traceView === 'conversation' - ? `${conversationTail.length} messages` - : traceView === 'events' + Thread Trace{" "} + {traceView === "conversation" + ? "Conversation" + : traceView === "events" + ? "Events" + : "Steps"}{" "} + ( + {traceView === "conversation" + ? `${conversationTail.length} messages` + : traceView === "events" ? `${visibleTrace.length} events` - : `${traceSteps.length} steps / ${visibleTrace.length} events` - }) + : `${traceSteps.length} steps / ${visibleTrace.length} events`} + )

    - status: {streamState} | run: {selectedRunId ? shortId(selectedRunId, 12) : '-'} | raw_events: {rawEventCount} | messages: {conversationTail.length} + status: {streamState} | run:{" "} + {selectedRunId ? shortId(selectedRunId, 12) : "-"} | raw_events:{" "} + {rawEventCount} | messages: {conversationTail.length}

    - {traceView !== 'conversation' && ( + {traceView !== "conversation" && ( <>
    Run - setSelectedRunId(e.target.value)} + > {runCandidates.map((run: any) => (
    - {(['all', 'assistant', 'tool', 'runtime'] as const).map((kind) => ( - - ))} + {(["all", "assistant", "tool", "runtime"] as const).map( + (kind) => ( + + ), + )}
    )}
    @@ -1894,47 +2662,70 @@ function ThreadTraceSection({ threadId, autoRefreshEnabled, initialRunId = '' }: refresh
    - {traceView === 'conversation' ? ( + {traceView === "conversation" ? (
    messages: {conversationTail.length} - loading: {conversationLoading ? 'yes' : 'no'} + loading: {conversationLoading ? "yes" : "no"}
    ) : (
    assistant: {traceStats.assistant} tool: {traceStats.tool} runtime: {traceStats.runtime} - loading: {traceLoading ? 'yes' : 'no'} + loading: {traceLoading ? "yes" : "no"} +
    + )} + {traceError && ( +
    Trace load failed: {traceError}
    + )} + {conversationError && ( +
    + Conversation load failed: {conversationError}
    )} - {traceError &&
    Trace load failed: {traceError}
    } - {conversationError &&
    Conversation load failed: {conversationError}
    }
    - {traceView === 'conversation' ? ( + {traceView === "conversation" ? ( <> {conversationTail.map((message, idx) => ( - + ))} - {conversationTail.length === 0 &&
    No conversation messages yet.
    } + {conversationTail.length === 0 && ( +
    No conversation messages yet.
    + )} - ) : traceView === 'events' ? ( + ) : traceView === "events" ? ( <> {visibleTrace.map((item, idx) => ( - + ))} - {visibleTrace.length === 0 &&
    No trace events for this filter.
    } + {visibleTrace.length === 0 && ( +
    + No trace events for this filter. +
    + )} ) : ( <> {traceSteps.map((step) => ( - + ))} - {traceSteps.length === 0 &&
    No trace events for this filter.
    } + {traceSteps.length === 0 && ( +
    + No trace events for this filter. +
    + )} )}
    - {showRawTable && traceView !== 'conversation' && ( + {showRawTable && traceView !== "conversation" && (
    Raw trace table
    {s.session_id.slice(0, 8)} + {s.session_id.slice(0, 8)} + {s.status} {s.started_ago}{s.ended_ago || '-'}{s.ended_ago || "-"} {s.lease.lease_id ? ( {s.lease.lease_id} - ) : '-'} + ) : ( + "-" + )} + + {s.error || '-'}{s.error || "-"}
    @@ -1950,22 +2741,31 @@ function ThreadTraceSection({ threadId, autoRefreshEnabled, initialRunId = '' }: - {traceTail.slice().reverse().map((item, idx) => ( - - - - - - - - - - ))} + {traceTail + .slice() + .reverse() + .map((item, idx) => ( + + + + + + + + + + ))}
    {item.seq || '-'}{item.actor}{item.event_type}{item.summary}{shortId(item.run_id)}{item.created_ago || '-'} -
    - view -
    {JSON.stringify(item.payload, null, 2)}
    -
    -
    {item.seq || "-"} + + {item.actor} + + {item.event_type}{item.summary}{shortId(item.run_id)}{item.created_ago || "-"} +
    + view +
    +                          {JSON.stringify(item.payload, null, 2)}
    +                        
    +
    +
    @@ -2009,12 +2809,25 @@ function SessionDetailPage() {

    Session: {data.session_id.slice(0, 8)}

    -
    Thread: {data.thread_id.slice(0, 8)}
    -
    Status: {data.info.status}
    -
    Provider: {data.info.provider || '-'}
    -
    Started: {data.info.started_ago}
    -
    Last Active: {data.info.last_active_ago}
    -
    Ended: {data.info.ended_ago || '-'}
    +
    + Thread:{" "} + {data.thread_id.slice(0, 8)} +
    +
    + Status: {data.info.status} +
    +
    + Provider: {data.info.provider || "-"} +
    +
    + Started: {data.info.started_ago} +
    +
    + Last Active: {data.info.last_active_ago} +
    +
    + Ended: {data.info.ended_ago || "-"} +
    @@ -2035,10 +2848,11 @@ function SessionDetailPage() { function LeasesPage() { const location = useLocation(); const [data, setData] = React.useState(null); - const divergedOnly = new URLSearchParams(location.search).get('diverged') === '1'; + const divergedOnly = + new URLSearchParams(location.search).get("diverged") === "1"; React.useEffect(() => { - fetchAPI('/leases').then(setData); + fetchAPI("/leases").then(setData); }, []); if (!data) return
    Loading...
    ; @@ -2046,10 +2860,18 @@ function LeasesPage() { const triageSummary = triage.summary || {}; const triageGroups = Array.isArray(triage.groups) ? triage.groups : []; const items = divergedOnly - ? data.items.filter((item: any) => ['active_drift', 'detached_residue', 'orphan_cleanup'].includes(item.triage?.category)) + ? data.items.filter((item: any) => + ["active_drift", "detached_residue", "orphan_cleanup"].includes( + item.triage?.category, + ), + ) : data.items; const visibleGroups = divergedOnly - ? triageGroups.filter((group: any) => ['active_drift', 'detached_residue', 'orphan_cleanup'].includes(group.key)) + ? triageGroups.filter((group: any) => + ["active_drift", "detached_residue", "orphan_cleanup"].includes( + group.key, + ), + ) : triageGroups; const renderLeaseTable = (rows: any[]) => ( @@ -2068,19 +2890,25 @@ function LeasesPage() { {rows.map((item: any) => ( - {item.lease_id} + + {item.lease_id} + {item.provider} - {item.instance_id?.slice(0, 12) || '-'} + {item.instance_id?.slice(0, 12) || "-"} {item.thread.thread_id ? ( - {item.thread.thread_id.slice(0, 8)} + + {item.thread.thread_id.slice(0, 8)} + ) : ( orphan )} - + + + {item.updated_ago} - {item.error || '-'} + {item.error || "-"} ))} @@ -2090,11 +2918,17 @@ function LeasesPage() { return (

    {data.title}

    -

    Legacy lease view, now backed by backend triage semantics. Use this when you want lease-only focus without losing the full raw table.

    +

    + Legacy lease view, now backed by backend triage semantics. Use this when + you want lease-only focus without losing the full raw table. +

    total - {items.length}{divergedOnly ? ` / ${data.count}` : ''} + + {items.length} + {divergedOnly ? ` / ${data.count}` : ""} + active drift @@ -2114,16 +2948,23 @@ function LeasesPage() {
    - - {divergedOnly ? 'Show all leases' : 'Only attention buckets'} + + {divergedOnly ? "Show all leases" : "Only attention buckets"} + + + Open resources - Open resources
    {visibleGroups .filter((group: any) => group.count > 0) .map((group: any) => (
    -

    {group.title} ({group.count})

    +

    + {group.title} ({group.count}) +

    {group.description}

    {renderLeaseTable(group.items)}
    @@ -2174,7 +3015,8 @@ function LeaseDetailPage() { Provider: {data.info.provider}
    - Instance ID: {data.info.instance_id || '-'} + Instance ID:{" "} + {data.info.instance_id || "-"}
    Created: {data.info.created_ago} @@ -2219,7 +3061,9 @@ function LeaseDetailPage() {
    -

    {data.lease_events.title} ({data.lease_events.count})

    +

    + {data.lease_events.title} ({data.lease_events.count}) +

    @@ -2232,7 +3076,9 @@ function LeaseDetailPage() { {data.lease_events.items.map((e: any) => ( - + @@ -2255,7 +3101,7 @@ function DivergedPage() { const [data, setData] = React.useState(null); React.useEffect(() => { - fetchAPI('/diverged').then(setData); + fetchAPI("/diverged").then(setData); }, []); if (!data) return
    Loading...
    ; @@ -2280,21 +3126,25 @@ function DivergedPage() {
    {data.items.map((item: any) => ( - + - - + ))} @@ -2308,7 +3158,7 @@ function EventsPage() { const [data, setData] = React.useState(null); React.useEffect(() => { - fetchAPI('/events?limit=100').then(setData); + fetchAPI("/events?limit=100").then(setData); }, []); if (!data) return
    Loading...
    ; @@ -2332,15 +3182,19 @@ function EventsPage() {
    {data.items.map((item: any) => ( - + - + ))} @@ -2408,13 +3262,17 @@ function EventDetailPage() { {data.related_lease.lease_id && (

    Related Lease

    - {data.related_lease.lease_id} + + {data.related_lease.lease_id} +
    )}

    Payload

    -
    {JSON.stringify(data.payload, null, 2)}
    +
    +          {JSON.stringify(data.payload, null, 2)}
    +        
    ); @@ -2423,16 +3281,18 @@ function EventDetailPage() { // Page: Evaluation function EvaluationPage() { const location = useLocation(); - const [dataset, setDataset] = React.useState('SWE-bench/SWE-bench_Lite'); - const [split, setSplit] = React.useState('test'); - const [startIdx, setStartIdx] = React.useState('0'); - const [sliceCount, setSliceCount] = React.useState('10'); - const [promptProfile, setPromptProfile] = React.useState('heuristic'); - const [timeoutSec, setTimeoutSec] = React.useState('180'); - const [recursionLimit, setRecursionLimit] = React.useState('256'); - const [sandbox, setSandbox] = React.useState('local'); - const [runStatus, setRunStatus] = React.useState<'idle' | 'starting' | 'submitted' | 'error'>('idle'); - const [evaluationId, setEvaluationId] = React.useState(''); + const [dataset, setDataset] = React.useState("SWE-bench/SWE-bench_Lite"); + const [split, setSplit] = React.useState("test"); + const [startIdx, setStartIdx] = React.useState("0"); + const [sliceCount, setSliceCount] = React.useState("10"); + const [promptProfile, setPromptProfile] = React.useState("heuristic"); + const [timeoutSec, setTimeoutSec] = React.useState("180"); + const [recursionLimit, setRecursionLimit] = React.useState("256"); + const [sandbox, setSandbox] = React.useState("local"); + const [runStatus, setRunStatus] = React.useState< + "idle" | "starting" | "submitted" | "error" + >("idle"); + const [evaluationId, setEvaluationId] = React.useState(""); const [runError, setRunError] = React.useState(null); const [evaluations, setEvaluations] = React.useState([]); const [evalOffset, setEvalOffset] = React.useState(0); @@ -2444,7 +3304,9 @@ function EvaluationPage() { const loadEvaluations = React.useCallback(async () => { setRunsLoading(true); try { - const payload = await fetchAPI(`/evaluations?limit=${evalLimit}&offset=${evalOffset}`); + const payload = await fetchAPI( + `/evaluations?limit=${evalLimit}&offset=${evalOffset}`, + ); setEvaluations(Array.isArray(payload?.items) ? payload.items : []); setEvalPagination(payload?.pagination || null); } catch (e: any) { @@ -2463,15 +3325,15 @@ function EvaluationPage() { }, [loadEvaluations]); async function handleStart() { - if (runStatus === 'starting') return; + if (runStatus === "starting") return; setRunError(null); - setEvaluationId(''); - setRunStatus('starting'); + setEvaluationId(""); + setRunStatus("starting"); try { - const payload = await fetchJSON('/api/monitor/evaluations', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, + const payload = await fetchJSON("/api/monitor/evaluations", { + method: "POST", + headers: { "Content-Type": "application/json" }, body: JSON.stringify({ dataset, split, @@ -2481,77 +3343,112 @@ function EvaluationPage() { timeout_sec: Number(timeoutSec), recursion_limit: Number(recursionLimit), sandbox, - arm: 'monitor', + arm: "monitor", }), }); - const nextEvalId = String(payload?.evaluation_id || ''); - if (!nextEvalId) throw new Error('create evaluation returned empty evaluation_id'); + const nextEvalId = String(payload?.evaluation_id || ""); + if (!nextEvalId) + throw new Error("create evaluation returned empty evaluation_id"); setEvaluationId(nextEvalId); - setRunStatus('submitted'); + setRunStatus("submitted"); setComposerOpen(false); await loadEvaluations(); } catch (e: any) { - setRunStatus('error'); + setRunStatus("error"); setRunError(e?.message || String(e)); } } - const currentEval = evaluations.find((item: any) => item.evaluation_id === evaluationId); + const currentEval = evaluations.find( + (item: any) => item.evaluation_id === evaluationId, + ); const submissionPreview = { dataset, split, - start: Number(startIdx || '0'), - count: Number(sliceCount || '0'), + start: Number(startIdx || "0"), + count: Number(sliceCount || "0"), prompt_profile: promptProfile, - timeout_sec: Number(timeoutSec || '0'), - recursion_limit: Number(recursionLimit || '0'), + timeout_sec: Number(timeoutSec || "0"), + recursion_limit: Number(recursionLimit || "0"), sandbox, - arm: 'monitor', + arm: "monitor", }; const parameterReference = [ - ['Dataset', 'Benchmark source', 'Lite for fast iteration, Verified for strict runs'], - ['Split', 'Data partition', 'Use test for formal comparison'], - ['Start / Slice', 'Case range', 'Run small slices first, then scale up'], - ['Prompt Profile', 'Prompt strategy', 'Compare baseline vs heuristic in A/B'], - ['Timeout(s)', 'Per-case wall clock limit', '180~300 for initial runs'], - ['Recursion', 'Agent iteration budget', '256 default, raise to 512 for hard tasks'], - ['Sandbox', 'Execution provider', 'Use local for quick checks, daytona for infra parity'], + [ + "Dataset", + "Benchmark source", + "Lite for fast iteration, Verified for strict runs", + ], + ["Split", "Data partition", "Use test for formal comparison"], + ["Start / Slice", "Case range", "Run small slices first, then scale up"], + [ + "Prompt Profile", + "Prompt strategy", + "Compare baseline vs heuristic in A/B", + ], + ["Timeout(s)", "Per-case wall clock limit", "180~300 for initial runs"], + [ + "Recursion", + "Agent iteration budget", + "256 default, raise to 512 for hard tasks", + ], + [ + "Sandbox", + "Execution provider", + "Use local for quick checks, daytona for infra parity", + ], ]; const statusReference = [ - ['queued', 'Job is persisted and waiting for executor slots.'], - ['running', 'At least one thread is active and writing status updates.'], - ['provisional', 'Artifacts are incomplete (missing eval summary or eval error). Score is not final.'], - ['completed', 'Runner finished and artifacts were written.'], - ['completed_with_errors', 'Runner finished, but summary reports failed items/errors.'], - ['error', 'Runner failed; open detail page to inspect stderr and trace.'], + ["queued", "Job is persisted and waiting for executor slots."], + ["running", "At least one thread is active and writing status updates."], + [ + "provisional", + "Artifacts are incomplete (missing eval summary or eval error). Score is not final.", + ], + ["completed", "Runner finished and artifacts were written."], + [ + "completed_with_errors", + "Runner finished, but summary reports failed items/errors.", + ], + ["error", "Runner failed; open detail page to inspect stderr and trace."], ]; const currentProgress = currentEval ? evalProgress(currentEval) : null; React.useEffect(() => { - window.scrollTo({ top: 0, left: 0, behavior: 'auto' }); + window.scrollTo({ top: 0, left: 0, behavior: "auto" }); }, []); React.useEffect(() => { // @@@evaluation-query-open - allow deterministic screenshot/review entry to open config panel via ?new=1. const query = new URLSearchParams(location.search); - setComposerOpen(query.get('new') === '1'); + setComposerOpen(query.get("new") === "1"); }, [location.search]); return (

    Evaluations

    -

    One evaluation contains many threads. Start jobs from config panel, track durable progress in list, then drill into thread trace.

    +

    + One evaluation contains many threads. Start jobs from config panel, + track durable progress in list, then drill into thread trace. +

    Current Submission

    -

    Latest evaluation submitted from this page.

    -
    evaluation: {evaluationId || '-'}
    +

    + Latest evaluation submitted from this page. +

    +
    evaluation: {evaluationId || "-"}

    status: {currentEval?.status || runStatus}

    {currentEval && currentProgress && (
    -
    phase: {String(currentEval.status || '-').toUpperCase()}
    +
    + phase: {String(currentEval.status || "-").toUpperCase()} +
    -
    +
    {formatProgressSummary(currentProgress)} @@ -2561,16 +3458,25 @@ function EvaluationPage() { {runError &&
    run error: {runError}
    } {evaluationId && (

    - open evaluation detail + + open evaluation detail +

    )}

    Start New Evaluation

    -

    Open a focused config panel. After submit, track progress in the evaluation list below.

    -
    @@ -2580,15 +3486,24 @@ function EvaluationPage() {

    1. Submit

    -

    Open config, choose scope/profile/sandbox, then submit one batch run.

    +

    + Open config, choose scope/profile/sandbox, then submit one batch + run. +

    2. Track

    -

    List auto-refreshes every 5s and survives reload. Status is backend-persisted.

    +

    + List auto-refreshes every 5s and survives reload. Status is + backend-persisted. +

    3. Inspect

    -

    Open evaluation detail to jump to per-thread trace and tool-call timeline.

    +

    + Open evaluation detail to jump to per-thread trace and tool-call + timeline. +

    @@ -2597,7 +3512,9 @@ function EvaluationPage() {

    Status Guide

      {statusReference.map((row) => ( -
    • {row[0]}: {row[1]}
    • +
    • + {row[0]}: {row[1]} +
    • ))}
    @@ -2605,7 +3522,9 @@ function EvaluationPage() {

    Field Guide

      {parameterReference.slice(0, 4).map((row) => ( -
    • {row[0]}: {row[1]}
    • +
    • + {row[0]}: {row[1]} +
    • ))}
    @@ -2615,15 +3534,23 @@ function EvaluationPage() {

    Evaluations ({evalPagination?.total ?? evaluations.length})

    -

    - Auto refresh: 5s {runsLoading ? '| loading...' : ''} - {' '}| page {evalPagination?.page ?? 1} + Auto refresh: 5s {runsLoading ? "| loading..." : ""} | page{" "} + {evalPagination?.page ?? 1} +

    +

    + Evaluation = one batch run. Progress shows + total/completed/started-or-running/pending. Click Evaluation ID for + detail trace and thread links.

    -

    Evaluation = one batch run. Progress shows total/completed/started-or-running/pending. Click Evaluation ID for detail trace and thread links.

    {e.event_id} + {e.event_id} + {e.event_type} {e.source} {e.created_ago}
    {item.lease_id} + {item.lease_id} + {item.provider} {item.thread.thread_id ? ( - {item.thread.thread_id.slice(0, 8)} + + {item.thread.thread_id.slice(0, 8)} + ) : ( orphan )} {item.state_badge.desired} {item.state_badge.observed} + {item.state_badge.hours_diverged}h {item.error || '-'}{item.error || "-"}
    {item.event_type} + {item.event_type} + {item.source} {item.provider} {item.lease.lease_id ? ( {item.lease.lease_id} - ) : '-'} + ) : ( + "-" + )} {item.error || '-'}{item.error || "-"} {item.created_ago}
    @@ -2631,8 +3558,12 @@ function EvaluationPage() { - - + + @@ -2640,18 +3571,32 @@ function EvaluationPage() { {evaluations.map((item: any) => ( - + - - + + - + ))} {evaluations.length === 0 && ( @@ -2695,17 +3649,25 @@ function EvaluationPage() {

    - offset={evalPagination?.offset ?? 0} | limit={evalPagination?.limit ?? evalLimit} | total={evalPagination?.total ?? evaluations.length} + offset={evalPagination?.offset ?? 0} | limit= + {evalPagination?.limit ?? evalLimit} | total= + {evalPagination?.total ?? evaluations.length}

    -

    Configure run scope, profile and runtime, then submit.

    +

    + Configure run scope, profile and runtime, then submit. +

    @@ -2733,39 +3707,65 @@ function EvaluationPage() { - setDataset(e.target.value)} + > + + -

    Benchmark source. Lite is faster; Verified is stricter and slower.

    +

    + Benchmark source. Lite is faster; Verified is stricter and + slower. +

    - setSplit(e.target.value)} + > -

    Dataset partition. Use test for formal comparison.

    +

    + Dataset partition. Use test{" "} + for formal comparison. +

    - setStartIdx(e.target.value)} /> -

    Starting index inside the selected split.

    + setStartIdx(e.target.value)} + /> +

    + Starting index inside the selected split. +

    - setSliceCount(e.target.value)} + > -

    How many items to run in this evaluation batch.

    +

    + How many items to run in this evaluation batch. +

    @@ -2777,18 +3777,29 @@ function EvaluationPage() { - setPromptProfile(e.target.value)} + > -

    Prompt strategy passed to runner. Used for A/B profile comparison.

    +

    + Prompt strategy passed to runner. Used for A/B profile + comparison. +

    - setRecursionLimit(e.target.value)} /> -

    Agent recursion/iteration budget per item.

    + setRecursionLimit(e.target.value)} + /> +

    + Agent recursion/iteration budget per item. +

    @@ -2800,38 +3811,60 @@ function EvaluationPage() { - setTimeoutSec(e.target.value)} /> -

    Per-item wall-clock timeout in seconds.

    + setTimeoutSec(e.target.value)} + /> +

    + Per-item wall-clock timeout in seconds. +

    - setSandbox(e.target.value)} + > -

    Execution environment provider for this run.

    +

    + Execution environment provider for this run. +

    - -
    -

    Submits config to backend and starts an evaluation job.

    +

    + Submits config to backend and starts an evaluation job. +

    Submission Preview -
    {JSON.stringify(submissionPreview, null, 2)}
    +
    +                {JSON.stringify(submissionPreview, null, 2)}
    +              
    @@ -2892,60 +3925,89 @@ function EvaluationDetailPage() { threads_done: data.info?.threads_done ?? 0, threads_running: data.info?.threads_running ?? 0, slice_count: data.info?.slice_count ?? data.info?.threads_total ?? 0, - progress_source: data.info?.progress_source ?? 'thread_rows', + progress_source: data.info?.progress_source ?? "thread_rows", }); - const threadStateLabel = detailProgress.mode === 'checkpoint_estimate' ? 'started' : 'running'; - const scoreGate = String(data.info?.score?.score_gate || 'provisional'); - const publishable = Boolean(data.info?.score?.publishable ?? (scoreGate === 'final')); + const threadStateLabel = + detailProgress.mode === "checkpoint_estimate" ? "started" : "running"; + const scoreGate = String(data.info?.score?.score_gate || "provisional"); + const publishable = Boolean( + data.info?.score?.publishable ?? scoreGate === "final", + ); const scoreFinal = publishable; const summaryReady = !!data.info?.score?.eval_summary_path; const operator = data.info?.operator_surface || {}; const statusToneClass = - data.info.status === 'completed' - ? 'chip-success' - : data.info.status === 'error' - ? 'chip-danger' - : data.info.status === 'provisional' || data.info.status === 'completed_with_errors' - ? 'chip-warning' - : ''; + data.info.status === "completed" + ? "chip-success" + : data.info.status === "error" + ? "chip-danger" + : data.info.status === "provisional" || + data.info.status === "completed_with_errors" + ? "chip-warning" + : ""; return (

    Evaluation: {shortId(data.evaluation_id, 14)}

    - {data.info.status} + + {data.info.status} + {data.info.dataset} - {threadStateLabel}={data.info.threads_running}/{data.info.threads_total} + + {threadStateLabel}={data.info.threads_running}/ + {data.info.threads_total} + gate={scoreGate} - + publishable={String(publishable)} - score={scoreFinal ? `${data.info.score?.resolved_instances ?? 0}/${data.info.score?.total_instances ?? 0} (${formatPct(data.info.score?.primary_score_pct)})` : 'PROVISIONAL'} + score= + {scoreFinal + ? `${data.info.score?.resolved_instances ?? 0}/${data.info.score?.total_instances ?? 0} (${formatPct(data.info.score?.primary_score_pct)})` + : "PROVISIONAL"}
    -
    phase: {String(data.info.status || '-').toUpperCase()}
    +
    + phase: {String(data.info.status || "-").toUpperCase()} +
    -
    +
    {formatProgressSummary(detailProgress)}
    -
    +

    Operator Status

    -

    {operator.summary || 'Inspect the current evaluation state before judging score or trace coverage.'}

    +

    + {operator.summary || + "Inspect the current evaluation state before judging score or trace coverage."} +

    - - {operator.tone || 'default'} + + {operator.tone || "default"}
    - {operator.headline || 'Evaluation operator surface'} + + {operator.headline || "Evaluation operator surface"} +

    Facts

    @@ -2962,7 +4024,8 @@ function EvaluationDetailPage() {
      {(operator.artifacts || []).map((item: any) => (
    • - {item.label}: {item.path} + {item.label}:{" "} + {item.path}
    • ))}
    @@ -2987,12 +4050,24 @@ function EvaluationDetailPage() {

    Config

    -
    Split: {data.info.split}
    -
    Start: {data.info.start_idx}
    -
    Count: {data.info.slice_count}
    -
    Profile: {data.info.prompt_profile}
    -
    Timeout: {data.info.timeout_sec}s
    -
    Recursion: {data.info.recursion_limit}
    +
    + Split: {data.info.split} +
    +
    + Start: {data.info.start_idx} +
    +
    + Count: {data.info.slice_count} +
    +
    + Profile: {data.info.prompt_profile} +
    +
    + Timeout: {data.info.timeout_sec}s +
    +
    + Recursion: {data.info.recursion_limit} +
    @@ -3000,42 +4075,119 @@ function EvaluationDetailPage() {

    Score

    -
    Score Gate: {scoreGate}
    -
    Publishable: {String(publishable)}
    -
    Summary: {summaryReady ? 'ready' : 'missing'}
    -
    Resolved: {data.info.score?.resolved_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Resolved Rate: {formatPct(data.info.score?.resolved_rate_pct)}
    -
    Completed: {data.info.score?.completed_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Completed Rate: {formatPct(data.info.score?.completed_rate_pct)}
    -
    Non-empty Patch: {data.info.score?.non_empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Non-empty Rate: {formatPct(data.info.score?.non_empty_patch_rate_pct)}
    -
    Empty Patch: {data.info.score?.empty_patch_instances ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Errors: {data.info.score?.error_instances ?? 0}
    -
    Trace Active: {data.info.score?.active_trace_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Tool-call Threads: {data.info.score?.tool_call_threads ?? 0}/{data.info.score?.total_instances ?? 0}
    -
    Tool-call Coverage: {formatPct(data.info.score?.tool_call_thread_rate_pct)}
    -
    Tool Calls Total: {data.info.score?.tool_calls_total ?? 0}
    -
    Avg Tool Calls(active): {data.info.score?.avg_tool_calls_per_active_thread ?? '-'}
    -
    Recursion Cap Hits: {data.info.score?.recursion_cap_hits ?? 0}{data.info.score?.recursion_limit ? ` / cap ${data.info.score.recursion_limit}` : ''}
    -
    Run Dir: {data.info.score?.run_dir || '-'}
    +
    + Score Gate: {scoreGate} +
    +
    + Publishable: {String(publishable)} +
    +
    + Summary: {summaryReady ? "ready" : "missing"} +
    +
    + Resolved:{" "} + {data.info.score?.resolved_instances ?? 0}/ + {data.info.score?.total_instances ?? 0} +
    +
    + Resolved Rate:{" "} + {formatPct(data.info.score?.resolved_rate_pct)} +
    +
    + Completed:{" "} + {data.info.score?.completed_instances ?? 0}/ + {data.info.score?.total_instances ?? 0} +
    +
    + Completed Rate:{" "} + {formatPct(data.info.score?.completed_rate_pct)} +
    +
    + Non-empty Patch:{" "} + {data.info.score?.non_empty_patch_instances ?? 0}/ + {data.info.score?.total_instances ?? 0} +
    +
    + Non-empty Rate:{" "} + {formatPct(data.info.score?.non_empty_patch_rate_pct)} +
    +
    + Empty Patch:{" "} + {data.info.score?.empty_patch_instances ?? 0}/ + {data.info.score?.total_instances ?? 0} +
    +
    + Errors: {data.info.score?.error_instances ?? 0} +
    +
    + Trace Active:{" "} + {data.info.score?.active_trace_threads ?? 0}/ + {data.info.score?.total_instances ?? 0} +
    +
    + Tool-call Threads:{" "} + {data.info.score?.tool_call_threads ?? 0}/ + {data.info.score?.total_instances ?? 0} +
    +
    + Tool-call Coverage:{" "} + {formatPct(data.info.score?.tool_call_thread_rate_pct)} +
    +
    + Tool Calls Total:{" "} + {data.info.score?.tool_calls_total ?? 0} +
    +
    + Avg Tool Calls(active):{" "} + {data.info.score?.avg_tool_calls_per_active_thread ?? "-"} +
    +
    + Recursion Cap Hits:{" "} + {data.info.score?.recursion_cap_hits ?? 0} + {data.info.score?.recursion_limit + ? ` / cap ${data.info.score.recursion_limit}` + : ""} +
    +
    + Run Dir:{" "} + {data.info.score?.run_dir || "-"} +
    ) : (
    Score artifacts (provisional)
    -
    Score Gate: {scoreGate}
    -
    Publishable: {String(publishable)}
    -
    Summary: {summaryReady ? 'ready' : 'missing'}
    -
    Final Score: blocked (provisional)
    -
    Block Reason: {data.info.score?.manifest_eval_error ? 'manifest_eval_error' : 'missing_eval_summary'}
    -
    Run Dir: {data.info.score?.run_dir || '-'}
    +
    + Score Gate: {scoreGate} +
    +
    + Publishable: {String(publishable)} +
    +
    + Summary: {summaryReady ? "ready" : "missing"} +
    +
    + Final Score: blocked (provisional) +
    +
    + Block Reason:{" "} + {data.info.score?.manifest_eval_error + ? "manifest_eval_error" + : "missing_eval_summary"} +
    +
    + Run Dir:{" "} + {data.info.score?.run_dir || "-"} +
    )}
    -

    {data.threads.title} ({data.threads.count})

    +

    + {data.threads.title} ({data.threads.count}) +

    Dataset Range Profile / SandboxStatusProgress + Status + + Progress + Score Updated
    {shortId(item.evaluation_id, 14)} + + {shortId(item.evaluation_id, 14)} + + {item.dataset}{item.start_idx}..{item.start_idx + item.slice_count - 1}{item.prompt_profile || '-'} / {item.sandbox || '-'} + {item.start_idx}..{item.start_idx + item.slice_count - 1} + + {item.prompt_profile || "-"} / {item.sandbox || "-"} + {(() => { // @@@publishable-preferred - publishable is the canonical release gate; score_gate stays as compatibility fallback. - const publishable = item.score?.publishable ?? (item.score?.score_gate === 'final'); + const publishable = + item.score?.publishable ?? + item.score?.score_gate === "final"; return ( <> -
    {String(item.status || '-').toUpperCase()}
    -
    publishable: {publishable ? 'TRUE' : 'FALSE'}
    +
    + {String(item.status || "-").toUpperCase()} +
    +
    + publishable: {publishable ? "TRUE" : "FALSE"} +
    ); })()} @@ -2662,18 +3607,27 @@ function EvaluationPage() { return (
    -
    +
    +
    +
    + {formatProgressSummary(p)}
    -
    {formatProgressSummary(p)}
    ); })()}
    - {(item.score?.publishable ?? (item.score?.score_gate === 'final')) ? ( + {(item.score?.publishable ?? + item.score?.score_gate === "final") ? ( <>
    R {formatResolvedScore(item)}
    -
    C {formatPct(item.score?.completed_rate_pct)} | T {formatPct(item.score?.tool_call_thread_rate_pct)}
    +
    + C {formatPct(item.score?.completed_rate_pct)} | T{" "} + {formatPct(item.score?.tool_call_thread_rate_pct)} +
    ) : ( <> @@ -2682,7 +3636,7 @@ function EvaluationPage() { )}
    {item.updated_ago || '-'}{item.updated_ago || "-"}
    @@ -3054,15 +4206,23 @@ function EvaluationDetailPage() { + - @@ -3086,14 +4246,14 @@ function ScrollToTopOnRouteChange() { React.useEffect(() => { // @@@history-scroll-restore-disable - browser may restore stale scroll offsets and make user land at page tail. const prev = window.history.scrollRestoration; - window.history.scrollRestoration = 'manual'; + window.history.scrollRestoration = "manual"; return () => { window.history.scrollRestoration = prev; }; }, []); React.useEffect(() => { // @@@route-scroll-reset - switch tabs/details should always start from top to avoid "tail landing" confusion. - window.scrollTo({ top: 0, left: 0, behavior: 'auto' }); + window.scrollTo({ top: 0, left: 0, behavior: "auto" }); }, [pathname]); return null; } @@ -3106,15 +4266,21 @@ function Layout({ children }: { children: React.ReactNode }) {

    Mycel Sandbox Monitor

    - Dashboard - Threads - Resources - Eval + + Dashboard + + + Threads + + + Resources + + + Eval +
    -
    - {children} -
    +
    {children}
    ); } @@ -3135,11 +4301,17 @@ export default function App() { } /> } /> } /> - } /> + } + /> } /> } /> } /> - } /> + } + /> diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 237c5647c..f40d26d7a 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -26,7 +26,9 @@ } body { - font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Noto Sans SC', sans-serif; + font-family: + -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", + "Noto Sans SC", sans-serif; background: var(--bg-soft); color: var(--text); line-height: 1.6; @@ -74,7 +76,10 @@ body { color: var(--text-secondary); text-decoration: none; font-weight: 500; - transition: color 0.18s ease, background 0.18s ease, border-color 0.18s ease; + transition: + color 0.18s ease, + background 0.18s ease, + border-color 0.18s ease; border: 1px solid transparent; border-radius: 999px; padding: 0.45rem 0.8rem; @@ -105,8 +110,12 @@ body { } @keyframes fadeIn { - from { opacity: 0; } - to { opacity: 1; } + from { + opacity: 0; + } + to { + opacity: 1; + } } /* Breadcrumb */ @@ -267,7 +276,11 @@ h2 { .monitor-provider-card { border: 1px solid var(--border); - background: linear-gradient(180deg, var(--panel) 0%, var(--panel-strong) 100%); + background: linear-gradient( + 180deg, + var(--panel) 0%, + var(--panel-strong) 100% + ); border-radius: 16px; padding: 1rem; text-align: left; @@ -456,7 +469,11 @@ h2 { .provider-detail-shell { border: 1px solid var(--border); border-radius: 18px; - background: linear-gradient(180deg, var(--panel) 0%, var(--panel-strong) 100%); + background: linear-gradient( + 180deg, + var(--panel) 0%, + var(--panel-strong) 100% + ); padding: 1rem 1rem 1.15rem; margin-top: 1rem; } @@ -482,6 +499,27 @@ h2 { margin-top: 1rem; } +.monitor-lease-detail-shell { + margin-bottom: 1rem; + border: 1px solid var(--border); + border-radius: 16px; + background: linear-gradient( + 180deg, + var(--panel) 0%, + var(--panel-strong) 100% + ); + padding: 1rem; +} + +.monitor-lease-detail-id { + color: var(--text-secondary); + font-size: 0.88rem; +} + +.monitor-lease-session-table { + margin-top: 0.25rem; +} + .provider-lease-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); @@ -497,6 +535,22 @@ h2 { display: flex; flex-direction: column; gap: 0.75rem; + text-align: left; + cursor: pointer; + transition: + border-color 0.18s ease, + background 0.18s ease, + box-shadow 0.18s ease; +} + +.provider-lease-card:hover { + border-color: var(--border-strong); + background: var(--bg-soft); +} + +.provider-lease-card.is-selected { + border-color: rgba(37, 99, 235, 0.24); + box-shadow: inset 0 0 0 1px rgba(37, 99, 235, 0.1); } .provider-lease-header { @@ -510,7 +564,7 @@ h2 { margin: 0.22rem 0 0; color: var(--text-muted); font-size: 0.8rem; - font-family: 'SF Mono', Monaco, monospace; + font-family: "SF Mono", Monaco, monospace; } .provider-lease-link { @@ -715,7 +769,7 @@ td a:hover { } .mono { - font-family: 'SF Mono', Monaco, monospace; + font-family: "SF Mono", Monaco, monospace; font-size: 0.85rem; } @@ -797,7 +851,11 @@ section li { } .hint-box { - background: linear-gradient(180deg, var(--panel) 0%, var(--panel-strong) 100%); + background: linear-gradient( + 180deg, + var(--panel) 0%, + var(--panel-strong) 100% + ); border: 1px solid var(--border); border-radius: 16px; padding: 1.15rem 1.2rem; @@ -837,7 +895,11 @@ section li { padding: 0.55rem 0.95rem; border: 1px solid var(--border); cursor: pointer; - transition: background 0.18s ease, border-color 0.18s ease, color 0.18s ease, transform 0.18s ease; + transition: + background 0.18s ease, + border-color 0.18s ease, + color 0.18s ease, + transform 0.18s ease; } .ghost-btn { @@ -918,7 +980,7 @@ section li { padding: 1.5rem; border-radius: 12px; overflow-x: auto; - font-family: 'SF Mono', Monaco, monospace; + font-family: "SF Mono", Monaco, monospace; font-size: 0.85rem; line-height: 1.5; color: var(--text); @@ -1118,14 +1180,14 @@ section li { .trace-step { color: #89a4c0; - font-family: 'SF Mono', Monaco, monospace; + font-family: "SF Mono", Monaco, monospace; font-size: 0.85rem; } .trace-event { color: var(--text); font-size: 0.85rem; - font-family: 'SF Mono', Monaco, monospace; + font-family: "SF Mono", Monaco, monospace; } .trace-run-id { @@ -1150,7 +1212,7 @@ section li { border: 1px solid var(--border); border-radius: 7px; padding: 0.55rem 0.65rem; - font-family: 'SF Mono', Monaco, monospace; + font-family: "SF Mono", Monaco, monospace; font-size: 0.82rem; color: var(--text); white-space: pre-wrap; @@ -1413,7 +1475,11 @@ section.eval-runtime-panel { } .eval-operator-warning { - background: linear-gradient(180deg, var(--panel) 0%, var(--warning-soft) 100%); + background: linear-gradient( + 180deg, + var(--panel) 0%, + var(--warning-soft) 100% + ); } .eval-operator-danger { @@ -1421,7 +1487,11 @@ section.eval-runtime-panel { } .eval-operator-success { - background: linear-gradient(180deg, var(--panel) 0%, var(--success-soft) 100%); + background: linear-gradient( + 180deg, + var(--panel) 0%, + var(--success-soft) 100% + ); } .eval-operator-hero { From 50b9c17737546370b357fff85245f89d9243716b Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 19:36:07 +0800 Subject: [PATCH 25/87] feat: scope monitor sessions to selected lease --- ...2026-04-06-resource-observability-split.md | 4 ++ ...-06-resource-observability-split-design.md | 12 +++++ frontend/monitor/src/App.tsx | 54 ++++++++++++++++--- frontend/monitor/src/styles.css | 13 +++++ 4 files changed, 77 insertions(+), 6 deletions(-) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 5595dd8f9..a4a6acbf5 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -40,6 +40,10 @@ - selected lease cards now open a dedicated `Lease Detail` panel before the full provider session table - the panel reuses existing payload data only: lease/thread links, member, started time, and grouped session rows - this gives monitor resources a local deep-drill layer without changing backend contracts +- `D4` now has a landed phase-6: + - the provider session table now defaults to `Selected lease` scope instead of always showing every provider session row + - operators can switch back to `All provider sessions` when they want the full truth table + - this makes the lease drill-down and the table below it read as one path instead of two competing surfaces - `D2` now has a landed phase-2: - evaluation detail payload includes backend-owned `info.operator_surface` - provisional eval detail opens with `Operator Status`, artifact paths, and explicit next steps diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 340f5f69d..e6c397d39 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -320,6 +320,18 @@ The dashboard is a switchboard, not a full destination page. It should answer - no import of product sandbox components - only existing provider/session/lease payload data is reused +### Current D4 Phase-6 Landing + +- the provider session table now obeys the active drill-down instead of always dumping every provider row: + - default scope is `Selected lease` + - operator can explicitly switch to `All provider sessions` +- this turns the lease detail + session table into one coherent path: + - choose lease + - inspect lease detail + - see only the session rows for that lease by default + - opt back into the noisier provider-wide truth table when needed +- still no backend changes; this is a frontend-owned information-ordering fix over existing payload data + ### D4 Remaining Gaps - monitor provider/detail surface is now close to the product resources page in interaction quality, but still lacks the richer sandbox-sheet capabilities such as file browsing or per-session live metrics diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 313b6b6ca..4388d7091 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -714,6 +714,9 @@ function MonitorResourcesPage() { const [leaseData, setLeaseData] = React.useState(null); const [selectedId, setSelectedId] = React.useState(""); const [selectedLeaseId, setSelectedLeaseId] = React.useState(""); + const [sessionScope, setSessionScope] = React.useState<"lease" | "provider">( + "lease", + ); const [loading, setLoading] = React.useState(false); const [refreshing, setRefreshing] = React.useState(false); const [error, setError] = React.useState(null); @@ -788,6 +791,10 @@ function MonitorResourcesPage() { }); }, [leaseData, resourceData, selectedId]); + React.useEffect(() => { + setSessionScope("lease"); + }, [selectedId, selectedLeaseId]); + if (error) { return (
    @@ -854,6 +861,10 @@ function MonitorResourcesPage() { ) || selectedLeaseGroups[0] || null; + const scopedSessions = + sessionScope === "provider" || !selectedLeaseGroup + ? selectedSessions + : selectedLeaseGroup.sessions; return (
    @@ -1129,12 +1140,39 @@ function MonitorResourcesPage() { ) : null}
    -

    Sessions ({selectedSessions.length})

    +

    + Sessions ( + {sessionScope === "provider" + ? selectedSessions.length + : scopedSessions.length} + ) +

    - Global session rows currently attached to this provider. This - is the monitor-side truth surface, not the user projection. + {sessionScope === "provider" + ? "Global session rows currently attached to this provider. This is the full monitor-side truth surface." + : "Session rows for the selected lease group. Switch back to all provider sessions when you need the noisier truth table."}

    +
    + + +
    {item.item_index} - {evalThreadLabel(item.thread_id, data.evaluation_id)} + + {evalThreadLabel(item.thread_id, data.evaluation_id)} + {item.session?.session_url ? ( - {shortId(item.session.session_id)} - ) : '-'} + + {shortId(item.session.session_id)} + + ) : ( + "-" + )} + + {item.run?.run_id ? shortId(item.run.run_id, 12) : "-"} {item.run?.run_id ? shortId(item.run.run_id, 12) : '-'} {item.run?.event_count ?? 0} {item.status} {item.start_idx}
    @@ -1148,7 +1186,7 @@ function MonitorResourcesPage() { - {selectedSessions.map((session: any) => ( + {scopedSessions.map((session: any) => ( ))} - {selectedSessions.length === 0 ? ( + {scopedSessions.length === 0 ? ( - + ) : null} diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index f40d26d7a..d135d2db3 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -913,6 +913,19 @@ section li { color: var(--text); } +.ghost-btn.is-active { + background: var(--accent-soft); + border-color: rgba(37, 99, 235, 0.16); + color: var(--accent); +} + +.segmented-toggle { + display: inline-flex; + align-items: center; + gap: 0.5rem; + flex-wrap: wrap; +} + .primary-btn { background: var(--text); color: #fff; From 7d4f94a69094ac339616f0b794a9587b1175fa75 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:04:28 +0800 Subject: [PATCH 26/87] feat: modernize monitor console shell --- frontend/monitor/src/App.tsx | 321 +++++++++++++++++++++----------- frontend/monitor/src/styles.css | 240 +++++++++++++++++++++--- 2 files changed, 426 insertions(+), 135 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 4388d7091..02d84c4d8 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -149,13 +149,9 @@ function DashboardPage() { return (
    -
    -
    -

    Dashboard

    -

    - Operator landing for resource health, workload pressure, and the - latest evaluation run. -

    +
    +
    + Global health snapshot
    + + One evaluation contains many threads; stay here for durable + progress, then jump to detail when needed. +

    Auto refresh: 5s {runsLoading ? "| loading..." : ""} | page{" "} @@ -4282,7 +4201,153 @@ function EvaluationDetailPage() { ); } -// Layout: Top navigation +const SHELL_NAV = [ + { + to: "/dashboard", + label: "Dashboard", + shortLabel: "DB", + testId: "nav-dashboard", + }, + { + to: "/resources", + label: "Resources", + shortLabel: "RS", + testId: "nav-resources", + }, + { + to: "/evaluation", + label: "Evaluations", + shortLabel: "EV", + testId: "nav-eval", + }, + { to: "/threads", label: "Threads", shortLabel: "TH", testId: "nav-threads" }, + { to: "/traces", label: "Traces", shortLabel: "TR", testId: "nav-traces" }, + { to: "/leases", label: "Leases", shortLabel: "LS", testId: "nav-leases" }, +] as const; + +const GUIDE_SECTIONS = [ + { + title: "Dashboard", + body: "Start here. Read provider health, live workload pressure, and the latest evaluation before drilling into detail.", + }, + { + title: "Resources", + body: "Use the global resources page to inspect provider health, select a lease, and then narrow the session truth surface without losing the global contract.", + }, + { + title: "Evaluations", + body: "Open config only when you are ready to submit. After that, stay in the list or jump into evaluation detail for artifacts, trace, and next-step diagnosis.", + }, + { + title: "Threads / Traces / Leases", + body: "Treat these as truth surfaces. Use them when the dashboard or resources page tells you where to look, not as the first page you land on.", + }, +] as const; + +function shellMeta(pathname: string) { + // @@@shell-route-bucketing - detail routes should inherit the nearest console section rather than render as separate primary destinations. + if (pathname.startsWith("/resources") || pathname.startsWith("/lease")) { + return { + eyebrow: "Global compute surface", + title: "Resources", + description: + "Provider health, lease triage, and scoped session truth for all sandboxes.", + }; + } + if (pathname.startsWith("/evaluation")) { + return { + eyebrow: "Evaluation operations", + title: "Evaluations", + description: + "Start runs, monitor durable progress, and inspect artifacts without losing operator context.", + }; + } + if (pathname.startsWith("/threads") || pathname.startsWith("/thread")) { + return { + eyebrow: "Runtime index", + title: "Threads", + description: + "Global thread index and detail drill-down into sessions, leases, and trace surfaces.", + }; + } + if (pathname.startsWith("/traces") || pathname.startsWith("/session")) { + return { + eyebrow: "Execution traces", + title: "Traces", + description: + "Sequence-level inspection for sessions, tool calls, and conversation surfaces.", + }; + } + if (pathname.startsWith("/leases")) { + return { + eyebrow: "Lease truth", + title: "Leases", + description: + "Use grouped lease triage first, then drop into raw truth when you need exact runtime state.", + }; + } + return { + eyebrow: "Global ops console", + title: "Dashboard", + description: + "Landing page for health, workload, and the fastest path into global resources or active evaluations.", + }; +} + +function OperatorGuideModal({ + open, + onClose, +}: { + open: boolean; + onClose: () => void; +}) { + React.useEffect(() => { + if (!open) return; + const onKeyDown = (event: KeyboardEvent) => { + if (event.key === "Escape") onClose(); + }; + window.addEventListener("keydown", onKeyDown); + return () => window.removeEventListener("keydown", onKeyDown); + }, [open, onClose]); + + if (!open) return null; + + return ( +

    +
    event.stopPropagation()} + > +
    +
    +

    Operator Guide

    +

    How to read this console

    +
    + +
    +

    + This guide stays out of the main content column by default. Open it + when you need orientation, then go back to the live console surface. +

    +
    + {GUIDE_SECTIONS.map((section) => ( +
    +

    {section.title}

    +

    {section.body}

    +
    + ))} +
    +
    +
    + ); +} + function ScrollToTopOnRouteChange() { const { pathname } = useLocation(); React.useEffect(() => { @@ -4301,28 +4366,62 @@ function ScrollToTopOnRouteChange() { } function Layout({ children }: { children: React.ReactNode }) { + const { pathname } = useLocation(); + const [guideOpen, setGuideOpen] = React.useState(false); + const meta = shellMeta(pathname); + return ( -
    - -
    {children}
    + +
    +
    +
    +

    {meta.eyebrow}

    +

    {meta.title}

    +

    + {meta.description} +

    +
    +
    + +
    +
    +
    {children}
    +
    + setGuideOpen(false)} + />
    ); } diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index d135d2db3..d092827d6 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -40,39 +40,74 @@ body { flex-direction: column; } -/* Top Navigation */ -.top-nav { - background: rgba(255, 255, 255, 0.94); - border-bottom: 1px solid var(--border); - padding: 1rem 2rem 0.9rem; +/* Console Shell */ +.console-app { + min-height: 100vh; display: flex; - align-items: center; - justify-content: space-between; - gap: 2rem; + background: + radial-gradient( + circle at top left, + rgba(37, 99, 235, 0.04), + transparent 28% + ), + linear-gradient(180deg, #fbfbfb 0%, #f7f7f6 100%); +} + +.console-sidebar { + width: 272px; + padding: 1.25rem 1rem 1rem; + border-right: 1px solid var(--border); + background: rgba(255, 255, 255, 0.82); + backdrop-filter: blur(18px); + display: flex; + flex-direction: column; + gap: 1.25rem; position: sticky; top: 0; - z-index: 20; - backdrop-filter: blur(14px); + height: 100vh; } -.top-nav-brand { +.console-brand { display: flex; - align-items: center; + align-items: flex-start; + gap: 0.85rem; +} + +.console-brand-mark { + width: 2.35rem; + height: 2.35rem; + border-radius: 0.9rem; + display: grid; + place-items: center; + font-size: 0.92rem; + font-weight: 700; + color: var(--accent); + background: linear-gradient(180deg, #eff6ff 0%, #dbeafe 100%); + border: 1px solid rgba(37, 99, 235, 0.12); } .logo { - font-size: 1.05rem; - font-weight: 600; + font-size: 1rem; + font-weight: 650; color: var(--text); } -.nav-links { +.console-brand-copy { + margin-top: 0.15rem; + color: var(--text-muted); + font-size: 0.84rem; +} + +.console-nav { display: flex; - gap: 0.55rem; - flex-wrap: wrap; + flex-direction: column; + gap: 0.3rem; } -.nav-links a { +.console-nav a { + display: flex; + align-items: center; + gap: 0.7rem; color: var(--text-secondary); text-decoration: none; font-weight: 500; @@ -81,22 +116,97 @@ body { background 0.18s ease, border-color 0.18s ease; border: 1px solid transparent; - border-radius: 999px; - padding: 0.45rem 0.8rem; + border-radius: 14px; + padding: 0.72rem 0.82rem; } -.nav-links a:hover { +.console-nav a:hover { color: var(--text); background: var(--bg-muted); } -.nav-links a[aria-current="page"] { +.console-nav a[aria-current="page"] { color: var(--accent); background: var(--accent-soft); border-color: rgba(37, 99, 235, 0.12); } -/* Content */ +.console-nav-mark { + width: 2rem; + height: 2rem; + flex: 0 0 auto; + border-radius: 0.7rem; + display: grid; + place-items: center; + background: rgba(23, 23, 23, 0.04); + color: var(--text-muted); + font-size: 0.7rem; + letter-spacing: 0.04em; + text-transform: uppercase; +} + +.console-nav a[aria-current="page"] .console-nav-mark { + background: rgba(37, 99, 235, 0.12); + color: var(--accent); +} + +.console-sidebar-foot { + margin-top: auto; + border: 1px solid var(--border); + border-radius: 16px; + background: rgba(255, 255, 255, 0.72); + padding: 0.9rem 0.95rem; + color: var(--text-secondary); + font-size: 0.85rem; +} + +.shell-eyebrow { + margin-bottom: 0.35rem; + font-size: 0.75rem; + line-height: 1.2; + letter-spacing: 0.08em; + text-transform: uppercase; + color: var(--text-muted); +} + +.console-main { + flex: 1; + min-width: 0; + display: flex; + flex-direction: column; +} + +.console-header { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: 1rem; + padding: 1.35rem 2rem 1rem; + border-bottom: 1px solid rgba(231, 231, 231, 0.8); + background: rgba(255, 255, 255, 0.72); + backdrop-filter: blur(14px); + position: sticky; + top: 0; + z-index: 15; +} + +.console-title { + margin: 0 0 0.45rem; + font-size: 1.7rem; + line-height: 1.08; +} + +.console-description { + margin-bottom: 0; + max-width: 62ch; +} + +.console-header-actions { + display: flex; + align-items: center; + gap: 0.75rem; +} + .content { flex: 1; padding: 2rem; @@ -109,6 +219,45 @@ body { animation: fadeIn 0.2s; } +.shell-modal-backdrop { + position: fixed; + inset: 0; + z-index: 50; + background: rgba(248, 248, 248, 0.78); + backdrop-filter: blur(8px); + padding: 2rem; + overflow-y: auto; +} + +.shell-modal-panel { + width: min(920px, 100%); + margin: 0 auto; + background: var(--panel); + border: 1px solid var(--border); + border-radius: 22px; + box-shadow: 0 24px 80px rgba(23, 23, 23, 0.08); + padding: 1.3rem; +} + +.shell-modal-head { + margin-bottom: 0.35rem; +} + +.shell-modal-panel h2 { + margin: 0; +} + +.shell-guide-grid { + display: grid; + grid-template-columns: repeat(12, minmax(0, 1fr)); + gap: 1rem; + margin-top: 1rem; +} + +.shell-guide-grid > * { + grid-column: span 6; +} + @keyframes fadeIn { from { opacity: 0; @@ -885,6 +1034,16 @@ section li { flex-wrap: wrap; } +.page-toolbar { + margin-bottom: 1rem; +} + +.page-kicker { + display: flex; + align-items: center; + gap: 0.6rem; +} + .ghost-btn, .primary-btn { display: inline-flex; @@ -1612,6 +1771,29 @@ section.eval-runtime-panel { } @media (max-width: 1080px) { + .console-app { + flex-direction: column; + } + + .console-sidebar { + width: 100%; + height: auto; + position: static; + border-right: 0; + border-bottom: 1px solid var(--border); + } + + .console-nav { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); + } + + .console-header { + padding-left: 1.25rem; + padding-right: 1.25rem; + } + + .shell-guide-grid > *, .dashboard-card, .evaluation-flow > *, .evaluation-notes > *, @@ -1625,20 +1807,30 @@ section.eval-runtime-panel { } @media (max-width: 720px) { - .top-nav, + .console-header, .content { padding-left: 1rem; padding-right: 1rem; } + .console-nav { + grid-template-columns: 1fr 1fr; + } + + .console-header { + position: static; + } + h1 { font-size: 1.7rem; } + .shell-modal-backdrop, .eval-composer-backdrop { padding: 1rem; } + .shell-modal-panel, .eval-composer-panel { padding: 1rem; } From 423c00ffe2444e4dd2da30e58a67ea20c121b1ac Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:18:27 +0800 Subject: [PATCH 27/87] feat: deepen monitor console hierarchy --- frontend/monitor/src/App.tsx | 324 +++++++++++--------------------- frontend/monitor/src/styles.css | 134 ++++++++++--- 2 files changed, 225 insertions(+), 233 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 02d84c4d8..cb1235caa 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -128,7 +128,6 @@ function DashboardPage() { if (error) { return (
    -

    Dashboard

    Dashboard load failed: {error}
    ); @@ -149,31 +148,16 @@ function DashboardPage() { return (
    -
    -
    - Global health snapshot -
    - -
    -
    -
    +
    -
    -

    Infra Health

    -

    - Global provider and lease state from the monitor backend. -

    +

    Infra Health

    +
    + + Resources
    - - Open resources -
    -
    -
    -
    +
    +
    +

    Active Workload

    -

    - How much monitored runtime is currently alive across DB - sessions, providers, and evaluations. -

    + + Threads +
    - - Open threads - -
    -
    - - - 0 ? "default" : "warning" - } - /> -
    -
    +
    + + + 0 ? "default" : "warning" + } + /> +
    +
    -
    -
    -
    +
    +

    Latest Eval

    -

    - Most recent evaluation known to the monitor. Use this as the - fastest jump into detail. -

    + + {latestEval ? "Detail" : "Eval list"} +
    - - {latestEval ? "Open latest eval" : "Open eval list"} - -
    - {latestEval ? ( -
    -
    - - {latestEval.status} - - - publishable={String(Boolean(latestEval.publishable))} - -
    -
    - {latestEval.evaluation_id} -
    -
    -
    -
    -
    - {latestEval.threads_done || 0}/{latestEval.threads_total || 0}{" "} - threads · {formatPct(latestEval.progress_pct || 0)} · updated{" "} - {latestEval.updated_ago || "-"} -
    -
    + {latestEval ? ( +
    +
    + + {latestEval.status} + + + publishable={String(Boolean(latestEval.publishable))} + +
    +
    +
    +
    +
    + {latestEval.threads_done || 0}/{latestEval.threads_total || 0}{" "} + threads · {formatPct(latestEval.progress_pct || 0)} · updated{" "} + {latestEval.updated_ago || "-"} +
    -
    - ) : ( -
    -

    - No evaluation rows yet. Open Eval to submit a minimal run. -

    -
    - )} -
    + ) : ( +

    No evaluations yet

    + )} + +
    ); @@ -794,7 +754,6 @@ function MonitorResourcesPage() { if (error) { return (
    -

    Resources

    Resource load failed: {error}
    ); @@ -864,20 +823,8 @@ function MonitorResourcesPage() { return (
    -
    -
    - Global provider health and lease triage -
    - -
    - -
    +
    +
    0 ? "success" : "danger" } /> -
    +
    +
    -
    +
    -
    -

    Providers

    -

    - Same provider surface as the product page, but backed by the - global monitor contract. -

    -
    +

    Providers

    +
    {providers.map((provider: any) => { @@ -1098,15 +1043,10 @@ function MonitorResourcesPage() {
    -
    +

    Leases ({selectedLeaseGroups.length})

    -

    - Monitor-side lease grouping for this provider. This is the - closest equivalent to the product sandbox cards, but still - grounded in global monitor truth. -

    @@ -1139,11 +1079,6 @@ function MonitorResourcesPage() { : scopedSessions.length} ) -

    - {sessionScope === "provider" - ? "Global session rows currently attached to this provider. This is the full monitor-side truth surface." - : "Session rows for the selected lease group. Switch back to all provider sessions when you need the noisier truth table."} -

    ) : null} -
    +

    Lease Health

    -

    - Backend-owned lease lifecycle triage. Separate live drift from - stale detached residue before assuming the whole system is on - fire. -

    Legacy flat table @@ -1258,12 +1188,8 @@ function MonitorResourcesPage() { {hasPrimaryLeaseAttention ? (
    {activeDriftLeases.length > 0 ? ( -
    +

    Active Drift ({activeDriftLeases.length})

    -

    - Recent desired/observed mismatch. These rows deserve live - operator attention before they age into residue. -

    {shortId(session.id, 12)} @@ -1178,9 +1216,13 @@ function MonitorResourcesPage() {
    No sessions reported for this provider. + {sessionScope === "provider" + ? "No sessions reported for this provider." + : "No sessions reported for the selected lease group."} +
    @@ -1304,12 +1230,8 @@ function MonitorResourcesPage() { ) : null} {detachedResidueLeases.length > 0 ? ( -
    +

    Detached Residue ({detachedResidueLeases.length})

    -

    - Detached rows that still want `running` long after the runtime - stopped moving. Usually cleanup debt, not fresh pressure. -

    @@ -1353,13 +1275,8 @@ function MonitorResourcesPage() { {hasSecondaryLeaseAttention ? (
    -
    +

    Cleanup Backlog ({orphanCleanupLeases.length})

    -

    - Rows that already lost thread binding. Keep them visible for - cleanup honesty, but do not confuse them with live compute - pressure. -

    @@ -1393,7 +1310,7 @@ function MonitorResourcesPage() { ) : null} {healthyCapacityLeases.length > 0 ? ( -
    +
    Healthy Capacity ({healthyCapacityLeases.length})
    @@ -1440,7 +1357,7 @@ function MonitorResourcesPage() {
    No lease groups reported yet.
    ) : null} -
    +
    All leases ({leases.length})
    @@ -1521,7 +1438,6 @@ function ThreadsPage() { return (
    -

    {data.title}

    Global thread index. Start here to find the active run, then drill into session, lease, and trace detail. @@ -1644,7 +1560,6 @@ function TracesPage() { return (

    -

    {data.title}

    Run-level trace index for debugging tool calls, checkpoints, and runtime transitions across monitored threads. @@ -2951,7 +2866,6 @@ function LeasesPage() { return (

    -

    {data.title}

    Legacy lease view, now backed by backend triage semantics. Use this when you want lease-only focus without losing the full raw table. @@ -3142,7 +3056,6 @@ function DivergedPage() { return (

    -

    {data.title}

    {data.description}

    Total: {data.count}

    @@ -3199,7 +3112,6 @@ function EventsPage() { return (
    -

    {data.title}

    {data.description}

    Total: {data.count}

    @@ -3445,12 +3357,9 @@ function EvaluationPage() { return (
    -
    -
    +
    +

    Current Submission

    -

    - Latest evaluation submitted from this page. -

    evaluation: {evaluationId || "-"}

    status: {currentEval?.status || runStatus}

    {currentEval && currentProgress && ( @@ -3478,40 +3387,16 @@ function EvaluationPage() {

    )}
    - -
    -

    Start New Evaluation

    -

    - Open a focused config panel. After submit, track progress in the - evaluation list below. -

    - -
    -
    - -
    +

    Evaluations ({evalPagination?.total ?? evaluations.length})

    - - One evaluation contains many threads; stay here for durable - progress, then jump to detail when needed. - + Auto refresh 5s +
    +
    + {evalPagination?.total ?? evaluations.length} evaluations + {runsLoading ? "loading..." : "idle"} + page {evalPagination?.page ?? 1}
    -

    - Auto refresh: 5s {runsLoading ? "| loading..." : ""} | page{" "} - {evalPagination?.page ?? 1} -

    -

    - Evaluation = one batch run. Progress shows - total/completed/started-or-running/pending. Click Evaluation ID for - detail trace and thread links. -

    @@ -3634,6 +3519,7 @@ function EvaluationPage() { Next + {composerOpen && ( @@ -4278,6 +4164,14 @@ function shellMeta(pathname: string) { "Sequence-level inspection for sessions, tool calls, and conversation surfaces.", }; } + if (pathname.startsWith("/events") || pathname.startsWith("/event")) { + return { + eyebrow: "Execution traces", + title: "Events", + description: + "Lease and runtime event history for debugging sequence, source, and error surfaces.", + }; + } if (pathname.startsWith("/leases")) { return { eyebrow: "Lease truth", @@ -4369,6 +4263,7 @@ function Layout({ children }: { children: React.ReactNode }) { const { pathname } = useLocation(); const [guideOpen, setGuideOpen] = React.useState(false); const meta = shellMeta(pathname); + const showEvalComposeAction = pathname === "/evaluation"; return (
    @@ -4407,6 +4302,11 @@ function Layout({ children }: { children: React.ReactNode }) {

    + {showEvalComposeAction ? ( + + Open Config + + ) : null}
    -
    -
    -

    Providers

    - -
    -
    - {providers.map((provider: any) => { - const sessions = Array.isArray(provider.sessions) - ? provider.sessions - : []; - const runningCount = sessions.filter( - (session: any) => session.status === "running", - ).length; - const unavailable = provider.status === "unavailable"; - const cpuUsed = provider.cardCpu?.used; - const memoryUsed = provider.telemetry?.memory?.used; - return ( - +
    +
    + {providers.map((provider: any) => { + const sessions = Array.isArray(provider.sessions) ? provider.sessions : []; + const runningCount = sessions.filter((s: any) => s.status === "running").length; + const unavailable = provider.status === "unavailable"; + return ( +
    -
    -
    - - - -
    - - - - ); - })} +
    + {provider.type} + {sessions.length} sess · {runningCount} run +
    + + ); + })} +
    -
    - - {selectedProvider ? ( -
    +
    + {selectedProvider ? ( + <>
    @@ -1155,8 +1116,12 @@ function MonitorResourcesPage() {
    - - ) : null} + + ) : ( +
    Select a provider from the list.
    + )} +
    +
    diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index c23cc2a7b..8669fd90a 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -475,6 +475,99 @@ h2 { margin-bottom: 1.25rem; } +.resource-split-console { + display: grid; + grid-template-columns: 320px minmax(0, 1fr); + gap: 1rem; + align-items: start; + margin-bottom: 1.25rem; +} + +.resource-rail { + border: 1px solid var(--border); + border-radius: 18px; + background: rgba(255, 255, 255, 0.72); + padding: 0.95rem; + position: sticky; + top: 132px; +} + +.resource-rail h2 { + margin: 0; +} + +.resource-rail .section-row { + margin-bottom: 0.85rem; +} + +.resource-rail-list { + display: flex; + flex-direction: column; + gap: 0.65rem; + max-height: calc(100vh - 220px); + overflow: auto; + padding-right: 0.15rem; +} + +.resource-rail-item { + border: 1px solid transparent; + border-radius: 14px; + background: var(--bg-soft); + padding: 0.8rem 0.9rem; + text-align: left; + display: flex; + flex-direction: column; + gap: 0.42rem; + transition: + border-color 0.18s ease, + background 0.18s ease, + box-shadow 0.18s ease, + transform 0.18s ease; +} + +.resource-rail-item:hover:not(:disabled) { + border-color: var(--border); + background: var(--panel); + transform: translateY(-1px); +} + +.resource-rail-item.is-selected { + border-color: rgba(37, 99, 235, 0.22); + background: var(--panel); + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05); +} + +.resource-rail-item.is-unavailable { + opacity: 0.82; +} + +.resource-rail-row { + display: flex; + align-items: center; + gap: 0.5rem; + color: var(--text); +} + +.resource-rail-row strong { + font-size: 0.93rem; +} + +.resource-rail-meta { + display: flex; + align-items: center; + justify-content: space-between; + gap: 0.75rem; + font-size: 0.78rem; + color: var(--text-secondary); +} + +.resource-detail { + display: flex; + flex-direction: column; + gap: 1rem; + min-width: 0; +} + .monitor-provider-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); @@ -1876,6 +1969,7 @@ section.eval-runtime-panel { .dashboard-card, .dashboard-card-hero, .dashboard-sidebar-stack, + .resource-split-console, .evaluation-flow > *, .evaluation-notes > *, .evaluation-overview > *, @@ -1896,6 +1990,20 @@ section.eval-runtime-panel { background: transparent; backdrop-filter: none; } + + .resource-split-console { + grid-template-columns: 1fr; + } + + .resource-rail { + position: static; + } + + .resource-rail-list { + max-height: none; + overflow: visible; + padding-right: 0; + } } @media (max-width: 720px) { @@ -1909,6 +2017,12 @@ section.eval-runtime-panel { grid-template-columns: 1fr 1fr; } + .resource-rail-meta { + flex-direction: column; + align-items: flex-start; + gap: 0.2rem; + } + .console-header { position: static; } From 8709c7b0387dac39a1787a73c25f42ef7c0639ce Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:27:34 +0800 Subject: [PATCH 29/87] feat: tighten monitor lease detail density --- frontend/monitor/src/App.tsx | 23 ++++++------- frontend/monitor/src/styles.css | 58 +++++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 18 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 329a245a8..bea6e099e 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -563,10 +563,6 @@ function MonitorLeaseDetailPanel({ group }: { group: any }) { {group.leaseId || "ephemeral"}
    -

    - Monitor-side lease drill-down. Use this layer before dropping into - the full provider session truth table. -

    {group.leaseId ? ( @@ -599,18 +595,18 @@ function MonitorLeaseDetailPanel({ group }: { group: any }) { {stopped}
    -
    -
    +
    +
    Member {group.memberName}
    -
    +
    Thread {group.threadId ? shortId(group.threadId, 14) : "no thread binding"}
    -
    +
    Started {group.startedAt @@ -618,12 +614,12 @@ function MonitorLeaseDetailPanel({ group }: { group: any }) { : "--"}
    -
    +
    Status {group.status}
    - +
    @@ -1040,6 +1036,11 @@ function MonitorResourcesPage() { : scopedSessions.length} ) +

    + {sessionScope === "provider" + ? "full provider truth surface" + : "scoped to selected lease"} +

    -
    Session
    +
    diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 8669fd90a..3b094ad11 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -799,14 +799,11 @@ h2 { .monitor-lease-detail-shell { margin-bottom: 1rem; - border: 1px solid var(--border); + border: 1px solid var(--border-strong); border-radius: 16px; - background: linear-gradient( - 180deg, - var(--panel) 0%, - var(--panel-strong) 100% - ); - padding: 1rem; + background: var(--panel); + padding: 1rem 1.05rem 1.05rem; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04); } .monitor-lease-detail-id { @@ -814,10 +811,49 @@ h2 { font-size: 0.88rem; } +.monitor-lease-context-bar { + display: grid; + grid-template-columns: repeat(4, minmax(0, 1fr)); + gap: 0.6rem; + margin-bottom: 0.8rem; +} + +.monitor-lease-context-item { + display: flex; + flex-direction: column; + gap: 0.18rem; + padding: 0.7rem 0.78rem; + border-radius: 12px; + background: var(--bg-muted); + border: 1px solid var(--border); +} + +.monitor-lease-context-item strong { + font-size: 0.72rem; + letter-spacing: 0.04em; + text-transform: uppercase; + color: var(--text-muted); +} + +.monitor-lease-context-item span { + font-size: 0.84rem; + color: var(--text); +} + .monitor-lease-session-table { margin-top: 0.25rem; } +.resource-table-dense th { + padding: 0.55rem 0.7rem; + font-size: 0.8rem; +} + +.resource-table-dense td { + padding: 0.5rem 0.7rem; + font-size: 0.85rem; +} + .provider-lease-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); @@ -2004,6 +2040,10 @@ section.eval-runtime-panel { overflow: visible; padding-right: 0; } + + .monitor-lease-context-bar { + grid-template-columns: repeat(2, minmax(0, 1fr)); + } } @media (max-width: 720px) { @@ -2023,6 +2063,10 @@ section.eval-runtime-panel { gap: 0.2rem; } + .monitor-lease-context-bar { + grid-template-columns: 1fr; + } + .console-header { position: static; } From 58fbc7a3d5df9bb72400dc31bfa4d11c3dc3edde Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:30:58 +0800 Subject: [PATCH 30/87] feat: tighten evaluation status density --- frontend/monitor/src/App.tsx | 126 +++++++++++++++++++------------- frontend/monitor/src/styles.css | 65 ++++++++++++++-- 2 files changed, 134 insertions(+), 57 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index bea6e099e..113e50233 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -103,6 +103,28 @@ function DashboardMetric({ ); } +function evaluationStatusTone(item: any): string { + const status = String(item?.status || "").toLowerCase(); + const publishable = Boolean( + item?.score?.publishable ?? item?.score?.score_gate === "final", + ); + if (status === "error" || status === "completed_with_errors") { + return "chip-danger"; + } + if (publishable) return "chip-success"; + if (status === "provisional" || status === "running" || status === "queued") { + return "chip-warning"; + } + return "chip-muted"; +} + +function evaluationScoreTone(item: any): string { + const publishable = Boolean( + item?.score?.publishable ?? item?.score?.score_gate === "final", + ); + return publishable ? "chip-success" : "chip-warning"; +} + function DashboardPage() { const [data, setData] = React.useState(null); const [loading, setLoading] = React.useState(false); @@ -3326,8 +3348,12 @@ function EvaluationPage() {

    Current Submission

    +
    + + {String(currentEval?.status || runStatus || "idle").toUpperCase()} + +
    evaluation: {evaluationId || "-"}
    -

    status: {currentEval?.status || runStatus}

    {currentEval && currentProgress && (
    @@ -3356,7 +3382,7 @@ function EvaluationPage() {

    Evaluations ({evalPagination?.total ?? evaluations.length})

    - Auto refresh 5s + auto refresh 5s
    {evalPagination?.total ?? evaluations.length} evaluations @@ -3397,19 +3423,19 @@ function EvaluationPage() {
    @@ -3432,21 +3458,17 @@ function EvaluationPage() { })()} @@ -3458,32 +3480,34 @@ function EvaluationPage() { )}
    Session {(() => { - // @@@publishable-preferred - publishable is the canonical release gate; score_gate stays as compatibility fallback. - const publishable = - item.score?.publishable ?? - item.score?.score_gate === "final"; return ( - <> -
    +
    + {String(item.status || "-").toUpperCase()} -
    -
    - publishable: {publishable ? "TRUE" : "FALSE"} -
    - + + + {Boolean(item.score?.publishable ?? item.score?.score_gate === "final") + ? "publishable" + : "provisional"} + +
    ); })()}
    - {(item.score?.publishable ?? - item.score?.score_gate === "final") ? ( - <> -
    R {formatResolvedScore(item)}
    -
    - C {formatPct(item.score?.completed_rate_pct)} | T{" "} - {formatPct(item.score?.tool_call_thread_rate_pct)} -
    - - ) : ( - <> -
    R PROVISIONAL
    -
    C - | T -
    - - )} +
    + + {(item.score?.publishable ?? item.score?.score_gate === "final") + ? `R ${formatResolvedScore(item)}` + : "R PROVISIONAL"} + +
    + C {formatPct(item.score?.completed_rate_pct)} | T{" "} + {formatPct(item.score?.tool_call_thread_rate_pct)} +
    +
    {item.updated_ago || "-"}
    -
    - -

    +

    +
    offset={evalPagination?.offset ?? 0} | limit= {evalPagination?.limit ?? evalLimit} | total= {evalPagination?.total ?? evaluations.length} -

    - +
    +
    + + +
    diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 3b094ad11..b01c5dd6b 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -400,23 +400,23 @@ h2 { .dashboard-metric { display: flex; flex-direction: column; - gap: 0.18rem; - min-height: 96px; - padding: 0.95rem 1rem; + gap: 0.15rem; + min-height: 0; + padding: 0.72rem 0.85rem; border: 1px solid var(--border); - border-radius: 14px; + border-radius: 12px; background: var(--panel); } .dashboard-metric-label { - font-size: 0.8rem; + font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.04em; color: var(--text-muted); } .dashboard-metric-value { - font-size: 1.45rem; + font-size: 1.25rem; line-height: 1.15; color: var(--text); } @@ -1687,6 +1687,27 @@ section li { border: 1px solid var(--border); } +.status-chip { + display: inline-flex; + align-items: center; + gap: 0.3rem; + padding: 0.24rem 0.56rem; + border-radius: 999px; + border: 1px solid var(--border); + background: var(--bg-muted); + color: var(--text-secondary); + font-size: 0.74rem; + line-height: 1; + letter-spacing: 0.03em; + text-transform: uppercase; +} + +.chip-muted { + background: var(--bg-muted); + color: var(--text-secondary); + border-color: transparent; +} + .chip-success { background: var(--success-soft); color: var(--success); @@ -1745,6 +1766,38 @@ section li { margin: 0.35rem 0 0.85rem; } +.eval-status-stack, +.eval-score-stack { + display: flex; + flex-direction: column; + gap: 0.35rem; + align-items: flex-start; +} + +.eval-score-stack { + font-size: 0.8rem; + color: var(--text-secondary); +} + +.evaluation-pagination-row { + display: flex; + align-items: center; + justify-content: space-between; + gap: 0.85rem; + margin-top: 0.75rem; + flex-wrap: wrap; +} + +.evaluation-pagination-copy { + margin: 0; +} + +.evaluation-pagination-actions { + display: flex; + align-items: center; + gap: 0.5rem; +} + .evaluation-overview .hint-box, .evaluation-flow .hint-box, .evaluation-notes .hint-box { From ea096f325f68ab4781c61700813c97fa92b227c3 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:35:50 +0800 Subject: [PATCH 31/87] feat: tighten monitor evaluation split density --- frontend/monitor/src/styles.css | 50 +++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index b01c5dd6b..056929e91 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -1726,6 +1726,37 @@ section li { border-color: transparent; } +.chip-row { + display: flex; + flex-wrap: wrap; + gap: 0.35rem; +} + +/* @@@status-tone-chips - map session/lease status to chip colors so they read consistently across resources and eval pages */ +.status-running { + background: var(--success-soft); + color: var(--success); + border-color: transparent; +} + +.status-paused { + background: var(--warning-soft); + color: var(--warning); + border-color: transparent; +} + +.status-stopped { + background: var(--bg-muted); + color: var(--text-muted); + border-color: transparent; +} + +.status-destroying { + background: var(--danger-soft); + color: var(--danger); + border-color: transparent; +} + .evaluation-flow, .evaluation-overview, .evaluation-notes { @@ -1743,20 +1774,20 @@ section li { grid-column: span 6; } -/* @@@eval-split-layout - sidebar (recessed status) + primary (table) side-by-side */ +/* @@@eval-split-layout - fixed sidebar width matching resource-rail for visual consistency */ .eval-split-layout { display: grid; - grid-template-columns: repeat(12, minmax(0, 1fr)); + grid-template-columns: 320px minmax(0, 1fr); gap: 1rem; align-items: start; } .eval-split-aside { - grid-column: span 4; + position: sticky; + top: 132px; } .eval-split-main { - grid-column: span 8; } .evaluation-meta-row { @@ -2065,9 +2096,7 @@ section.eval-runtime-panel { .evaluation-column, .provider-lease-meta, .lease-cluster-grid > *, - .eval-operator-grid > *, - .eval-split-aside, - .eval-split-main { + .eval-operator-grid > * { grid-column: span 12; } @@ -2080,10 +2109,15 @@ section.eval-runtime-panel { backdrop-filter: none; } - .resource-split-console { + .resource-split-console, + .eval-split-layout { grid-template-columns: 1fr; } + .eval-split-aside { + position: static; + } + .resource-rail { position: static; } From dd32d4de87e496c62f2cd895b089179fe82a12a8 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:38:23 +0800 Subject: [PATCH 32/87] feat: deepen monitor console hierarchy --- frontend/monitor/src/styles.css | 73 ++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 10 deletions(-) diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 056929e91..b80a2dc7d 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -356,7 +356,7 @@ h2 { .dashboard-grid { display: grid; grid-template-columns: repeat(12, minmax(0, 1fr)); - gap: 1rem; + gap: 1.25rem; align-items: start; } @@ -364,11 +364,14 @@ h2 { grid-column: span 4; display: flex; flex-direction: column; - gap: 1rem; + gap: 0.85rem; } +/* @@@dashboard-hero-weight - hero spans wider and gets extra shadow to pull visual focus */ .dashboard-card-hero { grid-column: span 8; + padding: 1.35rem 1.4rem; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); } .dashboard-card-eval { @@ -379,18 +382,28 @@ h2 { grid-column: span 4; display: flex; flex-direction: column; - gap: 1rem; + gap: 0.85rem; +} + +/* @@@sidebar-compact - secondary cards in the sidebar stack use tighter padding to feel subordinate to hero */ +.dashboard-sidebar-stack .dashboard-card { + padding: 0.85rem 0.95rem; + gap: 0.65rem; +} + +.dashboard-sidebar-stack .dashboard-card-head h2 { + font-size: 1.05rem; } .dashboard-card-head h2 { - margin: 0 0 0.4rem; + margin: 0 0 0.25rem; } .dashboard-metric-grid, .resource-summary-grid { display: grid; - grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); - gap: 0.75rem; + grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); + gap: 0.55rem; } .resource-summary-grid { @@ -475,6 +488,17 @@ h2 { margin-bottom: 1.25rem; } +/* @@@lease-health-subordinate - lease health is global truth, not the primary working surface. Softer container to avoid competing with provider detail above. */ +#lease-health { + border-color: transparent; + background: transparent; + padding-left: 0; + padding-right: 0; + padding-top: 1.5rem; + border-top: 1px solid var(--border); + border-radius: 0; +} + .resource-split-console { display: grid; grid-template-columns: 320px minmax(0, 1fr); @@ -767,13 +791,13 @@ h2 { line-height: 1.35; } +/* @@@provider-detail-primary - strongest containment in resource-detail column to anchor visual focus */ .provider-detail-shell { border: 1px solid var(--border-strong); border-radius: 16px; background: var(--panel); - padding: 1rem 1.2rem 1.15rem; - margin-top: 1rem; - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04); + padding: 1.15rem 1.3rem 1.2rem; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); } .provider-detail-heading { @@ -793,8 +817,11 @@ h2 { flex-wrap: wrap; } +/* @@@session-shell-subordinate - lease/session area below provider detail is secondary; border-top instead of full containment to avoid box-in-box */ .resource-session-shell { - margin-top: 1rem; + margin-top: 0.75rem; + padding-top: 0.75rem; + border-top: 1px solid var(--border); } .monitor-lease-detail-shell { @@ -1782,12 +1809,38 @@ section li { align-items: start; } +/* @@@eval-aside-instrument - compact sticky aside with tighter text to feel like an instrument readout, not a content block */ .eval-split-aside { position: sticky; top: 132px; + font-size: 0.88rem; +} + +.eval-split-aside h2 { + font-size: 0.95rem; + margin: 0 0 0.5rem; +} + +.eval-split-aside .count { + margin-bottom: 0.5rem; } +/* @@@eval-main-table-primary - table column gets subtle containment so it reads as the working surface */ .eval-split-main { + border: 1px solid var(--border-strong); + border-radius: 16px; + background: var(--panel); + padding: 1.15rem 1.2rem; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04); +} + +.eval-split-main h2 { + margin: 0 0 0.25rem; +} + +.eval-split-main table { + border: none; + border-radius: 0; } .evaluation-meta-row { From 199e511b8a2078cd551680a9590dac5fedbc61bc Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:41:18 +0800 Subject: [PATCH 33/87] feat: modernize monitor shell chrome --- frontend/monitor/src/App.tsx | 87 +++++++++------------------------ frontend/monitor/src/styles.css | 66 +++++++++++++++++-------- 2 files changed, 68 insertions(+), 85 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 113e50233..a948d790c 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -4120,62 +4120,21 @@ const GUIDE_SECTIONS = [ }, ] as const; -function shellMeta(pathname: string) { - // @@@shell-route-bucketing - detail routes should inherit the nearest console section rather than render as separate primary destinations. - if (pathname.startsWith("/resources") || pathname.startsWith("/lease")) { - return { - eyebrow: "Global compute surface", - title: "Resources", - description: - "Provider health, lease triage, and scoped session truth for all sandboxes.", - }; - } - if (pathname.startsWith("/evaluation")) { - return { - eyebrow: "Evaluation operations", - title: "Evaluations", - description: - "Start runs, monitor durable progress, and inspect artifacts without losing operator context.", - }; - } - if (pathname.startsWith("/threads") || pathname.startsWith("/thread")) { - return { - eyebrow: "Runtime index", - title: "Threads", - description: - "Global thread index and detail drill-down into sessions, leases, and trace surfaces.", - }; - } - if (pathname.startsWith("/traces") || pathname.startsWith("/session")) { - return { - eyebrow: "Execution traces", - title: "Traces", - description: - "Sequence-level inspection for sessions, tool calls, and conversation surfaces.", - }; - } - if (pathname.startsWith("/events") || pathname.startsWith("/event")) { - return { - eyebrow: "Execution traces", - title: "Events", - description: - "Lease and runtime event history for debugging sequence, source, and error surfaces.", - }; - } - if (pathname.startsWith("/leases")) { - return { - eyebrow: "Lease truth", - title: "Leases", - description: - "Use grouped lease triage first, then drop into raw truth when you need exact runtime state.", - }; - } - return { - eyebrow: "Global ops console", - title: "Dashboard", - description: - "Landing page for health, workload, and the fastest path into global resources or active evaluations.", - }; +function shellMeta(pathname: string): { title: string; subtitle: string } { + // @@@shell-route-bucketing - detail routes inherit the nearest console section. + if (pathname.startsWith("/resources") || pathname.startsWith("/lease")) + return { title: "Resources", subtitle: "Provider health · lease triage · session truth" }; + if (pathname.startsWith("/evaluation")) + return { title: "Evaluations", subtitle: "Submit · track · inspect artifacts" }; + if (pathname.startsWith("/threads") || pathname.startsWith("/thread")) + return { title: "Threads", subtitle: "Global thread index · session and trace drill-down" }; + if (pathname.startsWith("/traces") || pathname.startsWith("/session")) + return { title: "Traces", subtitle: "Sequence-level session and tool-call inspection" }; + if (pathname.startsWith("/events") || pathname.startsWith("/event")) + return { title: "Events", subtitle: "Lease and runtime event history" }; + if (pathname.startsWith("/leases")) + return { title: "Leases", subtitle: "Grouped triage · raw truth fallback" }; + return { title: "Dashboard", subtitle: "Health · workload · latest evaluation" }; } function OperatorGuideModal({ @@ -4262,7 +4221,7 @@ function Layout({ children }: { children: React.ReactNode }) {
    M
    Mycel Monitor -

    Global sandbox ops console

    +

    Sandbox Console

    - Mode -

    - Light-mode operator shell. Global truth first, drill-down second. -

    +
    + + Monitor +
    + global · light · v0
    -

    {meta.eyebrow}

    {meta.title}

    -

    - {meta.description} -

    +

    {meta.subtitle}

    {showEvalComposeAction ? ( diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index b80a2dc7d..1a7f38b4c 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -107,17 +107,18 @@ body { .console-nav a { display: flex; align-items: center; - gap: 0.7rem; + gap: 0.65rem; color: var(--text-secondary); text-decoration: none; font-weight: 500; + font-size: 0.9rem; transition: color 0.18s ease, background 0.18s ease, border-color 0.18s ease; border: 1px solid transparent; - border-radius: 14px; - padding: 0.72rem 0.82rem; + border-radius: 10px; + padding: 0.58rem 0.75rem; } .console-nav a:hover { @@ -132,15 +133,15 @@ body { } .console-nav-mark { - width: 2rem; - height: 2rem; + width: 1.7rem; + height: 1.7rem; flex: 0 0 auto; - border-radius: 0.7rem; + border-radius: 0.5rem; display: grid; place-items: center; background: rgba(23, 23, 23, 0.04); color: var(--text-muted); - font-size: 0.7rem; + font-size: 0.62rem; letter-spacing: 0.04em; text-transform: uppercase; } @@ -152,12 +153,34 @@ body { .console-sidebar-foot { margin-top: auto; - border: 1px solid var(--border); - border-radius: 16px; - background: rgba(255, 255, 255, 0.72); - padding: 0.9rem 0.95rem; + border-top: 1px solid var(--border); + padding: 0.85rem 0.2rem 0.25rem; + color: var(--text-muted); + font-size: 0.82rem; +} + +.console-foot-row { + display: flex; + align-items: center; + gap: 0.45rem; + margin-bottom: 0.25rem; + font-weight: 500; color: var(--text-secondary); - font-size: 0.85rem; +} + +.console-foot-dot { + width: 0.45rem; + height: 0.45rem; + border-radius: 999px; + background: var(--success); + box-shadow: 0 0 0 3px rgba(5, 150, 105, 0.12); +} + +.console-foot-meta { + font-size: 0.72rem; + letter-spacing: 0.04em; + text-transform: uppercase; + color: var(--text-muted); } .shell-eyebrow { @@ -178,27 +201,30 @@ body { .console-header { display: flex; - align-items: flex-start; + align-items: center; justify-content: space-between; gap: 1rem; - padding: 1.35rem 2rem 1rem; + padding: 0.9rem 2rem; border-bottom: 1px solid rgba(231, 231, 231, 0.8); background: rgba(255, 255, 255, 0.72); backdrop-filter: blur(14px); position: sticky; top: 0; z-index: 15; + min-height: 3.5rem; } .console-title { - margin: 0 0 0.45rem; - font-size: 1.7rem; - line-height: 1.08; + margin: 0; + font-size: 1.5rem; + line-height: 1.15; } -.console-description { - margin-bottom: 0; - max-width: 62ch; +.console-subtitle { + margin: 0.25rem 0 0; + font-size: 0.82rem; + color: var(--text-muted); + letter-spacing: 0.01em; } .console-header-actions { From 9f0109976717a6290dd32874b404c781364b423c Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:44:54 +0800 Subject: [PATCH 34/87] feat: sharpen monitor sidebar rail grouping --- frontend/monitor/src/styles.css | 36 +++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index 1a7f38b4c..4ce9fc354 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -101,7 +101,29 @@ body { .console-nav { display: flex; flex-direction: column; - gap: 0.3rem; + gap: 1.1rem; +} + +/* @@@nav-group-rhythm - section spacing + muted label creates hierarchy without decoration */ +.console-nav-group { + display: flex; + flex-direction: column; + gap: 0.2rem; +} + +.console-nav-group + .console-nav-group { + padding-top: 0.65rem; + border-top: 1px solid rgba(231, 231, 231, 0.72); +} + +.console-nav-group-label { + display: block; + padding: 0 0.75rem 0.3rem; + font-size: 0.68rem; + font-weight: 600; + letter-spacing: 0.08em; + text-transform: uppercase; + color: var(--text-muted); } .console-nav a { @@ -130,6 +152,7 @@ body { color: var(--accent); background: var(--accent-soft); border-color: rgba(37, 99, 235, 0.12); + box-shadow: inset 2px 0 0 var(--accent); } .console-nav-mark { @@ -2155,8 +2178,17 @@ section.eval-runtime-panel { } .console-nav { + gap: 0.75rem; + } + + .console-nav-group { display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); + gap: 0.3rem; + } + + .console-nav-group-label { + grid-column: 1 / -1; } .console-header { @@ -2219,7 +2251,7 @@ section.eval-runtime-panel { padding-right: 1rem; } - .console-nav { + .console-nav-group { grid-template-columns: 1fr 1fr; } From 6335c3c15ae35be6ed81346de2cb0d83eaff2908 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:45:36 +0800 Subject: [PATCH 35/87] feat: group monitor sidebar navigation --- frontend/monitor/src/App.tsx | 52 ++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index a948d790c..dea8c338a 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -4077,28 +4077,29 @@ function EvaluationDetailPage() { ); } -const SHELL_NAV = [ +// @@@nav-information-architecture - grouped rail: overview → infra → workload. Section headers add hierarchy without adding pages. +const SHELL_NAV_GROUPS = [ { - to: "/dashboard", - label: "Dashboard", - shortLabel: "DB", - testId: "nav-dashboard", + label: "Overview", + items: [ + { to: "/dashboard", label: "Dashboard", shortLabel: "DB", testId: "nav-dashboard" }, + ], }, { - to: "/resources", - label: "Resources", - shortLabel: "RS", - testId: "nav-resources", + label: "Infrastructure", + items: [ + { to: "/resources", label: "Resources", shortLabel: "RS", testId: "nav-resources" }, + { to: "/leases", label: "Leases", shortLabel: "LS", testId: "nav-leases" }, + ], }, { - to: "/evaluation", - label: "Evaluations", - shortLabel: "EV", - testId: "nav-eval", + label: "Workload", + items: [ + { to: "/evaluation", label: "Evaluations", shortLabel: "EV", testId: "nav-eval" }, + { to: "/threads", label: "Threads", shortLabel: "TH", testId: "nav-threads" }, + { to: "/traces", label: "Traces", shortLabel: "TR", testId: "nav-traces" }, + ], }, - { to: "/threads", label: "Threads", shortLabel: "TH", testId: "nav-threads" }, - { to: "/traces", label: "Traces", shortLabel: "TR", testId: "nav-traces" }, - { to: "/leases", label: "Leases", shortLabel: "LS", testId: "nav-leases" }, ] as const; const GUIDE_SECTIONS = [ @@ -4225,13 +4226,18 @@ function Layout({ children }: { children: React.ReactNode }) {
    From 94f5b89fbab2279cea822bdcd15eed1369b3f2ee Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:46:31 +0800 Subject: [PATCH 36/87] feat: add monitor favicon --- frontend/monitor/index.html | 2 +- frontend/monitor/public/favicon.svg | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 frontend/monitor/public/favicon.svg diff --git a/frontend/monitor/index.html b/frontend/monitor/index.html index c49c45cd1..f54328915 100644 --- a/frontend/monitor/index.html +++ b/frontend/monitor/index.html @@ -3,6 +3,7 @@ + Leon Sandbox Monitor @@ -10,4 +11,3 @@ - diff --git a/frontend/monitor/public/favicon.svg b/frontend/monitor/public/favicon.svg new file mode 100644 index 000000000..f409dd6e1 --- /dev/null +++ b/frontend/monitor/public/favicon.svg @@ -0,0 +1,6 @@ + + + + + + From dc0b2c5e0f73b627b6ec2ba1975cb80ef263aa74 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:47:48 +0800 Subject: [PATCH 37/87] feat: normalize monitor loading surfaces --- frontend/monitor/src/App.tsx | 40 +++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index dea8c338a..a42d11b33 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1416,7 +1416,13 @@ function ThreadsPage() { void loadThreads(); }, [loadThreads]); - if (!data) return
    Loading...
    ; + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } const pagination = data.pagination || {}; const total = Number(pagination.total || data.count || 0); const currentCount = Number(data.count || 0); @@ -1538,7 +1544,13 @@ function TracesPage() { void loadTraces(); }, [loadTraces]); - if (!data) return
    Loading...
    ; + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } const pagination = data.pagination || {}; const total = Number(pagination.total || data.count || 0); const currentCount = Number(data.count || 0); @@ -1652,7 +1664,13 @@ function ThreadDetailPage() { fetchAPI(`/thread/${threadId}`).then(setData); }, [threadId]); - if (!data) return
    Loading...
    ; + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } const threadIsActive = Array.isArray(data?.sessions?.items) ? data.sessions.items.some((s: any) => s.status === "active") : false; @@ -2792,7 +2810,13 @@ function LeasesPage() { fetchAPI("/leases").then(setData); }, []); - if (!data) return
    Loading...
    ; + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } const triage = data.triage || {}; const triageSummary = triage.summary || {}; const triageGroups = Array.isArray(triage.groups) ? triage.groups : []; @@ -3040,7 +3064,13 @@ function DivergedPage() { fetchAPI("/diverged").then(setData); }, []); - if (!data) return
    Loading...
    ; + if (!data) { + return ( +
    +
    Loading...
    +
    + ); + } return (
    From 27b5e464e641b2ad0dd0350981bdb0300d8b4368 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 22:49:28 +0800 Subject: [PATCH 38/87] feat: rename eval entry action --- frontend/monitor/src/App.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index a42d11b33..746e50e2c 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -4287,7 +4287,7 @@ function Layout({ children }: { children: React.ReactNode }) {
    {showEvalComposeAction ? ( - Open Config + Build Eval ) : null}
    -
    - - - - - - - - - - - - - {scopedSessions.map((session: any) => ( - - - - - - - - - ))} - {scopedSessions.length === 0 ? ( - - - - ) : null} - -
    SessionThreadLeaseMemberStatusStarted
    {shortId(session.id, 12)} - {session.threadId ? ( - - {shortId(session.threadId, 12)} - - ) : ( - "-" - )} - - {session.leaseId ? ( - - {shortId(session.leaseId, 12)} - - ) : ( - "-" - )} - {session.memberName || session.memberId || "-"}{session.status} - {session.startedAt - ? new Date(session.startedAt).toLocaleString() - : "-"} -
    - {sessionScope === "provider" - ? "No sessions reported for this provider." - : "No sessions reported for the selected lease group."} -
    -
    - + ) : ( -
    Select a provider from the list.
    +
    + Select a provider from the list. +
    )}
    -
    +

    Lease Health

    @@ -3253,6 +3279,7 @@ function EventDetailPage() { // Page: Evaluation function EvaluationPage() { const location = useLocation(); + const navigate = useNavigate(); const [dataset, setDataset] = React.useState("SWE-bench/SWE-bench_Lite"); const [split, setSplit] = React.useState("test"); const [startIdx, setStartIdx] = React.useState("0"); @@ -3323,7 +3350,7 @@ function EvaluationPage() { throw new Error("create evaluation returned empty evaluation_id"); setEvaluationId(nextEvalId); setRunStatus("submitted"); - setComposerOpen(false); + closeComposer(); await loadEvaluations(); } catch (e: any) { setRunStatus("error"); @@ -3381,13 +3408,29 @@ function EvaluationPage() { setComposerOpen(query.get("new") === "1"); }, [location.search]); + // @@@evaluation-query-close - clear the query flag on close so the shell CTA can reopen the composer on the next click. + function closeComposer() { + const query = new URLSearchParams(location.search); + query.delete("new"); + setComposerOpen(false); + navigate( + { + pathname: location.pathname, + search: query.toString() ? `?${query.toString()}` : "", + }, + { replace: true }, + ); + } + return (

    Current Submission

    - + {String(currentEval?.status || runStatus || "idle").toUpperCase()}
    @@ -3418,144 +3461,151 @@ function EvaluationPage() { )}
    -
    -

    Evaluations ({evalPagination?.total ?? evaluations.length})

    - auto refresh 5s -
    -
    - {evalPagination?.total ?? evaluations.length} evaluations - {runsLoading ? "loading..." : "idle"} - page {evalPagination?.page ?? 1} -
    - - - - - - - - - - - - - - - {evaluations.map((item: any) => ( - - - - - - - + + + ))} + {evaluations.length === 0 && ( + + + + )} + +
    EvaluationDatasetRangeProfile / Sandbox - Status - - Progress - ScoreUpdated
    - - {shortId(item.evaluation_id, 14)} - - {item.dataset} - {item.start_idx}..{item.start_idx + item.slice_count - 1} - - {item.prompt_profile || "-"} / {item.sandbox || "-"} - - {(() => { - return ( -
    - - {String(item.status || "-").toUpperCase()} - - - {Boolean(item.score?.publishable ?? item.score?.score_gate === "final") - ? "publishable" - : "provisional"} - -
    - ); - })()} -
    - {(() => { - const p = evalProgress(item); - return ( -
    -
    -
    +
    +

    Evaluations ({evalPagination?.total ?? evaluations.length})

    + auto refresh 5s +
    +
    + + {evalPagination?.total ?? evaluations.length} evaluations + + {runsLoading ? "loading..." : "idle"} + page {evalPagination?.page ?? 1} +
    + + + + + + + + + + + + + + + {evaluations.map((item: any) => ( + + + + + + + + - - - - ))} - {evaluations.length === 0 && ( - - - - )} - -
    EvaluationDatasetRangeProfile / Sandbox + Status + + Progress + ScoreUpdated
    + + {shortId(item.evaluation_id, 14)} + + {item.dataset} + {item.start_idx}..{item.start_idx + item.slice_count - 1} + + {item.prompt_profile || "-"} / {item.sandbox || "-"} + + {(() => { + return ( +
    + + {String(item.status || "-").toUpperCase()} + + + {Boolean( + item.score?.publishable ?? + item.score?.score_gate === "final", + ) + ? "publishable" + : "provisional"} +
    -
    - {formatProgressSummary(p)} + ); + })()} +
    + {(() => { + const p = evalProgress(item); + return ( +
    +
    +
    +
    +
    + {formatProgressSummary(p)} +
    + ); + })()} +
    +
    + + {(item.score?.publishable ?? + item.score?.score_gate === "final") + ? `R ${formatResolvedScore(item)}` + : "R PROVISIONAL"} + +
    + C {formatPct(item.score?.completed_rate_pct)} | T{" "} + {formatPct(item.score?.tool_call_thread_rate_pct)}
    - ); - })()} -
    -
    - - {(item.score?.publishable ?? item.score?.score_gate === "final") - ? `R ${formatResolvedScore(item)}` - : "R PROVISIONAL"} - -
    - C {formatPct(item.score?.completed_rate_pct)} | T{" "} - {formatPct(item.score?.tool_call_thread_rate_pct)}
    -
    -
    {item.updated_ago || "-"}
    No evaluations yet.
    -
    -
    - offset={evalPagination?.offset ?? 0} | limit= - {evalPagination?.limit ?? evalLimit} | total= - {evalPagination?.total ?? evaluations.length} -
    -
    - - +
    {item.updated_ago || "-"}
    No evaluations yet.
    +
    +
    + offset={evalPagination?.offset ?? 0} | limit= + {evalPagination?.limit ?? evalLimit} | total= + {evalPagination?.total ?? evaluations.length} +
    +
    + + +
    -
    {composerOpen && ( // @@@evaluation-composer-modal - keep config editing in a fixed layer to avoid "tail jump" in long list pages. -
    setComposerOpen(false)} - > +
    e.stopPropagation()} @@ -3564,7 +3614,7 @@ function EvaluationPage() {

    New Evaluation Config

    + + {/* @@@session-trace-reuse - the compat session contract does not expose a dedicated run id, so reuse the thread trace surface here instead of inventing a deeper API seam. */} +
    ); } From b10b3e6acae08eac224dc92c8794908071129300 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 23:24:13 +0800 Subject: [PATCH 47/87] fix: serve historical lease detail --- backend/web/monitor.py | 69 ++----------------- backend/web/services/monitor_service.py | 51 +++++++++++++- storage/contracts.py | 19 +++++ .../providers/sqlite/sandbox_monitor_repo.py | 25 +++++++ .../supabase/sandbox_monitor_repo.py | 33 +++++++++ tests/Unit/monitor/test_monitor_compat.py | 44 ++++++++++++ 6 files changed, 177 insertions(+), 64 deletions(-) diff --git a/backend/web/monitor.py b/backend/web/monitor.py index 70bbf062e..357cb7c57 100644 --- a/backend/web/monitor.py +++ b/backend/web/monitor.py @@ -1987,69 +1987,14 @@ def list_leases(): @router.get("/lease/{lease_id}") -def get_lease(lease_id: str, db: sqlite3.Connection = Depends(get_db)): - lease = db.execute( - """ - SELECT * FROM sandbox_leases WHERE lease_id = ? - """, - (lease_id,), - ).fetchone() - - if not lease: - raise HTTPException(status_code=404, detail="Lease not found") - - threads = db.execute( - """ - SELECT DISTINCT thread_id FROM chat_sessions WHERE lease_id = ? - """, - (lease_id,), - ).fetchall() - - # Get lease events - events = db.execute( - """ - SELECT * FROM lease_events - WHERE lease_id = ? - ORDER BY created_at DESC - """, - (lease_id,), - ).fetchall() - - badge = make_badge(lease["desired_state"], lease["observed_state"]) - badge["error"] = lease["last_error"] +def get_lease(lease_id: str): + from backend.web.services import monitor_service - return { - "lease_id": lease_id, - "breadcrumb": [{"label": "Leases", "url": "/leases"}, {"label": lease_id, "url": f"/lease/{lease_id}"}], - "info": { - "provider": lease["provider_name"], - "instance_id": lease["current_instance_id"], - "created_at": lease["created_at"], - "created_ago": format_time_ago(lease["created_at"]), - "updated_at": lease["updated_at"], - "updated_ago": format_time_ago(lease["updated_at"]), - }, - "state": badge, - "related_threads": { - "title": "Related Threads", - "items": [{"thread_id": t["thread_id"], "thread_url": f"/thread/{t['thread_id']}"} for t in threads], - }, - "lease_events": { - "title": "Lease Events", - "count": len(events), - "items": [ - { - "event_id": e["event_id"], - "event_url": f"/event/{e['event_id']}", - "event_type": e["event_type"], - "source": e["source"], - "created_at": e["created_at"], - "created_ago": format_time_ago(e["created_at"]), - } - for e in events - ], - }, - } + try: + return monitor_service.get_lease(lease_id) + except KeyError as exc: + detail = exc.args[0] if exc.args else "Lease not found" + raise HTTPException(status_code=404, detail=detail) from exc @router.get("/diverged") diff --git a/backend/web/services/monitor_service.py b/backend/web/services/monitor_service.py index 697ac300c..e9ad5bbcc 100644 --- a/backend/web/services/monitor_service.py +++ b/backend/web/services/monitor_service.py @@ -545,6 +545,49 @@ def _map_lease_detail( } +def _historical_lease_detail( + lease_id: str, + sessions: list[dict[str, Any]], + events: list[dict[str, Any]], +) -> dict[str, Any] | None: + if not sessions and not events: + return None + + created_candidates = [ + str(value) + for value in [*(row.get("started_at") for row in sessions), *(row.get("created_at") for row in events)] + if value + ] + updated_candidates = [ + str(value) + for value in [ + *(row.get("ended_at") or row.get("started_at") for row in sessions), + *(row.get("created_at") for row in events), + ] + if value + ] + first_session = sessions[0] if sessions else {} + thread_ids: list[str] = [] + seen_threads: set[str] = set() + for row in sessions: + thread_id = str(row.get("thread_id") or "").strip() + if thread_id and thread_id not in seen_threads: + seen_threads.add(thread_id) + thread_ids.append(thread_id) + + lease = { + "provider_name": first_session.get("provider_name") or "unknown", + "current_instance_id": first_session.get("current_instance_id"), + "created_at": min(created_candidates) if created_candidates else None, + "updated_at": max(updated_candidates) if updated_candidates else None, + "desired_state": first_session.get("desired_state"), + "observed_state": first_session.get("observed_state"), + "last_error": first_session.get("last_error"), + } + threads = [{"thread_id": thread_id} for thread_id in thread_ids] + return _map_lease_detail(lease_id, lease, threads, events) + + def _map_diverged(rows: list[dict[str, Any]]) -> dict[str, Any]: items = [ { @@ -654,12 +697,16 @@ def get_lease(lease_id: str) -> dict[str, Any]: repo = make_sandbox_monitor_repo() try: lease = repo.query_lease(lease_id) - if not lease: - raise KeyError("Lease not found") threads = repo.query_lease_threads(lease_id) events = repo.query_lease_events(lease_id) + sessions = repo.query_lease_sessions(lease_id) finally: repo.close() + if not lease: + fallback = _historical_lease_detail(lease_id, sessions, events) + if fallback: + return fallback + raise KeyError("Lease not found") return _map_lease_detail(lease_id, lease, threads, events) diff --git a/storage/contracts.py b/storage/contracts.py index 40f6e6406..8df8e6b8a 100644 --- a/storage/contracts.py +++ b/storage/contracts.py @@ -98,7 +98,26 @@ def list_active(self) -> list[dict[str, Any]]: ... def list_all(self) -> list[dict[str, Any]]: ... def cleanup_expired(self) -> list[str]: ... +class SandboxMonitorRepo(Protocol): + """Read-only monitor queries over sandbox/session/lease state.""" + def close(self) -> None: ... + def query_threads(self, *, thread_id: str | None = None) -> list[dict[str, Any]]: ... + def query_thread_summary(self, thread_id: str) -> dict[str, Any] | None: ... + def query_thread_sessions(self, thread_id: str) -> list[dict[str, Any]]: ... + def query_leases(self) -> list[dict[str, Any]]: ... + def list_leases_with_threads(self) -> list[dict[str, Any]]: ... + def query_lease(self, lease_id: str) -> dict[str, Any] | None: ... + def query_lease_sessions(self, lease_id: str) -> list[dict[str, Any]]: ... + def query_lease_threads(self, lease_id: str) -> list[dict[str, Any]]: ... + def query_lease_events(self, lease_id: str) -> list[dict[str, Any]]: ... + def query_diverged(self) -> list[dict[str, Any]]: ... + def query_events(self, limit: int = 100) -> list[dict[str, Any]]: ... + def query_event(self, event_id: str) -> dict[str, Any] | None: ... + def count_rows(self, table_names: list[str]) -> dict[str, int]: ... + def list_sessions_with_leases(self) -> list[dict[str, Any]]: ... + def list_probe_targets(self) -> list[dict[str, Any]]: ... + def query_lease_instance_id(self, lease_id: str) -> str | None: ... # --------------------------------------------------------------------------- # Member-Chat — enums + row types # --------------------------------------------------------------------------- diff --git a/storage/providers/sqlite/sandbox_monitor_repo.py b/storage/providers/sqlite/sandbox_monitor_repo.py index 25c3836bb..1fd75d856 100644 --- a/storage/providers/sqlite/sandbox_monitor_repo.py +++ b/storage/providers/sqlite/sandbox_monitor_repo.py @@ -206,6 +206,31 @@ def query_lease(self, lease_id: str) -> dict | None: ).fetchone() return _row_to_dict(row) if row else None + def query_lease_sessions(self, lease_id: str) -> list[dict]: + rows = self._conn.execute( + """ + SELECT + cs.chat_session_id, + cs.thread_id, + cs.status, + cs.started_at, + cs.ended_at, + cs.close_reason, + cs.lease_id, + sl.provider_name, + sl.desired_state, + sl.observed_state, + sl.current_instance_id, + sl.last_error + FROM chat_sessions cs + LEFT JOIN sandbox_leases sl ON cs.lease_id = sl.lease_id + WHERE cs.lease_id = ? + ORDER BY cs.started_at DESC + """, + (lease_id,), + ).fetchall() + return [_row_to_dict(r) for r in rows] + def query_lease_threads(self, lease_id: str) -> list[dict]: rows = self._conn.execute( """ diff --git a/storage/providers/supabase/sandbox_monitor_repo.py b/storage/providers/supabase/sandbox_monitor_repo.py index 1f8697b6b..cfc647008 100644 --- a/storage/providers/supabase/sandbox_monitor_repo.py +++ b/storage/providers/supabase/sandbox_monitor_repo.py @@ -181,6 +181,39 @@ def query_lease(self, lease_id: str) -> dict | None: ) return dict(rows[0]) if rows else None + def query_lease_sessions(self, lease_id: str) -> list[dict]: + sessions = q.rows( + q.order( + self._client.table("chat_sessions") + .select("chat_session_id,thread_id,status,started_at,ended_at,close_reason,lease_id") + .eq("lease_id", lease_id), + "started_at", + desc=True, + repo=_REPO, + operation="query_lease_sessions", + ).execute(), + _REPO, + "query_lease_sessions", + ) + lease = self.query_lease(lease_id) + return [ + { + "chat_session_id": session.get("chat_session_id"), + "thread_id": session.get("thread_id"), + "status": session.get("status"), + "started_at": session.get("started_at"), + "ended_at": session.get("ended_at"), + "close_reason": session.get("close_reason"), + "lease_id": session.get("lease_id"), + "provider_name": lease.get("provider_name") if lease else None, + "desired_state": lease.get("desired_state") if lease else None, + "observed_state": lease.get("observed_state") if lease else None, + "current_instance_id": lease.get("current_instance_id") if lease else None, + "last_error": lease.get("last_error") if lease else None, + } + for session in sessions + ] + def query_lease_threads(self, lease_id: str) -> list[dict]: rows = q.rows( q.order( diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index f36196fe7..9dbd843a1 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -210,6 +210,50 @@ def close(self): assert payload["triage"]["summary"]["detached_residue"] == 1 +def test_get_lease_falls_back_to_historical_session_rows(monkeypatch): + class FakeRepo: + def query_lease(self, lease_id): + return None + + def query_lease_threads(self, lease_id): + return [] + + def query_lease_events(self, lease_id): + return [] + + def query_lease_sessions(self, lease_id): + return [ + { + "chat_session_id": "sess-old", + "thread_id": "thread-historical", + "status": "closed", + "started_at": "2026-04-06T10:00:00", + "ended_at": "2026-04-06T10:05:00", + "close_reason": "expired", + "lease_id": lease_id, + "provider_name": None, + "desired_state": None, + "observed_state": None, + "current_instance_id": None, + "last_error": None, + } + ] + + def close(self): + return None + + monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeRepo()) + + payload = monitor_service.get_lease("lease-historical") + + assert payload["lease_id"] == "lease-historical" + assert payload["info"]["provider"] == "unknown" + assert payload["state"]["text"] == "destroyed" + assert payload["related_threads"]["items"] == [ + {"thread_id": "thread-historical", "thread_url": "/thread/thread-historical"} + ] + + def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_materialize(): payload = monitor_service.build_evaluation_operator_surface( status="provisional", From fc565a02a3960b8e561e199c90d45821c7bd3ae8 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 23:27:14 +0800 Subject: [PATCH 48/87] fix: guide empty run traces to events --- frontend/monitor/src/App.tsx | 31 ++++++++++++++++++++++++++++--- frontend/monitor/src/styles.css | 15 +++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 4d24f7689..86869fc37 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -2676,9 +2676,34 @@ function ThreadTraceSection({ index={idx} /> ))} - {conversationTail.length === 0 && ( -
    No conversation messages yet.
    - )} + {conversationTail.length === 0 && + (traceTail.length > 0 ? ( +
    +

    No conversation messages were captured for this run.

    +

    + Trace events still exist. Switch to the lower-level views to + inspect the run directly. +

    +
    + + +
    +
    + ) : ( +
    No conversation messages yet.
    + ))} ) : traceView === "events" ? ( <> diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index e9e3a3e0c..da960618d 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -1691,6 +1691,21 @@ section li { padding: 1rem; } +.trace-guided-empty { + display: grid; + gap: 0.65rem; +} + +.trace-guided-empty p { + margin: 0; +} + +.trace-guided-actions { + display: flex; + gap: 0.65rem; + flex-wrap: wrap; +} + .trace-surface-error { border-style: solid; border-color: rgba(220, 38, 38, 0.18); From 7acf448c5093e72e5bb4d49559f8a1c3c3c6a76d Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 23:29:29 +0800 Subject: [PATCH 49/87] fix: classify lease detail under leases shell --- frontend/monitor/src/App.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 86869fc37..f7bfba3c6 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -4272,9 +4272,9 @@ const GUIDE_SECTIONS = [ function shellMeta(pathname: string): { title: string; subtitle: string } { // @@@shell-route-bucketing - detail routes inherit the nearest console section. - if (pathname.startsWith("/leases")) + if (pathname.startsWith("/leases") || pathname.startsWith("/lease/")) return { title: "Leases", subtitle: "Grouped triage · raw truth fallback" }; - if (pathname.startsWith("/resources") || pathname.startsWith("/lease")) + if (pathname.startsWith("/resources")) return { title: "Resources", subtitle: "Provider health · lease triage · session truth", From b7d42e359eac117a5d5006a042898b75832d968b Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 23:32:43 +0800 Subject: [PATCH 50/87] refactor: hide redundant thread lease links --- frontend/monitor/src/App.tsx | 40 ++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index f7bfba3c6..0e803220d 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1700,6 +1700,18 @@ function ThreadDetailPage() { const threadIsActive = Array.isArray(data?.sessions?.items) ? data.sessions.items.some((s: any) => s.status === "active") : false; + const sessionLeaseIds = new Set( + Array.isArray(data?.sessions?.items) + ? data.sessions.items + .map((session: any) => String(session?.lease?.lease_id || "").trim()) + .filter(Boolean) + : [], + ); + const visibleRelatedLeases = Array.isArray(data?.related_leases?.items) + ? data.related_leases.items.filter( + (lease: any) => !sessionLeaseIds.has(String(lease?.lease_id || "").trim()), + ) + : []; return (
    @@ -1757,19 +1769,21 @@ function ThreadDetailPage() { -
    -

    {data.related_leases.title}

    -
      - {data.related_leases.items.map((l: any) => ( -
    • - {l.lease_id} -
    • - ))} - {data.related_leases.items.length === 0 && ( -
    • No related leases for this thread.
    • - )} -
    -
    + {(visibleRelatedLeases.length > 0 || data.related_leases.items.length === 0) && ( +
    +

    {data.related_leases.title}

    +
      + {visibleRelatedLeases.map((l: any) => ( +
    • + {l.lease_id} +
    • + ))} + {data.related_leases.items.length === 0 && ( +
    • No related leases for this thread.
    • + )} +
    +
    + )}

    Live Trace

    From fe0e38efc99d4565d1e609a43b3f165c031bacef Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 23:40:51 +0800 Subject: [PATCH 51/87] fix: keep monitor resources honest without lease groups --- ...2026-04-06-resource-observability-split.md | 4 ++ ...-06-resource-observability-split-design.md | 12 +++++ frontend/monitor/src/App.tsx | 52 +++++++++++-------- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index a4a6acbf5..035da36a2 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -55,6 +55,10 @@ - triage distinguishes `active_drift`, `detached_residue`, `orphan_cleanup`, and `healthy_capacity` - monitor `Resources` consumes that triage surface directly instead of flattening everything back into `diverged/orphan` - legacy `/leases` also now leads with triage buckets before the collapsed raw table +- new queued follow-up: + - bounded resource cleanup inside monitor `Resources` + - first slice must target backlog-like classes only (`detached_residue`, `orphan_cleanup`) + - cleanup action must be backend-owned and Playwright-proven; dead buttons do not count - next honest follow-up remains: - `D3` because lease regrouping is still heuristic and needs stronger lifecycle meaning than age-based detached residue alone diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index e6c397d39..dc0ab9d3b 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -357,6 +357,18 @@ The dashboard is a switchboard, not a full destination page. It should answer - semantics are still inferred from current lease row + thread binding only; they do not yet account for stronger lifecycle facts such as historical cleanup windows or explicit terminal/session shutdown markers - the legacy `/leases` flat table still exists as a drill-down/debug surface, though the monitor resources page now gives a better default entry by rendering only non-empty attention groups and collapsing healthy capacity +### Next D3 Follow-on: Bounded Resource Cleanup + +- monitor `Resources` should eventually expose a small cleanup surface for global backlog classes +- first target is not live lease mutation; it is bounded cleanup of rows that already read like backlog: + - `detached_residue` + - `orphan_cleanup` +- the cleanup contract must stay backend-owned and explicit: + - no frontend-only disappearance tricks + - no silent fallback when cleanup is unsupported + - no product-page reuse of these controls +- if this lands, it should appear as an operator action inside the global monitor resources surface, close to lease health / residue drill-down, not as a generic product resource affordance + ### Why this IA - the backend already exposes `/api/monitor/resources`; the missing piece is a monitor entry surface, not another resource backend invention diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 0e803220d..2e0ddf9a5 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -843,8 +843,11 @@ function MonitorResourcesPage() { ) || selectedLeaseGroups[0] || null; + // @@@resource-session-scope - no lease group means provider scope is the only honest truth surface, even if the UI last asked for lease scope. + const effectiveSessionScope = + selectedLeaseGroup == null ? "provider" : sessionScope; const scopedSessions = - sessionScope === "provider" || !selectedLeaseGroup + effectiveSessionScope === "provider" || !selectedLeaseGroup ? selectedSessions : selectedLeaseGroup.sessions; @@ -1074,37 +1077,40 @@ function MonitorResourcesPage() {

    Sessions ( - {sessionScope === "provider" + {effectiveSessionScope === "provider" ? selectedSessions.length : scopedSessions.length} )

    - {sessionScope === "provider" + {selectedLeaseGroup == null + ? "full provider truth surface" + : effectiveSessionScope === "provider" ? "full provider truth surface" : "scoped to selected lease"}

    -
    - - -
    + + +
    + ) : null}
    @@ -1151,7 +1157,7 @@ function MonitorResourcesPage() { {scopedSessions.length === 0 ? ( From b286e6f55e7300746a44452515f75130c07ee178 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 23:41:29 +0800 Subject: [PATCH 52/87] style: format monitor app shell --- frontend/monitor/src/App.tsx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 2e0ddf9a5..62e2c65b7 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1086,8 +1086,8 @@ function MonitorResourcesPage() { {selectedLeaseGroup == null ? "full provider truth surface" : effectiveSessionScope === "provider" - ? "full provider truth surface" - : "scoped to selected lease"} + ? "full provider truth surface" + : "scoped to selected lease"}

    {selectedLeaseGroup ? ( @@ -1715,7 +1715,8 @@ function ThreadDetailPage() { ); const visibleRelatedLeases = Array.isArray(data?.related_leases?.items) ? data.related_leases.items.filter( - (lease: any) => !sessionLeaseIds.has(String(lease?.lease_id || "").trim()), + (lease: any) => + !sessionLeaseIds.has(String(lease?.lease_id || "").trim()), ) : []; @@ -1775,7 +1776,8 @@ function ThreadDetailPage() {
    - {sessionScope === "provider" + {effectiveSessionScope === "provider" ? "No sessions reported for this provider." : "No sessions reported for the selected lease group."}
    - {(visibleRelatedLeases.length > 0 || data.related_leases.items.length === 0) && ( + {(visibleRelatedLeases.length > 0 || + data.related_leases.items.length === 0) && (

    {data.related_leases.title}

      From a95e33229f53d6a914ffc5ced2176024fdc8fffe Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 23:51:22 +0800 Subject: [PATCH 53/87] test: mark eval composer modal for sweep proofs --- frontend/monitor/src/App.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 62e2c65b7..816eaf42e 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -3658,9 +3658,14 @@ function EvaluationPage() { {composerOpen && ( // @@@evaluation-composer-modal - keep config editing in a fixed layer to avoid "tail jump" in long list pages. -
      +
      e.stopPropagation()} >
      From f1d857d422d134d379897917c50971993d6e2562 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Mon, 6 Apr 2026 23:59:57 +0800 Subject: [PATCH 54/87] fix: honor monitor deep links after async load --- frontend/monitor/src/App.tsx | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 816eaf42e..e08f3537d 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -4384,7 +4384,7 @@ function OperatorGuideModal({ } function ScrollToTopOnRouteChange() { - const { pathname } = useLocation(); + const { pathname, hash } = useLocation(); React.useEffect(() => { // @@@history-scroll-restore-disable - browser may restore stale scroll offsets and make user land at page tail. const prev = window.history.scrollRestoration; @@ -4396,7 +4396,26 @@ function ScrollToTopOnRouteChange() { React.useEffect(() => { // @@@route-scroll-reset - switch tabs/details should always start from top to avoid "tail landing" confusion. window.scrollTo({ top: 0, left: 0, behavior: "auto" }); - }, [pathname]); + if (!hash) return; + + // @@@hash-deeplink-retry - lease health and similar sections appear after async data load, so retry briefly instead of pretending the hash already landed. + const targetId = decodeURIComponent(hash.slice(1)); + let attempts = 0; + const maxAttempts = 40; + const timer = window.setInterval(() => { + const target = document.getElementById(targetId); + if (target) { + target.scrollIntoView({ block: "start", inline: "nearest" }); + window.clearInterval(timer); + return; + } + attempts += 1; + if (attempts >= maxAttempts) { + window.clearInterval(timer); + } + }, 50); + return () => window.clearInterval(timer); + }, [pathname, hash]); return null; } From e85d3e2b8702a2583a3c02b2ba53434436b90122 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 00:27:20 +0800 Subject: [PATCH 55/87] feat: add bounded monitor cleanup contract --- backend/web/routers/monitor.py | 22 +++ backend/web/services/monitor_service.py | 166 +++++++++++++++++- ...2026-04-06-resource-observability-split.md | 17 ++ ...-06-resource-observability-split-design.md | 63 +++++++ .../test_monitor_resources_route.py | 47 ++++- tests/Unit/monitor/test_monitor_compat.py | 139 ++++++++++++++- 6 files changed, 444 insertions(+), 10 deletions(-) diff --git a/backend/web/routers/monitor.py b/backend/web/routers/monitor.py index 12ed2cdca..5fdd95d2f 100644 --- a/backend/web/routers/monitor.py +++ b/backend/web/routers/monitor.py @@ -7,6 +7,7 @@ import asyncio from fastapi import HTTPException, Query, Request +from pydantic import BaseModel, Field from backend.web.monitor import list_evaluations, list_leases, router from backend.web.services import monitor_service @@ -16,6 +17,12 @@ ) +class ResourceCleanupRequest(BaseModel): + action: str = Field(default="cleanup_residue") + lease_ids: list[str] + expected_category: str + + @router.get("/health") def health_snapshot(): return monitor_service.runtime_health_snapshot() @@ -82,6 +89,21 @@ async def resources_refresh(): return await asyncio.to_thread(refresh_monitor_resource_overview_sync) +@router.post("/resources/cleanup") +async def resources_cleanup(payload: ResourceCleanupRequest): + from backend.web.services import monitor_service + + try: + return await asyncio.to_thread( + monitor_service.cleanup_resource_leases, + action=payload.action, + lease_ids=payload.lease_ids, + expected_category=payload.expected_category, + ) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @router.get("/sandbox/{lease_id}/browse") async def sandbox_browse(lease_id: str, path: str = Query(default="/")): from backend.web.services.resource_service import sandbox_browse as _browse diff --git a/backend/web/services/monitor_service.py b/backend/web/services/monitor_service.py index e9ad5bbcc..f422d7865 100644 --- a/backend/web/services/monitor_service.py +++ b/backend/web/services/monitor_service.py @@ -7,7 +7,7 @@ from datetime import UTC, datetime from typing import Any -from backend.web.core.storage_factory import make_sandbox_monitor_repo +from backend.web.core.storage_factory import make_chat_session_repo, make_lease_repo, make_sandbox_monitor_repo from backend.web.services.sandbox_service import init_providers_and_managers, load_all_sessions from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path @@ -146,6 +146,8 @@ def _lease_link(lease_id: str | None) -> dict[str, Any]: } DETACHED_RESIDUE_THRESHOLD_HOURS = 4.0 +RESOURCE_CLEANUP_ALLOWED_CATEGORIES = {"detached_residue", "orphan_cleanup"} +ACTIVE_CHAT_SESSION_STATUSES = {"active", "idle", "paused"} def _classify_lease_semantics(*, thread_id: str | None, badge: dict[str, Any]) -> dict[str, str]: @@ -222,6 +224,32 @@ def _classify_lease_triage( } +def _cleanable_lease_ids(lease_ids: list[str]) -> list[str]: + cleaned: list[str] = [] + seen: set[str] = set() + for raw in lease_ids: + lease_id = str(raw or "").strip() + if not lease_id or lease_id in seen: + continue + seen.add(lease_id) + cleaned.append(lease_id) + if not cleaned: + raise ValueError("lease_ids must contain at least one non-empty lease id") + return cleaned + + +def _triage_category_for_row(row: dict[str, Any]) -> str: + badge = _make_badge(row.get("desired_state"), row.get("observed_state")) + triage = _classify_lease_triage( + thread_id=row.get("thread_id"), + badge=badge, + observed_state=row.get("observed_state"), + desired_state=row.get("desired_state"), + updated_at=row.get("updated_at"), + ) + return str(triage["category"]) + + def _extract_eval_note_value(notes: str, key: str) -> str | None: match = re.search(rf"(?:^|[ |]){re.escape(key)}=([^ ]+)", notes) if not match: @@ -554,9 +582,7 @@ def _historical_lease_detail( return None created_candidates = [ - str(value) - for value in [*(row.get("started_at") for row in sessions), *(row.get("created_at") for row in events)] - if value + str(value) for value in [*(row.get("started_at") for row in sessions), *(row.get("created_at") for row in events)] if value ] updated_candidates = [ str(value) @@ -693,6 +719,138 @@ def list_leases() -> dict[str, Any]: repo.close() +def cleanup_resource_leases( + *, + action: str, + lease_ids: list[str], + expected_category: str, +) -> dict[str, Any]: + if action != "cleanup_residue": + raise ValueError(f"Unsupported cleanup action: {action}") + if expected_category not in RESOURCE_CLEANUP_ALLOWED_CATEGORIES: + raise ValueError("expected_category must be one of: detached_residue, orphan_cleanup") + + target_lease_ids = _cleanable_lease_ids(lease_ids) + monitor_repo = make_sandbox_monitor_repo() + lease_repo = make_lease_repo() + chat_session_repo = make_chat_session_repo() + try: + rows_by_id = {str(row.get("lease_id") or ""): row for row in monitor_repo.query_leases() if row.get("lease_id")} + providers, _ = init_providers_and_managers() + cleaned: list[dict[str, Any]] = [] + skipped: list[str] = [] + errors: list[dict[str, Any]] = [] + + for lease_id in target_lease_ids: + row = rows_by_id.get(lease_id) + if row is None: + skipped.append(lease_id) + errors.append({"lease_id": lease_id, "reason": "lease_not_found"}) + continue + + actual_category = _triage_category_for_row(row) + if actual_category != expected_category: + skipped.append(lease_id) + errors.append( + { + "lease_id": lease_id, + "reason": "category_mismatch", + "expected_category": expected_category, + "actual_category": actual_category, + } + ) + continue + + sessions = monitor_repo.query_lease_sessions(lease_id) + live_session_ids = [ + str(session.get("chat_session_id")) + for session in sessions + if str(session.get("status") or "").strip().lower() in ACTIVE_CHAT_SESSION_STATUSES + ] + if live_session_ids: + skipped.append(lease_id) + errors.append( + { + "lease_id": lease_id, + "reason": "live_sessions_present", + "session_ids": live_session_ids, + } + ) + continue + + if chat_session_repo.lease_has_running_command(lease_id): + skipped.append(lease_id) + errors.append({"lease_id": lease_id, "reason": "running_command_present"}) + continue + + provider_name = str(row.get("provider_name") or "").strip() + instance_id = str(row.get("current_instance_id") or "").strip() or None + if instance_id: + provider = providers.get(provider_name) + if provider is None: + skipped.append(lease_id) + errors.append( + { + "lease_id": lease_id, + "reason": "provider_unavailable", + "provider": provider_name, + } + ) + continue + if not provider.get_capability().can_destroy: + skipped.append(lease_id) + errors.append( + { + "lease_id": lease_id, + "reason": "provider_destroy_unsupported", + "provider": provider_name, + } + ) + continue + try: + destroyed = provider.destroy_session(instance_id, sync=True) + except Exception as exc: + skipped.append(lease_id) + errors.append( + { + "lease_id": lease_id, + "reason": "provider_destroy_failed", + "provider": provider_name, + "detail": str(exc), + } + ) + continue + if not destroyed: + skipped.append(lease_id) + errors.append( + { + "lease_id": lease_id, + "reason": "provider_destroy_failed", + "provider": provider_name, + "detail": "destroy_session returned false", + } + ) + continue + + lease_repo.delete(lease_id) + cleaned.append({"lease_id": lease_id, "category": actual_category}) + + refreshed_summary = list_leases()["triage"]["summary"] + return { + "action": action, + "expected_category": expected_category, + "attempted": target_lease_ids, + "cleaned": cleaned, + "skipped": skipped, + "errors": errors, + "refreshed_summary": refreshed_summary, + } + finally: + chat_session_repo.close() + lease_repo.close() + monitor_repo.close() + + def get_lease(lease_id: str) -> dict[str, Any]: repo = make_sandbox_monitor_repo() try: diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 035da36a2..6f35f292e 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -59,6 +59,23 @@ - bounded resource cleanup inside monitor `Resources` - first slice must target backlog-like classes only (`detached_residue`, `orphan_cleanup`) - cleanup action must be backend-owned and Playwright-proven; dead buttons do not count + - chosen contract shape: + - `POST /api/monitor/resources/cleanup` + - request = `action + explicit lease_ids + expected_category` + - response = `attempted/cleaned/skipped/errors/refreshed_summary` + - chosen execution shape: + - re-query backend triage before every mutation + - reuse existing provider destroy + lease repo delete semantics + - fail loudly if a lease has drifted back into live/healthy classes + - landed backend slice: + - `backend/web/routers/monitor.py` now exposes `POST /api/monitor/resources/cleanup` + - `backend/web/services/monitor_service.py` now owns `cleanup_resource_leases(...)` + - route/service return `attempted/cleaned/skipped/errors/refreshed_summary` + - focused proof: + - `env -u ALL_PROXY -u all_proxy uv run pytest -q tests/Unit/monitor/test_monitor_compat.py tests/Integration/test_monitor_resources_route.py` -> `17 passed` + - `uv run ruff check backend/web/services/monitor_service.py backend/web/routers/monitor.py tests/Unit/monitor/test_monitor_compat.py tests/Integration/test_monitor_resources_route.py` -> green + - `uv run ruff format --check backend/web/services/monitor_service.py backend/web/routers/monitor.py tests/Unit/monitor/test_monitor_compat.py tests/Integration/test_monitor_resources_route.py` -> green + - `uv run pyright backend/web/services/monitor_service.py backend/web/routers/monitor.py` -> `0 errors` - next honest follow-up remains: - `D3` because lease regrouping is still heuristic and needs stronger lifecycle meaning than age-based detached residue alone diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index dc0ab9d3b..f6bc068fc 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -369,6 +369,69 @@ The dashboard is a switchboard, not a full destination page. It should answer - no product-page reuse of these controls - if this lands, it should appear as an operator action inside the global monitor resources surface, close to lease health / residue drill-down, not as a generic product resource affordance +#### Chosen Minimal Approach + +- add a monitor-only write endpoint instead of overloading the existing read routes: + - `POST /api/monitor/resources/cleanup` +- request contract stays narrow and backend-owned: + - `action`: first slice only `cleanup_residue` + - `lease_ids`: explicit list of lease ids chosen by the operator + - `expected_category`: one of `detached_residue` or `orphan_cleanup` +- response contract must be honest and per-lease: + - `attempted` + - `cleaned` + - `skipped` + - `errors` + - `refreshed_summary` + +#### Landed Backend Slice + +- backend route now exists at `POST /api/monitor/resources/cleanup` +- service ownership lives in `backend/web/services/monitor_service.py::cleanup_resource_leases(...)` +- first landed action is still only `cleanup_residue` +- currently landed backend guards: + - rejects unsupported `action` + - rejects unsupported `expected_category` + - re-checks current triage from live monitor rows before mutation + - refuses to mutate leases that currently classify outside `detached_residue` / `orphan_cleanup` + - refuses cleanup when live sessions or running commands still exist + - refuses cleanup when a provider-backed destroy step is still required but unavailable/failing +- current honest boundary: + - backend contract is live and tested + - UI buttons in monitor `Resources -> Lease Health` are still pending + +#### Why This Shape + +- it keeps read contracts (`/api/monitor/resources`, `/api/monitor/leases`) clean and cacheable +- it avoids inventing a frontend-owned cleanup heuristic; backend re-checks current triage before mutating anything +- it lets the first slice reuse existing sandbox destruction + lease deletion semantics instead of creating a second cleanup language + +#### Backend Rules + +- `cleanup_residue` is allowed only when the current backend triage still classifies the lease as: + - `detached_residue` + - `orphan_cleanup` +- any lease that currently resolves to `active_drift` or `healthy_capacity` must fail loudly +- first slice does not support bulk heuristics like “all detached residue” without explicit lease ids +- first slice must not silently downgrade to product/session destroy routes if the manager/provider path is missing + +#### Cleanup Execution Model + +- re-query the current lease truth through the monitor repo and monitor triage helpers before every mutation +- for each accepted lease: + - if a live provider instance is still attached, destroy it through the provider/manager path first + - once the lease is no longer in use by terminals/sessions, delete the lease row through the existing lease repo abstraction +- if the lease still has active terminal/session bindings, return an explicit skip/error instead of force-deleting through the repo + +#### Frontend First Slice + +- add a small cleanup action only inside `Resources -> Lease Health` +- scope it to grouped backlog sections, not the provider detail working surface +- first slice can be as small as: + - per-row `Cleanup` + - optional group-level `Cleanup visible residue` +- success state must come from a re-fetch of monitor triage, not optimistic UI removal + ### Why this IA - the backend already exposes `/api/monitor/resources`; the missing piece is a monitor entry surface, not another resource backend invention diff --git a/tests/Integration/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py index cdf28e6f7..5a5e54263 100644 --- a/tests/Integration/test_monitor_resources_route.py +++ b/tests/Integration/test_monitor_resources_route.py @@ -72,7 +72,48 @@ def test_monitor_leases_route_exposes_summary_and_groups(): assert "triage" in payload assert set(payload["summary"]).issuperset({"total", "healthy", "diverged", "orphan", "orphan_diverged"}) assert isinstance(payload["groups"], list) - assert set(payload["triage"]["summary"]).issuperset( - {"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"} - ) + assert set(payload["triage"]["summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"}) assert isinstance(payload["triage"]["groups"], list) + + +def test_monitor_resources_cleanup_route_forwards_structured_payload(monkeypatch): + from backend.web.services import monitor_service + + monkeypatch.setattr( + monitor_service, + "cleanup_resource_leases", + lambda *, action, lease_ids, expected_category: { + "action": action, + "expected_category": expected_category, + "attempted": list(lease_ids), + "cleaned": [{"lease_id": "lease-1", "category": expected_category}], + "skipped": [], + "errors": [], + "refreshed_summary": { + "total": 1, + "active_drift": 0, + "detached_residue": 0, + "orphan_cleanup": 1, + "healthy_capacity": 0, + }, + }, + ) + + with TestClient(app) as client: + response = client.post( + "/api/monitor/resources/cleanup", + json={ + "action": "cleanup_residue", + "lease_ids": ["lease-1"], + "expected_category": "detached_residue", + }, + ) + + assert response.status_code == 200 + payload = response.json() + assert payload["action"] == "cleanup_residue" + assert payload["attempted"] == ["lease-1"] + assert payload["cleaned"] == [{"lease_id": "lease-1", "category": "detached_residue"}] + assert payload["skipped"] == [] + assert payload["errors"] == [] + assert set(payload["refreshed_summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"}) diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index 9dbd843a1..ed90f5dde 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -249,9 +249,7 @@ def close(self): assert payload["lease_id"] == "lease-historical" assert payload["info"]["provider"] == "unknown" assert payload["state"]["text"] == "destroyed" - assert payload["related_threads"]["items"] == [ - {"thread_id": "thread-historical", "thread_url": "/thread/thread-historical"} - ] + assert payload["related_threads"]["items"] == [{"thread_id": "thread-historical", "thread_url": "/thread/thread-historical"}] def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_materialize(): @@ -349,3 +347,138 @@ def test_build_evaluation_operator_surface_marks_completed_with_errors(): "missing": 2, "total": 6, } + + +def test_cleanup_resource_leases_deletes_allowed_detached_residue(monkeypatch): + rows = [ + { + "lease_id": "lease-stale", + "provider_name": "local", + "desired_state": "running", + "observed_state": "detached", + "current_instance_id": None, + "last_error": None, + "updated_at": "2026-04-05T00:00:00", + "thread_id": "subagent-1234", + } + ] + + class FakeMonitorRepo: + def query_leases(self): + return list(rows) + + def query_lease_sessions(self, lease_id): + assert lease_id == "lease-stale" + return [{"chat_session_id": "sess-old", "status": "closed"}] + + def close(self): + return None + + class FakeLeaseRepo: + def __init__(self): + self.deleted = [] + + def delete(self, lease_id): + self.deleted.append(lease_id) + rows[:] = [row for row in rows if row["lease_id"] != lease_id] + + def close(self): + return None + + class FakeChatSessionRepo: + def lease_has_running_command(self, lease_id): + assert lease_id == "lease-stale" + return False + + def close(self): + return None + + lease_repo = FakeLeaseRepo() + monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeMonitorRepo()) + monkeypatch.setattr(monitor_service, "make_lease_repo", lambda: lease_repo) + monkeypatch.setattr(monitor_service, "make_chat_session_repo", lambda: FakeChatSessionRepo()) + monkeypatch.setattr(monitor_service, "init_providers_and_managers", lambda: ({}, {})) + monkeypatch.setattr(monitor_service, "_hours_since", lambda _: 24.0) + + payload = monitor_service.cleanup_resource_leases( + action="cleanup_residue", + lease_ids=["lease-stale"], + expected_category="detached_residue", + ) + + assert lease_repo.deleted == ["lease-stale"] + assert payload["attempted"] == ["lease-stale"] + assert payload["cleaned"] == [{"lease_id": "lease-stale", "category": "detached_residue"}] + assert payload["skipped"] == [] + assert payload["errors"] == [] + assert payload["refreshed_summary"]["detached_residue"] == 0 + + +def test_cleanup_resource_leases_reports_category_mismatch_without_deleting(monkeypatch): + rows = [ + { + "lease_id": "lease-live", + "provider_name": "local", + "desired_state": "running", + "observed_state": "detached", + "current_instance_id": "inst-live", + "last_error": None, + "updated_at": "2026-04-06T00:00:00", + "thread_id": "thread-1", + } + ] + + class FakeMonitorRepo: + def query_leases(self): + return list(rows) + + def query_lease_sessions(self, lease_id): + assert lease_id == "lease-live" + return [{"chat_session_id": "sess-live", "status": "active"}] + + def close(self): + return None + + class FakeLeaseRepo: + def __init__(self): + self.deleted = [] + + def delete(self, lease_id): + self.deleted.append(lease_id) + + def close(self): + return None + + class FakeChatSessionRepo: + def lease_has_running_command(self, lease_id): + assert lease_id == "lease-live" + return True + + def close(self): + return None + + lease_repo = FakeLeaseRepo() + monkeypatch.setattr(monitor_service, "make_sandbox_monitor_repo", lambda: FakeMonitorRepo()) + monkeypatch.setattr(monitor_service, "make_lease_repo", lambda: lease_repo) + monkeypatch.setattr(monitor_service, "make_chat_session_repo", lambda: FakeChatSessionRepo()) + monkeypatch.setattr(monitor_service, "init_providers_and_managers", lambda: ({}, {})) + monkeypatch.setattr(monitor_service, "_hours_since", lambda _: 0.5) + + payload = monitor_service.cleanup_resource_leases( + action="cleanup_residue", + lease_ids=["lease-live"], + expected_category="detached_residue", + ) + + assert lease_repo.deleted == [] + assert payload["attempted"] == ["lease-live"] + assert payload["cleaned"] == [] + assert payload["skipped"] == ["lease-live"] + assert payload["errors"] == [ + { + "lease_id": "lease-live", + "reason": "category_mismatch", + "expected_category": "detached_residue", + "actual_category": "active_drift", + } + ] From 761320487defd791a929f1ce9a2c03b8752d54b4 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 00:34:55 +0800 Subject: [PATCH 56/87] feat: add monitor cleanup controls --- ...2026-04-06-resource-observability-split.md | 7 + ...-06-resource-observability-split-design.md | 12 ++ frontend/monitor/src/App.tsx | 132 ++++++++++++++++++ frontend/monitor/src/styles.css | 32 +++++ 4 files changed, 183 insertions(+) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 6f35f292e..587a6f1bb 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -76,6 +76,13 @@ - `uv run ruff check backend/web/services/monitor_service.py backend/web/routers/monitor.py tests/Unit/monitor/test_monitor_compat.py tests/Integration/test_monitor_resources_route.py` -> green - `uv run ruff format --check backend/web/services/monitor_service.py backend/web/routers/monitor.py tests/Unit/monitor/test_monitor_compat.py tests/Integration/test_monitor_resources_route.py` -> green - `uv run pyright backend/web/services/monitor_service.py backend/web/routers/monitor.py` -> `0 errors` + - landed monitor UI slice: + - monitor `Resources -> Lease Health` now exposes per-row `Cleanup` only for `detached_residue` and `orphan_cleanup` + - success/failure state is shown via explicit feedback banner, not optimistic disappearance + - focused proof: + - `cd frontend/monitor && npm run build` -> green + - Playwright caller-proof clicked a real cleanup button and the page returned explicit failure text instead of silently swallowing it: + - `Cleanup incomplete: 0 cleaned · 1 skipped · 1 errors (lease-39ab24: lease no longer exists).` - next honest follow-up remains: - `D3` because lease regrouping is still heuristic and needs stronger lifecycle meaning than age-based detached residue alone diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index f6bc068fc..1ce491c12 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -432,6 +432,18 @@ The dashboard is a switchboard, not a full destination page. It should answer - optional group-level `Cleanup visible residue` - success state must come from a re-fetch of monitor triage, not optimistic UI removal +#### Landed Frontend Slice + +- monitor `Resources -> Lease Health` now exposes row-level `Cleanup` only inside: + - `Detached Residue` + - `Cleanup Backlog` +- no cleanup action exists in provider detail or product `/resources` +- current UI behavior: + - button triggers the backend cleanup contract with explicit `lease_ids` + - button disables while its request is in flight + - result is shown as a small success/error feedback banner + - post-action state still comes from re-fetch, not optimistic row removal + ### Why this IA - the backend already exposes `/api/monitor/resources`; the missing piece is a monitor entry surface, not another resource backend invention diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index e08f3537d..a7d07c1c8 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -49,6 +49,34 @@ async function fetchJSON(path: string, init?: RequestInit) { return payload; } +function formatCleanupError(error: any) { + const reason = String(error?.reason || "cleanup_failed"); + const leaseId = String(error?.lease_id || ""); + const prefix = leaseId ? `${shortId(leaseId, 12)}: ` : ""; + if (reason === "category_mismatch") { + return `${prefix}lease no longer matches ${error?.expected_category || "expected category"}`; + } + if (reason === "live_sessions_present") { + return `${prefix}active sessions still attached`; + } + if (reason === "running_command_present") { + return `${prefix}running terminal command still attached`; + } + if (reason === "provider_unavailable") { + return `${prefix}provider unavailable for destroy`; + } + if (reason === "provider_destroy_unsupported") { + return `${prefix}provider does not support destroy`; + } + if (reason === "provider_destroy_failed") { + return `${prefix}${error?.detail || "provider destroy failed"}`; + } + if (reason === "lease_not_found") { + return `${prefix}lease no longer exists`; + } + return `${prefix}${reason}`; +} + // Component: Breadcrumb navigation function Breadcrumb({ items, @@ -703,6 +731,11 @@ function MonitorResourcesPage() { const [loading, setLoading] = React.useState(false); const [refreshing, setRefreshing] = React.useState(false); const [error, setError] = React.useState(null); + const [cleanupBusyId, setCleanupBusyId] = React.useState(""); + const [cleanupFeedback, setCleanupFeedback] = React.useState<{ + tone: "success" | "error"; + text: string; + } | null>(null); const loadResources = React.useCallback(async () => { setLoading(true); @@ -746,6 +779,57 @@ function MonitorResourcesPage() { } }, []); + const cleanupLease = React.useCallback( + async ( + leaseId: string, + expectedCategory: "detached_residue" | "orphan_cleanup", + ) => { + setCleanupBusyId(leaseId); + setCleanupFeedback(null); + try { + const payload = await fetchJSON(`${API_BASE}/resources/cleanup`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + action: "cleanup_residue", + lease_ids: [leaseId], + expected_category: expectedCategory, + }), + }); + await refreshNow(); + const cleanedCount = Array.isArray(payload.cleaned) + ? payload.cleaned.length + : 0; + const skippedCount = Array.isArray(payload.skipped) + ? payload.skipped.length + : 0; + const errorCount = Array.isArray(payload.errors) + ? payload.errors.length + : 0; + if (errorCount > 0) { + const firstError = payload.errors[0]; + setCleanupFeedback({ + tone: "error", + text: `Cleanup incomplete: ${cleanedCount} cleaned · ${skippedCount} skipped · ${errorCount} errors (${formatCleanupError(firstError)}).`, + }); + return; + } + setCleanupFeedback({ + tone: "success", + text: `Cleanup applied: ${cleanedCount} lease cleaned from ${expectedCategory}.`, + }); + } catch (e: any) { + setCleanupFeedback({ + tone: "error", + text: `Cleanup failed: ${e?.message || String(e)}`, + }); + } finally { + setCleanupBusyId(""); + } + }, + [refreshNow], + ); + React.useEffect(() => { void loadResources(); }, [loadResources]); @@ -1205,6 +1289,11 @@ function MonitorResourcesPage() { {healthyCapacityLeases.length}
      + {cleanupFeedback ? ( +
      + {cleanupFeedback.text} +
      + ) : null} {hasPrimaryLeaseAttention ? (
      {activeDriftLeases.length > 0 ? ( @@ -1260,6 +1349,7 @@ function MonitorResourcesPage() { Thread State Updated + Action @@ -1284,6 +1374,28 @@ function MonitorResourcesPage() { {item.updated_ago} + + + ))} @@ -1305,6 +1417,7 @@ function MonitorResourcesPage() { Instance State Updated + Action @@ -1321,6 +1434,25 @@ function MonitorResourcesPage() { {item.updated_ago} + + + ))} diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index da960618d..c4100c274 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -533,6 +533,27 @@ h2 { background: var(--bg-muted); } +.cleanup-feedback { + margin: 0.85rem 0 1rem; + padding: 0.75rem 0.95rem; + border-radius: 12px; + border: 1px solid var(--border); + background: var(--bg-muted); + color: var(--text-secondary); +} + +.cleanup-feedback.is-success { + border-color: rgba(5, 150, 105, 0.16); + background: var(--success-soft); + color: var(--success); +} + +.cleanup-feedback.is-error { + border-color: rgba(220, 38, 38, 0.16); + background: var(--danger-soft); + color: var(--danger); +} + .resource-section-shell { margin-bottom: 1.25rem; } @@ -1153,6 +1174,17 @@ td[colspan] { padding: 2rem 1rem; } +.cleanup-action-cell { + width: 1%; + white-space: nowrap; +} + +.cleanup-action-cell .ghost-btn { + min-height: 2rem; + padding: 0.35rem 0.7rem; + font-size: 0.82rem; +} + .page[data-testid="page-traces"] td, .page[data-testid="page-threads"] td { padding: 0.5rem 0.75rem; From b759e1b8539877d4c459ddd578814492e2f6a200 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 00:35:55 +0800 Subject: [PATCH 57/87] docs: dedupe cleanup slice spec --- ...-06-resource-observability-split-design.md | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index 1ce491c12..e0341737a 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -434,15 +434,19 @@ The dashboard is a switchboard, not a full destination page. It should answer #### Landed Frontend Slice -- monitor `Resources -> Lease Health` now exposes row-level `Cleanup` only inside: - - `Detached Residue` - - `Cleanup Backlog` -- no cleanup action exists in provider detail or product `/resources` +- monitor `Resources -> Lease Health` now exposes per-row `Cleanup` buttons only for: + - `detached_residue` + - `orphan_cleanup` +- no cleanup controls were added to: + - product `/resources` + - provider detail working surface + - `active_drift` + - `healthy_capacity` - current UI behavior: - - button triggers the backend cleanup contract with explicit `lease_ids` - - button disables while its request is in flight - - result is shown as a small success/error feedback banner - - post-action state still comes from re-fetch, not optimistic row removal + - clicking `Cleanup` calls `POST /api/monitor/resources/cleanup` + - button goes busy for the targeted lease only + - result is rendered via an inline feedback strip + - visible state change comes from a re-fetch of monitor resources/leases, not optimistic removal ### Why this IA From b9267227e758d7354511b00d07c0ea55fd6208e2 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 00:45:13 +0800 Subject: [PATCH 58/87] feat: add visible cleanup controls --- ...2026-04-06-resource-observability-split.md | 6 +- ...-06-resource-observability-split-design.md | 9 +- frontend/monitor/src/App.tsx | 86 ++++++++++++++++--- 3 files changed, 85 insertions(+), 16 deletions(-) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index 587a6f1bb..df1ae7ddd 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -78,11 +78,13 @@ - `uv run pyright backend/web/services/monitor_service.py backend/web/routers/monitor.py` -> `0 errors` - landed monitor UI slice: - monitor `Resources -> Lease Health` now exposes per-row `Cleanup` only for `detached_residue` and `orphan_cleanup` + - monitor `Resources -> Lease Health` now also exposes `Cleanup visible` for the currently rendered backlog rows in those same two buckets - success/failure state is shown via explicit feedback banner, not optimistic disappearance - focused proof: - `cd frontend/monitor && npm run build` -> green - - Playwright caller-proof clicked a real cleanup button and the page returned explicit failure text instead of silently swallowing it: - - `Cleanup incomplete: 0 cleaned · 1 skipped · 1 errors (lease-39ab24: lease no longer exists).` + - Playwright caller-proof clicked a real group cleanup button and the page re-fetched into an honest smaller backlog state: + - `cleanup-bulk-verify-after.yaml` contains `Cleanup applied: 8 leases cleaned from detached_residue.` + - `cleanup-bulk-verify-after.yaml` contains `Detached Residue (8)` - next honest follow-up remains: - `D3` because lease regrouping is still heuristic and needs stronger lifecycle meaning than age-based detached residue alone diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index e0341737a..b829ad4f5 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -398,7 +398,8 @@ The dashboard is a switchboard, not a full destination page. It should answer - refuses cleanup when a provider-backed destroy step is still required but unavailable/failing - current honest boundary: - backend contract is live and tested - - UI buttons in monitor `Resources -> Lease Health` are still pending + - first monitor UI buttons are live + - broader cleanup ergonomics and bulk controls may still evolve #### Why This Shape @@ -437,6 +438,9 @@ The dashboard is a switchboard, not a full destination page. It should answer - monitor `Resources -> Lease Health` now exposes per-row `Cleanup` buttons only for: - `detached_residue` - `orphan_cleanup` +- monitor `Resources -> Lease Health` also now exposes bounded group actions: + - `Cleanup visible` for the currently rendered `detached_residue` rows + - `Cleanup visible` for the currently rendered `orphan_cleanup` rows - no cleanup controls were added to: - product `/resources` - provider detail working surface @@ -444,7 +448,8 @@ The dashboard is a switchboard, not a full destination page. It should answer - `healthy_capacity` - current UI behavior: - clicking `Cleanup` calls `POST /api/monitor/resources/cleanup` - - button goes busy for the targeted lease only + - clicking `Cleanup visible` still sends explicit visible `lease_ids`; it does not invoke a hidden bulk backend mode + - button goes busy for the targeted lease or targeted visible bucket only - result is rendered via an inline feedback strip - visible state change comes from a re-fetch of monitor resources/leases, not optimistic removal diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index a7d07c1c8..41ee9cb65 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -779,12 +779,17 @@ function MonitorResourcesPage() { } }, []); - const cleanupLease = React.useCallback( + const cleanupLeases = React.useCallback( async ( - leaseId: string, + leaseIds: string[], expectedCategory: "detached_residue" | "orphan_cleanup", + scopeLabel: string, ) => { - setCleanupBusyId(leaseId); + const targetIds = leaseIds.filter(Boolean); + if (targetIds.length === 0) { + return; + } + setCleanupBusyId(scopeLabel); setCleanupFeedback(null); try { const payload = await fetchJSON(`${API_BASE}/resources/cleanup`, { @@ -792,7 +797,7 @@ function MonitorResourcesPage() { headers: { "Content-Type": "application/json" }, body: JSON.stringify({ action: "cleanup_residue", - lease_ids: [leaseId], + lease_ids: targetIds, expected_category: expectedCategory, }), }); @@ -816,7 +821,7 @@ function MonitorResourcesPage() { } setCleanupFeedback({ tone: "success", - text: `Cleanup applied: ${cleanedCount} lease cleaned from ${expectedCategory}.`, + text: `Cleanup applied: ${cleanedCount} lease${cleanedCount === 1 ? "" : "s"} cleaned from ${expectedCategory}.`, }); } catch (e: any) { setCleanupFeedback({ @@ -907,6 +912,8 @@ function MonitorResourcesPage() { const hasPrimaryLeaseAttention = activeDriftLeases.length > 0 || detachedResidueLeases.length > 0; const hasSecondaryLeaseAttention = orphanCleanupLeases.length > 0; + const visibleDetachedResidueLeases = detachedResidueLeases.slice(0, 8); + const visibleOrphanCleanupLeases = orphanCleanupLeases.slice(0, 8); const refreshedAt = summary.last_refreshed_at || summary.snapshot_at; const selectedSessions = Array.isArray(selectedProvider?.sessions) ? selectedProvider.sessions @@ -1340,7 +1347,32 @@ function MonitorResourcesPage() { {detachedResidueLeases.length > 0 ? (
      -

      Detached Residue ({detachedResidueLeases.length})

      +
      +

      Detached Residue ({detachedResidueLeases.length})

      + +
      @@ -1353,7 +1385,7 @@ function MonitorResourcesPage() { - {detachedResidueLeases.slice(0, 8).map((item: any) => ( + {visibleDetachedResidueLeases.map((item: any) => (
      @@ -1384,9 +1416,10 @@ function MonitorResourcesPage() { loading } onClick={() => - void cleanupLease( - item.lease_id, + void cleanupLeases( + [item.lease_id], "detached_residue", + item.lease_id, ) } data-testid={`cleanup-${item.lease_id}`} @@ -1408,7 +1441,32 @@ function MonitorResourcesPage() { {hasSecondaryLeaseAttention ? (
      -

      Cleanup Backlog ({orphanCleanupLeases.length})

      +
      +

      Cleanup Backlog ({orphanCleanupLeases.length})

      + +
      @@ -1421,7 +1479,7 @@ function MonitorResourcesPage() { - {orphanCleanupLeases.slice(0, 8).map((item: any) => ( + {visibleOrphanCleanupLeases.map((item: any) => ( - + )} From 500a8f1bc7dcee1ad14501e1b4b2ebcbfc5a6199 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 12:10:59 +0800 Subject: [PATCH 74/87] fix: separate evaluation list load errors --- frontend/monitor/src/App.tsx | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 712fc6e82..cdd0e1415 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -3656,6 +3656,7 @@ function EvaluationPage() { >("idle"); const [evaluationId, setEvaluationId] = React.useState(""); const [runError, setRunError] = React.useState(null); + const [listError, setListError] = React.useState(null); const [evaluations, setEvaluations] = React.useState([]); const [evalOffset, setEvalOffset] = React.useState(0); const [evalLimit] = React.useState(30); @@ -3672,9 +3673,9 @@ function EvaluationPage() { ); setEvaluations(Array.isArray(payload?.items) ? payload.items : []); setEvalPagination(payload?.pagination || null); - setRunError(null); + setListError(null); } catch (e: any) { - setRunError(e?.message || String(e)); + setListError(e?.message || String(e)); } finally { setRunsLoading(false); } @@ -3865,6 +3866,7 @@ function EvaluationPage() { {runsLoading ? "loading..." : "idle"} page {evalPagination?.page ?? 1} + {listError &&
      list error: {listError}
      }
      @@ -1444,7 +1502,11 @@ function MonitorResourcesPage() { loading } onClick={() => - void cleanupLease(item.lease_id, "orphan_cleanup") + void cleanupLeases( + [item.lease_id], + "orphan_cleanup", + item.lease_id, + ) } data-testid={`cleanup-${item.lease_id}`} > From 4ea31fb7b71dc94fa40438e9566d4e83c8aa6ad4 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 00:49:36 +0800 Subject: [PATCH 59/87] feat: confirm visible cleanup actions --- ...2026-04-06-resource-observability-split.md | 9 ++- ...-06-resource-observability-split-design.md | 3 +- frontend/monitor/src/App.tsx | 78 ++++++++++++++++--- frontend/monitor/src/styles.css | 25 ++++++ 4 files changed, 99 insertions(+), 16 deletions(-) diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md index df1ae7ddd..ead536846 100644 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ b/docs/superpowers/plans/2026-04-06-resource-observability-split.md @@ -79,12 +79,15 @@ - landed monitor UI slice: - monitor `Resources -> Lease Health` now exposes per-row `Cleanup` only for `detached_residue` and `orphan_cleanup` - monitor `Resources -> Lease Health` now also exposes `Cleanup visible` for the currently rendered backlog rows in those same two buckets + - group cleanup now stages an inline `Confirm cleanup / Cancel` guardrail before mutating multiple leases - success/failure state is shown via explicit feedback banner, not optimistic disappearance - focused proof: - `cd frontend/monitor && npm run build` -> green - - Playwright caller-proof clicked a real group cleanup button and the page re-fetched into an honest smaller backlog state: - - `cleanup-bulk-verify-after.yaml` contains `Cleanup applied: 8 leases cleaned from detached_residue.` - - `cleanup-bulk-verify-after.yaml` contains `Detached Residue (8)` + - Playwright caller-proof clicked `Cleanup visible` and first got an inline confirmation state: + - `cleanup-confirm-pending.yaml` contains `Confirm cleanup` + - `cleanup-confirm-pending.yaml` contains `Remove 8 visible leases from Detached Residue.` + - then clicking `Confirm cleanup` re-fetched into an honest smaller backlog state: + - `cleanup-confirm-after.yaml` contains `Cleanup applied: 8 leases cleaned from detached_residue.` - next honest follow-up remains: - `D3` because lease regrouping is still heuristic and needs stronger lifecycle meaning than age-based detached residue alone diff --git a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md index b829ad4f5..d3ca22a2e 100644 --- a/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md +++ b/docs/superpowers/specs/2026-04-06-resource-observability-split-design.md @@ -448,7 +448,8 @@ The dashboard is a switchboard, not a full destination page. It should answer - `healthy_capacity` - current UI behavior: - clicking `Cleanup` calls `POST /api/monitor/resources/cleanup` - - clicking `Cleanup visible` still sends explicit visible `lease_ids`; it does not invoke a hidden bulk backend mode + - clicking `Cleanup visible` first stages an inline confirm row for the current bucket + - clicking `Confirm cleanup` then sends explicit visible `lease_ids`; it does not invoke a hidden bulk backend mode - button goes busy for the targeted lease or targeted visible bucket only - result is rendered via an inline feedback strip - visible state change comes from a re-fetch of monitor resources/leases, not optimistic removal diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 41ee9cb65..86b68c1f8 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -736,6 +736,13 @@ function MonitorResourcesPage() { tone: "success" | "error"; text: string; } | null>(null); + const [cleanupConfirm, setCleanupConfirm] = React.useState<{ + leaseIds: string[]; + expectedCategory: "detached_residue" | "orphan_cleanup"; + scopeLabel: string; + label: string; + count: number; + } | null>(null); const loadResources = React.useCallback(async () => { setLoading(true); @@ -765,6 +772,7 @@ function MonitorResourcesPage() { const refreshNow = React.useCallback(async () => { setRefreshing(true); setError(null); + setCleanupConfirm(null); try { const [resources, leases] = await Promise.all([ fetchJSON(`${API_BASE}/resources/refresh`, { method: "POST" }), @@ -791,6 +799,7 @@ function MonitorResourcesPage() { } setCleanupBusyId(scopeLabel); setCleanupFeedback(null); + setCleanupConfirm(null); try { const payload = await fetchJSON(`${API_BASE}/resources/cleanup`, { method: "POST", @@ -914,6 +923,7 @@ function MonitorResourcesPage() { const hasSecondaryLeaseAttention = orphanCleanupLeases.length > 0; const visibleDetachedResidueLeases = detachedResidueLeases.slice(0, 8); const visibleOrphanCleanupLeases = orphanCleanupLeases.slice(0, 8); + // @@@cleanup-visible-confirm - visible-bucket cleanup mutates multiple leases, so group actions stage an explicit confirm while single-row cleanup stays one-click. const refreshedAt = summary.last_refreshed_at || summary.snapshot_at; const selectedSessions = Array.isArray(selectedProvider?.sessions) ? selectedProvider.sessions @@ -1301,6 +1311,42 @@ function MonitorResourcesPage() { {cleanupFeedback.text} ) : null} + {cleanupConfirm ? ( +
      +
      + Confirm cleanup +

      + Remove {cleanupConfirm.count} visible lease + {cleanupConfirm.count === 1 ? "" : "s"} from{" "} + {cleanupConfirm.label}. +

      +
      +
      + + +
      +
      + ) : null} {hasPrimaryLeaseAttention ? (
      {activeDriftLeases.length > 0 ? ( @@ -1358,19 +1404,23 @@ function MonitorResourcesPage() { loading } onClick={() => - void cleanupLeases( - visibleDetachedResidueLeases.map( + setCleanupConfirm({ + leaseIds: visibleDetachedResidueLeases.map( (item: any) => item.lease_id, ), - "detached_residue", - "group:detached_residue", - ) + expectedCategory: "detached_residue", + scopeLabel: "group:detached_residue", + label: "Detached Residue", + count: visibleDetachedResidueLeases.length, + }) } data-testid="cleanup-visible-detached-residue" > {cleanupBusyId === "group:detached_residue" ? "Cleaning..." - : "Cleanup visible"} + : cleanupConfirm?.scopeLabel === "group:detached_residue" + ? "Awaiting confirm" + : "Cleanup visible"}
      @@ -1452,19 +1502,23 @@ function MonitorResourcesPage() { loading } onClick={() => - void cleanupLeases( - visibleOrphanCleanupLeases.map( + setCleanupConfirm({ + leaseIds: visibleOrphanCleanupLeases.map( (item: any) => item.lease_id, ), - "orphan_cleanup", - "group:orphan_cleanup", - ) + expectedCategory: "orphan_cleanup", + scopeLabel: "group:orphan_cleanup", + label: "Cleanup Backlog", + count: visibleOrphanCleanupLeases.length, + }) } data-testid="cleanup-visible-orphan-cleanup" > {cleanupBusyId === "group:orphan_cleanup" ? "Cleaning..." - : "Cleanup visible"} + : cleanupConfirm?.scopeLabel === "group:orphan_cleanup" + ? "Awaiting confirm" + : "Cleanup visible"}
      diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index c4100c274..fbf7eb103 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -554,6 +554,31 @@ h2 { color: var(--danger); } +.cleanup-confirm { + margin: 0 0 1rem; + padding: 0.9rem 1rem; + border-radius: 14px; + border: 1px solid rgba(245, 158, 11, 0.22); + background: linear-gradient(180deg, var(--panel) 0%, var(--warning-soft) 100%); + display: flex; + align-items: center; + justify-content: space-between; + gap: 1rem; + flex-wrap: wrap; +} + +.cleanup-confirm p { + margin: 0.25rem 0 0; + color: var(--text-secondary); +} + +.cleanup-confirm-actions { + display: flex; + align-items: center; + gap: 0.65rem; + flex-wrap: wrap; +} + .resource-section-shell { margin-bottom: 1.25rem; } From ee8dae470d53f0ae89cd368eac5375ef151ce159 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 10:31:34 +0800 Subject: [PATCH 60/87] fix: restore resource compatibility after dev rebase --- backend/web/services/resource_cache.py | 22 +++- .../services/resource_projection_service.py | 89 +++++++++++++ backend/web/services/resource_service.py | 45 +++++++ tests/conftest.py | 121 +++++++++++++++++- 4 files changed, 273 insertions(+), 4 deletions(-) diff --git a/backend/web/services/resource_cache.py b/backend/web/services/resource_cache.py index 55eded73b..009f7dac2 100644 --- a/backend/web/services/resource_cache.py +++ b/backend/web/services/resource_cache.py @@ -10,7 +10,7 @@ from datetime import UTC, datetime from typing import Any -from backend.web.services import resource_projection_service, resource_service +from backend.web.services import resource_service _DEFAULT_REFRESH_INTERVAL_SEC = 90.0 @@ -24,6 +24,10 @@ def clear_resource_overview_cache() -> None: _snapshot_cache = None +def clear_monitor_resource_overview_cache() -> None: + clear_resource_overview_cache() + + def _now_iso() -> str: return datetime.now(UTC).isoformat().replace("+00:00", "Z") @@ -56,7 +60,7 @@ def _with_refresh_metadata( def _snapshot_drifted_from_live_sessions(snapshot: dict[str, Any]) -> bool: - live_stats = resource_projection_service.visible_resource_session_stats() + live_stats = resource_service.visible_resource_session_stats() for provider in snapshot.get("providers") or []: provider_id = str(provider.get("id") or "") current = live_stats.get(provider_id, {"sessions": 0, "running": 0}) @@ -77,7 +81,7 @@ def refresh_resource_overview_sync() -> dict[str, Any]: global _snapshot_cache started = time.perf_counter() try: - payload = resource_projection_service.list_resource_providers() + payload = resource_service.list_resource_providers() duration_ms = (time.perf_counter() - started) * 1000 payload = _with_refresh_metadata(payload, duration_ms=duration_ms, status="ok", error=None) with _snapshot_lock: @@ -96,6 +100,10 @@ def refresh_resource_overview_sync() -> dict[str, Any]: return degraded +def refresh_monitor_resource_overview_sync() -> dict[str, Any]: + return refresh_resource_overview_sync() + + def get_resource_overview_snapshot() -> dict[str, Any]: """Return cached snapshot; perform one synchronous refresh on cold start.""" with _snapshot_lock: @@ -111,6 +119,10 @@ def get_resource_overview_snapshot() -> dict[str, Any]: return refresh_resource_overview_sync() +def get_monitor_resource_overview_snapshot() -> dict[str, Any]: + return get_resource_overview_snapshot() + + async def resource_overview_refresh_loop() -> None: """Continuously refresh resource overview snapshot.""" interval_sec = _read_refresh_interval_sec() @@ -138,3 +150,7 @@ async def resource_overview_refresh_loop() -> None: print("[monitor] resource refresh loop timeout") except Exception as exc: print(f"[monitor] resource refresh loop error: {exc}") + + +async def monitor_resource_overview_refresh_loop() -> None: + await resource_overview_refresh_loop() diff --git a/backend/web/services/resource_projection_service.py b/backend/web/services/resource_projection_service.py index c59a5d55c..2ee2119a4 100644 --- a/backend/web/services/resource_projection_service.py +++ b/backend/web/services/resource_projection_service.py @@ -7,6 +7,7 @@ from backend.web.core.config import SANDBOXES_DIR from backend.web.core.storage_factory import list_resource_snapshots, make_sandbox_monitor_repo +from backend.web.services import sandbox_service from backend.web.services.resource_common import ( CATALOG as _CATALOG, ) @@ -49,6 +50,21 @@ from storage.models import map_lease_to_session_status +class _ResourceServiceCompat: + def get_provider_display_contract(self, config_name: str) -> dict[str, Any]: + from backend.web.services import resource_service as resource_service_module + + return resource_service_module.get_provider_display_contract(config_name) + + def get_provider_capability_contract(self, config_name: str) -> tuple[dict[str, bool], str | None]: + from backend.web.services import resource_service as resource_service_module + + return resource_service_module.get_provider_capability_contract(config_name) + + +resource_service = _ResourceServiceCompat() + + def _empty_capabilities() -> dict[str, bool]: return empty_capabilities() @@ -248,3 +264,76 @@ def visible_resource_session_stats() -> dict[str, dict[str, int]]: provider_stats["running"] += 1 return stats + + +def list_user_resource_providers(app: Any, owner_user_id: str) -> dict[str, Any]: + thread_repo = getattr(getattr(app, "state", None), "thread_repo", None) + member_repo = getattr(getattr(app, "state", None), "member_repo", None) + leases = sandbox_service.list_user_leases( + owner_user_id, + thread_repo=thread_repo, + member_repo=member_repo, + ) + + grouped: dict[str, list[dict[str, Any]]] = {} + for lease in leases: + provider_instance = str(lease.get("provider_name") or "local") + grouped.setdefault(provider_instance, []).append(dict(lease)) + + providers: list[dict[str, Any]] = [] + running_sessions = 0 + for config_name, provider_leases in grouped.items(): + display = resource_service.get_provider_display_contract(config_name) + capabilities, capability_error = resource_service.get_provider_capability_contract(config_name) + running_count = 0 + sessions: list[dict[str, Any]] = [] + for lease in provider_leases: + normalized = map_lease_to_session_status(lease.get("observed_state"), lease.get("desired_state")) + if normalized == "running": + running_count += 1 + running_sessions += 1 + agents = lease.get("agents") or [] + owner = agents[0] if agents else {} + for thread_id in lease.get("thread_ids") or []: + sessions.append( + { + "id": f"{lease['lease_id']}:{thread_id}", + "leaseId": str(lease.get("lease_id") or ""), + "threadId": str(thread_id or ""), + "memberId": str(owner.get("member_id") or ""), + "memberName": str(owner.get("member_name") or "未绑定Agent"), + "avatarUrl": owner.get("avatar_url"), + "status": normalized, + "startedAt": str(lease.get("created_at") or ""), + "metrics": None, + } + ) + + provider_status = "unavailable" if capability_error else _to_resource_status(True, running_count) + unavailable_reason = str(capability_error or "").strip() or None + providers.append( + { + "id": config_name, + "name": config_name, + "description": display["description"], + "vendor": display["vendor"], + "type": display["type"], + "status": provider_status, + "unavailableReason": unavailable_reason, + "error": ({"code": "PROVIDER_UNAVAILABLE", "message": unavailable_reason} if unavailable_reason else None), + "capabilities": capabilities, + "telemetry": {"running": {"used": running_count, "limit": None, "unit": "sandbox"}}, + "cardCpu": None, + "consoleUrl": display["console_url"], + "sessions": sessions, + } + ) + + summary = { + "snapshot_at": datetime.now(UTC).isoformat().replace("+00:00", "Z"), + "total_providers": len(providers), + "active_providers": len([p for p in providers if p.get("status") == "active"]), + "unavailable_providers": len([p for p in providers if p.get("status") == "unavailable"]), + "running_sessions": running_sessions, + } + return {"summary": summary, "providers": providers} diff --git a/backend/web/services/resource_service.py b/backend/web/services/resource_service.py index 736d1e8ee..8541a9b85 100644 --- a/backend/web/services/resource_service.py +++ b/backend/web/services/resource_service.py @@ -4,7 +4,24 @@ from typing import Any +from backend.web.core.config import SANDBOXES_DIR from backend.web.core.storage_factory import make_sandbox_monitor_repo, upsert_resource_snapshot +from backend.web.services import resource_projection_service +from backend.web.services.resource_common import ( + CATALOG as _CATALOG, +) +from backend.web.services.resource_common import ( + resolve_console_url as _resolve_console_url, +) +from backend.web.services.resource_common import ( + resolve_instance_capabilities as _resolve_instance_capabilities, +) +from backend.web.services.resource_common import ( + resolve_provider_name, +) +from backend.web.services.resource_common import ( + resolve_provider_type as _resolve_provider_type, +) from backend.web.services.sandbox_service import build_provider_from_config_name from sandbox.resource_snapshot import ( ensure_resource_snapshot_table, @@ -16,6 +33,34 @@ # --------------------------------------------------------------------------- +def list_resource_providers() -> dict[str, Any]: + return resource_projection_service.list_resource_providers() + + +def visible_resource_session_stats() -> dict[str, dict[str, int]]: + return resource_projection_service.visible_resource_session_stats() + + +def get_provider_display_contract(config_name: str) -> dict[str, Any]: + provider_name = resolve_provider_name(config_name, sandboxes_dir=SANDBOXES_DIR) + catalog = _CATALOG.get(provider_name) + description = catalog.description if catalog else provider_name + vendor = catalog.vendor if catalog else None + provider_type = _resolve_provider_type(provider_name, config_name, sandboxes_dir=SANDBOXES_DIR) + console_url = _resolve_console_url(provider_name, config_name, sandboxes_dir=SANDBOXES_DIR) + return { + "provider_name": provider_name, + "description": description, + "vendor": vendor, + "type": provider_type, + "console_url": console_url, + } + + +def get_provider_capability_contract(config_name: str) -> tuple[dict[str, bool], str | None]: + return _resolve_instance_capabilities(config_name) + + def sandbox_browse(lease_id: str, path: str) -> dict[str, Any]: """Browse the filesystem of a sandbox lease via its provider.""" from pathlib import PurePosixPath diff --git a/tests/conftest.py b/tests/conftest.py index 8136ade6b..6bb8bca85 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,10 @@ import gc import sys import time +from collections.abc import AsyncIterator, Generator +from contextlib import asynccontextmanager from pathlib import Path +from types import SimpleNamespace import pytest @@ -40,8 +43,124 @@ def _unlink_db(db_path: Path) -> None: @pytest.fixture -def temp_db(tmp_path): +def temp_db(tmp_path: Path) -> Generator[Path, None, None]: """Provide a temporary SQLite database path with Windows-safe cleanup.""" db_path = tmp_path / "test.db" yield db_path _unlink_db(db_path) + + +class _FakeAsyncCursor: + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + async def execute(self, _query: str, *_args, **_kwargs) -> None: + return None + + async def fetchone(self): + return (1,) + + +class _FakeAsyncConnection: + def cursor(self) -> _FakeAsyncCursor: + return _FakeAsyncCursor() + + async def close(self) -> None: + return None + + +@pytest.fixture(autouse=True) +def _stub_web_checkpointer_contract(monkeypatch: pytest.MonkeyPatch) -> None: + """Keep TestClient startup on the happy path unless a test overrides it.""" + from backend.web.core import lifespan as lifespan_module + + async def _connect(_dsn: str) -> _FakeAsyncConnection: + return _FakeAsyncConnection() + + monkeypatch.setenv("LEON_POSTGRES_URL", "postgresql://tests") + monkeypatch.setattr(lifespan_module, "AsyncConnection", SimpleNamespace(connect=_connect)) + + +@pytest.fixture(autouse=True) +def _route_smoke_app_harness(request: pytest.FixtureRequest, monkeypatch: pytest.MonkeyPatch) -> Generator[None, None, None]: + path = str(request.node.path) + if not path.endswith("tests/Integration/test_monitor_resources_route.py") and not path.endswith( + "tests/Integration/test_resources_route.py" + ): + yield + return + + from backend.web.core.dependencies import get_current_user_id + from backend.web.main import app as web_app + from backend.web.routers import monitor as monitor_router + from backend.web.services import monitor_service, resource_projection_service + + @asynccontextmanager + async def _noop_lifespan(_app) -> AsyncIterator[None]: + yield + + product_payload = { + "summary": { + "snapshot_at": "now", + "total_providers": 1, + "active_providers": 1, + "unavailable_providers": 0, + "running_sessions": 1, + "last_refreshed_at": "now", + "refresh_status": "fresh", + }, + "providers": [{"id": "local", "sessions": []}], + } + monitor_payload = { + "summary": { + "snapshot_at": "now", + "running_sessions": 1, + "last_refreshed_at": "now", + "refresh_status": "fresh", + }, + "providers": [{"id": "local"}], + } + lease_payload = { + "summary": {"total": 1, "healthy": 1, "diverged": 0, "orphan": 0, "orphan_diverged": 0}, + "groups": [], + "triage": { + "summary": { + "total": 1, + "active_drift": 0, + "detached_residue": 0, + "orphan_cleanup": 0, + "healthy_capacity": 1, + }, + "groups": [], + }, + } + + original_lifespan = web_app.router.lifespan_context + monkeypatch.setattr(web_app.router, "lifespan_context", _noop_lifespan) + web_app.dependency_overrides[get_current_user_id] = lambda: "user-test" + monkeypatch.setattr(monitor_router, "get_monitor_resource_overview_snapshot", lambda: monitor_payload) + monkeypatch.setattr(monitor_router, "refresh_monitor_resource_overview_sync", lambda: monitor_payload) + monkeypatch.setattr(monitor_router, "list_leases", lambda: lease_payload) + monkeypatch.setattr(monitor_router, "list_evaluations", lambda *args, **kwargs: {"items": []}) + monkeypatch.setattr( + monitor_service, + "runtime_health_snapshot", + lambda: { + "snapshot_at": "now", + "db": {"counts": {"chat_sessions": 1}}, + "sessions": {"total": 1}, + }, + ) + monkeypatch.setattr( + resource_projection_service, + "list_user_resource_providers", + lambda *_args, **_kwargs: product_payload, + ) + try: + yield + finally: + web_app.router.lifespan_context = original_lifespan + web_app.dependency_overrides.clear() From c05c6173ddb1d6f8a4fd93ad5a654b5bd454261e Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 10:34:43 +0800 Subject: [PATCH 61/87] fix: add dialog semantics to operator guide --- frontend/monitor/src/App.tsx | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 86b68c1f8..0594a0598 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -4584,6 +4584,8 @@ function OperatorGuideModal({ open: boolean; onClose: () => void; }) { + const panelRef = React.useRef(null); + React.useEffect(() => { if (!open) return; const onKeyDown = (event: KeyboardEvent) => { @@ -4593,6 +4595,12 @@ function OperatorGuideModal({ return () => window.removeEventListener("keydown", onKeyDown); }, [open, onClose]); + React.useEffect(() => { + if (!open) return; + // @@@modal-focus-handshake - focus the panel itself so keyboard users land inside the active surface instead of staying on the trigger behind the backdrop. + panelRef.current?.focus(); + }, [open]); + if (!open) return null; return ( @@ -4602,13 +4610,18 @@ function OperatorGuideModal({ data-testid="operator-guide-modal" >
      event.stopPropagation()} >

      Operator Guide

      -

      How to read this console

      +

      How to read this console

      {showEvalComposeAction ? ( - + Build Eval ) : null} From 09449082c0b1f9abff3f0b12a20e3f9e93e845ab Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 10:44:23 +0800 Subject: [PATCH 64/87] fix: trap modal tab focus --- frontend/monitor/src/App.tsx | 44 ++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 699302da1..10b4fd1b2 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -77,6 +77,38 @@ function formatCleanupError(error: any) { return `${prefix}${reason}`; } +function trapDialogTabKey( + event: KeyboardEvent, + panel: HTMLElement | null, +): void { + if (event.key !== "Tab" || !panel) return; + // @@@dialog-focus-loop - keep keyboard focus inside the active modal so operators do not tab into the console shell behind it. + const focusables = Array.from( + panel.querySelectorAll( + 'a[href], button:not([disabled]), textarea:not([disabled]), input:not([disabled]), select:not([disabled]), [tabindex]:not([tabindex="-1"])', + ), + ).filter((node) => !node.hasAttribute("disabled")); + if (focusables.length === 0) { + event.preventDefault(); + panel.focus(); + return; + } + const first = focusables[0]; + const last = focusables[focusables.length - 1]; + const active = document.activeElement as HTMLElement | null; + if (event.shiftKey) { + if (!active || active === first || !panel.contains(active)) { + event.preventDefault(); + last.focus(); + } + return; + } + if (!active || active === last || !panel.contains(active)) { + event.preventDefault(); + first.focus(); + } +} + // Component: Breadcrumb navigation function Breadcrumb({ items, @@ -3728,7 +3760,11 @@ function EvaluationPage() { if (!composerOpen) return; // @@@composer-escape-close - keep the config layer aligned with the guide modal so keyboard users can dismiss it without reaching for the mouse. const onKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape") closeComposer(); + if (event.key === "Escape") { + closeComposer(); + return; + } + trapDialogTabKey(event, composerPanelRef.current); }; window.addEventListener("keydown", onKeyDown); return () => window.removeEventListener("keydown", onKeyDown); @@ -4619,7 +4655,11 @@ function OperatorGuideModal({ React.useEffect(() => { if (!open) return; const onKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape") onClose(); + if (event.key === "Escape") { + onClose(); + return; + } + trapDialogTabKey(event, panelRef.current); }; window.addEventListener("keydown", onKeyDown); return () => window.removeEventListener("keydown", onKeyDown); From 82dcda59ed6c7f888755f4f8759def909293daa7 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 10:51:11 +0800 Subject: [PATCH 65/87] fix: restore monitor resource triage snapshot --- backend/web/services/resource_cache.py | 10 ++++- .../test_monitor_resources_route.py | 5 +++ .../test_monitor_resource_overview_cache.py | 39 +++++++++++++++++++ tests/conftest.py | 10 +++++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/backend/web/services/resource_cache.py b/backend/web/services/resource_cache.py index 009f7dac2..62846a653 100644 --- a/backend/web/services/resource_cache.py +++ b/backend/web/services/resource_cache.py @@ -10,7 +10,7 @@ from datetime import UTC, datetime from typing import Any -from backend.web.services import resource_service +from backend.web.services import monitor_service, resource_service _DEFAULT_REFRESH_INTERVAL_SEC = 90.0 @@ -59,6 +59,13 @@ def _with_refresh_metadata( return payload +def _attach_monitor_triage(payload: dict[str, Any]) -> dict[str, Any]: + lease_payload = monitor_service.list_leases() + triage = lease_payload.get("triage") or {"summary": {}, "groups": []} + payload["triage"] = triage + return payload + + def _snapshot_drifted_from_live_sessions(snapshot: dict[str, Any]) -> bool: live_stats = resource_service.visible_resource_session_stats() for provider in snapshot.get("providers") or []: @@ -82,6 +89,7 @@ def refresh_resource_overview_sync() -> dict[str, Any]: started = time.perf_counter() try: payload = resource_service.list_resource_providers() + payload = _attach_monitor_triage(payload) duration_ms = (time.perf_counter() - started) * 1000 payload = _with_refresh_metadata(payload, duration_ms=duration_ms, status="ok", error=None) with _snapshot_lock: diff --git a/tests/Integration/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py index 5a5e54263..5e6b9c04b 100644 --- a/tests/Integration/test_monitor_resources_route.py +++ b/tests/Integration/test_monitor_resources_route.py @@ -11,9 +11,12 @@ def test_monitor_resources_route_smoke(): payload = response.json() assert "summary" in payload assert "providers" in payload + assert "triage" in payload assert "snapshot_at" in payload["summary"] assert "running_sessions" in payload["summary"] assert isinstance(payload["providers"], list) + assert set(payload["triage"]["summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"}) + assert isinstance(payload["triage"]["groups"], list) def test_monitor_resources_refresh_route_smoke(): @@ -24,8 +27,10 @@ def test_monitor_resources_refresh_route_smoke(): payload = response.json() assert "summary" in payload assert "providers" in payload + assert "triage" in payload assert "last_refreshed_at" in payload["summary"] assert "refresh_status" in payload["summary"] + assert set(payload["triage"]["summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"}) def test_monitor_and_product_resource_routes_coexist_intentionally(): diff --git a/tests/Unit/monitor/test_monitor_resource_overview_cache.py b/tests/Unit/monitor/test_monitor_resource_overview_cache.py index 0d17c0b04..6f961cb7d 100644 --- a/tests/Unit/monitor/test_monitor_resource_overview_cache.py +++ b/tests/Unit/monitor/test_monitor_resource_overview_cache.py @@ -1,6 +1,23 @@ from backend.web.services import resource_cache as cache +def _triage_payload(category: str) -> dict: + summary = { + "total": 1, + "active_drift": 0, + "detached_residue": 0, + "orphan_cleanup": 0, + "healthy_capacity": 0, + } + summary[category] = 1 + return { + "triage": { + "summary": summary, + "groups": [{"key": category, "items": [{"lease_id": "lease-1"}]}], + } + } + + def test_resource_overview_cache_refresh_adds_metadata(monkeypatch): cache.clear_monitor_resource_overview_cache() monkeypatch.setattr( @@ -17,14 +34,22 @@ def test_resource_overview_cache_refresh_adds_metadata(monkeypatch): "providers": [{"id": "local"}], }, ) + monkeypatch.setattr( + cache, + "monitor_service", + type("_MonitorService", (), {"list_leases": staticmethod(lambda: _triage_payload("detached_residue"))}), + raising=False, + ) payload = cache.refresh_monitor_resource_overview_sync() assert payload["summary"]["refresh_status"] == "ok" assert payload["summary"]["refresh_error"] is None assert payload["summary"]["last_refreshed_at"] == "2026-03-03T00:00:00Z" + assert payload["triage"]["summary"]["detached_residue"] == 1 cached = cache.get_monitor_resource_overview_snapshot() assert cached["providers"][0]["id"] == "local" + assert cached["triage"]["groups"][0]["key"] == "detached_residue" def test_resource_overview_cache_keeps_last_snapshot_on_refresh_error(monkeypatch): @@ -43,6 +68,12 @@ def test_resource_overview_cache_keeps_last_snapshot_on_refresh_error(monkeypatc "providers": [{"id": "docker"}], }, ) + monkeypatch.setattr( + cache, + "monitor_service", + type("_MonitorService", (), {"list_leases": staticmethod(lambda: _triage_payload("orphan_cleanup"))}), + raising=False, + ) cache.refresh_monitor_resource_overview_sync() def _raise(): @@ -53,6 +84,7 @@ def _raise(): assert degraded["providers"][0]["id"] == "docker" assert degraded["summary"]["refresh_status"] == "error" assert degraded["summary"]["refresh_error"] == "probe failed" + assert degraded["triage"]["groups"][0]["key"] == "orphan_cleanup" def test_resource_overview_cache_refreshes_when_live_session_counts_drift(monkeypatch): @@ -94,9 +126,16 @@ def test_resource_overview_cache_refreshes_when_live_session_counts_drift(monkey calls = iter([stale_payload, fresh_payload]) monkeypatch.setattr(cache.resource_service, "list_resource_providers", lambda: next(calls)) monkeypatch.setattr(cache.resource_service, "visible_resource_session_stats", lambda: {"local": {"sessions": 1, "running": 1}}) + monkeypatch.setattr( + cache, + "monitor_service", + type("_MonitorService", (), {"list_leases": staticmethod(lambda: _triage_payload("healthy_capacity"))}), + raising=False, + ) cache.refresh_monitor_resource_overview_sync() payload = cache.get_monitor_resource_overview_snapshot() assert payload["providers"][0]["telemetry"]["running"]["used"] == 1 assert len(payload["providers"][0]["sessions"]) == 1 + assert payload["triage"]["summary"]["healthy_capacity"] == 1 diff --git a/tests/conftest.py b/tests/conftest.py index 6bb8bca85..0f97a1173 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -122,6 +122,16 @@ async def _noop_lifespan(_app) -> AsyncIterator[None]: "refresh_status": "fresh", }, "providers": [{"id": "local"}], + "triage": { + "summary": { + "total": 1, + "active_drift": 0, + "detached_residue": 0, + "orphan_cleanup": 0, + "healthy_capacity": 1, + }, + "groups": [], + }, } lease_payload = { "summary": {"total": 1, "healthy": 1, "diverged": 0, "orphan": 0, "orphan_diverged": 0}, From 924cbda2d2a97d1af5fc71e01d0aaeb608e75521 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 11:00:31 +0800 Subject: [PATCH 66/87] fix: stop repeated conversation error polling --- frontend/monitor/src/App.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 10b4fd1b2..d171cdad0 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -2817,7 +2817,9 @@ function ThreadTraceSection({ if (!threadId || !autoRefreshEnabled || !autoRefresh) return; const timer = window.setInterval(() => { loadTrace(selectedRunId); - loadConversation(); + if (!conversationError) { + loadConversation(); + } }, 2000); return () => window.clearInterval(timer); }, [ @@ -2827,6 +2829,7 @@ function ThreadTraceSection({ selectedRunId, loadTrace, loadConversation, + conversationError, ]); const traceTail = traceEvents.slice(-300); From 65b7d5bbac30a3fb93289a0533e21fccfc224414 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 11:17:11 +0800 Subject: [PATCH 67/87] fix: restore compat lease deep links --- backend/web/monitor.py | 50 ++++++++++++++++- tests/Unit/monitor/test_monitor_compat.py | 67 +++++++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/backend/web/monitor.py b/backend/web/monitor.py index 357cb7c57..6d499b091 100644 --- a/backend/web/monitor.py +++ b/backend/web/monitor.py @@ -1986,13 +1986,61 @@ def list_leases(): return monitor_service.list_leases() +def _compat_historical_lease_detail(db: sqlite3.Connection, lease_id: str): + from backend.web.services import monitor_service + + sessions = [ + dict(row) + for row in db.execute( + """ + SELECT + cs.chat_session_id, + cs.thread_id, + cs.status, + cs.started_at, + cs.ended_at, + cs.close_reason, + cs.lease_id, + sl.provider_name, + sl.desired_state, + sl.observed_state, + sl.current_instance_id, + sl.last_error + FROM chat_sessions cs + LEFT JOIN sandbox_leases sl ON cs.lease_id = sl.lease_id + WHERE cs.lease_id = ? + ORDER BY cs.started_at DESC + """, + (lease_id,), + ).fetchall() + ] + events = [ + dict(row) + for row in db.execute( + """ + SELECT event_id, lease_id, event_type, source, created_at + FROM lease_events + WHERE lease_id = ? + ORDER BY created_at DESC + """, + (lease_id,), + ).fetchall() + ] + # @@@compat-lease-fallback - thread/session detail still reads compat sqlite facts. + # When service-backed lease detail misses, keep linked historical leases navigable. + return monitor_service._historical_lease_detail(lease_id, sessions, events) + + @router.get("/lease/{lease_id}") -def get_lease(lease_id: str): +def get_lease(lease_id: str, db: sqlite3.Connection = Depends(get_db)): from backend.web.services import monitor_service try: return monitor_service.get_lease(lease_id) except KeyError as exc: + fallback = _compat_historical_lease_detail(db, lease_id) + if fallback: + return fallback detail = exc.args[0] if exc.args else "Lease not found" raise HTTPException(status_code=404, detail=detail) from exc diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index ed90f5dde..c314691e9 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -252,6 +252,73 @@ def close(self): assert payload["related_threads"]["items"] == [{"thread_id": "thread-historical", "thread_url": "/thread/thread-historical"}] +def test_monitor_route_get_lease_falls_back_to_compat_db_when_service_misses(tmp_path, monkeypatch): + db_path = tmp_path / "sandbox.db" + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + conn.executescript( + """ + CREATE TABLE chat_sessions ( + chat_session_id TEXT PRIMARY KEY, + thread_id TEXT, + lease_id TEXT, + status TEXT, + started_at TEXT, + ended_at TEXT, + close_reason TEXT + ); + CREATE TABLE sandbox_leases ( + lease_id TEXT PRIMARY KEY, + provider_name TEXT, + desired_state TEXT, + observed_state TEXT, + current_instance_id TEXT, + last_error TEXT + ); + CREATE TABLE lease_events ( + event_id TEXT PRIMARY KEY, + lease_id TEXT, + event_type TEXT, + source TEXT, + payload_json TEXT, + error TEXT, + created_at TEXT + ); + """ + ) + conn.execute( + """ + INSERT INTO chat_sessions ( + chat_session_id, thread_id, lease_id, status, started_at, ended_at, close_reason + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + "sess-local", + "thread-local", + "lease-local-history", + "closed", + "2026-04-07T01:25:18.632049", + "2026-04-07T01:27:19.554403", + "thread_deleted", + ), + ) + conn.commit() + + def _raise_keyerror(_lease_id: str): + raise KeyError("Lease not found") + + monkeypatch.setattr(monitor_service, "get_lease", _raise_keyerror) + + try: + payload = monitor.get_lease("lease-local-history", db=conn) + finally: + conn.close() + + assert payload["lease_id"] == "lease-local-history" + assert payload["related_threads"]["items"] == [{"thread_id": "thread-local", "thread_url": "/thread/thread-local"}] + assert payload["state"]["text"] == "destroyed" + + def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_materialize(): payload = monitor_service.build_evaluation_operator_surface( status="provisional", From 389b2cfe4c5d66ec277b53a54e87bb86a5010d00 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 11:19:30 +0800 Subject: [PATCH 68/87] fix: surface monitor page load errors --- frontend/monitor/src/App.tsx | 140 ++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 67 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index d171cdad0..ed0ad0a68 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -131,11 +131,14 @@ function Breadcrumb({ function StateBadge({ badge }: { badge: any }) { const className = `state-badge state-${badge.color}`; const text = badge.text || badge.observed; + // @@@badge-tooltip-historical - destroyed leases have no desired/observed; "Converged" is misleading for historical state. const tooltip = badge.hours_diverged ? `Diverged for ${badge.hours_diverged}h` - : badge.converged - ? "Converged" - : `${badge.observed} → ${badge.desired}`; + : !badge.desired && !badge.observed + ? "No active state" + : badge.converged + ? `${badge.observed} (converged)` + : `${badge.observed} → ${badge.desired}`; return ( @@ -1709,16 +1712,20 @@ function MonitorResourcesPage() { function ThreadsPage() { const [data, setData] = React.useState(null); const [loading, setLoading] = React.useState(false); + const [error, setError] = React.useState(null); const [offset, setOffset] = React.useState(0); const [limit, setLimit] = React.useState(50); const loadThreads = React.useCallback(async () => { setLoading(true); + setError(null); try { const payload = await fetchAPI( `/threads?offset=${offset}&limit=${limit}`, ); setData(payload); + } catch (e: any) { + setError(e?.message || String(e)); } finally { setLoading(false); } @@ -1728,6 +1735,13 @@ function ThreadsPage() { void loadThreads(); }, [loadThreads]); + if (error) { + return ( +
      +
      Threads load failed: {error}
      +
      + ); + } if (!data) { return (
      @@ -1839,14 +1853,18 @@ function ThreadsPage() { function TracesPage() { const [data, setData] = React.useState(null); const [loading, setLoading] = React.useState(false); + const [error, setError] = React.useState(null); const [offset, setOffset] = React.useState(0); const [limit, setLimit] = React.useState(50); const loadTraces = React.useCallback(async () => { setLoading(true); + setError(null); try { const payload = await fetchAPI(`/traces?offset=${offset}&limit=${limit}`); setData(payload); + } catch (e: any) { + setError(e?.message || String(e)); } finally { setLoading(false); } @@ -1856,6 +1874,13 @@ function TracesPage() { void loadTraces(); }, [loadTraces]); + if (error) { + return ( +
      +
      Traces load failed: {error}
      +
      + ); + } if (!data) { return (
      @@ -1967,15 +1992,26 @@ function ThreadDetailPage() { const { threadId } = useParams(); const location = useLocation(); const [data, setData] = React.useState(null); + const [error, setError] = React.useState(null); const initialRunId = React.useMemo( () => new URLSearchParams(location.search).get("run") || "", [location.search], ); React.useEffect(() => { - fetchAPI(`/thread/${threadId}`).then(setData); + setError(null); + fetchAPI(`/thread/${threadId}`) + .then(setData) + .catch((e) => setError(e?.message || String(e))); }, [threadId]); + if (error) { + return ( +
      +
      Thread load failed: {error}
      +
      + ); + } if (!data) { return (
      @@ -3167,13 +3203,24 @@ function SessionDetailPage() { function LeasesPage() { const location = useLocation(); const [data, setData] = React.useState(null); + const [error, setError] = React.useState(null); const divergedOnly = new URLSearchParams(location.search).get("diverged") === "1"; React.useEffect(() => { - fetchAPI("/leases").then(setData); + setError(null); + fetchAPI("/leases") + .then(setData) + .catch((e) => setError(e?.message || String(e))); }, []); + if (error) { + return ( +
      +
      Leases load failed: {error}
      +
      + ); + } if (!data) { return (
      @@ -3329,11 +3376,21 @@ function LeaseDetailPage() { ); } + // @@@lease-historical-signal - detect historical fallback lease: no active desired/observed state, provider unknown. + const isHistorical = !data.state.desired && !data.state.observed; + return (

      Lease: {data.lease_id}

      + {isHistorical && ( +

      + Historical lease — reconstructed from session records. Provider and + state fields may be incomplete. +

      + )} +
      Provider: {data.info.provider} @@ -3354,10 +3411,10 @@ function LeaseDetailPage() {

      State

      - Desired: {data.state.desired} + Desired: {data.state.desired || "-"}
      - Observed: {data.state.observed} + Observed: {data.state.observed || "-"}
      Status: @@ -3420,76 +3477,25 @@ function LeaseDetailPage() { ); } -// Page: Diverged Leases -function DivergedPage() { +// Page: Events List +function EventsPage() { const [data, setData] = React.useState(null); + const [error, setError] = React.useState(null); React.useEffect(() => { - fetchAPI("/diverged").then(setData); + setError(null); + fetchAPI("/events?limit=100") + .then(setData) + .catch((e) => setError(e?.message || String(e))); }, []); - if (!data) { + if (error) { return (
      -
      Loading...
      +
      Events load failed: {error}
      ); } - - return ( -
      -

      {data.description}

      -

      Total: {data.count}

      -
      - - - - - - - - - - - - - {data.items.map((item: any) => ( - - - - - - - - - - ))} - -
      Lease IDProviderThreadDesiredObservedHours DivergedError
      - {item.lease_id} - {item.provider} - {item.thread.thread_id ? ( - - {item.thread.thread_id.slice(0, 8)} - - ) : ( - orphan - )} - {item.state_badge.desired}{item.state_badge.observed} - {item.state_badge.hours_diverged}h - {item.error || "-"}
      - - ); -} - -// Page: Events List -function EventsPage() { - const [data, setData] = React.useState(null); - - React.useEffect(() => { - fetchAPI("/events?limit=100").then(setData); - }, []); - if (!data) { return (
      From d56e9b70393d0fbefede2b7b230531e77e6977a2 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 11:37:11 +0800 Subject: [PATCH 69/87] fix: stabilize app permissions polling and resources build --- .../src/hooks/use-thread-permissions.test.tsx | 60 +++- .../app/src/hooks/use-thread-permissions.ts | 130 ++++--- .../src/pages/resources/ProviderDetail.tsx | 332 ++++++++++++++++++ 3 files changed, 470 insertions(+), 52 deletions(-) create mode 100644 frontend/app/src/pages/resources/ProviderDetail.tsx diff --git a/frontend/app/src/hooks/use-thread-permissions.test.tsx b/frontend/app/src/hooks/use-thread-permissions.test.tsx index d23871fa2..3a41aac36 100644 --- a/frontend/app/src/hooks/use-thread-permissions.test.tsx +++ b/frontend/app/src/hooks/use-thread-permissions.test.tsx @@ -1,6 +1,6 @@ // @vitest-environment jsdom -import { render } from "@testing-library/react"; +import { act, render } from "@testing-library/react"; import { useEffect } from "react"; import { afterEach, describe, expect, it, vi } from "vitest"; import { useThreadPermissions } from "./use-thread-permissions"; @@ -22,6 +22,7 @@ vi.mock("../api", async () => { afterEach(() => { vi.clearAllMocks(); + vi.useRealTimers(); window.history.replaceState({}, "", "/"); }); @@ -35,13 +36,20 @@ function Harness({ threadId }: { threadId?: string }) { describe("useThreadPermissions", () => { it("does not log an error when an in-flight permissions request is aborted on unmount", async () => { - const consoleError = vi.spyOn(console, "error").mockImplementation(() => undefined); + const consoleError = vi + .spyOn(console, "error") + .mockImplementation(() => undefined); - getThreadPermissions.mockImplementation((_threadId: string, signal?: AbortSignal) => new Promise((_, reject) => { - signal?.addEventListener("abort", () => { - reject(new DOMException("The user aborted a request.", "AbortError")); - }); - })); + getThreadPermissions.mockImplementation( + (_threadId: string, signal?: AbortSignal) => + new Promise((_, reject) => { + signal?.addEventListener("abort", () => { + reject( + new DOMException("The user aborted a request.", "AbortError"), + ); + }); + }), + ); const view = render(); view.unmount(); @@ -54,7 +62,9 @@ describe("useThreadPermissions", () => { it("does not log a failed fetch once navigation already left the thread route", async () => { window.history.replaceState({}, "", "/chat/hire/member-1/thread-1"); - const consoleError = vi.spyOn(console, "error").mockImplementation(() => undefined); + const consoleError = vi + .spyOn(console, "error") + .mockImplementation(() => undefined); getThreadPermissions.mockImplementation(async () => { window.history.replaceState({}, "", "/resources"); @@ -69,4 +79,38 @@ describe("useThreadPermissions", () => { expect(consoleError).not.toHaveBeenCalled(); consoleError.mockRestore(); }); + + it("stops polling permissions after an active-route terminal error", async () => { + vi.useFakeTimers(); + window.history.replaceState({}, "", "/chat/hire/member-1/thread-1"); + const consoleError = vi + .spyOn(console, "error") + .mockImplementation(() => undefined); + + getThreadPermissions.mockRejectedValue( + new Error( + 'API 503: {"detail":"Sandbox agent init failed for daytona_selfhost: No module named \'daytona_sdk\'"}', + ), + ); + + render(); + + await act(async () => { + await Promise.resolve(); + await Promise.resolve(); + }); + + expect(getThreadPermissions).toHaveBeenCalledTimes(1); + expect(consoleError).toHaveBeenCalledTimes(1); + + await act(async () => { + vi.advanceTimersByTime(6000); + await Promise.resolve(); + await Promise.resolve(); + }); + + expect(getThreadPermissions).toHaveBeenCalledTimes(1); + expect(consoleError).toHaveBeenCalledTimes(1); + consoleError.mockRestore(); + }); }); diff --git a/frontend/app/src/hooks/use-thread-permissions.ts b/frontend/app/src/hooks/use-thread-permissions.ts index be0dbdf15..ea5684f92 100644 --- a/frontend/app/src/hooks/use-thread-permissions.ts +++ b/frontend/app/src/hooks/use-thread-permissions.ts @@ -27,63 +27,96 @@ export interface ThreadPermissionsActions { answers?: AskUserAnswer[], annotations?: Record, ) => Promise; - addSessionRule: (behavior: PermissionRuleBehavior, toolName: string) => Promise; - removeSessionRule: (behavior: PermissionRuleBehavior, toolName: string) => Promise; + addSessionRule: ( + behavior: PermissionRuleBehavior, + toolName: string, + ) => Promise; + removeSessionRule: ( + behavior: PermissionRuleBehavior, + toolName: string, + ) => Promise; } function isActiveThreadRoute(threadId: string): boolean { const path = window.location.pathname.replace(/\/+$/, ""); - return (path.startsWith("/threads/") || path.startsWith("/chat/hire/")) && path.endsWith(`/${encodeURIComponent(threadId)}`); + return ( + (path.startsWith("/threads/") || path.startsWith("/chat/hire/")) && + path.endsWith(`/${encodeURIComponent(threadId)}`) + ); } -export function useThreadPermissions(threadId: string | undefined): ThreadPermissionsState & ThreadPermissionsActions { +export function useThreadPermissions( + threadId: string | undefined, +): ThreadPermissionsState & ThreadPermissionsActions { const [requests, setRequests] = useState([]); - const [sessionRules, setSessionRules] = useState({ allow: [], deny: [], ask: [] }); + const [sessionRules, setSessionRules] = useState({ + allow: [], + deny: [], + ask: [], + }); const [managedOnly, setManagedOnly] = useState(false); const [loading, setLoading] = useState(false); const [resolvingId, setResolvingId] = useState(null); const refreshGenerationRef = useRef(0); const requestAbortRef = useRef(null); + const pausePollingRef = useRef(false); - const refreshPermissions = useCallback(async () => { - if (!threadId) { - setRequests([]); - setSessionRules({ allow: [], deny: [], ask: [] }); - setManagedOnly(false); - return; - } - // @@@permission-refresh-generation - route switches can leave an old - // permissions fetch resolving after the chat page has already unmounted. - // Only the latest in-scope refresh is allowed to touch state or logs. - const generation = ++refreshGenerationRef.current; - requestAbortRef.current?.abort(); - const controller = new AbortController(); - requestAbortRef.current = controller; - setLoading(true); - try { - const payload = await getThreadPermissions(threadId, controller.signal); - if (refreshGenerationRef.current !== generation) return; - setRequests(payload.requests ?? []); - setSessionRules(payload.session_rules ?? { allow: [], deny: [], ask: [] }); - setManagedOnly(payload.managed_only ?? false); - } catch (err) { - if (controller.signal.aborted) return; - if (refreshGenerationRef.current !== generation) return; - // @@@permission-route-teardown - browser navigation can tear down the old - // thread page before React cleanup runs, which surfaces as a generic - // Failed to fetch from the abandoned route. Only log if this thread page - // is still the active route. - if (!isActiveThreadRoute(threadId)) return; - console.error("[useThreadPermissions] Failed to load permissions:", err); - } finally { - if (requestAbortRef.current === controller) { - requestAbortRef.current = null; + const refreshPermissions = useCallback( + async (force = false) => { + if (!threadId) { + setRequests([]); + setSessionRules({ allow: [], deny: [], ask: [] }); + setManagedOnly(false); + return; } - if (refreshGenerationRef.current === generation) { - setLoading(false); + if (!force && pausePollingRef.current) { + return; } - } - }, [threadId]); + // @@@permission-refresh-generation - route switches can leave an old + // permissions fetch resolving after the chat page has already unmounted. + // Only the latest in-scope refresh is allowed to touch state or logs. + const generation = ++refreshGenerationRef.current; + requestAbortRef.current?.abort(); + const controller = new AbortController(); + requestAbortRef.current = controller; + setLoading(true); + try { + const payload = await getThreadPermissions(threadId, controller.signal); + if (refreshGenerationRef.current !== generation) return; + pausePollingRef.current = false; + setRequests(payload.requests ?? []); + setSessionRules( + payload.session_rules ?? { allow: [], deny: [], ask: [] }, + ); + setManagedOnly(payload.managed_only ?? false); + } catch (err) { + if (controller.signal.aborted) return; + if (refreshGenerationRef.current !== generation) return; + // @@@permission-route-teardown - browser navigation can tear down the old + // thread page before React cleanup runs, which surfaces as a generic + // Failed to fetch from the abandoned route. Only log if this thread page + // is still the active route. + if (!isActiveThreadRoute(threadId)) return; + // @@@permission-poll-stop-on-terminal-error - once an active thread has + // entered a real backend error state, stop the 2s bridge poll until the + // operator explicitly refreshes or switches thread. This avoids console + // spam and repeated doomed permission requests. + pausePollingRef.current = true; + console.error( + "[useThreadPermissions] Failed to load permissions:", + err, + ); + } finally { + if (requestAbortRef.current === controller) { + requestAbortRef.current = null; + } + if (refreshGenerationRef.current === generation) { + setLoading(false); + } + } + }, + [threadId], + ); const resolvePermissionRequest = useCallback( async ( @@ -96,7 +129,14 @@ export function useThreadPermissions(threadId: string | undefined): ThreadPermis if (!threadId) return; setResolvingId(requestId); try { - await resolveThreadPermission(threadId, requestId, decision, message, answers, annotations); + await resolveThreadPermission( + threadId, + requestId, + decision, + message, + answers, + annotations, + ); await refreshPermissions(); } finally { setResolvingId(null); @@ -130,9 +170,11 @@ export function useThreadPermissions(threadId: string | undefined): ThreadPermis setSessionRules({ allow: [], deny: [], ask: [] }); setManagedOnly(false); setLoading(false); + pausePollingRef.current = false; return; } - void refreshPermissions(); + pausePollingRef.current = false; + void refreshPermissions(true); // @@@permission-poll-bridge - permission requests are thread-scoped runtime // state, but they are not first-class SSE events yet. Poll the small diff --git a/frontend/app/src/pages/resources/ProviderDetail.tsx b/frontend/app/src/pages/resources/ProviderDetail.tsx new file mode 100644 index 000000000..c450f136a --- /dev/null +++ b/frontend/app/src/pages/resources/ProviderDetail.tsx @@ -0,0 +1,332 @@ +import { useState } from "react"; +import { + Monitor, + Cloud, + Container, + Lock, + Settings, + ArrowRight, + ExternalLink, +} from "lucide-react"; +import { Link } from "react-router-dom"; +import type { ProviderInfo, UsageMetric } from "./types"; +import { + groupByLease, + useSessionCounts, + type LeaseGroup, +} from "./session-list-utils"; +import SandboxCard from "./SandboxCard"; +import SandboxDetailSheet from "./SandboxDetailSheet"; +import { formatNumber, formatLimit } from "./utils/format"; + +const typeIcon = { + local: Monitor, + cloud: Cloud, + container: Container, +} as const; + +const typeLabel = { + local: "本地", + cloud: "云端", + container: "容器", +} as const; + +const statusLabel = { + active: "活跃", + ready: "就绪", + unavailable: "未就绪", +} as const; + +interface ProviderDetailProps { + provider: ProviderInfo; +} + +export default function ProviderDetail({ provider }: ProviderDetailProps) { + const { + name, + description, + vendor, + type, + status, + unavailableReason, + telemetry, + error, + } = provider; + const TypeIcon = typeIcon[type]; + const { + running: runningCount, + paused: pausedCount, + stopped: stoppedCount, + } = useSessionCounts(provider.sessions); + const groups = groupByLease(provider.sessions); + + const [selectedGroup, setSelectedGroup] = useState(null); + const [sheetOpen, setSheetOpen] = useState(false); + + if (status === "unavailable") { + return ( +
      +
      +
      + +
      +

      {name}

      +

      {description}

      +
      +
      +
      + + {typeLabel[type]} + + · + + {statusLabel[status]} + +
      +
      +
      + +

      + {unavailableReason} +

      + {error?.message && ( +

      + {error.message} +

      + )} +

      + 前往 设置 > 沙箱 配置 {name} 环境 +

      + + + 前往设置 + + +
      +
      + ); + } + + // @@@overview-semantic - local = host machine metrics (CPU/mem/disk are provider-level). + // Non-local = session counts only; per-instance probe data is not a global provider quota. + const isLocal = type === "local"; + + return ( + <> +
      +
      +
      + +
      +

      {name}

      +

      + {description} + {vendor && ` · ${vendor}`} +

      +
      +
      +
      + {provider.consoleUrl && ( + + 控制台 + + + )} + + {typeLabel[type]} + + · + + {statusLabel[status]} + +
      +
      + +
      +
      + + 概览 + +
      + + {isLocal ? ( +
      + + + + +
      + ) : ( +
      + + {pausedCount > 0 && ( + + )} + +
      + )} + + {telemetry.quota && ( +
      +
      + + 配额 + +
      +
      + +
      +
      + )} + +
      +
      + + 沙盒 + +
      + {groups.length === 0 ? ( +

      暂无沙盒

      + ) : ( +
      + {groups.map((group) => ( + session.id).join("|") + } + group={group} + onClick={() => { + setSelectedGroup(group); + setSheetOpen(true); + }} + /> + ))} +
      + )} +
      +
      +
      + + setSheetOpen(false)} + /> + + ); +} + +function StatPill({ + count, + label, + dotClass, +}: { + count: number; + label: string; + dotClass: string; +}) { + return ( + + + + {count} + + {label} + + ); +} + +function MetricPill({ label, metric }: { label: string; metric: UsageMetric }) { + const { used, limit, unit } = metric; + if (used == null) return null; + + const usedStr = `${formatNumber(used)}${limit == null && unit === "%" ? "%" : ""}`; + const limitStr = + limit != null + ? ` / ${formatNumber(limit)} ${unit}` + : unit === "%" + ? "" + : ` ${unit}`; + + return ( + + {label} + {usedStr} + {limitStr && {limitStr}} + + ); +} + +function StatBlock({ + metric, + label, + title, + compact = false, +}: { + metric: UsageMetric; + label: string; + title: string; + compact?: boolean; +}) { + const valueStr = + metric.used != null + ? `${formatNumber(metric.used)}${metric.limit == null && metric.unit === "%" ? "%" : ""}` + : "--"; + + return ( +
      +

      + {valueStr} +

      + {metric.limit != null && ( +

      + {formatLimit(metric.limit, metric.unit)} +

      + )} +

      + {label} +

      + {!compact && ( +

      {title}

      + )} +
      + ); +} From 03dc37edf4fbaab0dc2a3418ac09a89c2879871f Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 12:01:49 +0800 Subject: [PATCH 70/87] fix: neutralize zero-state resource triage notes --- frontend/monitor/src/App.tsx | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index ed0ad0a68..0bfb0f5d7 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1008,13 +1008,22 @@ function MonitorResourcesPage() { 0 + ? "needs operator attention" + : "no active drift" + } tone={(triageSummary.active_drift || 0) > 0 ? "warning" : "success"} /> 0 || + (triageSummary.orphan_cleanup || 0) > 0 + ? `${triageSummary.orphan_cleanup || 0} cleanup backlog` + : "no cleanup backlog" + } tone={ (triageSummary.detached_residue || 0) > 0 ? "danger" : "success" } From 8fb2b84ead78eef5c61d0f5f67497339e5c4f090 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 12:03:35 +0800 Subject: [PATCH 71/87] fix: neutralize empty healthy lease state --- frontend/monitor/src/App.tsx | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 0bfb0f5d7..dfb421fe5 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -1031,9 +1031,17 @@ function MonitorResourcesPage() { 0 + ? `${triageSummary.total || leases.length} total` + : "no leases reported yet" + } tone={ - (triageSummary.healthy_capacity || 0) > 0 ? "success" : "danger" + (triageSummary.total || leases.length) === 0 + ? "default" + : (triageSummary.healthy_capacity || 0) > 0 + ? "success" + : "danger" } />
      From d197d7a486c818313ce69ce9e7c820d889a83a62 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 12:05:37 +0800 Subject: [PATCH 72/87] fix: neutralize empty workload warning --- frontend/monitor/src/App.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index dfb421fe5..0416ca692 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -315,7 +315,11 @@ function DashboardPage() { value={workload.running_sessions || 0} note={`${workload.evaluations_running || 0} eval jobs`} tone={ - (workload.running_sessions || 0) > 0 ? "default" : "warning" + (workload.running_sessions || 0) > 0 + ? "default" + : (workload.evaluations_running || 0) > 0 + ? "warning" + : "default" } /> @@ -3668,6 +3672,7 @@ function EvaluationPage() { ); setEvaluations(Array.isArray(payload?.items) ? payload.items : []); setEvalPagination(payload?.pagination || null); + setRunError(null); } catch (e: any) { setRunError(e?.message || String(e)); } finally { From 4a89ae2019014cde947d19b785a43fa7333a0f63 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 12:09:38 +0800 Subject: [PATCH 73/87] fix: show evaluation load failure instead of empty state --- frontend/monitor/src/App.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 0416ca692..712fc6e82 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -3959,7 +3959,9 @@ function EvaluationPage() { ))} {evaluations.length === 0 && (
      No evaluations yet. + {runError ? "Unable to load evaluations." : "No evaluations yet."} +
      @@ -3960,7 +3962,9 @@ function EvaluationPage() { {evaluations.length === 0 && ( )} From a09ff219ee0c45f719b64d2f5dff2c3a3824c443 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 12:13:27 +0800 Subject: [PATCH 75/87] fix: remove monitor strict mode double fetch --- frontend/monitor/src/main.tsx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/frontend/monitor/src/main.tsx b/frontend/monitor/src/main.tsx index 46ce20d5d..287fc3135 100644 --- a/frontend/monitor/src/main.tsx +++ b/frontend/monitor/src/main.tsx @@ -4,8 +4,5 @@ import App from "./App"; import "./styles.css"; ReactDOM.createRoot(document.getElementById("root")!).render( - - - , + , ); - From 1134a8d11b6e150f71c78313557e427993e6a36d Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 12:22:46 +0800 Subject: [PATCH 76/87] fix: mark evaluation list failures as error --- frontend/monitor/src/App.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index cdd0e1415..536291cbf 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -3863,7 +3863,9 @@ function EvaluationPage() { {evalPagination?.total ?? evaluations.length} evaluations - {runsLoading ? "loading..." : "idle"} + + {runsLoading ? "loading..." : listError ? "error" : "idle"} + page {evalPagination?.page ?? 1} {listError &&
      list error: {listError}
      } From 47de9e673ab096ce594d253db2350916f0421636 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 12:25:25 +0800 Subject: [PATCH 77/87] fix: suppress trace empty state when conversation load fails --- frontend/monitor/src/App.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 536291cbf..4637d5ccc 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -3039,7 +3039,7 @@ function ThreadTraceSection({ /> ))} {conversationTail.length === 0 && - (traceTail.length > 0 ? ( + (conversationError ? null : traceTail.length > 0 ? (

      No conversation messages were captured for this run.

      From 6c50cfa31561ba945023d1c70ce072934f9fbe06 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 13:06:51 +0800 Subject: [PATCH 78/87] fix: stop evaluation polling after list failures --- frontend/monitor/src/App.tsx | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 4637d5ccc..26a12bbbb 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -3684,10 +3684,13 @@ function EvaluationPage() { React.useEffect(() => { void loadEvaluations(); const timer = window.setInterval(() => { + // @@@evaluation-list-poller - once the list has entered a hard backend error state, + // stop the 5s loop and let the operator recover with an explicit retry. + if (listError) return; void loadEvaluations(); }, 5000); return () => window.clearInterval(timer); - }, [loadEvaluations]); + }, [listError, loadEvaluations]); async function handleStart() { if (runStatus === "starting") return; @@ -3868,7 +3871,18 @@ function EvaluationPage() { page {evalPagination?.page ?? 1}

      - {listError &&
      list error: {listError}
      } + {listError && ( +
      + list error: {listError} + +
      + )}
      - {runError ? "Unable to load evaluations." : "No evaluations yet."} + {listError + ? "Unable to load evaluations." + : "No evaluations yet."}
      From 0cd27e017fb1841e190a64d65f2c9d06c8941afd Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 13:10:06 +0800 Subject: [PATCH 79/87] fix: reflect paused evaluation refresh on errors --- frontend/monitor/src/App.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 26a12bbbb..94270646d 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -3860,7 +3860,9 @@ function EvaluationPage() {

      Evaluations ({evalPagination?.total ?? evaluations.length})

      - auto refresh 5s + + {listError ? "auto refresh paused" : "auto refresh 5s"} +
      From f5374274885b527cc984dc5c98db313a286d44c3 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 13:13:06 +0800 Subject: [PATCH 80/87] fix: fully stop evaluation polling after list failures --- frontend/monitor/src/App.tsx | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 94270646d..9faa0aca2 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -3664,6 +3664,11 @@ function EvaluationPage() { const [runsLoading, setRunsLoading] = React.useState(false); const [composerOpen, setComposerOpen] = React.useState(false); const composerPanelRef = React.useRef(null); + const listErrorRef = React.useRef(null); + + React.useEffect(() => { + listErrorRef.current = listError; + }, [listError]); const loadEvaluations = React.useCallback(async () => { setRunsLoading(true); @@ -3686,11 +3691,14 @@ function EvaluationPage() { const timer = window.setInterval(() => { // @@@evaluation-list-poller - once the list has entered a hard backend error state, // stop the 5s loop and let the operator recover with an explicit retry. - if (listError) return; + if (listErrorRef.current) { + window.clearInterval(timer); + return; + } void loadEvaluations(); }, 5000); return () => window.clearInterval(timer); - }, [listError, loadEvaluations]); + }, [loadEvaluations]); async function handleStart() { if (runStatus === "starting") return; From 3c8e21e6ae4d4db6e1770cb0ce0ed65166ae37fd Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 13:22:03 +0800 Subject: [PATCH 81/87] fix: add retry actions to monitor load failures --- frontend/monitor/src/App.tsx | 37 +++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index 9faa0aca2..dda510a85 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -189,6 +189,21 @@ function evaluationScoreTone(item: any): string { return publishable ? "chip-success" : "chip-warning"; } +function RetryablePageError({ message }: { message: string }) { + return ( +
      +
      {message}
      + +
      + ); +} + function DashboardPage() { const [data, setData] = React.useState(null); const [loading, setLoading] = React.useState(false); @@ -214,7 +229,7 @@ function DashboardPage() { if (error) { return (
      -
      Dashboard load failed: {error}
      +
      ); } @@ -918,7 +933,7 @@ function MonitorResourcesPage() { if (error) { return (
      -
      Resource load failed: {error}
      +
      ); } @@ -1759,7 +1774,7 @@ function ThreadsPage() { if (error) { return (
      -
      Threads load failed: {error}
      +
      ); } @@ -1898,7 +1913,7 @@ function TracesPage() { if (error) { return (
      -
      Traces load failed: {error}
      +
      ); } @@ -2029,7 +2044,7 @@ function ThreadDetailPage() { if (error) { return (
      -
      Thread load failed: {error}
      +
      ); } @@ -3161,7 +3176,7 @@ function SessionDetailPage() { if (error) { return (
      -
      Session load failed: {error}
      +
      ); } @@ -3238,7 +3253,7 @@ function LeasesPage() { if (error) { return (
      -
      Leases load failed: {error}
      +
      ); } @@ -3385,7 +3400,7 @@ function LeaseDetailPage() { if (error) { return (
      -
      Lease load failed: {error}
      +
      ); } @@ -3513,7 +3528,7 @@ function EventsPage() { if (error) { return (
      -
      Events load failed: {error}
      +
      ); } @@ -3581,7 +3596,7 @@ function EventDetailPage() { if (error) { return (
      -
      Event load failed: {error}
      +
      ); } @@ -4270,7 +4285,7 @@ function EvaluationDetailPage() { if (error) { return (
      -
      Evaluation load failed: {error}
      +
      ); } From 16c08c8e2a2d495a4ff57bb7e7c7e4f6954cc93b Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 13:48:55 +0800 Subject: [PATCH 82/87] fix: stop trace polling after monitor trace failures --- frontend/monitor/src/App.tsx | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/frontend/monitor/src/App.tsx b/frontend/monitor/src/App.tsx index dda510a85..558c76202 100644 --- a/frontend/monitor/src/App.tsx +++ b/frontend/monitor/src/App.tsx @@ -2886,12 +2886,21 @@ function ThreadTraceSection({ }, [selectedRunId, loadTrace]); React.useEffect(() => { - if (!threadId || !autoRefreshEnabled || !autoRefresh) return; + // @@@trace-poll-stop-on-error - once trace or conversation has entered a + // hard backend error state, stop the 2s loop until the operator manually + // refreshes or the next successful load clears the error. + if ( + !threadId || + !autoRefreshEnabled || + !autoRefresh || + Boolean(traceError) || + Boolean(conversationError) + ) { + return; + } const timer = window.setInterval(() => { loadTrace(selectedRunId); - if (!conversationError) { - loadConversation(); - } + loadConversation(); }, 2000); return () => window.clearInterval(timer); }, [ @@ -2901,6 +2910,7 @@ function ThreadTraceSection({ selectedRunId, loadTrace, loadConversation, + traceError, conversationError, ]); From 1d6a68759221eba1232d8441ab8184f1546b1029 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 14:08:02 +0800 Subject: [PATCH 83/87] chore: remove superpowers docs and stray resource tests --- ...2026-04-06-resource-observability-split.md | 430 ---------------- ...-06-resource-observability-split-design.md | 461 ------------------ 2 files changed, 891 deletions(-) delete mode 100644 docs/superpowers/plans/2026-04-06-resource-observability-split.md delete mode 100644 docs/superpowers/specs/2026-04-06-resource-observability-split-design.md diff --git a/docs/superpowers/plans/2026-04-06-resource-observability-split.md b/docs/superpowers/plans/2026-04-06-resource-observability-split.md deleted file mode 100644 index ead536846..000000000 --- a/docs/superpowers/plans/2026-04-06-resource-observability-split.md +++ /dev/null @@ -1,430 +0,0 @@ -# Resource Observability Split Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Separate global monitor resources from user-visible product resources while moving the monitor/resource truth chain onto Supabase-backed wiring honestly enough that the system is not pretending local SQLite is still the only source of truth. - -**Architecture:** The implementation is split into two reviewable cuts. Cut A handles sandbox truth-source rewiring so lease/terminal/chat-session construction stops hardcoding SQLite-only repo creation. Cut B moves monitor/resource reads onto the shared storage abstraction, keeps `/api/monitor/resources` global, and introduces `/api/resources/*` for the product contract. - -**Tech Stack:** Python, FastAPI, Supabase-backed storage providers, existing storage contract/container abstractions, pytest, ruff - -**Execution note:** `#209` remains useful transplant material for the resource split, but active continuation moved to `#210` because the correct monitor baseline is the compat monitor from `PR #182`, not the reduced dev monitor shell. The frontend scope here stays bounded: keep the full compat operator surface, switch it to a lighter and clearer ops shell, and prove it with real Playwright traces instead of a component-only pass. - -**Additional sequencing note after live operator review:** before this branch is mergeable as a monitor base, the next follow-up cuts must address four honesty seams now visible in the real UI: `D1` threads pagination contract, `D2` provisional evaluation detail as an operator surface, `D3` lease orphan/diverged regrouping, and `D4` dashboard + global resources entry. - -**Current execution order after `D1`:** -- `D4` dashboard + global resources entry -- `D3` lease semantics/regrouping inside the new resources surface -- `D2` provisional evaluation operator surface - -**Live progress after latest frontend pass:** -- `D1` is done -- `D4` now has a landed phase-1: - - `/dashboard` route and `/api/monitor/dashboard` backend payload exist - - top nav is `Dashboard / Threads / Resources / Eval` - - root lands on `/dashboard` - - monitor `Resources` uses the global monitor contract and includes grouped lease triage - - evaluation tutorial/reference sections are collapsed by default -- `D4` now has a landed phase-2: - - monitor provider cards now expose a product-like status light, metric cells, capability strip, and session dots - - selected provider detail now reads like a real panel instead of a loose stats stack - - null telemetry in monitor resources no longer renders as fake `0.0` values -- `D4` now has a landed phase-3: - - selected provider detail now shows a lease card grid before the raw session table - - monitor keeps the raw session table for truth, but no longer forces operators to start from the noisiest surface -- `D4` now has a landed phase-4: - - dashboard `Diverged leases` and `Orphans` metrics now jump straight to `resources#lease-health` - - provider cards are tighter because duplicated paused/stopped footer counts were removed - - lease-health now only renders non-empty attention buckets by default and collapses healthy capacity behind a details shell -- `D4` now has a landed phase-5: - - selected lease cards now open a dedicated `Lease Detail` panel before the full provider session table - - the panel reuses existing payload data only: lease/thread links, member, started time, and grouped session rows - - this gives monitor resources a local deep-drill layer without changing backend contracts -- `D4` now has a landed phase-6: - - the provider session table now defaults to `Selected lease` scope instead of always showing every provider session row - - operators can switch back to `All provider sessions` when they want the full truth table - - this makes the lease drill-down and the table below it read as one path instead of two competing surfaces -- `D2` now has a landed phase-2: - - evaluation detail payload includes backend-owned `info.operator_surface` - - provisional eval detail opens with `Operator Status`, artifact paths, and explicit next steps - - redundant provisional score metadata is folded behind `Score artifacts (provisional)` instead of occupying the first screen - - operator payload now includes typed lifecycle `kind` and `artifact_summary` - - all six artifact slots stay visible with explicit `present|missing` status instead of silently dropping missing files -- `D3` now has a landed phase-2: - - `/api/monitor/leases` now adds backend-owned `triage.summary` and `triage.groups` - - triage distinguishes `active_drift`, `detached_residue`, `orphan_cleanup`, and `healthy_capacity` - - monitor `Resources` consumes that triage surface directly instead of flattening everything back into `diverged/orphan` - - legacy `/leases` also now leads with triage buckets before the collapsed raw table -- new queued follow-up: - - bounded resource cleanup inside monitor `Resources` - - first slice must target backlog-like classes only (`detached_residue`, `orphan_cleanup`) - - cleanup action must be backend-owned and Playwright-proven; dead buttons do not count - - chosen contract shape: - - `POST /api/monitor/resources/cleanup` - - request = `action + explicit lease_ids + expected_category` - - response = `attempted/cleaned/skipped/errors/refreshed_summary` - - chosen execution shape: - - re-query backend triage before every mutation - - reuse existing provider destroy + lease repo delete semantics - - fail loudly if a lease has drifted back into live/healthy classes - - landed backend slice: - - `backend/web/routers/monitor.py` now exposes `POST /api/monitor/resources/cleanup` - - `backend/web/services/monitor_service.py` now owns `cleanup_resource_leases(...)` - - route/service return `attempted/cleaned/skipped/errors/refreshed_summary` - - focused proof: - - `env -u ALL_PROXY -u all_proxy uv run pytest -q tests/Unit/monitor/test_monitor_compat.py tests/Integration/test_monitor_resources_route.py` -> `17 passed` - - `uv run ruff check backend/web/services/monitor_service.py backend/web/routers/monitor.py tests/Unit/monitor/test_monitor_compat.py tests/Integration/test_monitor_resources_route.py` -> green - - `uv run ruff format --check backend/web/services/monitor_service.py backend/web/routers/monitor.py tests/Unit/monitor/test_monitor_compat.py tests/Integration/test_monitor_resources_route.py` -> green - - `uv run pyright backend/web/services/monitor_service.py backend/web/routers/monitor.py` -> `0 errors` - - landed monitor UI slice: - - monitor `Resources -> Lease Health` now exposes per-row `Cleanup` only for `detached_residue` and `orphan_cleanup` - - monitor `Resources -> Lease Health` now also exposes `Cleanup visible` for the currently rendered backlog rows in those same two buckets - - group cleanup now stages an inline `Confirm cleanup / Cancel` guardrail before mutating multiple leases - - success/failure state is shown via explicit feedback banner, not optimistic disappearance - - focused proof: - - `cd frontend/monitor && npm run build` -> green - - Playwright caller-proof clicked `Cleanup visible` and first got an inline confirmation state: - - `cleanup-confirm-pending.yaml` contains `Confirm cleanup` - - `cleanup-confirm-pending.yaml` contains `Remove 8 visible leases from Detached Residue.` - - then clicking `Confirm cleanup` re-fetched into an honest smaller backlog state: - - `cleanup-confirm-after.yaml` contains `Cleanup applied: 8 leases cleaned from detached_residue.` -- next honest follow-up remains: - - `D3` because lease regrouping is still heuristic and needs stronger lifecycle meaning than age-based detached residue alone - ---- - -### Task 1: Lock Storage Abstraction For Monitor Reads - -**Files:** -- Modify: `storage/contracts.py` -- Modify: `storage/container.py` -- Modify: `backend/web/core/storage_factory.py` -- Test: `tests/Unit/storage/test_storage_container.py` - -- [ ] **Step 1: Write the failing test** - -```python -def test_storage_container_builds_sandbox_monitor_repo_with_supabase(fake_supabase_client): - container = StorageContainer(strategy="supabase", supabase_client=fake_supabase_client) - - repo = container.sandbox_monitor_repo() - - assert repo.__class__.__name__ == "SupabaseSandboxMonitorRepo" -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `uv run pytest -q tests/Unit/storage/test_storage_container.py -k sandbox_monitor_repo` -Expected: FAIL because `StorageContainer` has no `sandbox_monitor_repo()` and no `SandboxMonitorRepo` contract. - -- [ ] **Step 3: Write minimal implementation** - -```python -class SandboxMonitorRepo(Protocol): - def query_threads(self, *, thread_id: str | None = None) -> list[dict[str, Any]]: ... - def query_thread_summary(self, thread_id: str) -> dict[str, Any] | None: ... - def query_thread_sessions(self, thread_id: str) -> list[dict[str, Any]]: ... - def query_leases(self) -> list[dict[str, Any]]: ... - def list_leases_with_threads(self) -> list[dict[str, Any]]: ... - def query_lease(self, lease_id: str) -> dict[str, Any] | None: ... - def query_lease_threads(self, lease_id: str) -> list[dict[str, Any]]: ... - def query_lease_events(self, lease_id: str) -> list[dict[str, Any]]: ... - def query_diverged(self) -> list[dict[str, Any]]: ... - def query_events(self, limit: int = 100) -> list[dict[str, Any]]: ... - def query_event(self, event_id: str) -> dict[str, Any] | None: ... - def count_rows(self, table_names: list[str]) -> dict[str, int]: ... - def list_sessions_with_leases(self) -> list[dict[str, Any]]: ... - def list_probe_targets(self) -> list[dict[str, Any]]: ... - def query_lease_instance_id(self, lease_id: str) -> str | None: ... - def close(self) -> None: ... -``` - -```python -_REPO_REGISTRY["sandbox_monitor_repo"] = ( - "storage.providers.supabase.sandbox_monitor_repo", - "SupabaseSandboxMonitorRepo", -) -``` - -```python -def sandbox_monitor_repo(self) -> SandboxMonitorRepo: - return self._build_repo("sandbox_monitor_repo", self._sqlite_sandbox_monitor_repo) -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `uv run pytest -q tests/Unit/storage/test_storage_container.py -k sandbox_monitor_repo` -Expected: PASS - -- [ ] **Step 5: Commit** - -```bash -git add storage/contracts.py storage/container.py backend/web/core/storage_factory.py tests/Unit/storage/test_storage_container.py -git commit -m "refactor: move sandbox monitor repo into storage container" -``` - -### Task 2: Make Sandbox Repo Construction Strategy-Aware - -**Files:** -- Modify: `backend/web/core/storage_factory.py` -- Modify: `sandbox/manager.py` -- Modify: `sandbox/chat_session.py` -- Modify: `backend/web/utils/helpers.py` -- Modify: `backend/web/services/file_channel_service.py` -- Modify: `backend/web/services/activity_tracker.py` -- Modify: `backend/web/routers/threads.py` -- Modify: `backend/web/routers/webhooks.py` -- Test: `tests/Unit/backend/web/core/test_storage_factory.py` - -- [ ] **Step 1: Write the failing test** - -```python -def test_make_lease_repo_uses_supabase_when_strategy_is_supabase(monkeypatch, fake_supabase_client): - monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase") - monkeypatch.setenv("LEON_SUPABASE_CLIENT_FACTORY", "tests.support.fake_supabase:create_client") - - repo = make_lease_repo() - - assert repo.__class__.__name__ == "SupabaseLeaseRepo" -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `uv run pytest -q tests/Unit/backend/web/core/test_storage_factory.py -k 'make_lease_repo or make_terminal_repo or make_chat_session_repo'` -Expected: FAIL because these factories do not exist. - -- [ ] **Step 3: Write minimal implementation** - -```python -def make_lease_repo(db_path: Any = None) -> Any: - if _strategy() == "supabase": - from storage.providers.supabase.lease_repo import SupabaseLeaseRepo - return SupabaseLeaseRepo(client=_supabase_client()) - from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo - return SQLiteLeaseRepo(db_path=db_path) -``` - -```python -def make_terminal_repo(db_path: Any = None) -> Any: - if _strategy() == "supabase": - from storage.providers.supabase.terminal_repo import SupabaseTerminalRepo - return SupabaseTerminalRepo(client=_supabase_client()) - from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo - return SQLiteTerminalRepo(db_path=db_path) -``` - -```python -def make_chat_session_repo(db_path: Any = None) -> Any: - if _strategy() == "supabase": - from storage.providers.supabase.chat_session_repo import SupabaseChatSessionRepo - return SupabaseChatSessionRepo(client=_supabase_client()) - from storage.providers.sqlite.chat_session_repo import SQLiteChatSessionRepo - return SQLiteChatSessionRepo(db_path=db_path) -``` - -```python -self.terminal_store = make_terminal_repo(db_path=self.db_path) -self.lease_store = make_lease_repo(db_path=self.db_path) -self.session_manager = ChatSessionManager( - provider=provider, - db_path=self.db_path, - default_policy=ChatSessionPolicy(), - chat_session_repo=make_chat_session_repo(db_path=self.db_path), -) -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `uv run pytest -q tests/Unit/backend/web/core/test_storage_factory.py -k 'make_lease_repo or make_terminal_repo or make_chat_session_repo'` -Expected: PASS - -- [ ] **Step 5: Commit** - -```bash -git add backend/web/core/storage_factory.py sandbox/manager.py sandbox/chat_session.py backend/web/utils/helpers.py backend/web/services/file_channel_service.py backend/web/services/activity_tracker.py backend/web/routers/threads.py backend/web/routers/webhooks.py tests/Unit/backend/web/core/test_storage_factory.py -git commit -m "refactor: route sandbox repo construction through storage strategy" -``` - -### Task 3: Split Global Monitor Routes From Product Resource Routes - -**Files:** -- Create: `backend/web/routers/resources.py` -- Modify: `backend/web/routers/monitor.py` -- Modify: `backend/web/core/lifespan.py` -- Modify: `backend/web/services/monitor_service.py` -- Modify: `backend/web/services/resource_service.py` -- Modify: `backend/web/services/sandbox_service.py` -- Test: `tests/Integration/test_monitor_resources_route.py` -- Test: `tests/Integration/test_resources_route.py` - -- [ ] **Step 1: Write the failing test** - -```python -def test_resources_overview_route_is_not_served_from_monitor_prefix(client): - response = client.get("/api/resources/overview") - - assert response.status_code == 200 -``` - -```python -def test_monitor_resources_route_remains_available_for_global_view(client): - response = client.get("/api/monitor/resources") - - assert response.status_code == 200 -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `uv run pytest -q tests/Integration/test_resources_route.py tests/Integration/test_monitor_resources_route.py` -Expected: FAIL because `/api/resources/overview` does not exist. - -- [ ] **Step 3: Write minimal implementation** - -```python -router = APIRouter(prefix="/api/resources", tags=["resources"]) - -@router.get("/overview") -def get_resources_overview(request: Request, current_user=Depends(require_current_user)): - return list_resource_providers(request.app.state, current_user_id=current_user.user_id) -``` - -```python -monitor_repo = request.app.state.storage_container.sandbox_monitor_repo() -``` - -```python -app.include_router(resources_router) -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `uv run pytest -q tests/Integration/test_resources_route.py tests/Integration/test_monitor_resources_route.py` -Expected: PASS - -- [ ] **Step 5: Commit** - -```bash -git add backend/web/routers/resources.py backend/web/routers/monitor.py backend/web/core/lifespan.py backend/web/services/monitor_service.py backend/web/services/resource_service.py backend/web/services/sandbox_service.py tests/Integration/test_resources_route.py tests/Integration/test_monitor_resources_route.py -git commit -m "feat: split global monitor resources from product resources api" -``` - -### Task 4: Rewire Frontend Resource Consumer Minimally - -**Files:** -- Modify: `frontend/app/src/pages/resources/api.ts` -- Modify: `frontend/app/src/pages/ResourcesPage.tsx` -- Modify: `frontend/app/src/pages/resources/ProviderCard.tsx` -- Test: `frontend/app/src/pages/resources/api.test.ts` -- Test: Playwright CLI product trace on `/resources` - -- [ ] **Step 1: Write the failing test** - -```ts -it("fetches overview from /api/resources/overview", async () => { - await fetchResourcesOverview(); - expect(fetch).toHaveBeenCalledWith("/api/resources/overview", expect.anything()); -}); -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cd frontend/app && npm test -- api.test.ts` -Expected: FAIL because the client still calls `/api/monitor/resources`. - -- [ ] **Step 3: Write minimal implementation** - -```ts -export async function fetchResourcesOverview() { - return requestJson("/api/resources/overview"); -} -``` - -```tsx -
      -``` - -```tsx -

      资源

      -``` - -```tsx -... -``` - -```tsx -{totalSessions} 会话 -``` - -```tsx - -
      - ); -} - -function DashboardPage() { +// Page: Threads List +function ThreadsPage() { const [data, setData] = React.useState(null); - const [loading, setLoading] = React.useState(false); - const [error, setError] = React.useState(null); - - const loadDashboard = React.useCallback(async () => { - setLoading(true); - setError(null); - try { - const payload = await fetchAPI("/dashboard"); - setData(payload); - } catch (e: any) { - setError(e?.message || String(e)); - } finally { - setLoading(false); - } - }, []); React.useEffect(() => { - void loadDashboard(); - }, [loadDashboard]); - - if (error) { - return ( -
      - -
      - ); - } - - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } - - const infra = data.infra || {}; - const workload = data.workload || {}; - const latestEval = data.latest_evaluation || null; - const resourcesSummary = data.resources_summary || {}; - - return ( -
      -
      -
      -
      -

      Infra Health

      -
      - - - Resources - -
      -
      -
      - 0 - ? "warning" - : "success" - } - /> - - {infra.leases_diverged || 0} - - } - note={`${infra.leases_total || 0} total`} - tone={(infra.leases_diverged || 0) > 0 ? "warning" : "success"} - /> - - {infra.leases_orphan || 0} - - } - note={`${infra.leases_healthy || 0} healthy`} - tone={(infra.leases_orphan || 0) > 0 ? "danger" : "success"} - /> -
      -
      - -
      -
      -
      -

      Active Workload

      - - Threads - -
      -
      - - - 0 - ? "default" - : (workload.evaluations_running || 0) > 0 - ? "warning" - : "default" - } - /> -
      -
      - -
      -
      -

      Latest Eval

      - - {latestEval ? "Detail" : "Eval list"} - -
      - {latestEval ? ( -
      -
      - - {latestEval.status} - - - publishable={String(Boolean(latestEval.publishable))} - -
      -
      -
      -
      -
      - {latestEval.threads_done || 0}/{latestEval.threads_total || 0}{" "} - threads · {formatPct(latestEval.progress_pct || 0)} · updated{" "} - {latestEval.updated_ago || "-"} -
      - -
      - ) : ( -

      No evaluations yet

      - )} -
      -
      -
      -
      - ); -} - -const CAPABILITY_LABELS: Record = { - filesystem: "FS", - terminal: "TERM", - metrics: "METRICS", - screenshot: "SHOT", - web: "WEB", - process: "PROC", - hooks: "HOOKS", - mount: "MOUNT", -}; - -function formatMonitorMetric(value: any, suffix = "", digits = 1): string { - if (value == null) return "--"; - const num = Number(value); - if (!Number.isFinite(num)) return "--"; - return `${num.toFixed(digits)}${suffix}`; -} - -function ProviderStatusLight({ status }: { status: string }) { - const className = - status === "active" - ? "provider-status-light is-active" - : status === "ready" - ? "provider-status-light is-ready" - : "provider-status-light is-unavailable"; - return
      +
      +

      {data.title}

      +

      Total: {data.count}

      +
      - - - - - + + + + + + - {group.sessions.map((session: any) => ( - - - - - + {data.items.map((item: any) => ( + + + + + + ))}
      SessionMemberThreadStatusStartedThread IDSessionsLast ActiveLeaseProviderState
      - - {shortId(session.id, 12)} - - {session.memberName || session.memberId || "-"} - {session.threadId ? ( - - {shortId(session.threadId, 12)} - - ) : ( - "-" - )} - {session.status}
      {item.thread_id.slice(0, 8)}{item.session_count}{item.last_active_ago} - {session.startedAt - ? new Date(session.startedAt).toLocaleString() - : "-"} + {item.lease.lease_id ? ( + {item.lease.lease_id} + ) : '-'} {item.lease.provider || '-'}
      - - ); -} - -function MonitorResourcesPage() { - const [resourceData, setResourceData] = React.useState(null); - const [leaseData, setLeaseData] = React.useState(null); - const [selectedId, setSelectedId] = React.useState(""); - const [selectedLeaseId, setSelectedLeaseId] = React.useState(""); - const [sessionScope, setSessionScope] = React.useState<"lease" | "provider">( - "lease", - ); - const [loading, setLoading] = React.useState(false); - const [refreshing, setRefreshing] = React.useState(false); - const [error, setError] = React.useState(null); - const [cleanupBusyId, setCleanupBusyId] = React.useState(""); - const [cleanupFeedback, setCleanupFeedback] = React.useState<{ - tone: "success" | "error"; - text: string; - } | null>(null); - const [cleanupConfirm, setCleanupConfirm] = React.useState<{ - leaseIds: string[]; - expectedCategory: "detached_residue" | "orphan_cleanup"; - scopeLabel: string; - label: string; - count: number; - } | null>(null); - - const loadResources = React.useCallback(async () => { - setLoading(true); - setError(null); - try { - const [resources, leases] = await Promise.all([ - fetchAPI("/resources"), - fetchAPI("/leases"), - ]); - setResourceData(resources); - setLeaseData(leases); - const providers = Array.isArray(resources?.providers) - ? resources.providers - : []; - setSelectedId((prev) => - providers.some((provider: any) => provider.id === prev) - ? prev - : providers[0]?.id || "", - ); - } catch (e: any) { - setError(e?.message || String(e)); - } finally { - setLoading(false); - } - }, []); - - const refreshNow = React.useCallback(async () => { - setRefreshing(true); - setError(null); - setCleanupConfirm(null); - try { - const [resources, leases] = await Promise.all([ - fetchJSON(`${API_BASE}/resources/refresh`, { method: "POST" }), - fetchAPI("/leases"), - ]); - setResourceData(resources); - setLeaseData(leases); - } catch (e: any) { - setError(e?.message || String(e)); - } finally { - setRefreshing(false); - } - }, []); - - const cleanupLeases = React.useCallback( - async ( - leaseIds: string[], - expectedCategory: "detached_residue" | "orphan_cleanup", - scopeLabel: string, - ) => { - const targetIds = leaseIds.filter(Boolean); - if (targetIds.length === 0) { - return; - } - setCleanupBusyId(scopeLabel); - setCleanupFeedback(null); - setCleanupConfirm(null); - try { - const payload = await fetchJSON(`${API_BASE}/resources/cleanup`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - action: "cleanup_residue", - lease_ids: targetIds, - expected_category: expectedCategory, - }), - }); - await refreshNow(); - const cleanedCount = Array.isArray(payload.cleaned) - ? payload.cleaned.length - : 0; - const skippedCount = Array.isArray(payload.skipped) - ? payload.skipped.length - : 0; - const errorCount = Array.isArray(payload.errors) - ? payload.errors.length - : 0; - if (errorCount > 0) { - const firstError = payload.errors[0]; - setCleanupFeedback({ - tone: "error", - text: `Cleanup incomplete: ${cleanedCount} cleaned · ${skippedCount} skipped · ${errorCount} errors (${formatCleanupError(firstError)}).`, - }); - return; - } - setCleanupFeedback({ - tone: "success", - text: `Cleanup applied: ${cleanedCount} lease${cleanedCount === 1 ? "" : "s"} cleaned from ${expectedCategory}.`, - }); - } catch (e: any) { - setCleanupFeedback({ - tone: "error", - text: `Cleanup failed: ${e?.message || String(e)}`, - }); - } finally { - setCleanupBusyId(""); - } - }, - [refreshNow], - ); - - React.useEffect(() => { - void loadResources(); - }, [loadResources]); - - React.useEffect(() => { - if (!resourceData || !leaseData) { - setSelectedLeaseId(""); - return; - } - const providers = Array.isArray(resourceData.providers) - ? resourceData.providers - : []; - const selectedProvider = - providers.find((provider: any) => provider.id === selectedId) || - providers[0] || - null; - const selectedSessions = Array.isArray(selectedProvider?.sessions) - ? selectedProvider.sessions - : []; - const nextLeaseGroups = groupSessionsByLease(selectedSessions); - setSelectedLeaseId((prev) => { - if (nextLeaseGroups.some((group: any) => leaseGroupKey(group) === prev)) { - return prev; - } - return nextLeaseGroups[0] ? leaseGroupKey(nextLeaseGroups[0]) : ""; - }); - }, [leaseData, resourceData, selectedId]); - - React.useEffect(() => { - setSessionScope("lease"); - }, [selectedId, selectedLeaseId]); - - if (error) { - return ( -
      - -
      - ); - } - - if (!resourceData || !leaseData) { - return ( -
      -
      Loading...
      -
      - ); - } - - const providers = Array.isArray(resourceData.providers) - ? resourceData.providers - : []; - const summary = resourceData.summary || {}; - const leases = Array.isArray(leaseData.items) ? leaseData.items : []; - const leaseTriage = leaseData.triage || {}; - const triageSummary = leaseTriage.summary || {}; - const triageGroups = Array.isArray(leaseTriage.groups) - ? leaseTriage.groups - : []; - const selectedProvider = - providers.find((provider: any) => provider.id === selectedId) || - providers[0] || - null; - const activeDriftLeases = (triageGroups.find( - (group: any) => group.key === "active_drift", - )?.items || []) as any[]; - const detachedResidueLeases = (triageGroups.find( - (group: any) => group.key === "detached_residue", - )?.items || []) as any[]; - const orphanCleanupLeases = (triageGroups.find( - (group: any) => group.key === "orphan_cleanup", - )?.items || []) as any[]; - const healthyCapacityLeases = (triageGroups.find( - (group: any) => group.key === "healthy_capacity", - )?.items || []) as any[]; - const hasPrimaryLeaseAttention = - activeDriftLeases.length > 0 || detachedResidueLeases.length > 0; - const hasSecondaryLeaseAttention = orphanCleanupLeases.length > 0; - const visibleDetachedResidueLeases = detachedResidueLeases.slice(0, 8); - const visibleOrphanCleanupLeases = orphanCleanupLeases.slice(0, 8); - // @@@cleanup-visible-confirm - visible-bucket cleanup mutates multiple leases, so group actions stage an explicit confirm while single-row cleanup stays one-click. - const refreshedAt = summary.last_refreshed_at || summary.snapshot_at; - const selectedSessions = Array.isArray(selectedProvider?.sessions) - ? selectedProvider.sessions - : []; - const selectedLeaseGroups = groupSessionsByLease(selectedSessions); - const selectedRunning = selectedSessions.filter( - (session: any) => session.status === "running", - ).length; - const selectedPaused = selectedSessions.filter( - (session: any) => session.status === "paused", - ).length; - const selectedStopped = selectedSessions.filter( - (session: any) => session.status === "stopped", - ).length; - const selectedLeaseGroup = - selectedLeaseGroups.find( - (group: any) => leaseGroupKey(group) === selectedLeaseId, - ) || - selectedLeaseGroups[0] || - null; - // @@@resource-session-scope - no lease group means provider scope is the only honest truth surface, even if the UI last asked for lease scope. - const effectiveSessionScope = - selectedLeaseGroup == null ? "provider" : sessionScope; - const scopedSessions = - effectiveSessionScope === "provider" || !selectedLeaseGroup - ? selectedSessions - : selectedLeaseGroup.sessions; - - return ( -
      -
      -
      - - - 0 - ? "needs operator attention" - : "no active drift" - } - tone={(triageSummary.active_drift || 0) > 0 ? "warning" : "success"} - /> - 0 || - (triageSummary.orphan_cleanup || 0) > 0 - ? `${triageSummary.orphan_cleanup || 0} cleanup backlog` - : "no cleanup backlog" - } - tone={ - (triageSummary.detached_residue || 0) > 0 ? "danger" : "success" - } - /> - 0 - ? `${triageSummary.total || leases.length} total` - : "no leases reported yet" - } - tone={ - (triageSummary.total || leases.length) === 0 - ? "default" - : (triageSummary.healthy_capacity || 0) > 0 - ? "success" - : "danger" - } - /> -
      -
      - -
      -
      -
      -

      Providers

      - -
      -
      - {providers.map((provider: any) => { - const sessions = Array.isArray(provider.sessions) - ? provider.sessions - : []; - const runningCount = sessions.filter( - (s: any) => s.status === "running", - ).length; - const unavailable = provider.status === "unavailable"; - return ( - - ); - })} -
      -
      -
      - {selectedProvider ? ( - <> -
      -
      -
      -
      - -

      {selectedProvider.name}

      -
      -

      - {selectedProvider.description || - "No provider description."} -

      -
      -
      - - {selectedProvider.type} - {selectedProvider.vendor - ? ` · ${selectedProvider.vendor}` - : ""} - - {selectedProvider.consoleUrl ? ( - - Open console - - ) : null} -
      -
      -
      - - status - {selectedProvider.status} - - - running - {selectedRunning} - - - paused - {selectedPaused} - - - stopped - {selectedStopped} - -
      - -
      -
      - Provider - - {selectedProvider.type} - {selectedProvider.vendor - ? ` · ${selectedProvider.vendor}` - : ""} - -
      -
      - CPU - - {selectedProvider.telemetry?.cpu?.used == null - ? "--" - : `${Number(selectedProvider.telemetry.cpu.used).toFixed(1)}%`} - -
      -
      - Memory - - {selectedProvider.telemetry?.memory?.used == null - ? "--" - : `${Number(selectedProvider.telemetry.memory.used).toFixed(1)} / ${selectedProvider.telemetry?.memory?.limit ?? "--"} GB`} - -
      -
      - Disk - - {selectedProvider.telemetry?.disk?.used == null - ? "--" - : `${Number(selectedProvider.telemetry.disk.used).toFixed(1)} / ${selectedProvider.telemetry?.disk?.limit ?? "--"} GB`} - -
      -
      - Running metric - - {selectedProvider.telemetry?.running?.used == null - ? "--" - : `${selectedProvider.telemetry.running.used} / ${selectedProvider.telemetry?.running?.limit ?? "--"} ${selectedProvider.telemetry?.running?.unit || ""}`} - -
      -
      - Reason - - {selectedProvider.unavailableReason || - selectedProvider.error || - "healthy"} - -
      -
      -
      -
      -
      -
      -

      Leases ({selectedLeaseGroups.length})

      -
      -
      -
      - {selectedLeaseGroups.map((group: any) => ( - setSelectedLeaseId(leaseGroupKey(group))} - /> - ))} - {selectedLeaseGroups.length === 0 ? ( -
      - No lease groups reported for this provider. -
      - ) : null} -
      - {selectedLeaseGroup ? ( - - ) : null} -
      -
      -

      - Sessions ( - {effectiveSessionScope === "provider" - ? selectedSessions.length - : scopedSessions.length} - ) -

      -

      - {selectedLeaseGroup == null - ? "full provider truth surface" - : effectiveSessionScope === "provider" - ? "full provider truth surface" - : "scoped to selected lease"} -

      -
      - {selectedLeaseGroup ? ( -
      - - -
      - ) : null} -
      - - - - - - - - - - - - - {scopedSessions.map((session: any) => ( - - - - - - - - - ))} - {scopedSessions.length === 0 ? ( - - - - ) : null} - -
      SessionThreadLeaseMemberStatusStarted
      {shortId(session.id, 12)} - {session.threadId ? ( - - {shortId(session.threadId, 12)} - - ) : ( - "-" - )} - - {session.leaseId ? ( - - {shortId(session.leaseId, 12)} - - ) : ( - "-" - )} - {session.memberName || session.memberId || "-"}{session.status} - {session.startedAt - ? new Date(session.startedAt).toLocaleString() - : "-"} -
      - {effectiveSessionScope === "provider" - ? "No sessions reported for this provider." - : "No sessions reported for the selected lease group."} -
      -
      - - ) : ( -
      - Select a provider from the list. -
      - )} -
      -
      - -
      -
      -
      -

      Lease Health

      -
      - - Legacy flat table - -
      -
      - - active drift - {activeDriftLeases.length} - - - detached residue - {detachedResidueLeases.length} - - - orphan cleanup - {orphanCleanupLeases.length} - - - healthy - {healthyCapacityLeases.length} - -
      - {cleanupFeedback ? ( -
      - {cleanupFeedback.text} -
      - ) : null} - {cleanupConfirm ? ( -
      -
      - Confirm cleanup -

      - Remove {cleanupConfirm.count} visible lease - {cleanupConfirm.count === 1 ? "" : "s"} from{" "} - {cleanupConfirm.label}. -

      -
      -
      - - -
      -
      - ) : null} - {hasPrimaryLeaseAttention ? ( -
      - {activeDriftLeases.length > 0 ? ( -
      -

      Active Drift ({activeDriftLeases.length})

      - - - - - - - - - - - - {activeDriftLeases.slice(0, 8).map((item: any) => ( - - - - - - - - ))} - -
      LeaseProviderThreadStateUpdated
      - - {shortId(item.lease_id, 12)} - - {item.provider} - {item.thread?.thread_id ? ( - - {shortId(item.thread.thread_id, 12)} - - ) : ( - orphan - )} - - - {item.updated_ago}
      -
      - ) : null} - - {detachedResidueLeases.length > 0 ? ( -
      -
      -

      Detached Residue ({detachedResidueLeases.length})

      - -
      - - - - - - - - - - - - - {visibleDetachedResidueLeases.map((item: any) => ( - - - - - - - - - ))} - -
      LeaseProviderThreadStateUpdatedAction
      - - {shortId(item.lease_id, 12)} - - {item.provider} - {item.thread?.thread_id ? ( - - {shortId(item.thread.thread_id, 12)} - - ) : ( - orphan - )} - - - {item.updated_ago} - -
      -
      - ) : null} -
      - ) : null} - - {hasSecondaryLeaseAttention ? ( -
      -
      -
      -

      Cleanup Backlog ({orphanCleanupLeases.length})

      - -
      - - - - - - - - - - - - - {visibleOrphanCleanupLeases.map((item: any) => ( - - - - - - - - - ))} - -
      LeaseProviderInstanceStateUpdatedAction
      - - {shortId(item.lease_id, 12)} - - {item.provider}{shortId(item.instance_id, 12)} - - {item.updated_ago} - -
      -
      -
      - ) : null} - - {healthyCapacityLeases.length > 0 ? ( -
      - Healthy Capacity ({healthyCapacityLeases.length}) - - - - - - - - - - - - {healthyCapacityLeases.slice(0, 8).map((item: any) => ( - - - - - - - - ))} - -
      LeaseProviderThreadStateUpdated
      - - {shortId(item.lease_id, 12)} - - {item.provider} - {item.thread?.thread_id ? ( - - {shortId(item.thread.thread_id, 12)} - - ) : ( - orphan - )} - - - {item.updated_ago}
      -
      - ) : null} - - {!hasPrimaryLeaseAttention && - !hasSecondaryLeaseAttention && - healthyCapacityLeases.length === 0 ? ( -
      No lease groups reported yet.
      - ) : null} - -
      - All leases ({leases.length}) - - - - - - - - - - - - - - {leases.map((item: any) => ( - - - - - - - - - - ))} - -
      Lease IDProviderInstance IDThreadStateUpdatedError
      - {item.lease_id} - {item.provider} - {item.instance_id?.slice(0, 12) || "-"} - - {item.thread.thread_id ? ( - - {item.thread.thread_id.slice(0, 8)} - - ) : ( - orphan - )} - - - {item.updated_ago}{item.error || "-"}
      -
      -
      -
      - ); -} - -// Page: Threads List -function ThreadsPage() { - const [data, setData] = React.useState(null); - const [loading, setLoading] = React.useState(false); - const [error, setError] = React.useState(null); - const [offset, setOffset] = React.useState(0); - const [limit, setLimit] = React.useState(50); - - const loadThreads = React.useCallback(async () => { - setLoading(true); - setError(null); - try { - const payload = await fetchAPI( - `/threads?offset=${offset}&limit=${limit}`, - ); - setData(payload); - } catch (e: any) { - setError(e?.message || String(e)); - } finally { - setLoading(false); - } - }, [offset, limit]); - - React.useEffect(() => { - void loadThreads(); - }, [loadThreads]); - - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } - const pagination = data.pagination || {}; - const total = Number(pagination.total || data.count || 0); - const currentCount = Number(data.count || 0); - const from = total > 0 ? offset + 1 : 0; - const to = offset + currentCount; - const page = Number(pagination.page || 1); - - return ( -
      -

      - Global thread index. Start here to find the active run, then drill into - session, lease, and trace detail. -

      -

      - Showing {from}-{to} of {total} | page {page} -

      -
      -
      -
      - - - -
      -
      - Rows: - -
      -
      - - - - - - - - - - - - - - {data.items.map((item: any) => ( - - - - - - - - - - ))} - -
      Thread IDModeSessionsLast ActiveLeaseProviderState
      - {item.thread_id.slice(0, 8)} - - {item.thread_mode || "normal"} / trace= - {item.keep_full_trace ? "full" : "latest"} - {item.session_count}{item.last_active_ago} - {item.lease.lease_id ? ( - {item.lease.lease_id} - ) : ( - "-" - )} - {item.lease.provider || "-"} - -
      -
      -
      - ); -} - -function TracesPage() { - const [data, setData] = React.useState(null); - const [loading, setLoading] = React.useState(false); - const [error, setError] = React.useState(null); - const [offset, setOffset] = React.useState(0); - const [limit, setLimit] = React.useState(50); - - const loadTraces = React.useCallback(async () => { - setLoading(true); - setError(null); - try { - const payload = await fetchAPI(`/traces?offset=${offset}&limit=${limit}`); - setData(payload); - } catch (e: any) { - setError(e?.message || String(e)); - } finally { - setLoading(false); - } - }, [offset, limit]); - - React.useEffect(() => { - void loadTraces(); - }, [loadTraces]); - - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } - const pagination = data.pagination || {}; - const total = Number(pagination.total || data.count || 0); - const currentCount = Number(data.count || 0); - const from = total > 0 ? offset + 1 : 0; - const to = offset + currentCount; - const page = Number(pagination.page || 1); - - return ( -
      -

      - Run-level trace index for debugging tool calls, checkpoints, and runtime - transitions across monitored threads. -

      -

      - Showing {from}-{to} of {total} | page {page} -

      -
      -
      -
      - - - -
      -
      - Rows: - -
      -
      - - - - - - - - - - - - - - - {data.items.map((item: any) => ( - - - - - - - - - - - ))} - -
      ThreadRunModeEventsTool CallsStartedLast EventStatus
      - - {item.thread_id.slice(0, 18)} - - {shortId(item.run_id, 12)} - {item.thread_mode || "normal"} / trace= - {item.keep_full_trace ? "full" : "latest"} - {item.event_count} - {item.tool_call_count} / {item.tool_result_count} - {item.started_ago || "-"}{item.last_event_ago || "-"}{item.status}
      -
      ); } @@ -2026,65 +86,21 @@ function TracesPage() { // Page: Thread Detail function ThreadDetailPage() { const { threadId } = useParams(); - const location = useLocation(); const [data, setData] = React.useState(null); - const [error, setError] = React.useState(null); - const initialRunId = React.useMemo( - () => new URLSearchParams(location.search).get("run") || "", - [location.search], - ); React.useEffect(() => { - setError(null); - fetchAPI(`/thread/${threadId}`) - .then(setData) - .catch((e) => setError(e?.message || String(e))); + fetchAPI(`/thread/${threadId}`).then(setData); }, [threadId]); - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } - const threadIsActive = Array.isArray(data?.sessions?.items) - ? data.sessions.items.some((s: any) => s.status === "active") - : false; - const sessionLeaseIds = new Set( - Array.isArray(data?.sessions?.items) - ? data.sessions.items - .map((session: any) => String(session?.lease?.lease_id || "").trim()) - .filter(Boolean) - : [], - ); - const visibleRelatedLeases = Array.isArray(data?.related_leases?.items) - ? data.related_leases.items.filter( - (lease: any) => - !sessionLeaseIds.has(String(lease?.lease_id || "").trim()), - ) - : []; + if (!data) return
      Loading...
      ; return (

      Thread: {data.thread_id.slice(0, 8)}

      -

      - mode: {data.thread_mode || "normal"} | trace:{" "} - {data.keep_full_trace ? "full" : "latest"} -

      -

      - {data.sessions.title} ({data.sessions.count}) -

      +

      {data.sessions.title} ({data.sessions.count})

      @@ -2100,1296 +116,83 @@ function ThreadDetailPage() { {data.sessions.items.map((s: any) => ( - + - + - - + + ))} - {data.sessions.items.length === 0 && ( - - - - )}
      - {s.session_id.slice(0, 8)} - {s.session_id.slice(0, 8)} {s.status} {s.started_ago}{s.ended_ago || "-"}{s.ended_ago || '-'} {s.lease.lease_id ? ( {s.lease.lease_id} - ) : ( - "-" - )} + ) : '-'} - - {s.error || "-"}{s.error || '-'}
      No sessions recorded for this thread.
      - {(visibleRelatedLeases.length > 0 || - data.related_leases.items.length === 0) && ( -
      -

      {data.related_leases.title}

      -
        - {visibleRelatedLeases.map((l: any) => ( -
      • - {l.lease_id} -
      • - ))} - {data.related_leases.items.length === 0 && ( -
      • No related leases for this thread.
      • - )} -
      -
      - )} - -
      -

      Live Trace

      -

      - Conversation, event stream, and grouped steps for the selected run. - Use this after locating the right session or lease above. -

      - -
      -
      - ); -} - -function summarizeTraceEvent(eventType: string, payload: any): string { - if (eventType === "tool_call") - return `${payload?.name || "tool"}(${JSON.stringify(payload?.args || {})})`; - if (eventType === "tool_result") - return `${payload?.name || "tool"} -> ${String(payload?.content || "").slice(0, 240)}`; - if (eventType === "text") return String(payload?.content || "").slice(0, 120); - if (eventType === "status") { - const state = - typeof payload?.state === "string" - ? payload.state - : JSON.stringify(payload?.state || "-"); - return `state=${state} calls=${payload?.call_count ?? "-"}`; - } - if (eventType === "error") return payload?.error || "error"; - if (eventType === "done") return "done"; - return JSON.stringify(payload).slice(0, 120); -} - -type TraceItem = { - seq: number | null; - run_id: string | null; - created_at?: string | null; - created_ago?: string | null; - event_type: string; - actor: "assistant" | "tool" | "runtime"; - summary: string; - payload: any; -}; - -function normalizeTraceEvent( - eventType: string, - payload: any, -): TraceItem | null { - const seq = payload?._seq ?? null; - const run_id = payload?._run_id ?? null; - - if (eventType === "text") { - const content = - typeof payload?.content === "string" - ? payload.content - : String(payload?.content ?? ""); - if (!content) return null; - return { - seq, - run_id, - event_type: "assistant_text", - actor: "assistant", - summary: content, - payload, - }; - } - - if (eventType === "tool_call") { - return { - seq, - run_id, - event_type: "tool_call", - actor: "tool", - summary: `${payload?.name || "tool"}`, - payload, - }; - } - - if (eventType === "tool_result") { - return { - seq, - run_id, - event_type: "tool_result", - actor: "tool", - summary: `${payload?.name || "tool"}`, - payload, - }; - } - - if (eventType === "status") { - const state = - typeof payload?.state === "string" - ? payload.state - : JSON.stringify(payload?.state || "-"); - return { - seq, - run_id, - event_type: "status", - actor: "runtime", - summary: `state=${state} calls=${payload?.call_count ?? "-"}`, - payload, - }; - } - - if ( - eventType === "error" || - eventType === "cancelled" || - eventType === "done" - ) { - return { - seq, - run_id, - event_type: eventType, - actor: "runtime", - summary: summarizeTraceEvent(eventType, payload), - payload, - }; - } - return null; -} - -function normalizeStoredTraceEvent( - row: any, - fallbackRunId: string | null, -): TraceItem | null { - const payload = row?.payload || {}; - if (payload?._seq == null && row?.seq != null) payload._seq = row.seq; - if (payload?._run_id == null && fallbackRunId) - payload._run_id = fallbackRunId; - const normalized = normalizeTraceEvent( - String(row?.event_type || ""), - payload, - ); - if (!normalized) return null; - return { - ...normalized, - seq: row?.seq ?? normalized.seq, - run_id: fallbackRunId || normalized.run_id, - created_at: row?.created_at || null, - created_ago: row?.created_ago || null, - }; -} - -function mergeTraceItems(prev: TraceItem[], next: TraceItem): TraceItem[] { - const last = prev.length ? prev[prev.length - 1] : null; - - // @@@streaming-text-fold - collapse token-level text stream into one assistant step for readable trace timeline. - if ( - next.event_type === "assistant_text" && - last && - last.event_type === "assistant_text" && - last.run_id === next.run_id - ) { - const merged = [...prev]; - merged[merged.length - 1] = { - ...last, - seq: next.seq ?? last.seq, - summary: `${last.summary}${next.summary}`, - payload: next.payload, - }; - return merged; - } - - // @@@status-coalesce - keep only latest status snapshot for same run to reduce noise. - if ( - next.event_type === "status" && - last && - last.event_type === "status" && - last.run_id === next.run_id - ) { - const merged = [...prev]; - merged[merged.length - 1] = next; - return merged; - } - - return [...prev, next]; -} - -type TraceStep = { - step: number; - run_id: string | null; - seq_start: number | null; - seq_end: number | null; - created_ago: string | null; - assistant_text: string; - tool_name: string | null; - tool_args: any; - command_line: string | null; - tool_output: string | null; - runtime_notes: string[]; - raw_items: TraceItem[]; -}; - -function buildTraceSteps(items: TraceItem[]): TraceStep[] { - const steps: TraceStep[] = []; - let assistantBuffer: string[] = []; - let pending: Omit | null = null; - - const pushStep = (step: Omit) => { - steps.push({ ...step, step: steps.length + 1 }); - }; - - for (const item of items) { - if (item.event_type === "assistant_text") { - if (pending) { - pending.runtime_notes.push(item.summary); - pending.raw_items.push(item); - pending.seq_end = item.seq ?? pending.seq_end; - } else { - assistantBuffer.push(item.summary); - } - continue; - } - - if (item.event_type === "tool_call") { - if (pending) { - pushStep(pending); - pending = null; - } - pending = { - run_id: item.run_id, - seq_start: item.seq, - seq_end: item.seq, - created_ago: item.created_ago || null, - assistant_text: assistantBuffer.join("\n").trim(), - tool_name: item.payload?.name || item.summary, - tool_args: item.payload?.args || {}, - command_line: item.payload?.args?.CommandLine - ? String(item.payload.args.CommandLine) - : null, - tool_output: null, - runtime_notes: [], - raw_items: [item], - }; - assistantBuffer = []; - continue; - } - - if (item.event_type === "tool_result") { - if (pending && !pending.tool_output) { - pending.tool_output = String(item.payload?.content || "(no output)"); - pending.raw_items.push(item); - pending.seq_end = item.seq ?? pending.seq_end; - } else { - pushStep({ - run_id: item.run_id, - seq_start: item.seq, - seq_end: item.seq, - created_ago: item.created_ago || null, - assistant_text: assistantBuffer.join("\n").trim(), - tool_name: item.payload?.name || item.summary, - tool_args: null, - command_line: null, - tool_output: String(item.payload?.content || "(no output)"), - runtime_notes: [], - raw_items: [item], - }); - assistantBuffer = []; - } - continue; - } - - const runtimeNote = - item.event_type === "status" - ? formatStatusSummary(item.payload) - : item.summary; - if (pending) { - pending.runtime_notes.push(runtimeNote); - pending.raw_items.push(item); - pending.seq_end = item.seq ?? pending.seq_end; - if ( - item.event_type === "error" || - item.event_type === "cancelled" || - item.event_type === "done" - ) { - pushStep(pending); - pending = null; - } - } else { - pushStep({ - run_id: item.run_id, - seq_start: item.seq, - seq_end: item.seq, - created_ago: item.created_ago || null, - assistant_text: assistantBuffer.join("\n").trim(), - tool_name: null, - tool_args: null, - command_line: null, - tool_output: null, - runtime_notes: [runtimeNote], - raw_items: [item], - }); - assistantBuffer = []; - } - } - - if (pending) pushStep(pending); - - const remain = assistantBuffer.join("\n").trim(); - if (remain) { - pushStep({ - run_id: items.length ? items[items.length - 1].run_id : null, - seq_start: null, - seq_end: null, - created_ago: null, - assistant_text: remain, - tool_name: null, - tool_args: null, - command_line: null, - tool_output: null, - runtime_notes: [], - raw_items: [], - }); - } - - return steps; -} - -function shortId(value: string | null, size = 8): string { - if (!value) return "-"; - return String(value).slice(0, size); -} - -function evalThreadLabel( - threadId: string | null, - evaluationId: string | null, -): string { - if (!threadId) return "-"; - if (!evaluationId) return shortId(threadId, 20); - const prefix = `swebench-${evaluationId}-`; - if (threadId.startsWith(prefix)) { - const instanceId = threadId.slice(prefix.length); - return instanceId || shortId(threadId, 20); - } - return shortId(threadId, 20); -} - -function formatPct(value: any): string { - const num = Number(value); - if (!Number.isFinite(num)) return "-"; - return `${num.toFixed(1)}%`; -} - -function formatResolvedScore(item: any): string { - const resolved = Number(item?.score?.resolved_instances ?? 0); - const total = Number(item?.score?.total_instances ?? 0); - return `${resolved}/${total} (${formatPct(item?.score?.resolved_rate_pct)})`; -} - -function evalProgress(item: any): { - done: number; - target: number; - running: number; - pct: number; - mode: "thread_rows" | "session_rows" | "checkpoint_estimate"; -} { - const doneRaw = Number(item?.threads_done ?? 0); - const runningRaw = Number(item?.threads_running ?? 0); - const targetRaw = Number(item?.slice_count ?? item?.threads_total ?? 0); - const modeRaw = String(item?.progress_source || ""); - const done = Number.isFinite(doneRaw) ? Math.max(0, doneRaw) : 0; - const running = Number.isFinite(runningRaw) ? Math.max(0, runningRaw) : 0; - const targetCandidate = Number.isFinite(targetRaw) - ? Math.max(0, targetRaw) - : 0; - const mode = - modeRaw === "checkpoint_estimate" || modeRaw === "session_rows" - ? modeRaw - : "thread_rows"; - const target = - targetCandidate > 0 ? targetCandidate : Math.max(done + running, 0); - // @@@progress-active-ratio - evaluation threads can be running long before any thread reaches "done". - // Use (done + running) to reflect visible in-flight progress instead of a flat 0% bar. - const active = Math.min(target, done + running); - const pct = target > 0 ? Math.min(100, (active / target) * 100) : 0; - return { done, target, running, pct, mode }; -} - -function formatProgressSummary(progress: { - done: number; - target: number; - running: number; - pct: number; - mode: "thread_rows" | "session_rows" | "checkpoint_estimate"; -}): string { - const pending = Math.max( - 0, - progress.target - progress.done - progress.running, - ); - const activeLabel = - progress.mode === "checkpoint_estimate" ? "Started" : "In Progress"; - const sourceSuffix = - progress.mode === "thread_rows" ? "" : ` · source=${progress.mode}`; - return `Total ${progress.target} · Completed ${progress.done} · ${activeLabel} ${progress.running} · Pending ${pending} · Progress ${formatPct(progress.pct)}${sourceSuffix}`; -} - -function formatStatusSummary(payload: any): string { - const stateText = - typeof payload?.state === "string" - ? payload.state - : payload?.state?.state || JSON.stringify(payload?.state || "-"); - const calls = payload?.call_count ?? "-"; - const inTokens = payload?.input_tokens ?? payload?.token_count ?? "-"; - const outTokens = payload?.output_tokens ?? "-"; - return `state=${stateText} calls=${calls} tokens=${inTokens}/${outTokens}`; -} - -function conversationText(content: any): string { - if (typeof content === "string") return content; - if (Array.isArray(content)) { - return content - .map((part) => { - if (typeof part === "string") return part; - if (part && typeof part === "object" && part.type === "text") - return String(part.text || ""); - return JSON.stringify(part); - }) - .join(""); - } - if (content == null) return ""; - return typeof content === "object" - ? JSON.stringify(content, null, 2) - : String(content); -} - -function ConversationTraceCard({ - message, - index, -}: { - message: any; - index: number; -}) { - const msgType = String(message?.type || "Unknown"); - const msgTypeKey = msgType.toLowerCase(); - const text = conversationText(message?.content); - const toolCalls = Array.isArray(message?.tool_calls) - ? message.tool_calls - : []; - return ( -
      -
      -
      - [{index}] - {msgType} -
      - - id {shortId(message?.id || "-", 12)} - -
      - - {toolCalls.length > 0 && ( -
      -
      tool_calls
      -
      -            {JSON.stringify(toolCalls, null, 2)}
      -          
      -
      - )} - - {message?.tool_call_id && ( -
      -
      tool_call_id
      -
      {String(message.tool_call_id)}
      -
      - )} - -
      -
      content
      -
      -          {text || "(empty)"}
      -        
      -
      - -
      - Raw message -
      -          {JSON.stringify(message, null, 2)}
      -        
      -
      -
      - ); -} - -function TraceCard({ item }: { item: TraceItem }) { - const statusText = - item.event_type === "status" ? formatStatusSummary(item.payload) : null; - const commandLine = item.payload?.args?.CommandLine; - const toolArgs = item.payload?.args; - const toolOutput = item.payload?.content; - return ( -
      -
      -
      - #{item.seq ?? "-"} - - {item.actor} - - {item.event_type} -
      - run {shortId(item.run_id)} -
      - - {item.event_type === "assistant_text" && ( -
      {item.summary}
      - )} - - {item.event_type === "tool_call" && ( -
      -
      Tool
      -
      -            {item.payload?.name || item.summary}
      -          
      - {commandLine && ( - <> -
      CommandLine
      -
      -                {String(commandLine)}
      -              
      - - )} -
      Args
      -
      -            {JSON.stringify(toolArgs || {}, null, 2)}
      -          
      -
      - )} - - {item.event_type === "tool_result" && ( -
      -
      Tool
      -
      -            {item.payload?.name || item.summary}
      -          
      -
      Output
      -
      -            {String(toolOutput || "(no output)")}
      -          
      -
      - )} - - {item.event_type === "status" && ( -
      -
      Runtime
      -
      {statusText}
      -
      - )} - - {(item.event_type === "error" || - item.event_type === "cancelled" || - item.event_type === "done") && ( -
      {item.summary}
      - )} - -
      - Raw payload -
      -          {JSON.stringify(item.payload, null, 2)}
      -        
      -
      -
      - ); -} - -function TraceStepCard({ step }: { step: TraceStep }) { - return ( -
      -
      -
      - Step {step.step} - - seq {step.seq_start ?? "-"}..{step.seq_end ?? "-"} - - run {shortId(step.run_id)} -
      - {step.created_ago || "-"} -
      - - {step.assistant_text && ( -
      -
      Intent
      -
      -            {step.assistant_text}
      -          
      -
      - )} - - {step.tool_name && ( -
      -
      Action
      -
      {step.tool_name}
      - {step.command_line && ( - <> -
      CommandLine
      -
      -                {step.command_line}
      -              
      - - )} - {step.tool_args && ( - <> -
      Args
      -
      -                {JSON.stringify(step.tool_args, null, 2)}
      -              
      - - )} -
      - )} - - {step.tool_output != null && ( -
      -
      Observation
      -
      {step.tool_output}
      -
      - )} - - {step.runtime_notes.length > 0 && ( -
      -
      Runtime
      -
      {step.runtime_notes.join("\n")}
      -
      - )} - -
      - Raw events ({step.raw_items.length}) - {step.raw_items.map((item, idx) => ( -
      -
      - #{item.seq || "-"} - {item.event_type} -
      -
      -              {JSON.stringify(item.payload, null, 2)}
      -            
      -
      - ))} -
      -
      - ); -} - -function ThreadTraceSection({ - threadId, - autoRefreshEnabled, - initialRunId = "", -}: { - threadId: string; - autoRefreshEnabled: boolean; - initialRunId?: string; -}) { - const [traceEvents, setTraceEvents] = React.useState([]); - const [traceError, setTraceError] = React.useState(null); - const [traceLoading, setTraceLoading] = React.useState(false); - const [rawEventCount, setRawEventCount] = React.useState(0); - const [streamState, setStreamState] = React.useState< - "idle" | "polling" | "error" - >("idle"); - const [eventFilter, setEventFilter] = React.useState< - "all" | "assistant" | "tool" | "runtime" - >("all"); - const [traceView, setTraceView] = React.useState< - "conversation" | "events" | "steps" - >("conversation"); - const [showRawTable, setShowRawTable] = React.useState(false); - const [selectedRunId, setSelectedRunId] = React.useState(""); - const [runCandidates, setRunCandidates] = React.useState([]); - const [autoRefresh, setAutoRefresh] = React.useState(true); - const [conversationMessages, setConversationMessages] = React.useState( - [], - ); - const [conversationLoading, setConversationLoading] = - React.useState(false); - const [conversationError, setConversationError] = React.useState< - string | null - >(null); - - const loadTrace = React.useCallback( - (runId: string) => { - if (!threadId) return; - const query = runId ? `?run_id=${encodeURIComponent(runId)}` : ""; - setTraceLoading(true); - setTraceError(null); - setStreamState("polling"); - fetchAPI(`/thread/${threadId}/trace${query}`) - .then((payload) => { - setRawEventCount(payload?.event_count || 0); - setRunCandidates(payload?.run_candidates || []); - if (!runId && payload?.run_id) { - setSelectedRunId((prev) => prev || String(payload.run_id)); - } - const normalized = (payload?.events || []) - .map((row: any) => - normalizeStoredTraceEvent(row, payload?.run_id || runId || null), - ) - .filter(Boolean) as TraceItem[]; - const merged = normalized.reduce( - (acc: TraceItem[], item) => mergeTraceItems(acc, item), - [], - ); - setTraceEvents(merged); - setStreamState("idle"); - }) - .catch((e) => { - setTraceError(e.message); - setStreamState("error"); - }) - .finally(() => setTraceLoading(false)); - }, - [threadId], - ); - - const loadConversation = React.useCallback(() => { - if (!threadId) return; - setConversationLoading(true); - setConversationError(null); - fetchAPI(`/thread/${threadId}/conversation`) - .then((payload) => { - setConversationMessages( - Array.isArray(payload?.messages) ? payload.messages : [], - ); - }) - .catch((e) => setConversationError(e.message)) - .finally(() => setConversationLoading(false)); - }, [threadId]); - - React.useEffect(() => { - if (!threadId) return; - setTraceEvents([]); - setRunCandidates([]); - setSelectedRunId(initialRunId); - loadTrace(initialRunId); - loadConversation(); - }, [threadId, initialRunId, loadTrace, loadConversation]); - - React.useEffect(() => { - if (!selectedRunId) return; - loadTrace(selectedRunId); - }, [selectedRunId, loadTrace]); - - React.useEffect(() => { - // @@@trace-poll-stop-on-error - once trace or conversation has entered a - // hard backend error state, stop the 2s loop until the operator manually - // refreshes or the next successful load clears the error. - if ( - !threadId || - !autoRefreshEnabled || - !autoRefresh || - Boolean(traceError) || - Boolean(conversationError) - ) { - return; - } - const timer = window.setInterval(() => { - loadTrace(selectedRunId); - loadConversation(); - }, 2000); - return () => window.clearInterval(timer); - }, [ - threadId, - autoRefreshEnabled, - autoRefresh, - selectedRunId, - loadTrace, - loadConversation, - traceError, - conversationError, - ]); - - const traceTail = traceEvents.slice(-300); - const visibleTrace = traceTail.filter( - (item) => eventFilter === "all" || item.actor === eventFilter, - ); - const traceSteps = buildTraceSteps(visibleTrace); - const conversationTail = conversationMessages.slice(-200); - const traceStats = { - assistant: traceTail.filter((item) => item.actor === "assistant").length, - tool: traceTail.filter((item) => item.actor === "tool").length, - runtime: traceTail.filter((item) => item.actor === "runtime").length, - }; - - return ( -
      -

      - Thread Trace{" "} - {traceView === "conversation" - ? "Conversation" - : traceView === "events" - ? "Events" - : "Steps"}{" "} - ( - {traceView === "conversation" - ? `${conversationTail.length} messages` - : traceView === "events" - ? `${visibleTrace.length} events` - : `${traceSteps.length} steps / ${visibleTrace.length} events`} - ) -

      -

      - status: {streamState} | run:{" "} - {selectedRunId ? shortId(selectedRunId, 12) : "-"} | raw_events:{" "} - {rawEventCount} | messages: {conversationTail.length} -

      -
      - {traceView !== "conversation" && ( - <> -
      - Run - -
      -
      - {(["all", "assistant", "tool", "runtime"] as const).map( - (kind) => ( - - ), - )} -
      - - )} -
      - - - -
      - - - -
      - {traceView === "conversation" ? ( -
      - messages: {conversationTail.length} - loading: {conversationLoading ? "yes" : "no"} -
      - ) : ( -
      - assistant: {traceStats.assistant} - tool: {traceStats.tool} - runtime: {traceStats.runtime} - loading: {traceLoading ? "yes" : "no"} -
      - )} - {traceError && ( -
      - Trace load failed: {traceError} -
      - )} - {conversationError && ( -
      - Conversation load failed: {conversationError} -
      - )} -
      - {traceView === "conversation" ? ( - <> - {conversationTail.map((message, idx) => ( - - ))} - {conversationTail.length === 0 && - (conversationError ? null : traceTail.length > 0 ? ( -
      -

      No conversation messages were captured for this run.

      -

      - Trace events still exist. Switch to the lower-level views to - inspect the run directly. -

      -
      - - -
      -
      - ) : ( -
      No conversation messages yet.
      - ))} - - ) : traceView === "events" ? ( - <> - {visibleTrace.map((item, idx) => ( - - ))} - {visibleTrace.length === 0 && ( -
      - No trace events for this filter. -
      - )} - - ) : ( - <> - {traceSteps.map((step) => ( - - ))} - {traceSteps.length === 0 && ( -
      - No trace events for this filter. -
      - )} - - )} -
      - - {showRawTable && traceView !== "conversation" && ( -
      - Raw trace table - - - - - - - - - - - - - - {traceTail - .slice() - .reverse() - .map((item, idx) => ( - - - - - - - - - - ))} - -
      StepActorEventSummaryRunWhenPayload
      {item.seq || "-"} - - {item.actor} - - {item.event_type}{item.summary}{shortId(item.run_id)}{item.created_ago || "-"} -
      - view -
      -                          {JSON.stringify(item.payload, null, 2)}
      -                        
      -
      -
      -
      - )} -
      - ); -} - -// Page: Session Detail -function SessionDetailPage() { - const { sessionId } = useParams(); - const [data, setData] = React.useState(null); - const [error, setError] = React.useState(null); - - React.useEffect(() => { - if (!sessionId) return; - setError(null); - fetchAPI(`/session/${sessionId}`) - .then((payload) => setData(payload)) - .catch((e) => setError(e.message)); - }, [sessionId]); - - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } - - return ( -
      - -

      Session: {data.session_id.slice(0, 8)}

      - -
      -
      - Thread:{" "} - {data.thread_id.slice(0, 8)} -
      -
      - Status: {data.info.status} -
      -
      - Provider: {data.info.provider || "-"} -
      -
      - Started: {data.info.started_ago} -
      -
      - Last Active: {data.info.last_active_ago} -
      -
      - Ended: {data.info.ended_ago || "-"} -
      +
      +

      {data.related_leases.title}

      +
        + {data.related_leases.items.map((l: any) => ( +
      • + {l.lease_id} +
      • + ))} +
      - -
      - - View thread trace - - {data.info.lease_id && ( - - View lease - - )} -
      - - {/* @@@session-trace-reuse - the compat session contract does not expose a dedicated run id, so reuse the thread trace surface here instead of inventing a deeper API seam. */} -
      ); } // Page: Leases List function LeasesPage() { - const location = useLocation(); const [data, setData] = React.useState(null); - const [error, setError] = React.useState(null); - const divergedOnly = - new URLSearchParams(location.search).get("diverged") === "1"; React.useEffect(() => { - setError(null); - fetchAPI("/leases") - .then(setData) - .catch((e) => setError(e?.message || String(e))); + fetchAPI('/leases').then(setData); }, []); - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } - const triage = data.triage || {}; - const triageSummary = triage.summary || {}; - const triageGroups = Array.isArray(triage.groups) ? triage.groups : []; - const items = divergedOnly - ? data.items.filter((item: any) => - ["active_drift", "detached_residue", "orphan_cleanup"].includes( - item.triage?.category, - ), - ) - : data.items; - const visibleGroups = divergedOnly - ? triageGroups.filter((group: any) => - ["active_drift", "detached_residue", "orphan_cleanup"].includes( - group.key, - ), - ) - : triageGroups; - - const renderLeaseTable = (rows: any[]) => ( - - - - - - - - - - - - - - {rows.map((item: any) => ( - - - - - - - - - - ))} - -
      Lease IDProviderInstance IDThreadStateUpdatedError
      - {item.lease_id} - {item.provider}{item.instance_id?.slice(0, 12) || "-"} - {item.thread.thread_id ? ( - - {item.thread.thread_id.slice(0, 8)} - - ) : ( - orphan - )} - - - {item.updated_ago}{item.error || "-"}
      - ); + if (!data) return
      Loading...
      ; return ( -
      -

      - Legacy lease view, now backed by backend triage semantics. Use this when - you want lease-only focus without losing the full raw table. -

      -
      - - total - - {items.length} - {divergedOnly ? ` / ${data.count}` : ""} - - - - active drift - {triageSummary.active_drift || 0} - - - detached residue - {triageSummary.detached_residue || 0} - - - cleanup - {triageSummary.orphan_cleanup || 0} - - - healthy - {triageSummary.healthy_capacity || 0} - -
      -
      - - {divergedOnly ? "Show all leases" : "Only attention buckets"} - - - Open resources - -
      - {visibleGroups - .filter((group: any) => group.count > 0) - .map((group: any) => ( -
      -

      - {group.title} ({group.count}) -

      -

      {group.description}

      - {renderLeaseTable(group.items)} -
      - ))} -
      - All leases ({items.length}) - {renderLeaseTable(items)} -
      +
      +

      {data.title}

      +

      Total: {data.count}

      + + + + + + + + + + + + + + {data.items.map((item: any) => ( + + + + + + + + + + ))} + +
      Lease IDProviderInstance IDThreadStateUpdatedError
      {item.lease_id}{item.provider}{item.instance_id?.slice(0, 12) || '-'} + {item.thread.thread_id ? ( + {item.thread.thread_id.slice(0, 8)} + ) : ( + orphan + )} + {item.updated_ago}{item.error || '-'}
      ); } @@ -3398,52 +201,24 @@ function LeasesPage() { function LeaseDetailPage() { const { leaseId } = useParams(); const [data, setData] = React.useState(null); - const [error, setError] = React.useState(null); React.useEffect(() => { - setError(null); - fetchAPI(`/lease/${leaseId}`) - .then(setData) - .catch((e) => setError(e.message)); + fetchAPI(`/lease/${leaseId}`).then(setData); }, [leaseId]); - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } - - // @@@lease-historical-signal - detect historical fallback lease: no active desired/observed state, provider unknown. - const isHistorical = !data.state.desired && !data.state.observed; + if (!data) return
      Loading...
      ; return (

      Lease: {data.lease_id}

      - {isHistorical && ( -

      - Historical lease — reconstructed from session records. Provider and - state fields may be incomplete. -

      - )} -
      Provider: {data.info.provider}
      - Instance ID:{" "} - {data.info.instance_id || "-"} + Instance ID: {data.info.instance_id || '-'}
      Created: {data.info.created_ago} @@ -3457,10 +232,10 @@ function LeaseDetailPage() {

      State

      - Desired: {data.state.desired || "-"} + Desired: {data.state.desired}
      - Observed: {data.state.observed || "-"} + Observed: {data.state.observed}
      Status: @@ -3482,15 +257,10 @@ function LeaseDetailPage() { ))} - {data.related_threads.items.length === 0 && ( -

      No threads linked to this lease.

      - )}
      -

      - {data.lease_events.title} ({data.lease_events.count}) -

      +

      {data.lease_events.title} ({data.lease_events.count})

      @@ -3503,19 +273,12 @@ function LeaseDetailPage() { {data.lease_events.items.map((e: any) => ( - + ))} - {data.lease_events.items.length === 0 && ( - - - - )}
      - {e.event_id} - {e.event_id} {e.event_type} {e.source} {e.created_ago}
      No events recorded for this lease.
      @@ -3523,35 +286,72 @@ function LeaseDetailPage() { ); } +// Page: Diverged Leases +function DivergedPage() { + const [data, setData] = React.useState(null); + + React.useEffect(() => { + fetchAPI('/diverged').then(setData); + }, []); + + if (!data) return
      Loading...
      ; + + return ( +
      +

      {data.title}

      +

      {data.description}

      +

      Total: {data.count}

      + + + + + + + + + + + + + + {data.items.map((item: any) => ( + + + + + + + + + + ))} + +
      Lease IDProviderThreadDesiredObservedHours DivergedError
      {item.lease_id}{item.provider} + {item.thread.thread_id ? ( + {item.thread.thread_id.slice(0, 8)} + ) : ( + orphan + )} + {item.state_badge.desired}{item.state_badge.observed} + {item.state_badge.hours_diverged}h + {item.error || '-'}
      +
      + ); +} + // Page: Events List function EventsPage() { const [data, setData] = React.useState(null); - const [error, setError] = React.useState(null); React.useEffect(() => { - setError(null); - fetchAPI("/events?limit=100") - .then(setData) - .catch((e) => setError(e?.message || String(e))); + fetchAPI('/events?limit=100').then(setData); }, []); - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } + if (!data) return
      Loading...
      ; return (
      +

      {data.title}

      {data.description}

      Total: {data.count}

      @@ -3568,19 +368,15 @@ function EventsPage() { {data.items.map((item: any) => ( - + - + ))} @@ -3594,29 +390,12 @@ function EventsPage() { function EventDetailPage() { const { eventId } = useParams(); const [data, setData] = React.useState(null); - const [error, setError] = React.useState(null); React.useEffect(() => { - setError(null); - fetchAPI(`/event/${eventId}`) - .then(setData) - .catch((e) => setError(e.message)); + fetchAPI(`/event/${eventId}`).then(setData); }, [eventId]); - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } + if (!data) return
      Loading...
      ; return (
      @@ -3648,1274 +427,34 @@ function EventDetailPage() { {data.related_lease.lease_id && (

      Related Lease

      - - {data.related_lease.lease_id} - + {data.related_lease.lease_id}
      )}

      Payload

      -
      -          {JSON.stringify(data.payload, null, 2)}
      -        
      -
      -
      - ); -} - -// Page: Evaluation -function EvaluationPage() { - const location = useLocation(); - const navigate = useNavigate(); - const [dataset, setDataset] = React.useState("SWE-bench/SWE-bench_Lite"); - const [split, setSplit] = React.useState("test"); - const [startIdx, setStartIdx] = React.useState("0"); - const [sliceCount, setSliceCount] = React.useState("10"); - const [promptProfile, setPromptProfile] = React.useState("heuristic"); - const [timeoutSec, setTimeoutSec] = React.useState("180"); - const [recursionLimit, setRecursionLimit] = React.useState("256"); - const [sandbox, setSandbox] = React.useState("local"); - const [runStatus, setRunStatus] = React.useState< - "idle" | "starting" | "submitted" | "error" - >("idle"); - const [evaluationId, setEvaluationId] = React.useState(""); - const [runError, setRunError] = React.useState(null); - const [listError, setListError] = React.useState(null); - const [evaluations, setEvaluations] = React.useState([]); - const [evalOffset, setEvalOffset] = React.useState(0); - const [evalLimit] = React.useState(30); - const [evalPagination, setEvalPagination] = React.useState(null); - const [runsLoading, setRunsLoading] = React.useState(false); - const [composerOpen, setComposerOpen] = React.useState(false); - const composerPanelRef = React.useRef(null); - const listErrorRef = React.useRef(null); - - React.useEffect(() => { - listErrorRef.current = listError; - }, [listError]); - - const loadEvaluations = React.useCallback(async () => { - setRunsLoading(true); - try { - const payload = await fetchAPI( - `/evaluations?limit=${evalLimit}&offset=${evalOffset}`, - ); - setEvaluations(Array.isArray(payload?.items) ? payload.items : []); - setEvalPagination(payload?.pagination || null); - setListError(null); - } catch (e: any) { - setListError(e?.message || String(e)); - } finally { - setRunsLoading(false); - } - }, [evalLimit, evalOffset]); - - React.useEffect(() => { - void loadEvaluations(); - const timer = window.setInterval(() => { - // @@@evaluation-list-poller - once the list has entered a hard backend error state, - // stop the 5s loop and let the operator recover with an explicit retry. - if (listErrorRef.current) { - window.clearInterval(timer); - return; - } - void loadEvaluations(); - }, 5000); - return () => window.clearInterval(timer); - }, [loadEvaluations]); - - async function handleStart() { - if (runStatus === "starting") return; - setRunError(null); - setEvaluationId(""); - setRunStatus("starting"); - - try { - const payload = await fetchJSON("/api/monitor/evaluations", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - dataset, - split, - start: Number(startIdx), - count: Number(sliceCount), - prompt_profile: promptProfile, - timeout_sec: Number(timeoutSec), - recursion_limit: Number(recursionLimit), - sandbox, - arm: "monitor", - }), - }); - const nextEvalId = String(payload?.evaluation_id || ""); - if (!nextEvalId) - throw new Error("create evaluation returned empty evaluation_id"); - setEvaluationId(nextEvalId); - setRunStatus("submitted"); - closeComposer(); - await loadEvaluations(); - } catch (e: any) { - setRunStatus("error"); - setRunError(e?.message || String(e)); - } - } - - const currentEval = evaluations.find( - (item: any) => item.evaluation_id === evaluationId, - ); - const submissionPreview = { - dataset, - split, - start: Number(startIdx || "0"), - count: Number(sliceCount || "0"), - prompt_profile: promptProfile, - timeout_sec: Number(timeoutSec || "0"), - recursion_limit: Number(recursionLimit || "0"), - sandbox, - arm: "monitor", - }; - const parameterReference = [ - [ - "Dataset", - "Benchmark source", - "Lite for fast iteration, Verified for strict runs", - ], - ["Split", "Data partition", "Use test for formal comparison"], - ["Start / Slice", "Case range", "Run small slices first, then scale up"], - [ - "Prompt Profile", - "Prompt strategy", - "Compare baseline vs heuristic in A/B", - ], - ["Timeout(s)", "Per-case wall clock limit", "180~300 for initial runs"], - [ - "Recursion", - "Agent iteration budget", - "256 default, raise to 512 for hard tasks", - ], - [ - "Sandbox", - "Execution provider", - "Use local for quick checks, daytona for infra parity", - ], - ]; - const currentProgress = currentEval ? evalProgress(currentEval) : null; - - React.useEffect(() => { - window.scrollTo({ top: 0, left: 0, behavior: "auto" }); - }, []); - React.useEffect(() => { - // @@@evaluation-query-open - allow deterministic screenshot/review entry to open config panel via ?new=1. - const query = new URLSearchParams(location.search); - setComposerOpen(query.get("new") === "1"); - }, [location.search]); - - React.useEffect(() => { - if (!composerOpen) return; - // @@@composer-modal-focus - focus the config panel itself so keyboard users land inside the active layer instead of remaining on the shell behind it. - composerPanelRef.current?.focus(); - }, [composerOpen]); - - React.useEffect(() => { - if (composerOpen) return; - const trigger = document.querySelector( - '[data-testid="evaluation-composer-trigger"]', - ); - trigger?.focus(); - }, [composerOpen]); - - React.useEffect(() => { - if (!composerOpen) return; - // @@@composer-escape-close - keep the config layer aligned with the guide modal so keyboard users can dismiss it without reaching for the mouse. - const onKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape") { - closeComposer(); - return; - } - trapDialogTabKey(event, composerPanelRef.current); - }; - window.addEventListener("keydown", onKeyDown); - return () => window.removeEventListener("keydown", onKeyDown); - }, [composerOpen, location.pathname, location.search]); - - // @@@evaluation-query-close - clear the query flag on close so the shell CTA can reopen the composer on the next click. - function closeComposer() { - const query = new URLSearchParams(location.search); - query.delete("new"); - setComposerOpen(false); - navigate( - { - pathname: location.pathname, - search: query.toString() ? `?${query.toString()}` : "", - }, - { replace: true }, - ); - } - - return ( -
      -
      -
      -

      Current Submission

      -
      - - {String(currentEval?.status || runStatus || "idle").toUpperCase()} - -
      -
      evaluation: {evaluationId || "-"}
      - {currentEval && currentProgress && ( -
      -
      - phase: {String(currentEval.status || "-").toUpperCase()} -
      -
      -
      -
      -
      - {formatProgressSummary(currentProgress)} -
      -
      - )} - {runError &&
      run error: {runError}
      } - {evaluationId && ( -

      - - open evaluation detail - -

      - )} -
      -
      -
      -

      Evaluations ({evalPagination?.total ?? evaluations.length})

      - - {listError ? "auto refresh paused" : "auto refresh 5s"} - -
      -
      - - {evalPagination?.total ?? evaluations.length} evaluations - - - {runsLoading ? "loading..." : listError ? "error" : "idle"} - - page {evalPagination?.page ?? 1} -
      - {listError && ( -
      - list error: {listError} - -
      - )} -
      - {item.event_type} - {item.event_type} {item.source} {item.provider} {item.lease.lease_id ? ( {item.lease.lease_id} - ) : ( - "-" - )} + ) : '-'} {item.error || "-"}{item.error || '-'} {item.created_ago}
      - - - - - - - - - - - - - - {evaluations.map((item: any) => ( - - - - - - - - - - - ))} - {evaluations.length === 0 && ( - - - - )} - -
      EvaluationDatasetRangeProfile / Sandbox - Status - - Progress - ScoreUpdated
      - - {shortId(item.evaluation_id, 14)} - - {item.dataset} - {item.start_idx}..{item.start_idx + item.slice_count - 1} - - {item.prompt_profile || "-"} / {item.sandbox || "-"} - - {(() => { - return ( -
      - - {String(item.status || "-").toUpperCase()} - - - {Boolean( - item.score?.publishable ?? - item.score?.score_gate === "final", - ) - ? "publishable" - : "provisional"} - -
      - ); - })()} -
      - {(() => { - const p = evalProgress(item); - return ( -
      -
      -
      -
      -
      - {formatProgressSummary(p)} -
      -
      - ); - })()} -
      -
      - - {(item.score?.publishable ?? - item.score?.score_gate === "final") - ? `R ${formatResolvedScore(item)}` - : "R PROVISIONAL"} - -
      - C {formatPct(item.score?.completed_rate_pct)} | T{" "} - {formatPct(item.score?.tool_call_thread_rate_pct)} -
      -
      -
      {item.updated_ago || "-"}
      - {listError - ? "Unable to load evaluations." - : "No evaluations yet."} -
      -
      -
      - offset={evalPagination?.offset ?? 0} | limit= - {evalPagination?.limit ?? evalLimit} | total= - {evalPagination?.total ?? evaluations.length} -
      -
      - - -
      -
      - - - - {composerOpen && ( - // @@@evaluation-composer-modal - keep config editing in a fixed layer to avoid "tail jump" in long list pages. -
      -
      e.stopPropagation()} - > -
      -

      New Evaluation Config

      - -
      -

      - Configure run scope, profile and runtime, then submit. -

      - -
      -
      -

      Run Scope

      -
      -
      - - -

      - Benchmark source. Lite is faster; Verified is stricter and - slower. -

      -
      -
      - - -

      - Dataset partition. Use test{" "} - for formal comparison. -

      -
      -
      - - setStartIdx(e.target.value)} - /> -

      - Starting index inside the selected split. -

      -
      -
      - - -

      - How many items to run in this evaluation batch. -

      -
      -
      -
      - -
      -

      Agent Profile

      -
      -
      - - -

      - Prompt strategy passed to runner. Used for A/B profile - comparison. -

      -
      -
      - - setRecursionLimit(e.target.value)} - /> -

      - Agent recursion/iteration budget per item. -

      -
      -
      -
      - -
      -

      Runtime

      -
      -
      - - setTimeoutSec(e.target.value)} - /> -

      - Per-item wall-clock timeout in seconds. -

      -
      -
      - - -

      - Execution environment provider for this run. -

      -
      -
      -
      - -
      -
      - - -
      -

      - Submits config to backend and starts an evaluation job. -

      -
      -
      - -
      - Submission Preview -
      -                {JSON.stringify(submissionPreview, null, 2)}
      -              
      -
      - -
      - Parameter Reference - - - - - - - - - - {parameterReference.map((row) => ( - - - - - - ))} - -
      FieldMeaningRecommendation
      {row[0]}{row[1]}{row[2]}
      -
      -
      -
      - )} -
      - ); -} - -function EvaluationDetailPage() { - const { evaluationId } = useParams(); - const [data, setData] = React.useState(null); - const [error, setError] = React.useState(null); - - React.useEffect(() => { - setError(null); - fetchAPI(`/evaluation/${evaluationId}`) - .then(setData) - .catch((e) => setError(e.message)); - }, [evaluationId]); - - if (error) { - return ( -
      - -
      - ); - } - if (!data) { - return ( -
      -
      Loading...
      -
      - ); - } - const detailProgress = evalProgress({ - threads_done: data.info?.threads_done ?? 0, - threads_running: data.info?.threads_running ?? 0, - slice_count: data.info?.slice_count ?? data.info?.threads_total ?? 0, - progress_source: data.info?.progress_source ?? "thread_rows", - }); - const threadStateLabel = - detailProgress.mode === "checkpoint_estimate" ? "started" : "running"; - const scoreGate = String(data.info?.score?.score_gate || "provisional"); - const publishable = Boolean( - data.info?.score?.publishable ?? scoreGate === "final", - ); - const scoreFinal = publishable; - const summaryReady = !!data.info?.score?.eval_summary_path; - const operator = data.info?.operator_surface || {}; - const statusToneClass = - data.info.status === "completed" - ? "chip-success" - : data.info.status === "error" - ? "chip-danger" - : data.info.status === "provisional" || - data.info.status === "completed_with_errors" - ? "chip-warning" - : ""; - - return ( -
      - -

      Evaluation: {shortId(data.evaluation_id, 14)}

      -
      - - {data.info.status} - - {data.info.dataset} - - {threadStateLabel}={data.info.threads_running}/ - {data.info.threads_total} - - gate={scoreGate} - - publishable={String(publishable)} - - - score= - {scoreFinal - ? `${data.info.score?.resolved_instances ?? 0}/${data.info.score?.total_instances ?? 0} (${formatPct(data.info.score?.primary_score_pct)})` - : "PROVISIONAL"} - -
      -
      -
      - phase: {String(data.info.status || "-").toUpperCase()} -
      -
      -
      -
      -
      - {formatProgressSummary(detailProgress)} -
      -
      - -
      -
      -
      -

      Operator Status

      -

      - {operator.summary || - "Inspect the current evaluation state before judging score or trace coverage."} -

      -
      - - {operator.tone || "default"} - -
      - - {operator.headline || "Evaluation operator surface"} - -
      -
      -

      Facts

      -
      - {(operator.facts || []).map((item: any) => ( -
      - {item.label}: {item.value} -
      - ))} -
      -
      -
      -

      Artifacts

      -
        - {(operator.artifacts || []).map((item: any) => ( -
      • - {item.label}:{" "} - {item.path} -
      • - ))} -
      -
      -
      -
      -

      Next Step

      -
        - {(operator.next_steps || []).map((item: string) => ( -
      1. {item}
      2. - ))} -
      -
      - {operator.raw_notes ? ( -
      - Raw runner notes -
      {operator.raw_notes}
      -
      - ) : null} -
      - -
      -

      Config

      -
      -
      - Split: {data.info.split} -
      -
      - Start: {data.info.start_idx} -
      -
      - Count: {data.info.slice_count} -
      -
      - Profile: {data.info.prompt_profile} -
      -
      - Timeout: {data.info.timeout_sec}s -
      -
      - Recursion: {data.info.recursion_limit} -
      -
      -
      - - {scoreFinal ? ( -
      -

      Score

      -
      -
      - Score Gate: {scoreGate} -
      -
      - Publishable: {String(publishable)} -
      -
      - Summary: {summaryReady ? "ready" : "missing"} -
      -
      - Resolved:{" "} - {data.info.score?.resolved_instances ?? 0}/ - {data.info.score?.total_instances ?? 0} -
      -
      - Resolved Rate:{" "} - {formatPct(data.info.score?.resolved_rate_pct)} -
      -
      - Completed:{" "} - {data.info.score?.completed_instances ?? 0}/ - {data.info.score?.total_instances ?? 0} -
      -
      - Completed Rate:{" "} - {formatPct(data.info.score?.completed_rate_pct)} -
      -
      - Non-empty Patch:{" "} - {data.info.score?.non_empty_patch_instances ?? 0}/ - {data.info.score?.total_instances ?? 0} -
      -
      - Non-empty Rate:{" "} - {formatPct(data.info.score?.non_empty_patch_rate_pct)} -
      -
      - Empty Patch:{" "} - {data.info.score?.empty_patch_instances ?? 0}/ - {data.info.score?.total_instances ?? 0} -
      -
      - Errors: {data.info.score?.error_instances ?? 0} -
      -
      - Trace Active:{" "} - {data.info.score?.active_trace_threads ?? 0}/ - {data.info.score?.total_instances ?? 0} -
      -
      - Tool-call Threads:{" "} - {data.info.score?.tool_call_threads ?? 0}/ - {data.info.score?.total_instances ?? 0} -
      -
      - Tool-call Coverage:{" "} - {formatPct(data.info.score?.tool_call_thread_rate_pct)} -
      -
      - Tool Calls Total:{" "} - {data.info.score?.tool_calls_total ?? 0} -
      -
      - Avg Tool Calls(active):{" "} - {data.info.score?.avg_tool_calls_per_active_thread ?? "-"} -
      -
      - Recursion Cap Hits:{" "} - {data.info.score?.recursion_cap_hits ?? 0} - {data.info.score?.recursion_limit - ? ` / cap ${data.info.score.recursion_limit}` - : ""} -
      -
      - Run Dir:{" "} - {data.info.score?.run_dir || "-"} -
      -
      -
      - ) : ( -
      - Score artifacts (provisional) -
      -
      - Score Gate: {scoreGate} -
      -
      - Publishable: {String(publishable)} -
      -
      - Summary: {summaryReady ? "ready" : "missing"} -
      -
      - Final Score: blocked (provisional) -
      -
      - Block Reason:{" "} - {data.info.score?.manifest_eval_error - ? "manifest_eval_error" - : "missing_eval_summary"} -
      -
      - Run Dir:{" "} - {data.info.score?.run_dir || "-"} -
      -
      -
      - )} - -
      -

      - {data.threads.title} ({data.threads.count}) -

      - - - - - - - - - - - - - - {data.threads.items.map((item: any) => ( - - - - - - - - - - ))} - {data.threads.items.length === 0 && ( - - - - )} - -
      #ThreadSessionRunEventsStatusStart
      {item.item_index} - - - {evalThreadLabel(item.thread_id, data.evaluation_id)} - - - - {item.session?.session_url ? ( - - {shortId(item.session.session_id)} - - ) : ( - "-" - )} - - {item.run?.run_id ? shortId(item.run.run_id, 12) : "-"} - {item.run?.event_count ?? 0}{item.status}{item.start_idx}
      No threads in this evaluation.
      -
      -
      - ); -} - -// @@@nav-information-architecture - grouped rail: overview → infra → workload. Section headers add hierarchy without adding pages. -const SHELL_NAV_GROUPS = [ - { - label: "Overview", - items: [ - { - to: "/dashboard", - label: "Dashboard", - shortLabel: "DB", - testId: "nav-dashboard", - }, - ], - }, - { - label: "Infrastructure", - items: [ - { - to: "/resources", - label: "Resources", - shortLabel: "RS", - testId: "nav-resources", - }, - { - to: "/leases", - label: "Leases", - shortLabel: "LS", - testId: "nav-leases", - }, - ], - }, - { - label: "Workload", - items: [ - { - to: "/evaluation", - label: "Evaluations", - shortLabel: "EV", - testId: "nav-eval", - }, - { - to: "/threads", - label: "Threads", - shortLabel: "TH", - testId: "nav-threads", - }, - { - to: "/traces", - label: "Traces", - shortLabel: "TR", - testId: "nav-traces", - }, - ], - }, -] as const; - -const GUIDE_SECTIONS = [ - { - title: "Dashboard", - body: "Start here. Read provider health, live workload pressure, and the latest evaluation before drilling into detail.", - }, - { - title: "Resources", - body: "Use the global resources page to inspect provider health, select a lease, and then narrow the session truth surface without losing the global contract.", - }, - { - title: "Evaluations", - body: "Open config only when you are ready to submit. After that, stay in the list or jump into evaluation detail for artifacts, trace, and next-step diagnosis.", - }, - { - title: "Threads / Traces / Leases", - body: "Treat these as truth surfaces. Use them when the dashboard or resources page tells you where to look, not as the first page you land on.", - }, -] as const; - -function shellMeta(pathname: string): { title: string; subtitle: string } { - // @@@shell-route-bucketing - detail routes inherit the nearest console section. - if (pathname.startsWith("/leases") || pathname.startsWith("/lease/")) - return { title: "Leases", subtitle: "Grouped triage · raw truth fallback" }; - if (pathname.startsWith("/resources")) - return { - title: "Resources", - subtitle: "Provider health · lease triage · session truth", - }; - if (pathname.startsWith("/evaluation")) - return { - title: "Evaluations", - subtitle: "Submit · track · inspect artifacts", - }; - if (pathname.startsWith("/threads") || pathname.startsWith("/thread")) - return { - title: "Threads", - subtitle: "Global thread index · session and trace drill-down", - }; - if (pathname.startsWith("/traces") || pathname.startsWith("/session")) - return { - title: "Traces", - subtitle: "Sequence-level session and tool-call inspection", - }; - if (pathname.startsWith("/events") || pathname.startsWith("/event")) - return { title: "Events", subtitle: "Lease and runtime event history" }; - return { - title: "Dashboard", - subtitle: "Health · workload · latest evaluation", - }; -} - -function OperatorGuideModal({ - open, - onClose, -}: { - open: boolean; - onClose: () => void; -}) { - const panelRef = React.useRef(null); - - React.useEffect(() => { - if (!open) return; - const onKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape") { - onClose(); - return; - } - trapDialogTabKey(event, panelRef.current); - }; - window.addEventListener("keydown", onKeyDown); - return () => window.removeEventListener("keydown", onKeyDown); - }, [open, onClose]); - - React.useEffect(() => { - if (!open) return; - // @@@modal-focus-handshake - focus the panel itself so keyboard users land inside the active surface instead of staying on the trigger behind the backdrop. - panelRef.current?.focus(); - }, [open]); - - React.useEffect(() => { - if (open) return; - const trigger = document.querySelector( - '[data-testid="operator-guide-trigger"]', - ); - trigger?.focus(); - }, [open]); - - if (!open) return null; - - return ( -
      -
      event.stopPropagation()} - > -
      -
      -

      Operator Guide

      -

      How to read this console

      -
      - -
      -

      - This guide stays out of the main content column by default. Open it - when you need orientation, then go back to the live console surface. -

      -
      - {GUIDE_SECTIONS.map((section) => ( -
      -

      {section.title}

      -

      {section.body}

      -
      - ))} -
      +
      {JSON.stringify(data.payload, null, 2)}
      ); } -function ScrollToTopOnRouteChange() { - const { pathname, hash } = useLocation(); - React.useEffect(() => { - // @@@history-scroll-restore-disable - browser may restore stale scroll offsets and make user land at page tail. - const prev = window.history.scrollRestoration; - window.history.scrollRestoration = "manual"; - return () => { - window.history.scrollRestoration = prev; - }; - }, []); - React.useEffect(() => { - // @@@route-scroll-reset - switch tabs/details should always start from top to avoid "tail landing" confusion. - window.scrollTo({ top: 0, left: 0, behavior: "auto" }); - if (!hash) return; - - // @@@hash-deeplink-retry - lease health and similar sections appear after async data load, so retry briefly instead of pretending the hash already landed. - const targetId = decodeURIComponent(hash.slice(1)); - let attempts = 0; - const maxAttempts = 40; - const timer = window.setInterval(() => { - const target = document.getElementById(targetId); - if (target) { - target.scrollIntoView({ block: "start", inline: "nearest" }); - window.clearInterval(timer); - return; - } - attempts += 1; - if (attempts >= maxAttempts) { - window.clearInterval(timer); - } - }, 50); - return () => window.clearInterval(timer); - }, [pathname, hash]); - return null; -} - +// Layout: Top navigation function Layout({ children }: { children: React.ReactNode }) { - const { pathname } = useLocation(); - const [guideOpen, setGuideOpen] = React.useState(false); - const meta = shellMeta(pathname); - const showEvalComposeAction = pathname === "/evaluation"; - return ( -
      - -
      -
      -
      -

      {meta.title}

      -

      {meta.subtitle}

      -
      -
      - {showEvalComposeAction ? ( - - Build Eval - - ) : null} - -
      -
      -
      {children}
      -
      - setGuideOpen(false)} - /> +
      + +
      + {children} +
      ); } @@ -4924,29 +463,16 @@ function Layout({ children }: { children: React.ReactNode }) { export default function App() { return ( - - } /> - } /> + } /> } /> - } /> - } /> } /> - } /> } /> } /> - } - /> + } /> } /> } /> - } /> - } - /> diff --git a/frontend/monitor/src/main.tsx b/frontend/monitor/src/main.tsx index 287fc3135..46ce20d5d 100644 --- a/frontend/monitor/src/main.tsx +++ b/frontend/monitor/src/main.tsx @@ -4,5 +4,8 @@ import App from "./App"; import "./styles.css"; ReactDOM.createRoot(document.getElementById("root")!).render( - , + + + , ); + diff --git a/frontend/monitor/src/styles.css b/frontend/monitor/src/styles.css index fbf7eb103..0b767eade 100644 --- a/frontend/monitor/src/styles.css +++ b/frontend/monitor/src/styles.css @@ -4,33 +4,10 @@ box-sizing: border-box; } -:root { - --bg: #ffffff; - --bg-soft: #fafafa; - --bg-muted: #f6f6f6; - --panel: #ffffff; - --panel-strong: #fcfcfc; - --border: #e7e7e7; - --border-strong: #d8d8d8; - --text: #171717; - --text-secondary: #525252; - --text-muted: #737373; - --accent: #2563eb; - --accent-soft: #eff6ff; - --danger-soft: #fef2f2; - --danger: #dc2626; - --warning-soft: #fffbeb; - --warning: #d97706; - --success-soft: #ecfdf5; - --success: #059669; -} - body { - font-family: - -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", - "Noto Sans SC", sans-serif; - background: var(--bg-soft); - color: var(--text); + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', sans-serif; + background: #0a0a0a; + color: #e0e0e0; line-height: 1.6; } @@ -40,337 +17,65 @@ body { flex-direction: column; } -/* Console Shell */ -.console-app { - min-height: 100vh; - display: flex; - background: - radial-gradient( - circle at top left, - rgba(37, 99, 235, 0.04), - transparent 28% - ), - linear-gradient(180deg, #fbfbfb 0%, #f7f7f6 100%); -} - -.console-sidebar { - width: 272px; - padding: 1.25rem 1rem 1rem; - border-right: 1px solid var(--border); - background: rgba(255, 255, 255, 0.82); - backdrop-filter: blur(18px); - display: flex; - flex-direction: column; - gap: 1.25rem; - position: sticky; - top: 0; - height: 100vh; -} - -.console-brand { +/* Top Navigation */ +.top-nav { + background: #1a1a1a; + border-bottom: 1px solid #333; + padding: 1rem 2rem; display: flex; - align-items: flex-start; - gap: 0.85rem; -} - -.console-brand-mark { - width: 2.35rem; - height: 2.35rem; - border-radius: 0.9rem; - display: grid; - place-items: center; - font-size: 0.92rem; - font-weight: 700; - color: var(--accent); - background: linear-gradient(180deg, #eff6ff 0%, #dbeafe 100%); - border: 1px solid rgba(37, 99, 235, 0.12); + align-items: center; + gap: 2rem; } .logo { - font-size: 1rem; - font-weight: 650; - color: var(--text); -} - -.console-brand-copy { - margin-top: 0.15rem; - color: var(--text-muted); - font-size: 0.84rem; -} - -.console-nav { - display: flex; - flex-direction: column; - gap: 1.1rem; -} - -/* @@@nav-group-rhythm - section spacing + muted label creates hierarchy without decoration */ -.console-nav-group { - display: flex; - flex-direction: column; - gap: 0.2rem; -} - -.console-nav-group + .console-nav-group { - padding-top: 0.65rem; - border-top: 1px solid rgba(231, 231, 231, 0.72); -} - -.console-nav-group-label { - display: block; - padding: 0 0.75rem 0.3rem; - font-size: 0.68rem; + font-size: 1.2rem; font-weight: 600; - letter-spacing: 0.08em; - text-transform: uppercase; - color: var(--text-muted); + color: #fff; } -.console-nav a { +.nav-links { display: flex; - align-items: center; - gap: 0.65rem; - color: var(--text-secondary); - text-decoration: none; - font-weight: 500; - font-size: 0.9rem; - transition: - color 0.18s ease, - background 0.18s ease, - border-color 0.18s ease; - border: 1px solid transparent; - border-radius: 10px; - padding: 0.58rem 0.75rem; -} - -.console-nav a:hover { - color: var(--text); - background: var(--bg-muted); -} - -.console-nav a[aria-current="page"] { - color: var(--accent); - background: var(--accent-soft); - border-color: rgba(37, 99, 235, 0.12); - box-shadow: inset 2px 0 0 var(--accent); -} - -.console-nav-mark { - width: 1.7rem; - height: 1.7rem; - flex: 0 0 auto; - border-radius: 0.5rem; - display: grid; - place-items: center; - background: rgba(23, 23, 23, 0.04); - color: var(--text-muted); - font-size: 0.62rem; - letter-spacing: 0.04em; - text-transform: uppercase; + gap: 1.5rem; } -.console-nav a[aria-current="page"] .console-nav-mark { - background: rgba(37, 99, 235, 0.12); - color: var(--accent); -} - -.console-sidebar-foot { - margin-top: auto; - border-top: 1px solid var(--border); - padding: 0.85rem 0.2rem 0.25rem; - color: var(--text-muted); - font-size: 0.82rem; -} - -.console-foot-row { - display: flex; - align-items: center; - gap: 0.45rem; - margin-bottom: 0.25rem; +.nav-links a { + color: #888; + text-decoration: none; font-weight: 500; - color: var(--text-secondary); -} - -.console-foot-dot { - width: 0.45rem; - height: 0.45rem; - border-radius: 999px; - background: var(--success); - box-shadow: 0 0 0 3px rgba(5, 150, 105, 0.12); -} - -.console-foot-meta { - font-size: 0.72rem; - letter-spacing: 0.04em; - text-transform: uppercase; - color: var(--text-muted); -} - -.shell-eyebrow { - margin-bottom: 0.35rem; - font-size: 0.75rem; - line-height: 1.2; - letter-spacing: 0.08em; - text-transform: uppercase; - color: var(--text-muted); -} - -.console-main { - flex: 1; - min-width: 0; - display: flex; - flex-direction: column; -} - -.console-header { - display: flex; - align-items: center; - justify-content: space-between; - gap: 1rem; - padding: 0.9rem 2rem; - border-bottom: 1px solid rgba(231, 231, 231, 0.8); - background: rgba(255, 255, 255, 0.72); - backdrop-filter: blur(14px); - position: sticky; - top: 0; - z-index: 15; - min-height: 3.5rem; -} - -.console-title { - margin: 0; - font-size: 1.5rem; - line-height: 1.15; -} - -.console-subtitle { - margin: 0.25rem 0 0; - font-size: 0.82rem; - color: var(--text-muted); - letter-spacing: 0.01em; -} - -.console-header-actions { - display: flex; - align-items: center; - gap: 0.75rem; -} - -/* @@@depth-system - 3 visual layers to break flatness: primary (look here), secondary (interact here), recessed (reference/debug) */ -.depth-primary { - background: var(--panel); - border: 1px solid var(--border-strong); - border-radius: 16px; - padding: 1.15rem 1.2rem; - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04); -} - -.depth-secondary { - background: var(--panel); - border: 1px solid var(--border); - border-radius: 14px; - padding: 1rem; -} - -.depth-recessed { - background: var(--bg-muted); - border: 1px solid transparent; - border-radius: 12px; - padding: 0.85rem 1rem; -} - -.depth-recessed h2 { - font-size: 0.95rem; - color: var(--text-secondary); + transition: color 0.2s; } -.depth-recessed .description { - font-size: 0.82rem; -} - -.depth-recessed table { - background: var(--bg-muted); -} - -.sticky-context { - position: sticky; - top: 72px; - z-index: 10; - background: rgba(250, 250, 250, 0.92); - backdrop-filter: blur(10px); - border-bottom: 1px solid var(--border); - padding: 0.6rem 2rem; - margin: 0 -2rem 1rem; +.nav-links a:hover { + color: #fff; } +/* Content */ .content { flex: 1; padding: 2rem; - max-width: 1280px; + max-width: 1600px; width: 100%; + margin: 0 auto; } .page { animation: fadeIn 0.2s; } -.shell-modal-backdrop { - position: fixed; - inset: 0; - z-index: 50; - background: rgba(248, 248, 248, 0.78); - backdrop-filter: blur(8px); - padding: 2rem; - overflow-y: auto; -} - -.shell-modal-panel { - width: min(920px, 100%); - margin: 0 auto; - background: var(--panel); - border: 1px solid var(--border); - border-radius: 22px; - box-shadow: 0 24px 80px rgba(23, 23, 23, 0.08); - padding: 1.3rem; -} - -.shell-modal-head { - margin-bottom: 0.35rem; -} - -.shell-modal-panel h2 { - margin: 0; -} - -.shell-guide-grid { - display: grid; - grid-template-columns: repeat(12, minmax(0, 1fr)); - gap: 1rem; - margin-top: 1rem; -} - -.shell-guide-grid > * { - grid-column: span 6; -} - @keyframes fadeIn { - from { - opacity: 0; - } - to { - opacity: 1; - } + from { opacity: 0; } + to { opacity: 1; } } /* Breadcrumb */ .breadcrumb { margin-bottom: 1rem; font-size: 0.9rem; - color: var(--text-muted); + color: #888; } .breadcrumb a { - color: var(--accent); + color: #4a9eff; text-decoration: none; } @@ -382,2005 +87,177 @@ body { h1 { font-size: 2rem; margin-bottom: 1rem; - color: var(--text); + color: #fff; } h2 { font-size: 1.3rem; margin: 2rem 0 1rem; - color: var(--text); + color: #fff; } .count { - color: var(--text-muted); + color: #888; margin-bottom: 1rem; } .description { - color: var(--text-secondary); + color: #aaa; margin-bottom: 1rem; - max-width: 72ch; -} - -.dashboard-grid { - display: grid; - grid-template-columns: repeat(12, minmax(0, 1fr)); - gap: 1.25rem; - align-items: start; -} - -.dashboard-card { - grid-column: span 4; - display: flex; - flex-direction: column; - gap: 0.85rem; } -/* @@@dashboard-hero-weight - hero spans wider and gets extra shadow to pull visual focus */ -.dashboard-card-hero { - grid-column: span 8; - padding: 1.35rem 1.4rem; - box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); -} - -.dashboard-card-eval { - grid-column: span 4; -} - -.dashboard-sidebar-stack { - grid-column: span 4; - display: flex; - flex-direction: column; - gap: 0.85rem; -} - -/* @@@sidebar-compact - secondary cards in the sidebar stack use tighter padding to feel subordinate to hero */ -.dashboard-sidebar-stack .dashboard-card { - padding: 0.85rem 0.95rem; - gap: 0.65rem; -} - -.dashboard-sidebar-stack .dashboard-card-head h2 { - font-size: 1.05rem; -} - -.dashboard-card-head h2 { - margin: 0 0 0.25rem; -} - -.dashboard-metric-grid, -.resource-summary-grid { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); - gap: 0.55rem; +/* Tables */ +table { + width: 100%; + border-collapse: collapse; + background: #1a1a1a; + border-radius: 8px; + overflow: hidden; } -.resource-summary-grid { - margin-bottom: 1.25rem; +thead { + background: #252525; } -.dashboard-metric { - display: flex; - flex-direction: column; - gap: 0.15rem; - min-height: 0; - padding: 0.72rem 0.85rem; - border: 1px solid var(--border); - border-radius: 12px; - background: var(--panel); +th { + text-align: left; + padding: 0.75rem 1rem; + font-weight: 600; + color: #fff; + font-size: 0.9rem; } -.dashboard-metric-label { - font-size: 0.72rem; - text-transform: uppercase; - letter-spacing: 0.04em; - color: var(--text-muted); +td { + padding: 0.75rem 1rem; + border-top: 1px solid #2a2a2a; } -.dashboard-metric-value { - font-size: 1.25rem; - line-height: 1.15; - color: var(--text); +tr:hover { + background: #222; } -.dashboard-inline-link { - color: inherit; +td a { + color: #4a9eff; text-decoration: none; } -.dashboard-inline-link:hover { - color: var(--accent); -} - -.dashboard-metric-note { - font-size: 0.82rem; - color: var(--text-secondary); -} - -.dashboard-metric-warning { - background: var(--warning-soft); -} - -.dashboard-metric-danger { - background: var(--danger-soft); -} - -.dashboard-metric-success { - background: var(--success-soft); -} - -.dashboard-eval-body { - display: flex; - flex-direction: column; - gap: 0.75rem; -} - -.dashboard-eval-id { - color: var(--text-secondary); - white-space: pre-wrap; - word-break: break-word; +td a:hover { + text-decoration: underline; } -.dashboard-eval-footer { - display: grid; - grid-template-columns: minmax(0, 1fr); +.mono { + font-family: 'SF Mono', Monaco, monospace; + font-size: 0.85rem; } -.dashboard-empty { - border: 1px dashed var(--border-strong); - border-radius: 14px; - padding: 1rem; - background: var(--bg-muted); +.error { + color: #ff6b6b; } -.cleanup-feedback { - margin: 0.85rem 0 1rem; - padding: 0.75rem 0.95rem; - border-radius: 12px; - border: 1px solid var(--border); - background: var(--bg-muted); - color: var(--text-secondary); +.orphan { + color: #ff9800; + font-style: italic; } -.cleanup-feedback.is-success { - border-color: rgba(5, 150, 105, 0.16); - background: var(--success-soft); - color: var(--success); +/* State Badge */ +.state-badge { + display: inline-block; + padding: 0.25rem 0.6rem; + border-radius: 4px; + font-size: 0.85rem; + font-weight: 500; } -.cleanup-feedback.is-error { - border-color: rgba(220, 38, 38, 0.16); - background: var(--danger-soft); - color: var(--danger); +.state-green { + background: #1a4d2e; + color: #4ade80; } -.cleanup-confirm { - margin: 0 0 1rem; - padding: 0.9rem 1rem; - border-radius: 14px; - border: 1px solid rgba(245, 158, 11, 0.22); - background: linear-gradient(180deg, var(--panel) 0%, var(--warning-soft) 100%); - display: flex; - align-items: center; - justify-content: space-between; - gap: 1rem; - flex-wrap: wrap; +.state-yellow { + background: #4d3d1a; + color: #fbbf24; } -.cleanup-confirm p { - margin: 0.25rem 0 0; - color: var(--text-secondary); +.state-red { + background: #4d1a1a; + color: #f87171; } -.cleanup-confirm-actions { - display: flex; - align-items: center; - gap: 0.65rem; - flex-wrap: wrap; +/* Sections */ +section { + margin-bottom: 2rem; } -.resource-section-shell { - margin-bottom: 1.25rem; +section ul { + list-style: none; + padding: 0; } -/* @@@lease-health-subordinate - lease health is global truth, not the primary working surface. Softer container to avoid competing with provider detail above. */ -#lease-health { - border-color: transparent; - background: transparent; - padding-left: 0; - padding-right: 0; - padding-top: 1.5rem; - border-top: 1px solid var(--border); - border-radius: 0; +section li { + padding: 0.5rem 0; } -.resource-split-console { +/* Info Grid */ +.info-grid { display: grid; - grid-template-columns: 320px minmax(0, 1fr); + grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem; - align-items: start; - margin-bottom: 1.25rem; -} - -.resource-rail { - border: 1px solid var(--border); - border-radius: 18px; - background: rgba(255, 255, 255, 0.72); - padding: 0.95rem; - position: sticky; - top: 132px; -} - -.resource-rail h2 { - margin: 0; -} - -.resource-rail .section-row { - margin-bottom: 0.85rem; -} - -.resource-rail-list { - display: flex; - flex-direction: column; - gap: 0.65rem; - max-height: calc(100vh - 220px); - overflow: auto; - padding-right: 0.15rem; + background: #1a1a1a; + padding: 1.5rem; + border-radius: 8px; + margin-bottom: 2rem; } -.resource-rail-item { - border: 1px solid transparent; - border-radius: 14px; - background: var(--bg-soft); - padding: 0.8rem 0.9rem; - text-align: left; +.info-grid div { display: flex; flex-direction: column; - gap: 0.42rem; - transition: - border-color 0.18s ease, - background 0.18s ease, - box-shadow 0.18s ease, - transform 0.18s ease; -} - -.resource-rail-item:hover:not(:disabled) { - border-color: var(--border); - background: var(--panel); - transform: translateY(-1px); -} - -.resource-rail-item.is-selected { - border-color: rgba(37, 99, 235, 0.22); - background: var(--panel); - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05); -} - -.resource-rail-item.is-unavailable { - opacity: 0.82; -} - -.resource-rail-row { - display: flex; - align-items: center; - gap: 0.5rem; - color: var(--text); -} - -.resource-rail-row strong { - font-size: 0.93rem; + gap: 0.25rem; } -.resource-rail-meta { - display: flex; - align-items: center; - justify-content: space-between; - gap: 0.75rem; - font-size: 0.78rem; - color: var(--text-secondary); +.info-grid strong { + color: #888; + font-size: 0.85rem; + font-weight: 500; } -.resource-detail { +/* State Info */ +.state-info { + background: #1a1a1a; + padding: 1.5rem; + border-radius: 8px; display: flex; flex-direction: column; gap: 1rem; - min-width: 0; } -.monitor-provider-grid { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); - gap: 0.9rem; -} - -.monitor-provider-card { - border: 1px solid transparent; - background: var(--bg-soft); - border-radius: 14px; - padding: 1rem; - text-align: left; +.state-info div { display: flex; - flex-direction: column; - gap: 0.9rem; - transition: - border-color 0.18s ease, - background 0.18s ease, - box-shadow 0.18s ease; -} - -.monitor-provider-card:hover:not(:disabled) { - border-color: var(--border); - background: var(--panel); -} - -.monitor-provider-card.is-selected { - border-color: rgba(37, 99, 235, 0.24); - background: var(--panel); - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04); -} - -.monitor-provider-card.is-unavailable { - opacity: 0.82; + gap: 1rem; + align-items: center; } -.monitor-provider-header { - display: flex; - align-items: flex-start; - justify-content: space-between; - gap: 0.75rem; +.state-info strong { + color: #888; + min-width: 100px; } -.monitor-provider-header strong { - display: block; - font-size: 0.95rem; +/* JSON Payload */ +.json-payload { + background: #1a1a1a; + padding: 1.5rem; + border-radius: 8px; + overflow-x: auto; + font-family: 'SF Mono', Monaco, monospace; + font-size: 0.85rem; + line-height: 1.5; + color: #e0e0e0; } -.monitor-provider-title { - display: flex; - align-items: center; - gap: 0.5rem; -} - -.monitor-provider-header p { - margin: 0.2rem 0 0; - font-size: 0.82rem; - color: var(--text-muted); -} - -.provider-status-light { - width: 0.55rem; - height: 0.55rem; - border-radius: 999px; - flex-shrink: 0; - background: var(--border-strong); -} - -.provider-status-light.is-active { - background: var(--success); - box-shadow: 0 0 0 4px rgba(5, 150, 105, 0.12); -} - -.provider-status-light.is-ready { - background: #9ca3af; -} - -.provider-status-light.is-unavailable { - background: var(--danger); -} - -.provider-card-divider { - border-top: 1px dashed var(--border); - margin-top: -0.1rem; -} - -.monitor-provider-metrics { - display: grid; - grid-template-columns: repeat(3, minmax(0, 1fr)); - gap: 0.55rem; -} - -.provider-mini-metric { - min-height: 0; - padding: 0.75rem 0.8rem; - border-radius: 12px; - border: 1px solid var(--border); - background: var(--panel); - display: flex; - flex-direction: column; - gap: 0.18rem; -} - -.provider-mini-label { - font-size: 0.74rem; - text-transform: uppercase; - letter-spacing: 0.04em; - color: var(--text-muted); -} - -.provider-mini-value { - font-size: 1rem; - line-height: 1.2; - color: var(--text); -} - -.provider-mini-note { - font-size: 0.76rem; - color: var(--text-secondary); -} - -.provider-capability-strip { - display: flex; - flex-wrap: wrap; - gap: 0.42rem; -} - -.provider-capability-chip { - display: inline-flex; - align-items: center; - gap: 0.3rem; - padding: 0.24rem 0.5rem; - border-radius: 999px; - border: 1px solid var(--border); - background: var(--bg-muted); - color: var(--text-secondary); - font-size: 0.72rem; - letter-spacing: 0.04em; - text-transform: uppercase; -} - -.provider-capability-chip.is-muted { - color: var(--text-muted); -} - -.provider-session-strip { - display: flex; - align-items: center; - justify-content: space-between; - gap: 0.75rem; - min-height: 1.25rem; -} - -.provider-session-empty { - color: var(--text-muted); - font-size: 0.8rem; -} - -.provider-session-dots { - display: flex; - align-items: center; - gap: 0.35rem; -} - -.provider-session-dot { - width: 0.45rem; - height: 0.45rem; - border-radius: 999px; - background: var(--border-strong); -} - -.provider-session-dot.status-running { - background: var(--success); -} - -.provider-session-dot.status-paused { - background: var(--warning); -} - -.provider-session-dot.status-stopped { - background: var(--border-strong); -} - -.provider-session-dot.status-destroying { - background: var(--danger); -} - -.provider-session-copy { - font-size: 0.78rem; - color: var(--text-secondary); -} - -.provider-inline-error { - margin-top: 0.32rem; - color: var(--danger); - font-size: 0.76rem; - line-height: 1.35; -} - -/* @@@provider-detail-primary - strongest containment in resource-detail column to anchor visual focus */ -.provider-detail-shell { - border: 1px solid var(--border-strong); - border-radius: 16px; - background: var(--panel); - padding: 1.15rem 1.3rem 1.2rem; - box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); -} - -.provider-detail-heading { - display: flex; - align-items: center; - gap: 0.55rem; -} - -.provider-detail-heading h2 { - margin: 0; -} - -.provider-detail-actions { - display: flex; - align-items: center; - gap: 0.6rem; - flex-wrap: wrap; -} - -/* @@@session-shell-subordinate - lease/session area below provider detail is secondary; border-top instead of full containment to avoid box-in-box */ -.resource-session-shell { - margin-top: 0.75rem; - padding-top: 0.75rem; - border-top: 1px solid var(--border); -} - -.monitor-lease-detail-shell { - margin-bottom: 1rem; - border: 1px solid var(--border-strong); - border-radius: 16px; - background: var(--panel); - padding: 1rem 1.05rem 1.05rem; - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04); -} - -.monitor-lease-detail-id { - color: var(--text-secondary); - font-size: 0.88rem; -} - -.monitor-lease-context-bar { - display: grid; - grid-template-columns: repeat(4, minmax(0, 1fr)); - gap: 0.6rem; - margin-bottom: 0.8rem; -} - -.monitor-lease-context-item { - display: flex; - flex-direction: column; - gap: 0.18rem; - padding: 0.7rem 0.78rem; - border-radius: 12px; - background: var(--bg-muted); - border: 1px solid var(--border); -} - -.monitor-lease-context-item strong { - font-size: 0.72rem; - letter-spacing: 0.04em; - text-transform: uppercase; - color: var(--text-muted); -} - -.monitor-lease-context-item span { - font-size: 0.84rem; - color: var(--text); -} - -.monitor-lease-session-table { - margin-top: 0.25rem; -} - -.resource-table-dense th { - padding: 0.55rem 0.7rem; - font-size: 0.8rem; -} - -.resource-table-dense td { - padding: 0.5rem 0.7rem; - font-size: 0.85rem; -} - -.provider-lease-grid { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); - gap: 0.85rem; - margin-bottom: 1rem; -} - -.provider-lease-card { - border: 1px solid var(--border); - border-radius: 16px; - background: var(--panel); - padding: 0.9rem; - display: flex; - flex-direction: column; - gap: 0.75rem; - text-align: left; - cursor: pointer; - transition: - border-color 0.18s ease, - background 0.18s ease, - box-shadow 0.18s ease; -} - -.provider-lease-card:hover { - border-color: var(--border-strong); - background: var(--bg-soft); -} - -.provider-lease-card.is-selected { - border-color: rgba(37, 99, 235, 0.24); - box-shadow: inset 0 0 0 1px rgba(37, 99, 235, 0.1); -} - -.provider-lease-header { - display: flex; - align-items: flex-start; - justify-content: space-between; - gap: 0.75rem; -} - -.provider-lease-header p { - margin: 0.22rem 0 0; - color: var(--text-muted); - font-size: 0.8rem; - font-family: "SF Mono", Monaco, monospace; -} - -.provider-lease-link { - color: var(--accent); - text-decoration: none; - font-weight: 600; -} - -.provider-lease-link:hover { - text-decoration: underline; -} - -.provider-lease-meta { - display: grid; - grid-template-columns: repeat(2, minmax(0, 1fr)); - gap: 0.6rem; - font-size: 0.8rem; - color: var(--text-secondary); -} - -.provider-lease-meta strong { - display: block; - margin-bottom: 0.2rem; - font-size: 0.72rem; - letter-spacing: 0.04em; - text-transform: uppercase; - color: var(--text-muted); -} - -.provider-lease-strip { - margin-bottom: 0; -} - -.resource-overview-strip { - display: flex; - gap: 0.65rem; - flex-wrap: wrap; - margin-bottom: 1rem; -} - -.resource-overview-pill { - display: inline-flex; - align-items: center; - gap: 0.45rem; - padding: 0.5rem 0.75rem; - border-radius: 999px; - border: 1px solid var(--border); - background: var(--bg-muted); - color: var(--text-secondary); -} - -.resource-overview-label { - font-size: 0.74rem; - letter-spacing: 0.04em; - text-transform: uppercase; - color: var(--text-muted); -} - -.lease-cluster-grid { - display: grid; - grid-template-columns: repeat(12, minmax(0, 1fr)); - gap: 1rem; -} - -.lease-cluster-grid > * { - grid-column: span 6; -} - -.lease-details-shell { - margin-top: 1rem; - border: 1px solid var(--border); - border-radius: 16px; - background: var(--panel); - padding: 0.9rem 1rem 1rem; -} - -.lease-details-shell summary { - cursor: pointer; - color: var(--text); - font-weight: 600; - margin-bottom: 0.9rem; -} - -.operator-notes-shell { - margin-bottom: 1.25rem; - border: 1px solid var(--border); - border-radius: 16px; - background: var(--panel); - padding: 0.85rem 1rem 1rem; -} - -.operator-notes-shell summary { - cursor: pointer; - color: var(--text); - font-weight: 600; -} - -.operator-notes-shell .evaluation-flow, -.operator-notes-shell .evaluation-notes { - margin-top: 1rem; -} - -.page-loading, -.page-error { - padding: 3rem 0; - text-align: center; - color: var(--text-muted); - font-size: 0.95rem; -} - -.page-error { - color: var(--danger); -} - -button, -select, -input { - font: inherit; -} - -select, -input { - min-height: 2.5rem; - border-radius: 10px; - border: 1px solid var(--border); - background: var(--panel); - color: var(--text); - padding: 0.55rem 0.7rem; -} - -select:focus, -input:focus, -button:focus-visible { - outline: 2px solid rgba(37, 99, 235, 0.18); - outline-offset: 2px; -} - -button:disabled, -select:disabled, -input:disabled { - cursor: not-allowed; - opacity: 0.62; -} - -/* Tables */ -table { - width: 100%; - border-collapse: collapse; - background: var(--panel); - border-radius: 12px; - overflow: hidden; - border: 1px solid var(--border); -} - -thead { - background: var(--bg-muted); -} - -th { - text-align: left; - padding: 0.75rem 1rem; - font-weight: 600; - color: var(--text); - font-size: 0.9rem; -} - -td { - padding: 0.75rem 1rem; - border-top: 1px solid var(--border); -} - -td[colspan] { - text-align: center; - color: var(--text-muted); - font-style: italic; - padding: 2rem 1rem; -} - -.cleanup-action-cell { - width: 1%; - white-space: nowrap; -} - -.cleanup-action-cell .ghost-btn { - min-height: 2rem; - padding: 0.35rem 0.7rem; - font-size: 0.82rem; -} - -.page[data-testid="page-traces"] td, -.page[data-testid="page-threads"] td { - padding: 0.5rem 0.75rem; - font-size: 0.88rem; -} - -.page[data-testid="page-traces"] th, -.page[data-testid="page-threads"] th { - padding: 0.55rem 0.75rem; - font-size: 0.82rem; -} - -tr:hover { - background: #fcfcfc; -} - -td a { - color: var(--accent); - text-decoration: none; -} - -td a:hover { - text-decoration: underline; -} - -.mono { - font-family: "SF Mono", Monaco, monospace; - font-size: 0.85rem; -} - -.error { - color: var(--danger); -} - -.orphan { - color: var(--warning); - font-style: italic; -} - -/* State Badge */ -.state-badge { - display: inline-block; - padding: 0.25rem 0.6rem; - border-radius: 4px; - font-size: 0.85rem; - font-weight: 500; -} - -.state-green { - background: var(--success-soft); - color: var(--success); -} - -.state-yellow { - background: var(--warning-soft); - color: var(--warning); -} - -.state-red { - background: var(--danger-soft); - color: var(--danger); -} - -/* Sections */ -section { - margin-bottom: 2rem; -} - -section ul { - list-style: none; - padding: 0; -} - -section li { - padding: 0.5rem 0; -} - -/* Info Grid */ -.info-grid { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); - gap: 1rem; - background: var(--panel); - padding: 1.5rem; - border-radius: 12px; - margin-bottom: 2rem; - border: 1px solid var(--border); -} - -.info-grid div { - display: flex; - flex-direction: column; - gap: 0.25rem; -} - -.info-grid strong { - color: var(--text-muted); - font-size: 0.85rem; - font-weight: 500; -} - -.info-grid-compact { - grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); - padding: 1rem 1.2rem; - gap: 0.75rem; -} - -.hint-box { - background: linear-gradient( - 180deg, - var(--panel) 0%, - var(--panel-strong) 100% - ); - border: 1px solid var(--border); - border-radius: 16px; - padding: 1.15rem 1.2rem; -} - -.hint-box h2 { - margin: 0 0 0.55rem; - font-size: 1rem; -} - -.hint-box ul { - display: flex; - flex-direction: column; - gap: 0.55rem; -} - -.hint-box li { - padding: 0; - color: var(--text-secondary); -} - -.section-row { - display: flex; - align-items: center; - justify-content: space-between; - gap: 1rem; - flex-wrap: wrap; -} - -.page-toolbar { - margin-bottom: 1rem; -} - -.page-kicker { - display: flex; - align-items: center; - gap: 0.6rem; -} - -.ghost-btn, -.primary-btn { - display: inline-flex; - align-items: center; - justify-content: center; - min-height: 2.5rem; - border-radius: 999px; - padding: 0.55rem 0.95rem; - border: 1px solid var(--border); - cursor: pointer; - transition: - background 0.18s ease, - border-color 0.18s ease, - color 0.18s ease, - transform 0.18s ease; -} - -.ghost-btn { - background: var(--panel); - color: var(--text-secondary); -} - -.ghost-btn:hover:not(:disabled) { - background: var(--bg-muted); - border-color: var(--border-strong); - color: var(--text); -} - -.ghost-btn.is-active { - background: var(--accent-soft); - border-color: rgba(37, 99, 235, 0.16); - color: var(--accent); -} - -.segmented-toggle { - display: inline-flex; - align-items: center; - gap: 0.5rem; - flex-wrap: wrap; -} - -.primary-btn { - background: var(--text); - color: #fff; - border-color: var(--text); -} - -.primary-btn:hover:not(:disabled) { - background: #242424; - border-color: #242424; - transform: translateY(-1px); -} - -.pagination-bar { - display: flex; - align-items: center; - justify-content: space-between; - gap: 1rem; - margin-bottom: 0.9rem; - flex-wrap: wrap; -} - -.pagination-controls { - display: flex; - align-items: center; - gap: 0.6rem; - flex-wrap: wrap; -} - -.pagination-size { - display: inline-flex; - align-items: center; - gap: 0.55rem; - color: var(--text-secondary); -} - -.pagination-size select { - min-width: 5rem; -} - -/* State Info */ -.state-info { - background: var(--panel); - padding: 1.5rem; - border-radius: 12px; - display: flex; - flex-direction: column; - gap: 1rem; - border: 1px solid var(--border); -} - -.state-info div { - display: flex; - gap: 1rem; - align-items: center; -} - -.state-info strong { - color: var(--text-muted); - min-width: 100px; -} - -/* JSON Payload */ -.json-payload { - background: var(--panel); - padding: 1.5rem; - border-radius: 12px; - overflow-x: auto; - font-family: "SF Mono", Monaco, monospace; - font-size: 0.85rem; - line-height: 1.5; - color: var(--text); - border: 1px solid var(--border); -} - -.page-tools { - display: flex; - gap: 0.75rem; - margin-bottom: 1rem; - flex-wrap: wrap; -} - -.quick-link { - display: inline-flex; - align-items: center; - padding: 0.45rem 0.8rem; - border-radius: 999px; - border: 1px solid var(--border); - background: var(--panel); - color: var(--text-secondary); - text-decoration: none; - font-size: 0.85rem; -} - -.quick-link:hover { - color: var(--text); - border-color: var(--border-strong); - background: var(--bg-muted); -} - -.trace-section-shell { - margin-top: 1.5rem; - background: var(--panel); - border: 1px solid var(--border); - border-radius: 12px; - padding: 1.2rem; -} - -.trace-section-shell > h2 { - margin-top: 0; -} - -.trace-summary { - white-space: pre-wrap; - word-break: break-word; - max-width: 56ch; -} - -.trace-actor { - display: inline-block; - padding: 0.15rem 0.45rem; - border-radius: 999px; - font-size: 0.75rem; - text-transform: uppercase; - letter-spacing: 0.04em; -} - -.trace-assistant { - background: #eff6ff; - color: #1d4ed8; -} - -.trace-tool { - background: #ecfdf5; - color: #047857; -} - -.trace-runtime { - background: #fffbeb; - color: #b45309; -} - -.trace-details summary { - cursor: pointer; - color: var(--accent); -} - -.trace-payload { - margin-top: 0.5rem; - max-height: 220px; - overflow: auto; - padding: 0.75rem; -} - -.trace-toolbar { - margin: 0.8rem 0; - display: flex; - justify-content: flex-start; - flex-wrap: wrap; - align-items: center; - gap: 0.55rem; - padding: 0.7rem 0.8rem; - border: 1px solid var(--border); - border-radius: 10px; - background: rgba(255, 255, 255, 0.72); -} - -.trace-run-select { - display: flex; - align-items: center; - gap: 0.4rem; -} - -.trace-run-select select { - border: 1px solid var(--border); - background: var(--panel); - color: var(--text); - border-radius: 6px; - padding: 0.3rem 0.45rem; -} - -.trace-filters { - display: flex; - gap: 0.4rem; - padding-left: 0.55rem; - border-left: 1px solid var(--border); -} - -.trace-view-switch { - display: flex; - gap: 0.4rem; - margin-left: auto; - padding-left: 0.55rem; - border-left: 1px solid var(--border); -} - -.trace-filter-btn { - border: 1px solid var(--border); - background: var(--panel); - color: var(--text-secondary); - border-radius: 6px; - padding: 0.28rem 0.62rem; - cursor: pointer; -} - -.trace-filter-btn.is-active { - background: var(--accent-soft); - color: var(--accent); - border-color: rgba(37, 99, 235, 0.18); -} - -.trace-raw-toggle { - color: var(--text-secondary); - font-size: 0.84rem; - display: flex; - align-items: center; - gap: 0.35rem; - padding: 0.25rem 0.5rem; - border-radius: 999px; - background: var(--bg-soft); -} - -.trace-metrics { - display: flex; - flex-wrap: wrap; - gap: 0.45rem; - color: var(--text-secondary); - font-size: 0.84rem; - margin-bottom: 0.35rem; -} - -.trace-metrics span { - display: inline-flex; - align-items: center; - padding: 0.22rem 0.5rem; - border-radius: 999px; - background: var(--bg-soft); - border: 1px solid var(--border); -} - -.trace-timeline { - margin-top: 0.8rem; - display: flex; - flex-direction: column; - gap: 0.35rem; -} - -.trace-card { - border: 1px solid var(--border); - background: var(--panel); - border-radius: 6px; - padding: 0.7rem 0.8rem; -} - -.trace-timeline > :nth-child(even) { - background: var(--bg-soft); -} - -.trace-card-assistant { - border-left: 4px solid #4f7fd8; -} - -.trace-card-tool { - border-left: 4px solid #5f9446; -} - -.trace-card-runtime { - border-left: 4px solid #a07932; -} - -.trace-card-header { - display: flex; - justify-content: space-between; - align-items: center; - margin-bottom: 0.6rem; - gap: 0.8rem; -} - -.trace-card-meta { - display: flex; - align-items: center; - gap: 0.42rem; -} - -.trace-step { - color: #89a4c0; - font-family: "SF Mono", Monaco, monospace; - font-size: 0.85rem; -} - -.trace-event { - color: var(--text); - font-size: 0.85rem; - font-family: "SF Mono", Monaco, monospace; -} - -.trace-run-id { - color: var(--text-muted); -} - -.trace-block-wrap { - display: flex; - flex-direction: column; - gap: 0.35rem; -} - -.trace-label { - color: var(--text-muted); - font-size: 0.78rem; - text-transform: uppercase; - letter-spacing: 0.04em; -} - -.trace-block { - background: var(--bg-soft); - border: 1px solid var(--border); - border-radius: 7px; - padding: 0.55rem 0.65rem; - font-family: "SF Mono", Monaco, monospace; - font-size: 0.82rem; - color: var(--text); - white-space: pre-wrap; - word-break: break-word; - max-height: 160px; - overflow: auto; -} - -.trace-output { - max-height: 220px; -} - -.trace-assistant-text { - max-height: 180px; -} - -.trace-command { - color: #047857; -} - -.trace-empty { - border: 1px dashed var(--border-strong); - color: var(--text-secondary); - border-radius: 8px; - padding: 1rem; -} - -.trace-guided-empty { - display: grid; - gap: 0.65rem; -} - -.trace-guided-empty p { - margin: 0; -} - -.trace-guided-actions { - display: flex; - gap: 0.65rem; - flex-wrap: wrap; -} - -.trace-surface-error { - border-style: solid; - border-color: rgba(220, 38, 38, 0.18); - background: var(--danger-soft); - color: var(--danger); -} - -.trace-raw-table { - margin-top: 1rem; -} - -.trace-step-card { - border: 1px solid var(--border); - background: var(--panel); - border-left: 4px solid #4f7fd8; - border-radius: 6px; - padding: 0.8rem; -} - -.conversation-card { - border: 1px solid var(--border); - background: var(--panel-strong); - border-radius: 6px; - padding: 0.8rem; -} - -.conversation-card[data-msg-type="assistant"] { - border-left: 3px solid #4f7fd8; -} - -.conversation-card[data-msg-type="tool"] { - border-left: 3px solid #5f9446; -} - -.conversation-card[data-msg-type="human"], -.conversation-card[data-msg-type="user"] { - border-left: 3px solid var(--border-strong); -} - -.trace-step-header { - display: flex; - justify-content: space-between; - gap: 0.8rem; - margin-bottom: 0.55rem; -} - -.trace-step-meta { - display: flex; - align-items: center; - gap: 0.6rem; -} - -.trace-step-index { - color: var(--text); - font-weight: 600; -} - -.trace-step-block { - display: flex; - flex-direction: column; - gap: 0.28rem; - margin: 0.4rem 0 0.65rem; -} - -.trace-raw-item { - margin-top: 0.5rem; -} - -.trace-raw-item-title { - display: flex; - gap: 0.45rem; - color: var(--text-secondary); - font-size: 0.82rem; -} - -.empty-list { - color: var(--text-muted); - font-style: italic; -} - -.eval-summary-bar { - display: flex; - flex-wrap: wrap; - gap: 0.4rem; - margin-bottom: 1rem; -} - -.eval-summary-chip { - display: inline-block; - padding: 0.2rem 0.55rem; - border-radius: 4px; - font-size: 0.82rem; - background: var(--bg-muted); - color: var(--text-secondary); - border: 1px solid var(--border); -} - -.status-chip { - display: inline-flex; - align-items: center; - gap: 0.3rem; - padding: 0.24rem 0.56rem; - border-radius: 999px; - border: 1px solid var(--border); - background: var(--bg-muted); - color: var(--text-secondary); - font-size: 0.74rem; - line-height: 1; - letter-spacing: 0.03em; - text-transform: uppercase; -} - -.chip-muted { - background: var(--bg-muted); - color: var(--text-secondary); - border-color: transparent; -} - -.chip-success { - background: var(--success-soft); - color: var(--success); - border-color: transparent; -} - -.chip-warning { - background: var(--warning-soft); - color: var(--warning); - border-color: transparent; -} - -.chip-danger { - background: var(--danger-soft); - color: var(--danger); - border-color: transparent; -} - -.chip-row { - display: flex; - flex-wrap: wrap; - gap: 0.35rem; -} - -/* @@@status-tone-chips - map session/lease status to chip colors so they read consistently across resources and eval pages */ -.status-running { - background: var(--success-soft); - color: var(--success); - border-color: transparent; -} - -.status-paused { - background: var(--warning-soft); - color: var(--warning); - border-color: transparent; -} - -.status-stopped { - background: var(--bg-muted); - color: var(--text-muted); - border-color: transparent; -} - -.status-destroying { - background: var(--danger-soft); - color: var(--danger); - border-color: transparent; -} - -.evaluation-flow, -.evaluation-overview, -.evaluation-notes { - display: grid; - grid-template-columns: repeat(12, minmax(0, 1fr)); - gap: 1rem; -} - -.evaluation-flow > *, -.evaluation-notes > * { - grid-column: span 4; -} - -.evaluation-overview > * { - grid-column: span 6; -} - -/* @@@eval-split-layout - fixed sidebar width matching resource-rail for visual consistency */ -.eval-split-layout { - display: grid; - grid-template-columns: 320px minmax(0, 1fr); - gap: 1rem; - align-items: start; -} - -/* @@@eval-aside-instrument - compact sticky aside with tighter text to feel like an instrument readout, not a content block */ -.eval-split-aside { - position: sticky; - top: 132px; - font-size: 0.88rem; -} - -.eval-split-aside h2 { - font-size: 0.95rem; - margin: 0 0 0.5rem; -} - -.eval-split-aside .count { - margin-bottom: 0.5rem; -} - -/* @@@eval-main-table-primary - table column gets subtle containment so it reads as the working surface */ -.eval-split-main { - border: 1px solid var(--border-strong); - border-radius: 16px; - background: var(--panel); - padding: 1.15rem 1.2rem; - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04); -} - -.eval-split-main h2 { - margin: 0 0 0.25rem; -} - -.eval-split-main table { - border: none; - border-radius: 0; -} - -.evaluation-meta-row { - display: flex; - flex-wrap: wrap; - gap: 0.8rem; - margin: 0.35rem 0 0.85rem; -} - -.eval-status-stack, -.eval-score-stack { - display: flex; - flex-direction: column; - gap: 0.35rem; - align-items: flex-start; -} - -.eval-score-stack { - font-size: 0.8rem; - color: var(--text-secondary); -} - -.evaluation-pagination-row { - display: flex; - align-items: center; - justify-content: space-between; - gap: 0.85rem; - margin-top: 0.75rem; - flex-wrap: wrap; -} - -.evaluation-pagination-copy { - margin: 0; -} - -.evaluation-pagination-actions { +/* Loading */ +div:has(> :only-child:is(div:contains("Loading"))) { display: flex; + justify-content: center; align-items: center; - gap: 0.5rem; -} - -.evaluation-overview .hint-box, -.evaluation-flow .hint-box, -.evaluation-notes .hint-box { - height: 100%; -} - -.evaluation-layout { - display: grid; - grid-template-columns: repeat(12, minmax(0, 1fr)); - gap: 1rem; - align-items: start; -} - -.evaluation-column { - grid-column: span 4; - display: flex; - flex-direction: column; - gap: 0.75rem; -} - -.evaluation-column h2 { - margin: 0; - font-size: 1rem; -} - -.evaluation-column-action { - grid-column: span 12; -} - -.evaluation-grid { - grid-template-columns: 1fr; - gap: 0.9rem; - margin-bottom: 0; -} - -.field-group { - display: flex; - flex-direction: column; - gap: 0.45rem; -} - -.field-label { - display: flex; - flex-direction: column; - gap: 0.25rem; - color: var(--text); -} - -.field-help { - color: var(--text-muted); - font-size: 0.84rem; - line-height: 1.5; -} - -.evaluation-action-row { - display: flex; - gap: 0.75rem; - flex-wrap: wrap; -} - -.eval-runtime-panel, -.eval-progress-cell { - display: flex; - flex-direction: column; - gap: 0.5rem; -} - -section.eval-runtime-panel { - background: var(--panel); - border: 1px solid var(--border); - border-radius: 12px; - padding: 1rem 1.2rem; -} - -.eval-progress-track { - position: relative; - width: 100%; - height: 0.65rem; - border-radius: 999px; - background: var(--bg-muted); - overflow: hidden; - border: 1px solid rgba(37, 99, 235, 0.08); -} - -.eval-progress-fill { - height: 100%; - border-radius: inherit; - background: linear-gradient(90deg, #3b82f6 0%, #60a5fa 100%); -} - -.eval-progress-line { - color: var(--text-secondary); - font-size: 0.78rem; - line-height: 1.5; - white-space: normal; -} - -.eval-operator-shell { - margin-top: 1rem; - margin-bottom: 1.5rem; - border: 1px solid var(--border); - border-radius: 16px; - padding: 1rem 1.1rem; - background: var(--panel); - display: flex; - flex-direction: column; - gap: 0.9rem; -} - -.eval-operator-warning { - background: linear-gradient( - 180deg, - var(--panel) 0%, - var(--warning-soft) 100% - ); -} - -.eval-operator-danger { - background: linear-gradient(180deg, var(--panel) 0%, var(--danger-soft) 100%); -} - -.eval-operator-success { - background: linear-gradient( - 180deg, - var(--panel) 0%, - var(--success-soft) 100% - ); -} - -.eval-operator-hero { - display: flex; - align-items: flex-start; - justify-content: space-between; - gap: 1rem; -} - -.eval-operator-hero h2 { - margin: 0 0 0.35rem; -} - -.eval-operator-headline { - font-size: 1.02rem; - color: var(--text); -} - -.eval-operator-grid { - display: grid; - grid-template-columns: repeat(12, minmax(0, 1fr)); - gap: 1rem; -} - -.eval-operator-grid > * { - grid-column: span 6; -} - -.eval-operator-shell h3 { - margin: 0 0 0.6rem; - font-size: 0.95rem; -} - -.eval-artifact-list, -.eval-next-step-list { - margin: 0; - padding-left: 1.1rem; - display: flex; - flex-direction: column; - gap: 0.45rem; -} - -.eval-artifact-list li, -.eval-next-step-list li { - color: var(--text-secondary); -} - -.eval-raw-notes { - border-top: 1px solid var(--border); - padding-top: 0.8rem; -} - -.eval-raw-notes summary { - cursor: pointer; - color: var(--text-secondary); - font-weight: 500; -} - -.eval-raw-notes pre { - margin-top: 0.75rem; - padding: 0.85rem 0.95rem; - border-radius: 12px; - border: 1px solid var(--border); - background: rgba(255, 255, 255, 0.72); - white-space: pre-wrap; - word-break: break-word; -} - -.eval-score-details { - margin: 1rem 0 0; - border: 1px solid var(--border); - border-radius: 14px; - background: var(--panel); - padding: 0.85rem 1rem; -} - -.eval-score-details summary { - cursor: pointer; - color: var(--text-secondary); - font-weight: 500; -} - -.eval-score-details .info-grid { - margin-top: 1rem; -} - -.eval-composer-backdrop { - position: fixed; - inset: 0; - background: rgba(250, 250, 250, 0.82); - backdrop-filter: blur(6px); - padding: 2rem; - overflow-y: auto; - z-index: 40; -} - -.eval-composer-panel { - width: min(1100px, 100%); - margin: 0 auto; - background: var(--panel); - border: 1px solid var(--border); - border-radius: 22px; - box-shadow: 0 20px 60px rgba(23, 23, 23, 0.08); - padding: 1.35rem; -} - -@media (max-width: 1080px) { - .console-app { - flex-direction: column; - } - - .console-sidebar { - width: 100%; - height: auto; - position: static; - border-right: 0; - border-bottom: 1px solid var(--border); - } - - .console-nav { - gap: 0.75rem; - } - - .console-nav-group { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); - gap: 0.3rem; - } - - .console-nav-group-label { - grid-column: 1 / -1; - } - - .console-header { - padding-left: 1.25rem; - padding-right: 1.25rem; - } - - .shell-guide-grid > *, - .dashboard-card, - .dashboard-card-hero, - .dashboard-sidebar-stack, - .resource-split-console, - .evaluation-flow > *, - .evaluation-notes > *, - .evaluation-overview > *, - .evaluation-column, - .provider-lease-meta, - .lease-cluster-grid > *, - .eval-operator-grid > * { - grid-column: span 12; - } - - .sticky-context { - position: static; - margin: 0 0 1rem; - padding: 0.6rem 0; - border-bottom: 1px solid var(--border); - background: transparent; - backdrop-filter: none; - } - - .resource-split-console, - .eval-split-layout { - grid-template-columns: 1fr; - } - - .eval-split-aside { - position: static; - } - - .resource-rail { - position: static; - } - - .resource-rail-list { - max-height: none; - overflow: visible; - padding-right: 0; - } - - .monitor-lease-context-bar { - grid-template-columns: repeat(2, minmax(0, 1fr)); - } -} - -@media (max-width: 720px) { - .console-header, - .content { - padding-left: 1rem; - padding-right: 1rem; - } - - .console-nav-group { - grid-template-columns: 1fr 1fr; - } - - .resource-rail-meta { - flex-direction: column; - align-items: flex-start; - gap: 0.2rem; - } - - .monitor-lease-context-bar { - grid-template-columns: 1fr; - } - - .console-header { - position: static; - } - - h1 { - font-size: 1.7rem; - } - - .shell-modal-backdrop, - .eval-composer-backdrop { - padding: 1rem; - } - - .shell-modal-panel, - .eval-composer-panel { - padding: 1rem; - } - - .trace-view-switch { - margin-left: 0; - } + min-height: 200px; + color: #888; } diff --git a/frontend/monitor/vite.config.ts b/frontend/monitor/vite.config.ts index a98d79886..7cc965550 100644 --- a/frontend/monitor/vite.config.ts +++ b/frontend/monitor/vite.config.ts @@ -1,35 +1,21 @@ -import { execSync } from "child_process"; import { defineConfig } from "vite"; import react from "@vitejs/plugin-react"; -function getWorktreePort(key: string, fallback: string): string { - try { - return execSync(`git config --worktree --get ${key}`, { encoding: "utf-8" }).trim(); - } catch { - return fallback; - } -} - -const backendPort = process.env.LEON_BACKEND_PORT || getWorktreePort("worktree.ports.backend", "8001"); -const monitorPort = parseInt(process.env.LEON_MONITOR_PORT || "5174", 10); -const monitorPreviewPort = parseInt(process.env.LEON_MONITOR_PREVIEW_PORT || "4174", 10); - export default defineConfig({ plugins: [react()], server: { - host: "0.0.0.0", - port: monitorPort, + port: 5174, strictPort: true, proxy: { "/api": { - target: `http://127.0.0.1:${backendPort}`, + target: "http://127.0.0.1:8001", changeOrigin: true, }, }, }, preview: { - host: "0.0.0.0", - port: monitorPreviewPort, + port: 4174, strictPort: true, }, }); + diff --git a/tests/Integration/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py index 5e6b9c04b..b03fa4e81 100644 --- a/tests/Integration/test_monitor_resources_route.py +++ b/tests/Integration/test_monitor_resources_route.py @@ -1,10 +1,21 @@ +from fastapi import FastAPI from fastapi.testclient import TestClient -from backend.web.main import app +from backend.web.core.dependencies import get_current_user_id +from backend.web.routers import monitor, resources + + +def _build_monitor_test_app(*, include_product_resources: bool = False) -> FastAPI: + app = FastAPI() + app.include_router(monitor.router) + if include_product_resources: + app.include_router(resources.router) + app.dependency_overrides[get_current_user_id] = lambda: "user-test" + return app def test_monitor_resources_route_smoke(): - with TestClient(app) as client: + with TestClient(_build_monitor_test_app()) as client: response = client.get("/api/monitor/resources") assert response.status_code == 200 @@ -20,7 +31,7 @@ def test_monitor_resources_route_smoke(): def test_monitor_resources_refresh_route_smoke(): - with TestClient(app) as client: + with TestClient(_build_monitor_test_app()) as client: response = client.post("/api/monitor/resources/refresh") assert response.status_code == 200 @@ -33,8 +44,16 @@ def test_monitor_resources_refresh_route_smoke(): assert set(payload["triage"]["summary"]).issuperset({"total", "active_drift", "detached_residue", "orphan_cleanup", "healthy_capacity"}) -def test_monitor_and_product_resource_routes_coexist_intentionally(): - with TestClient(app) as client: +def test_monitor_and_product_resource_routes_coexist_intentionally(monkeypatch): + from backend.web.services import resource_projection_service + + monkeypatch.setattr( + resource_projection_service, + "list_user_resource_providers", + lambda *_args, **_kwargs: {"summary": {"snapshot_at": "now"}, "providers": []}, + ) + + with TestClient(_build_monitor_test_app(include_product_resources=True)) as client: monitor_response = client.get("/api/monitor/resources") product_response = client.get("/api/resources/overview") @@ -43,7 +62,7 @@ def test_monitor_and_product_resource_routes_coexist_intentionally(): def test_monitor_health_route_smoke(): - with TestClient(app) as client: + with TestClient(_build_monitor_test_app()) as client: response = client.get("/api/monitor/health") assert response.status_code == 200 @@ -54,7 +73,7 @@ def test_monitor_health_route_smoke(): def test_monitor_dashboard_route_smoke(): - with TestClient(app) as client: + with TestClient(_build_monitor_test_app()) as client: response = client.get("/api/monitor/dashboard") assert response.status_code == 200 @@ -67,7 +86,7 @@ def test_monitor_dashboard_route_smoke(): def test_monitor_leases_route_exposes_summary_and_groups(): - with TestClient(app) as client: + with TestClient(_build_monitor_test_app()) as client: response = client.get("/api/monitor/leases") assert response.status_code == 200 @@ -104,7 +123,7 @@ def test_monitor_resources_cleanup_route_forwards_structured_payload(monkeypatch }, ) - with TestClient(app) as client: + with TestClient(_build_monitor_test_app()) as client: response = client.post( "/api/monitor/resources/cleanup", json={ diff --git a/tests/Unit/backend/web/services/test_resource_common.py b/tests/Unit/backend/web/services/test_resource_common.py deleted file mode 100644 index edc97dede..000000000 --- a/tests/Unit/backend/web/services/test_resource_common.py +++ /dev/null @@ -1,61 +0,0 @@ -from backend.web.services import resource_common - - -class _FakeThreadRepo: - def __init__(self, rows): - self._rows = rows - - def get_by_id(self, thread_id: str): - return self._rows.get(thread_id) - - def close(self): - pass - - -class _FakeMember: - def __init__(self, member_id: str, name: str, avatar: str | None = None): - self.id = member_id - self.name = name - self.avatar = avatar - - -class _FakeMemberRepo: - def __init__(self, members): - self._members = members - - def list_all(self): - return list(self._members) - - def close(self): - pass - - -def test_thread_owners_resolves_member_metadata_from_runtime_storage(): - owners = resource_common.thread_owners( - ["thread-1", "thread-2"], - thread_repo=_FakeThreadRepo({"thread-1": {"member_id": "member-1"}}), - member_repo=_FakeMemberRepo([_FakeMember("member-1", "Toad")]), - ) - - assert owners == { - "thread-1": {"member_id": "member-1", "member_name": "Toad", "avatar_url": None}, - "thread-2": {"member_id": None, "member_name": "未绑定Agent", "avatar_url": None}, - } - - -def test_metric_adds_error_only_when_present(): - assert resource_common.metric(1, 2, "%", "api", "live") == { - "used": 1, - "limit": 2, - "unit": "%", - "source": "api", - "freshness": "live", - } - assert resource_common.metric(None, None, "GB", "unknown", "stale", "probe failed") == { - "used": None, - "limit": None, - "unit": "GB", - "source": "unknown", - "freshness": "stale", - "error": "probe failed", - } diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index c314691e9..e3112c86d 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -1,85 +1,8 @@ import sqlite3 -from backend.web import monitor from backend.web.services import monitor_service -def _bootstrap_threads_monitor_db(db_path, count: int) -> sqlite3.Connection: - conn = sqlite3.connect(db_path) - conn.row_factory = sqlite3.Row - conn.executescript( - """ - CREATE TABLE sandbox_leases ( - lease_id TEXT PRIMARY KEY, - provider_name TEXT, - desired_state TEXT, - observed_state TEXT, - current_instance_id TEXT, - created_at TEXT, - updated_at TEXT - ); - - CREATE TABLE chat_sessions ( - chat_session_id TEXT PRIMARY KEY, - thread_id TEXT, - lease_id TEXT, - status TEXT, - started_at TEXT, - last_active_at TEXT - ); - """ - ) - for idx in range(count): - hour = idx // 60 - minute = idx % 60 - conn.execute( - """ - INSERT INTO chat_sessions ( - chat_session_id, thread_id, lease_id, status, started_at, last_active_at - ) VALUES (?, ?, ?, ?, ?, ?) - """, - ( - f"sess-{idx}", - f"thread-{idx:03d}", - None, - "closed", - f"2026-04-06T{hour:02d}:{minute:02d}:00", - f"2026-04-06T{hour:02d}:{minute:02d}:30", - ), - ) - conn.commit() - return conn - - -def test_list_running_eval_checkpoint_threads_returns_empty_when_eval_tables_absent(tmp_path, monkeypatch): - db_path = tmp_path / "leon.db" - sqlite3.connect(db_path).close() - monkeypatch.setattr(monitor, "DB_PATH", db_path) - - assert monitor._list_running_eval_checkpoint_threads() == [] - - -def test_list_threads_second_page_is_not_sliced_empty_after_sql_pagination(tmp_path, monkeypatch): - db_path = tmp_path / "sandbox.db" - conn = _bootstrap_threads_monitor_db(db_path, count=74) - try: - monkeypatch.setattr(monitor, "_list_running_eval_checkpoint_threads", lambda: []) - monkeypatch.setattr(monitor, "load_thread_mode_map", lambda thread_ids: {}) - - payload = monitor.list_threads(offset=50, limit=50, db=conn) - finally: - conn.close() - - assert payload["count"] == 24 - assert len(payload["items"]) == 24 - assert payload["items"][0]["thread_id"] == "thread-023" - assert payload["items"][-1]["thread_id"] == "thread-000" - assert payload["pagination"]["page"] == 2 - assert payload["pagination"]["has_prev"] is True - assert payload["pagination"]["has_next"] is False - assert payload["pagination"]["next_offset"] is None - - def test_list_leases_exposes_semantic_groups_and_summary(monkeypatch): class FakeRepo: def query_leases(self): @@ -252,73 +175,6 @@ def close(self): assert payload["related_threads"]["items"] == [{"thread_id": "thread-historical", "thread_url": "/thread/thread-historical"}] -def test_monitor_route_get_lease_falls_back_to_compat_db_when_service_misses(tmp_path, monkeypatch): - db_path = tmp_path / "sandbox.db" - conn = sqlite3.connect(db_path) - conn.row_factory = sqlite3.Row - conn.executescript( - """ - CREATE TABLE chat_sessions ( - chat_session_id TEXT PRIMARY KEY, - thread_id TEXT, - lease_id TEXT, - status TEXT, - started_at TEXT, - ended_at TEXT, - close_reason TEXT - ); - CREATE TABLE sandbox_leases ( - lease_id TEXT PRIMARY KEY, - provider_name TEXT, - desired_state TEXT, - observed_state TEXT, - current_instance_id TEXT, - last_error TEXT - ); - CREATE TABLE lease_events ( - event_id TEXT PRIMARY KEY, - lease_id TEXT, - event_type TEXT, - source TEXT, - payload_json TEXT, - error TEXT, - created_at TEXT - ); - """ - ) - conn.execute( - """ - INSERT INTO chat_sessions ( - chat_session_id, thread_id, lease_id, status, started_at, ended_at, close_reason - ) VALUES (?, ?, ?, ?, ?, ?, ?) - """, - ( - "sess-local", - "thread-local", - "lease-local-history", - "closed", - "2026-04-07T01:25:18.632049", - "2026-04-07T01:27:19.554403", - "thread_deleted", - ), - ) - conn.commit() - - def _raise_keyerror(_lease_id: str): - raise KeyError("Lease not found") - - monkeypatch.setattr(monitor_service, "get_lease", _raise_keyerror) - - try: - payload = monitor.get_lease("lease-local-history", db=conn) - finally: - conn.close() - - assert payload["lease_id"] == "lease-local-history" - assert payload["related_threads"]["items"] == [{"thread_id": "thread-local", "thread_url": "/thread/thread-local"}] - assert payload["state"]["text"] == "destroyed" - - def test_build_evaluation_operator_surface_flags_runner_exit_before_threads_materialize(): payload = monitor_service.build_evaluation_operator_surface( status="provisional", diff --git a/tests/Unit/monitor/test_monitor_resource_overview_uniqueness.py b/tests/Unit/monitor/test_monitor_resource_overview_uniqueness.py index 44c7db21b..dfcf08ba8 100644 --- a/tests/Unit/monitor/test_monitor_resource_overview_uniqueness.py +++ b/tests/Unit/monitor/test_monitor_resource_overview_uniqueness.py @@ -1,4 +1,4 @@ -from backend.web.services import resource_common, resource_projection_service +from backend.web.services import resource_service class _FakeRepo: @@ -67,25 +67,25 @@ def test_list_resource_providers_deduplicates_terminal_fallback_rows(monkeypatch }, ] - monkeypatch.setattr(resource_projection_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows)) + monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows)) monkeypatch.setattr( - resource_projection_service, + resource_service, "available_sandbox_types", lambda: [{"name": "local", "available": True}], ) monkeypatch.setattr( - resource_projection_service, + resource_service, "_resolve_instance_capabilities", - lambda _config_name: (resource_projection_service._empty_capabilities(), None), + lambda _config_name: (resource_service._empty_capabilities(), None), ) monkeypatch.setattr( - resource_projection_service, + resource_service, "_thread_owners", lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Toad", "avatar_url": None} for tid in thread_ids}, ) - monkeypatch.setattr(resource_projection_service, "list_resource_snapshots", lambda _lease_ids: {}) + monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {}) - payload = resource_projection_service.list_resource_providers() + payload = resource_service.list_resource_providers() local = payload["providers"][0] assert local["telemetry"]["running"]["used"] == 1 @@ -117,32 +117,32 @@ def test_list_resource_providers_resolves_owner_metadata_from_runtime_storage(mo }, ] - monkeypatch.setattr(resource_projection_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows)) + monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows)) monkeypatch.setattr( - resource_projection_service, + resource_service, "available_sandbox_types", lambda: [{"name": "daytona", "available": True}], ) - monkeypatch.setattr(resource_projection_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona") - monkeypatch.setattr(resource_projection_service, "_resolve_console_url", lambda *_args, **_kwargs: None) + monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona") + monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None) monkeypatch.setattr( - resource_projection_service, + resource_service, "_resolve_instance_capabilities", - lambda _config_name: (resource_projection_service._empty_capabilities(), None), + lambda _config_name: (resource_service._empty_capabilities(), None), ) monkeypatch.setattr( - resource_common, + resource_service, "build_thread_repo", lambda **_kwargs: _FakeThreadRepo({"thread-supabase": {"member_id": "member-1"}}), ) monkeypatch.setattr( - resource_common, + resource_service, "build_member_repo", lambda **_kwargs: _FakeMemberRepo([_FakeMember("member-1", "Toad")]), ) - monkeypatch.setattr(resource_projection_service, "list_resource_snapshots", lambda _lease_ids: {}) + monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {}) - payload = resource_projection_service.list_resource_providers() + payload = resource_service.list_resource_providers() assert payload["providers"][0]["sessions"] == [ { @@ -181,27 +181,27 @@ def test_list_resource_providers_hides_subagent_threads(monkeypatch): }, ] - monkeypatch.setattr(resource_projection_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows)) + monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows)) monkeypatch.setattr( - resource_projection_service, + resource_service, "available_sandbox_types", lambda: [{"name": "daytona", "available": True}], ) - monkeypatch.setattr(resource_projection_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona") - monkeypatch.setattr(resource_projection_service, "_resolve_console_url", lambda *_args, **_kwargs: None) + monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona") + monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None) monkeypatch.setattr( - resource_projection_service, + resource_service, "_resolve_instance_capabilities", - lambda _config_name: (resource_projection_service._empty_capabilities(), None), + lambda _config_name: (resource_service._empty_capabilities(), None), ) monkeypatch.setattr( - resource_projection_service, + resource_service, "_thread_owners", lambda thread_ids: {tid: {"member_id": tid, "member_name": tid, "avatar_url": None} for tid in thread_ids}, ) - monkeypatch.setattr(resource_projection_service, "list_resource_snapshots", lambda _lease_ids: {}) + monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {}) - payload = resource_projection_service.list_resource_providers() + payload = resource_service.list_resource_providers() sessions = payload["providers"][0]["sessions"] assert [session["threadId"] for session in sessions] == ["thread-parent"] @@ -222,30 +222,30 @@ def test_list_resource_providers_projects_visible_parent_when_raw_monitor_row_is ] monkeypatch.setattr( - resource_projection_service, + resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows, lease_threads={"lease-1": ["subagent-deadbeef", "thread-parent"]}), ) monkeypatch.setattr( - resource_projection_service, + resource_service, "available_sandbox_types", lambda: [{"name": "daytona_selfhost", "available": True}], ) - monkeypatch.setattr(resource_projection_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona") - monkeypatch.setattr(resource_projection_service, "_resolve_console_url", lambda *_args, **_kwargs: None) + monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona") + monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None) monkeypatch.setattr( - resource_projection_service, + resource_service, "_resolve_instance_capabilities", - lambda _config_name: (resource_projection_service._empty_capabilities(), None), + lambda _config_name: (resource_service._empty_capabilities(), None), ) monkeypatch.setattr( - resource_projection_service, + resource_service, "_thread_owners", lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Morel", "avatar_url": None} for tid in thread_ids}, ) - monkeypatch.setattr(resource_projection_service, "list_resource_snapshots", lambda _lease_ids: {}) + monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {}) - payload = resource_projection_service.list_resource_providers() + payload = resource_service.list_resource_providers() sessions = payload["providers"][0]["sessions"] assert sessions == [ @@ -285,27 +285,27 @@ def test_list_resource_providers_deduplicates_same_lease_thread_even_with_distin }, ] - monkeypatch.setattr(resource_projection_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows)) + monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows)) monkeypatch.setattr( - resource_projection_service, + resource_service, "available_sandbox_types", lambda: [{"name": "daytona_selfhost", "available": True}], ) - monkeypatch.setattr(resource_projection_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona") - monkeypatch.setattr(resource_projection_service, "_resolve_console_url", lambda *_args, **_kwargs: None) + monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona") + monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None) monkeypatch.setattr( - resource_projection_service, + resource_service, "_resolve_instance_capabilities", - lambda _config_name: (resource_projection_service._empty_capabilities(), None), + lambda _config_name: (resource_service._empty_capabilities(), None), ) monkeypatch.setattr( - resource_projection_service, + resource_service, "_thread_owners", lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Toad", "avatar_url": None} for tid in thread_ids}, ) - monkeypatch.setattr(resource_projection_service, "list_resource_snapshots", lambda _lease_ids: {}) + monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {}) - payload = resource_projection_service.list_resource_providers() + payload = resource_service.list_resource_providers() sessions = payload["providers"][0]["sessions"] assert sessions == [ From d6c50e86c4f8ba0bada3ab33511c607ffdb8536f Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 16:00:39 +0800 Subject: [PATCH 86/87] fix: resolve monitor sqlite test collisions --- storage/providers/sqlite/sandbox_monitor_repo.py | 1 + tests/Unit/monitor/test_monitor_compat.py | 2 -- ...dbox_monitor_repo.py => test_monitor_sqlite_sandbox_repo.py} | 0 3 files changed, 1 insertion(+), 2 deletions(-) rename tests/Unit/monitor/{test_sqlite_sandbox_monitor_repo.py => test_monitor_sqlite_sandbox_repo.py} (100%) diff --git a/storage/providers/sqlite/sandbox_monitor_repo.py b/storage/providers/sqlite/sandbox_monitor_repo.py index 1fd75d856..406366859 100644 --- a/storage/providers/sqlite/sandbox_monitor_repo.py +++ b/storage/providers/sqlite/sandbox_monitor_repo.py @@ -189,6 +189,7 @@ def list_leases_with_threads(self) -> list[dict]: sl.recipe_json, sl.desired_state, sl.observed_state, + sl.created_at, sl.updated_at, at.thread_id, at.cwd diff --git a/tests/Unit/monitor/test_monitor_compat.py b/tests/Unit/monitor/test_monitor_compat.py index e3112c86d..809ede5e1 100644 --- a/tests/Unit/monitor/test_monitor_compat.py +++ b/tests/Unit/monitor/test_monitor_compat.py @@ -1,5 +1,3 @@ -import sqlite3 - from backend.web.services import monitor_service diff --git a/tests/Unit/monitor/test_sqlite_sandbox_monitor_repo.py b/tests/Unit/monitor/test_monitor_sqlite_sandbox_repo.py similarity index 100% rename from tests/Unit/monitor/test_sqlite_sandbox_monitor_repo.py rename to tests/Unit/monitor/test_monitor_sqlite_sandbox_repo.py From e4124b2f241386f66f6121fb14dc27684a55c071 Mon Sep 17 00:00:00 2001 From: shuxueshuxue Date: Tue, 7 Apr 2026 16:04:27 +0800 Subject: [PATCH 87/87] fix: harden monitor CI smoke coverage --- storage/contracts.py | 3 ++ .../test_monitor_resources_route.py | 41 +++++++++++++++++-- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/storage/contracts.py b/storage/contracts.py index 8df8e6b8a..cc97e41de 100644 --- a/storage/contracts.py +++ b/storage/contracts.py @@ -98,6 +98,7 @@ def list_active(self) -> list[dict[str, Any]]: ... def list_all(self) -> list[dict[str, Any]]: ... def cleanup_expired(self) -> list[str]: ... + class SandboxMonitorRepo(Protocol): """Read-only monitor queries over sandbox/session/lease state.""" @@ -118,6 +119,8 @@ def count_rows(self, table_names: list[str]) -> dict[str, int]: ... def list_sessions_with_leases(self) -> list[dict[str, Any]]: ... def list_probe_targets(self) -> list[dict[str, Any]]: ... def query_lease_instance_id(self, lease_id: str) -> str | None: ... + + # --------------------------------------------------------------------------- # Member-Chat — enums + row types # --------------------------------------------------------------------------- diff --git a/tests/Integration/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py index b03fa4e81..95a82d809 100644 --- a/tests/Integration/test_monitor_resources_route.py +++ b/tests/Integration/test_monitor_resources_route.py @@ -14,7 +14,37 @@ def _build_monitor_test_app(*, include_product_resources: bool = False) -> FastA return app -def test_monitor_resources_route_smoke(): +def _stub_monitor_resource_snapshot(monkeypatch): + snapshot = { + "summary": { + "snapshot_at": "2026-04-07T00:00:00Z", + "last_refreshed_at": "2026-04-07T00:00:00Z", + "refresh_status": "fresh", + "running_sessions": 0, + "active_providers": 0, + "unavailable_providers": 0, + }, + "providers": [], + "triage": { + "summary": { + "total": 0, + "active_drift": 0, + "detached_residue": 0, + "orphan_cleanup": 0, + "healthy_capacity": 0, + }, + "groups": [], + }, + } + + monkeypatch.setattr(monitor, "get_monitor_resource_overview_snapshot", lambda: snapshot) + monkeypatch.setattr(monitor, "refresh_monitor_resource_overview_sync", lambda: snapshot) + return snapshot + + +def test_monitor_resources_route_smoke(monkeypatch): + _stub_monitor_resource_snapshot(monkeypatch) + with TestClient(_build_monitor_test_app()) as client: response = client.get("/api/monitor/resources") @@ -30,7 +60,9 @@ def test_monitor_resources_route_smoke(): assert isinstance(payload["triage"]["groups"], list) -def test_monitor_resources_refresh_route_smoke(): +def test_monitor_resources_refresh_route_smoke(monkeypatch): + _stub_monitor_resource_snapshot(monkeypatch) + with TestClient(_build_monitor_test_app()) as client: response = client.post("/api/monitor/resources/refresh") @@ -47,6 +79,7 @@ def test_monitor_resources_refresh_route_smoke(): def test_monitor_and_product_resource_routes_coexist_intentionally(monkeypatch): from backend.web.services import resource_projection_service + _stub_monitor_resource_snapshot(monkeypatch) monkeypatch.setattr( resource_projection_service, "list_user_resource_providers", @@ -72,7 +105,9 @@ def test_monitor_health_route_smoke(): assert "sessions" in payload -def test_monitor_dashboard_route_smoke(): +def test_monitor_dashboard_route_smoke(monkeypatch): + _stub_monitor_resource_snapshot(monkeypatch) + with TestClient(_build_monitor_test_app()) as client: response = client.get("/api/monitor/dashboard")