From 5d89587131f271509fe00d3c7241839f5b44a42d Mon Sep 17 00:00:00 2001 From: Keith Avery Date: Fri, 24 Apr 2026 04:28:58 -0400 Subject: [PATCH] fix(dashboard): span-close turn aggregator + active-session State pick MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two OTEL dashboard bugs from the 2026-04-24 playtest (sq-playtest-pingpong lines 582, 598). Bug 1 — Turns counter stuck at 0, Timeline / Timing / Prompt / Lore empty. Root cause: the aggregator only counted the semantic `turn_complete` event, which stopped reaching the stream in live traffic. OTEL span closes (`orchestrator.process_action`, `turn.agent_llm.inference`, `narrator.canonical_leak_audit`) kept flowing — Console + Subsystems tabs filled while every per-turn tab stayed empty. Fix: treat `agent_span_close { name: "orchestrator.process_action" }` as the canonical turn boundary. Accumulate `turn_id` / `player_id` / genre / world from `narrator.canonical_leak_audit` (carries `turn_id=:::`) and inference duration from `turn.agent_llm.inference`, then synthesize a `turn_complete` WatcherEvent on the `orchestrator.process_action` close. Dedupe by `turn_id` so a late semantic `turn_complete` replaces the synthesized entry rather than double-counting. Bug 2 — State tab reads the first session from /api/debug/state, which with multiple saves was usually the OLDEST save instead of the active one. StateTab now picks the session with the largest `last_activity_ts` (new field on `SessionStateView`; server-side sort is handled in the paired server PR). Wiring tests: - increments Turns counter when orchestrator.process_action closes (feeds the exact 3-event sequence seen in playtest console logs) - does not double-count when a real turn_complete follows a synthesized one for the same turn_id - State tab picks the most-recently-touched session by last_activity_ts - State tab falls back to first entry when last_activity_ts absent (back-compat with un-deployed servers) Paired with sidequest-server PR #39 for the server-side sort + `?session_key=` filter, plus the local_dm JSON extraction fix. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/components/Dashboard/DashboardApp.tsx | 171 ++++++++++++++++-- .../DashboardApp-event-parsing.test.tsx | 130 ++++++++++++- .../Dashboard/__tests__/StateTab.test.tsx | 31 ++++ src/components/Dashboard/tabs/StateTab.tsx | 16 +- src/types/watcher.ts | 4 + 5 files changed, 338 insertions(+), 14 deletions(-) diff --git a/src/components/Dashboard/DashboardApp.tsx b/src/components/Dashboard/DashboardApp.tsx index 7f5d6db..b65d7d4 100644 --- a/src/components/Dashboard/DashboardApp.tsx +++ b/src/components/Dashboard/DashboardApp.tsx @@ -30,8 +30,35 @@ export interface DashboardState { promptEvents: WatcherEvent[]; loreEvents: WatcherEvent[]; debugState: SessionStateView[] | null; + /** In-progress turn context accumulated from OTEL span closes between + * one `orchestrator.process_action` boundary and the next. Used to + * synthesize a `turn_complete` event when the server never emits a + * semantic one (playtest 2026-04-24 — the semantic path was missing + * from live traffic; only span closes reached the dashboard). */ + pendingTurn: PendingTurnContext; + /** Server-provided turn identifiers we've already accounted for, so a + * real `turn_complete` following a synthesized one doesn't double-count. */ + seenTurnKeys: Set; } +interface PendingTurnContext { + turnId: string | null; + playerId: string | null; + genre: string | null; + world: string | null; + inferenceDurationMs: number | null; + model: string | null; +} + +const emptyPending: PendingTurnContext = { + turnId: null, + playerId: null, + genre: null, + world: null, + inferenceDurationMs: null, + model: null, +}; + const initialState: DashboardState = { activeTab: 0, paused: false, @@ -42,6 +69,8 @@ const initialState: DashboardState = { promptEvents: [], loreEvents: [], debugState: null, + pendingTurn: { ...emptyPending }, + seenTurnKeys: new Set(), }; type Action = @@ -52,6 +81,61 @@ type Action = | { type: "SET_DEBUG_STATE"; data: SessionStateView[] } | { type: "SELECT_TURN"; index: number | null }; +/** + * Turn aggregator — playtest 2026-04-24 regression. + * + * The dashboard originally gated the Turns counter + Timeline / Timing / + * Prompt / Lore tabs on receiving a semantic `turn_complete` event. + * Under production traffic, those events stopped reaching the stream + * (the upstream publish never ran) but OTEL span closes kept flowing — + * including `orchestrator.process_action`, which is the canonical top-level + * span wrapping every turn. We now treat that span close as a turn + * boundary: accumulate contextual attributes from intermediate spans + * (`narrator.canonical_leak_audit` carries `turn_id`, `player_id`, genre, + * world; `turn.agent_llm.inference` carries duration + model), and on + * `orchestrator.process_action` close, synthesize a `turn_complete` + * WatcherEvent from the accumulated context. If the server later emits + * a real `turn_complete` for the same turn, dedupe via `seenTurnKeys`. + */ +function extractTurnKey(fields: Record): string | null { + const turnId = fields["turn_id"]; + if (typeof turnId === "string" && turnId.length > 0) return turnId; + // Numeric turn_id (semantic event) → stringified key. + if (typeof turnId === "number") return String(turnId); + return null; +} + +function synthesizeTurnComplete( + closingEvent: WatcherEvent, + pending: PendingTurnContext, +): WatcherEvent { + const fields: Record = { + turn_id: pending.turnId, + player_id: pending.playerId, + genre: pending.genre, + world: pending.world, + agent_name: "narrator", + agent_duration_ms: + pending.inferenceDurationMs ?? + (typeof closingEvent.fields["duration_ms"] === "number" + ? (closingEvent.fields["duration_ms"] as number) + : null), + total_duration_ms: + typeof closingEvent.fields["duration_ms"] === "number" + ? (closingEvent.fields["duration_ms"] as number) + : pending.inferenceDurationMs, + is_degraded: false, + synthesized: true, // Breadcrumb for debugging: this came from span-close, not a semantic emit. + }; + return { + timestamp: closingEvent.timestamp, + component: "orchestrator", + event_type: "turn_complete", + severity: "info", + fields, + }; +} + function reducer(state: DashboardState, action: Action): DashboardState { switch (action.type) { case "SET_TAB": @@ -63,6 +147,8 @@ function reducer(state: DashboardState, action: Action): DashboardState { ...initialState, activeTab: state.activeTab, debugState: state.debugState, + pendingTurn: { ...emptyPending }, + seenTurnKeys: new Set(), }; case "SELECT_TURN": return { ...state, selectedTurn: action.index }; @@ -80,21 +166,84 @@ function reducer(state: DashboardState, action: Action): DashboardState { componentMap: { ...state.componentMap, [comp]: [...compEvents, ev] }, }; + // --- Semantic events — server-emitted --- if (ev.event_type === "turn_complete") { - next.turns = [...state.turns, ev]; - // Auto-select latest turn if none selected or following latest - if ( - state.selectedTurn === null || - state.selectedTurn === state.turns.length - 1 - ) { - next.selectedTurn = next.turns.length - 1; + const key = extractTurnKey(ev.fields); + if (key !== null && state.seenTurnKeys.has(key)) { + // We already synthesized a turn for this key; keep the richer + // semantic event by replacing the synthesized placeholder. + const replaceIdx = state.turns.findIndex( + (t) => extractTurnKey(t.fields) === key, + ); + if (replaceIdx !== -1) { + const nextTurns = [...state.turns]; + nextTurns[replaceIdx] = ev; + next.turns = nextTurns; + } + } else { + next.turns = [...state.turns, ev]; + if (key !== null) { + next.seenTurnKeys = new Set(state.seenTurnKeys).add(key); + } + if ( + state.selectedTurn === null || + state.selectedTurn === state.turns.length - 1 + ) { + next.selectedTurn = next.turns.length - 1; + } } - } - if (ev.event_type === "prompt_assembled") { + next.pendingTurn = { ...emptyPending }; + } else if (ev.event_type === "prompt_assembled") { next.promptEvents = [...state.promptEvents, ev]; - } - if (ev.event_type === "lore_retrieval") { + } else if (ev.event_type === "lore_retrieval") { next.loreEvents = [...state.loreEvents, ev]; + } else if (ev.event_type === "agent_span_close") { + // --- Span-close turn-aggregator — playtest 2026-04-24 fallback --- + const spanName = String(ev.fields["name"] ?? ""); + if (spanName === "narrator.canonical_leak_audit") { + const turnId = ev.fields["turn_id"]; + if (typeof turnId === "string" && turnId.length > 0) { + // turn_id shape: ":::" + const parts = turnId.split(":"); + next.pendingTurn = { + ...state.pendingTurn, + turnId, + genre: parts[0] ?? state.pendingTurn.genre, + world: parts[1] ?? state.pendingTurn.world, + playerId: parts[2] ?? state.pendingTurn.playerId, + }; + } + } else if (spanName === "turn.agent_llm.inference") { + const dur = ev.fields["duration_ms"]; + const model = ev.fields["model"]; + next.pendingTurn = { + ...state.pendingTurn, + inferenceDurationMs: + typeof dur === "number" ? dur : state.pendingTurn.inferenceDurationMs, + model: + typeof model === "string" ? model : state.pendingTurn.model, + }; + } else if (spanName === "orchestrator.process_action") { + const pending = next.pendingTurn; // includes any updates above in this dispatch + const key = pending.turnId; + if (key !== null && state.seenTurnKeys.has(key)) { + // Already counted via semantic turn_complete; reset and skip. + next.pendingTurn = { ...emptyPending }; + } else { + const synthetic = synthesizeTurnComplete(ev, pending); + next.turns = [...state.turns, synthetic]; + if (key !== null) { + next.seenTurnKeys = new Set(state.seenTurnKeys).add(key); + } + if ( + state.selectedTurn === null || + state.selectedTurn === state.turns.length - 1 + ) { + next.selectedTurn = next.turns.length - 1; + } + next.pendingTurn = { ...emptyPending }; + } + } } return next; } diff --git a/src/components/Dashboard/__tests__/DashboardApp-event-parsing.test.tsx b/src/components/Dashboard/__tests__/DashboardApp-event-parsing.test.tsx index 187bb53..9da9e3d 100644 --- a/src/components/Dashboard/__tests__/DashboardApp-event-parsing.test.tsx +++ b/src/components/Dashboard/__tests__/DashboardApp-event-parsing.test.tsx @@ -107,11 +107,139 @@ describe("DashboardApp event parsing (playtest 2026-04-23)", () => { onEventCapture.fn!(spanClose); }); - // agent_span_close does NOT increment turns — that's turn_complete only. + // turn.agent_llm.inference alone is NOT a turn boundary — it's a + // contributing signal, but the aggregator waits for + // orchestrator.process_action before synthesizing a turn. expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*0/); // But it DOES land in allEvents → the Console tab surfaces it. expect( screen.getByText(/turn\.agent_llm\.inference/), ).toBeInTheDocument(); }); + + // ════════════════════════════════════════════════════════════════════════ + // Playtest 2026-04-24 regression — Turns counter stuck at 0. + // + // Bug: the semantic `turn_complete` event stopped arriving in live + // traffic but OTEL span closes (orchestrator.process_action, + // turn.agent_llm.inference, narrator.canonical_leak_audit) kept + // flowing — so the Console tab filled while the Turns counter / Timeline + // / Timing / Prompt / Lore tabs stayed empty. + // + // Fix: treat `agent_span_close { name: "orchestrator.process_action" }` + // as the canonical turn boundary. Accumulate turn_id / player_id from + // narrator.canonical_leak_audit and duration from turn.agent_llm.inference, + // then synthesize a turn_complete on the orchestrator.process_action close. + // ════════════════════════════════════════════════════════════════════════ + it("increments Turns counter when orchestrator.process_action closes (span-close fallback)", async () => { + const DashboardApp = await loadDashboard(); + render(); + + // Mimic the real 2026-04-24 playtest sequence: leak_audit carries + // turn_id, inference carries duration, then process_action closes. + const leakAudit: WatcherEvent = { + timestamp: "2026-04-24T17:00:00.000Z", + component: "sidequest-server", + event_type: "agent_span_close", + severity: "info", + fields: { + name: "narrator.canonical_leak_audit", + duration_ms: 12, + turn_id: "mutant_wasteland:flickering_reach:Slabgorb:7", + }, + }; + const inference: WatcherEvent = { + timestamp: "2026-04-24T17:00:00.100Z", + component: "sidequest-server", + event_type: "agent_span_close", + severity: "info", + fields: { + name: "turn.agent_llm.inference", + duration_ms: 20192, + model: "claude-opus-4-5", + }, + }; + const processAction: WatcherEvent = { + timestamp: "2026-04-24T17:00:00.200Z", + component: "sidequest-server", + event_type: "agent_span_close", + severity: "info", + fields: { + name: "orchestrator.process_action", + duration_ms: 22000, + action_len: 92, + }, + }; + + expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*0/); + + act(() => { + onEventCapture.fn!(leakAudit); + onEventCapture.fn!(inference); + onEventCapture.fn!(processAction); + }); + + // The aggregator synthesized a turn_complete from the span closes. + expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*1/); + }); + + it("does not double-count when a real turn_complete follows a synthesized one for the same turn_id", async () => { + const DashboardApp = await loadDashboard(); + render(); + + const turnKey = "mutant_wasteland:flickering_reach:Slabgorb:7"; + + const leakAudit: WatcherEvent = { + timestamp: "2026-04-24T17:00:00.000Z", + component: "sidequest-server", + event_type: "agent_span_close", + severity: "info", + fields: { + name: "narrator.canonical_leak_audit", + duration_ms: 12, + turn_id: turnKey, + }, + }; + const processAction: WatcherEvent = { + timestamp: "2026-04-24T17:00:00.100Z", + component: "sidequest-server", + event_type: "agent_span_close", + severity: "info", + fields: { + name: "orchestrator.process_action", + duration_ms: 22000, + action_len: 92, + }, + }; + // Server eventually also emits the semantic turn_complete with the + // same turn_id — the aggregator must replace-in-place, not append. + const semanticTurnComplete: WatcherEvent = { + timestamp: "2026-04-24T17:00:00.300Z", + component: "orchestrator", + event_type: "turn_complete", + severity: "info", + fields: { + turn_id: turnKey, + turn_number: 7, + agent_name: "narrator", + agent_duration_ms: 22000, + player_id: "Slabgorb", + genre: "mutant_wasteland", + world: "flickering_reach", + }, + }; + + act(() => { + onEventCapture.fn!(leakAudit); + onEventCapture.fn!(processAction); + }); + expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*1/); + + act(() => { + onEventCapture.fn!(semanticTurnComplete); + }); + // Still 1 — the real turn_complete replaced the synthesized entry + // instead of appending a duplicate. + expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*1/); + }); }); diff --git a/src/components/Dashboard/__tests__/StateTab.test.tsx b/src/components/Dashboard/__tests__/StateTab.test.tsx index 9f3d7ca..5a4301f 100644 --- a/src/components/Dashboard/__tests__/StateTab.test.tsx +++ b/src/components/Dashboard/__tests__/StateTab.test.tsx @@ -61,6 +61,37 @@ describe("StateTab — PlayerCard rendering (story 30-3)", () => { expect(locations.length).toBeGreaterThanOrEqual(1); }); + // Playtest 2026-04-24 — State tab was showing debugState[0], which in + // practice was the OLDEST save instead of the active one. The server + // now sorts by mtime and exposes `last_activity_ts`; the UI defensively + // picks the max-ts entry so older servers also work. + it("picks the most-recently-touched session by last_activity_ts", () => { + const stale = makeSession({ + session_key: "2026-04-22-old_save", + current_location: "Graveyard", + last_activity_ts: 1000, + }); + const active = makeSession({ + session_key: "2026-04-24-flickering_reach", + current_location: "The Filtration Warren", + last_activity_ts: 9999, + }); + // Deliberately pass stale first to prove sort (not index-0) wins. + render(); + // Active session's unique location appears; stale one's does not. + expect(screen.getAllByText(/The Filtration Warren/).length).toBeGreaterThanOrEqual(1); + expect(screen.queryByText(/Graveyard/)).toBeNull(); + }); + + it("falls back to first entry when last_activity_ts is missing", () => { + // Back-compat: older server that hasn't deployed the mtime field yet. + const a = makeSession({ session_key: "a", current_location: "Alpha" }); + const b = makeSession({ session_key: "b", current_location: "Bravo" }); + render(); + // With both ts missing the sort is a no-op; first entry wins. + expect(screen.getAllByText(/Alpha/).length).toBeGreaterThanOrEqual(1); + }); + it("renders multiple players without crashing", () => { const session = makeSession({ players: [ diff --git a/src/components/Dashboard/tabs/StateTab.tsx b/src/components/Dashboard/tabs/StateTab.tsx index 18d759c..69d34d5 100644 --- a/src/components/Dashboard/tabs/StateTab.tsx +++ b/src/components/Dashboard/tabs/StateTab.tsx @@ -27,8 +27,20 @@ export function StateTab({ debugState, onRefresh }: Props) { ); } - // For now, show the first (or only) session. Multi-session support: tabs/dropdown later if needed. - const session = debugState[0]; + // Pick the most-recently-touched session. + // + // Playtest 2026-04-24: with multiple saves in ~/.sidequest/saves/games/, + // the dashboard used to pick `debugState[0]` from the server's + // alphabetically-sorted list — which landed on the oldest save, not the + // active one. The server now sorts by save-file mtime (newest first) and + // exposes `last_activity_ts` on each view; we still sort defensively here + // so older servers (and any future unsorted responses) still land on the + // correct session. + const session = [...debugState].sort((a, b) => { + const aTs = a.last_activity_ts ?? 0; + const bTs = b.last_activity_ts ?? 0; + return bTs - aTs; + })[0]; return (
diff --git a/src/types/watcher.ts b/src/types/watcher.ts index ec5adbc..e83972f 100644 --- a/src/types/watcher.ts +++ b/src/types/watcher.ts @@ -124,4 +124,8 @@ export interface SessionStateView { has_music_director: boolean; has_audio_mixer: boolean; region_names: [string, string][]; + /** Save-file mtime in ms since epoch. Added 2026-04-24 so the State tab + * can pick the most-recently-touched session when multiple saves exist. + * Optional for back-compat with servers that haven't deployed yet. */ + last_activity_ts?: number; }