Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 160 additions & 11 deletions src/components/Dashboard/DashboardApp.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,35 @@ export interface DashboardState {
promptEvents: WatcherEvent[];
loreEvents: WatcherEvent[];
debugState: SessionStateView[] | null;
/** In-progress turn context accumulated from OTEL span closes between
* one `orchestrator.process_action` boundary and the next. Used to
* synthesize a `turn_complete` event when the server never emits a
* semantic one (playtest 2026-04-24 — the semantic path was missing
* from live traffic; only span closes reached the dashboard). */
pendingTurn: PendingTurnContext;
/** Server-provided turn identifiers we've already accounted for, so a
* real `turn_complete` following a synthesized one doesn't double-count. */
seenTurnKeys: Set<string>;
}

interface PendingTurnContext {
turnId: string | null;
playerId: string | null;
genre: string | null;
world: string | null;
inferenceDurationMs: number | null;
model: string | null;
}

const emptyPending: PendingTurnContext = {
turnId: null,
playerId: null,
genre: null,
world: null,
inferenceDurationMs: null,
model: null,
};

const initialState: DashboardState = {
activeTab: 0,
paused: false,
Expand All @@ -42,6 +69,8 @@ const initialState: DashboardState = {
promptEvents: [],
loreEvents: [],
debugState: null,
pendingTurn: { ...emptyPending },
seenTurnKeys: new Set(),
};

type Action =
Expand All @@ -52,6 +81,61 @@ type Action =
| { type: "SET_DEBUG_STATE"; data: SessionStateView[] }
| { type: "SELECT_TURN"; index: number | null };

/**
* Turn aggregator — playtest 2026-04-24 regression.
*
* The dashboard originally gated the Turns counter + Timeline / Timing /
* Prompt / Lore tabs on receiving a semantic `turn_complete` event.
* Under production traffic, those events stopped reaching the stream
* (the upstream publish never ran) but OTEL span closes kept flowing —
* including `orchestrator.process_action`, which is the canonical top-level
* span wrapping every turn. We now treat that span close as a turn
* boundary: accumulate contextual attributes from intermediate spans
* (`narrator.canonical_leak_audit` carries `turn_id`, `player_id`, genre,
* world; `turn.agent_llm.inference` carries duration + model), and on
* `orchestrator.process_action` close, synthesize a `turn_complete`
* WatcherEvent from the accumulated context. If the server later emits
* a real `turn_complete` for the same turn, dedupe via `seenTurnKeys`.
*/
function extractTurnKey(fields: Record<string, unknown>): string | null {
const turnId = fields["turn_id"];
if (typeof turnId === "string" && turnId.length > 0) return turnId;
// Numeric turn_id (semantic event) → stringified key.
if (typeof turnId === "number") return String(turnId);
return null;
}

function synthesizeTurnComplete(
closingEvent: WatcherEvent,
pending: PendingTurnContext,
): WatcherEvent {
const fields: Record<string, unknown> = {
turn_id: pending.turnId,
player_id: pending.playerId,
genre: pending.genre,
world: pending.world,
agent_name: "narrator",
agent_duration_ms:
pending.inferenceDurationMs ??
(typeof closingEvent.fields["duration_ms"] === "number"
? (closingEvent.fields["duration_ms"] as number)
: null),
total_duration_ms:
typeof closingEvent.fields["duration_ms"] === "number"
? (closingEvent.fields["duration_ms"] as number)
: pending.inferenceDurationMs,
is_degraded: false,
synthesized: true, // Breadcrumb for debugging: this came from span-close, not a semantic emit.
};
return {
timestamp: closingEvent.timestamp,
component: "orchestrator",
event_type: "turn_complete",
severity: "info",
fields,
};
}

function reducer(state: DashboardState, action: Action): DashboardState {
switch (action.type) {
case "SET_TAB":
Expand All @@ -63,6 +147,8 @@ function reducer(state: DashboardState, action: Action): DashboardState {
...initialState,
activeTab: state.activeTab,
debugState: state.debugState,
pendingTurn: { ...emptyPending },
seenTurnKeys: new Set(),
};
case "SELECT_TURN":
return { ...state, selectedTurn: action.index };
Expand All @@ -80,21 +166,84 @@ function reducer(state: DashboardState, action: Action): DashboardState {
componentMap: { ...state.componentMap, [comp]: [...compEvents, ev] },
};

// --- Semantic events — server-emitted ---
if (ev.event_type === "turn_complete") {
next.turns = [...state.turns, ev];
// Auto-select latest turn if none selected or following latest
if (
state.selectedTurn === null ||
state.selectedTurn === state.turns.length - 1
) {
next.selectedTurn = next.turns.length - 1;
const key = extractTurnKey(ev.fields);
if (key !== null && state.seenTurnKeys.has(key)) {
// We already synthesized a turn for this key; keep the richer
// semantic event by replacing the synthesized placeholder.
const replaceIdx = state.turns.findIndex(
(t) => extractTurnKey(t.fields) === key,
);
if (replaceIdx !== -1) {
const nextTurns = [...state.turns];
nextTurns[replaceIdx] = ev;
next.turns = nextTurns;
}
} else {
next.turns = [...state.turns, ev];
if (key !== null) {
next.seenTurnKeys = new Set(state.seenTurnKeys).add(key);
}
if (
state.selectedTurn === null ||
state.selectedTurn === state.turns.length - 1
) {
next.selectedTurn = next.turns.length - 1;
}
}
}
if (ev.event_type === "prompt_assembled") {
next.pendingTurn = { ...emptyPending };
} else if (ev.event_type === "prompt_assembled") {
next.promptEvents = [...state.promptEvents, ev];
}
if (ev.event_type === "lore_retrieval") {
} else if (ev.event_type === "lore_retrieval") {
next.loreEvents = [...state.loreEvents, ev];
} else if (ev.event_type === "agent_span_close") {
// --- Span-close turn-aggregator — playtest 2026-04-24 fallback ---
const spanName = String(ev.fields["name"] ?? "");
if (spanName === "narrator.canonical_leak_audit") {
const turnId = ev.fields["turn_id"];
if (typeof turnId === "string" && turnId.length > 0) {
// turn_id shape: "<genre>:<world>:<player>:<N>"
const parts = turnId.split(":");
next.pendingTurn = {
...state.pendingTurn,
turnId,
genre: parts[0] ?? state.pendingTurn.genre,
world: parts[1] ?? state.pendingTurn.world,
playerId: parts[2] ?? state.pendingTurn.playerId,
};
}
} else if (spanName === "turn.agent_llm.inference") {
const dur = ev.fields["duration_ms"];
const model = ev.fields["model"];
next.pendingTurn = {
...state.pendingTurn,
inferenceDurationMs:
typeof dur === "number" ? dur : state.pendingTurn.inferenceDurationMs,
model:
typeof model === "string" ? model : state.pendingTurn.model,
};
} else if (spanName === "orchestrator.process_action") {
const pending = next.pendingTurn; // includes any updates above in this dispatch
const key = pending.turnId;
if (key !== null && state.seenTurnKeys.has(key)) {
// Already counted via semantic turn_complete; reset and skip.
next.pendingTurn = { ...emptyPending };
} else {
const synthetic = synthesizeTurnComplete(ev, pending);
next.turns = [...state.turns, synthetic];
if (key !== null) {
next.seenTurnKeys = new Set(state.seenTurnKeys).add(key);
}
if (
state.selectedTurn === null ||
state.selectedTurn === state.turns.length - 1
) {
next.selectedTurn = next.turns.length - 1;
}
next.pendingTurn = { ...emptyPending };
}
}
}
return next;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,139 @@ describe("DashboardApp event parsing (playtest 2026-04-23)", () => {
onEventCapture.fn!(spanClose);
});

// agent_span_close does NOT increment turns — that's turn_complete only.
// turn.agent_llm.inference alone is NOT a turn boundary — it's a
// contributing signal, but the aggregator waits for
// orchestrator.process_action before synthesizing a turn.
expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*0/);
// But it DOES land in allEvents → the Console tab surfaces it.
expect(
screen.getByText(/turn\.agent_llm\.inference/),
).toBeInTheDocument();
});

// ════════════════════════════════════════════════════════════════════════
// Playtest 2026-04-24 regression — Turns counter stuck at 0.
//
// Bug: the semantic `turn_complete` event stopped arriving in live
// traffic but OTEL span closes (orchestrator.process_action,
// turn.agent_llm.inference, narrator.canonical_leak_audit) kept
// flowing — so the Console tab filled while the Turns counter / Timeline
// / Timing / Prompt / Lore tabs stayed empty.
//
// Fix: treat `agent_span_close { name: "orchestrator.process_action" }`
// as the canonical turn boundary. Accumulate turn_id / player_id from
// narrator.canonical_leak_audit and duration from turn.agent_llm.inference,
// then synthesize a turn_complete on the orchestrator.process_action close.
// ════════════════════════════════════════════════════════════════════════
it("increments Turns counter when orchestrator.process_action closes (span-close fallback)", async () => {
const DashboardApp = await loadDashboard();
render(<DashboardApp />);

// Mimic the real 2026-04-24 playtest sequence: leak_audit carries
// turn_id, inference carries duration, then process_action closes.
const leakAudit: WatcherEvent = {
timestamp: "2026-04-24T17:00:00.000Z",
component: "sidequest-server",
event_type: "agent_span_close",
severity: "info",
fields: {
name: "narrator.canonical_leak_audit",
duration_ms: 12,
turn_id: "mutant_wasteland:flickering_reach:Slabgorb:7",
},
};
const inference: WatcherEvent = {
timestamp: "2026-04-24T17:00:00.100Z",
component: "sidequest-server",
event_type: "agent_span_close",
severity: "info",
fields: {
name: "turn.agent_llm.inference",
duration_ms: 20192,
model: "claude-opus-4-5",
},
};
const processAction: WatcherEvent = {
timestamp: "2026-04-24T17:00:00.200Z",
component: "sidequest-server",
event_type: "agent_span_close",
severity: "info",
fields: {
name: "orchestrator.process_action",
duration_ms: 22000,
action_len: 92,
},
};

expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*0/);

act(() => {
onEventCapture.fn!(leakAudit);
onEventCapture.fn!(inference);
onEventCapture.fn!(processAction);
});

// The aggregator synthesized a turn_complete from the span closes.
expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*1/);
});

it("does not double-count when a real turn_complete follows a synthesized one for the same turn_id", async () => {
const DashboardApp = await loadDashboard();
render(<DashboardApp />);

const turnKey = "mutant_wasteland:flickering_reach:Slabgorb:7";

const leakAudit: WatcherEvent = {
timestamp: "2026-04-24T17:00:00.000Z",
component: "sidequest-server",
event_type: "agent_span_close",
severity: "info",
fields: {
name: "narrator.canonical_leak_audit",
duration_ms: 12,
turn_id: turnKey,
},
};
const processAction: WatcherEvent = {
timestamp: "2026-04-24T17:00:00.100Z",
component: "sidequest-server",
event_type: "agent_span_close",
severity: "info",
fields: {
name: "orchestrator.process_action",
duration_ms: 22000,
action_len: 92,
},
};
// Server eventually also emits the semantic turn_complete with the
// same turn_id — the aggregator must replace-in-place, not append.
const semanticTurnComplete: WatcherEvent = {
timestamp: "2026-04-24T17:00:00.300Z",
component: "orchestrator",
event_type: "turn_complete",
severity: "info",
fields: {
turn_id: turnKey,
turn_number: 7,
agent_name: "narrator",
agent_duration_ms: 22000,
player_id: "Slabgorb",
genre: "mutant_wasteland",
world: "flickering_reach",
},
};

act(() => {
onEventCapture.fn!(leakAudit);
onEventCapture.fn!(processAction);
});
expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*1/);

act(() => {
onEventCapture.fn!(semanticTurnComplete);
});
// Still 1 — the real turn_complete replaced the synthesized entry
// instead of appending a duplicate.
expect(screen.getByText(/Turns:/i).textContent).toMatch(/Turns:\s*1/);
});
});
31 changes: 31 additions & 0 deletions src/components/Dashboard/__tests__/StateTab.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,37 @@ describe("StateTab — PlayerCard rendering (story 30-3)", () => {
expect(locations.length).toBeGreaterThanOrEqual(1);
});

// Playtest 2026-04-24 — State tab was showing debugState[0], which in
// practice was the OLDEST save instead of the active one. The server
// now sorts by mtime and exposes `last_activity_ts`; the UI defensively
// picks the max-ts entry so older servers also work.
it("picks the most-recently-touched session by last_activity_ts", () => {
const stale = makeSession({
session_key: "2026-04-22-old_save",
current_location: "Graveyard",
last_activity_ts: 1000,
});
const active = makeSession({
session_key: "2026-04-24-flickering_reach",
current_location: "The Filtration Warren",
last_activity_ts: 9999,
});
// Deliberately pass stale first to prove sort (not index-0) wins.
render(<StateTab debugState={[stale, active]} onRefresh={vi.fn()} />);
// Active session's unique location appears; stale one's does not.
expect(screen.getAllByText(/The Filtration Warren/).length).toBeGreaterThanOrEqual(1);
expect(screen.queryByText(/Graveyard/)).toBeNull();
});

it("falls back to first entry when last_activity_ts is missing", () => {
// Back-compat: older server that hasn't deployed the mtime field yet.
const a = makeSession({ session_key: "a", current_location: "Alpha" });
const b = makeSession({ session_key: "b", current_location: "Bravo" });
render(<StateTab debugState={[a, b]} onRefresh={vi.fn()} />);
// With both ts missing the sort is a no-op; first entry wins.
expect(screen.getAllByText(/Alpha/).length).toBeGreaterThanOrEqual(1);
});

it("renders multiple players without crashing", () => {
const session = makeSession({
players: [
Expand Down
Loading
Loading