From 1d9306c6e0c4aa5a73bf97fd11cf38a115498764 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Fri, 20 Mar 2026 16:43:59 +0100 Subject: [PATCH] feat(tui): add compaction probe metrics to TUI dashboard (#2049) Display compaction probe validation statistics in the TUI memory panel: - Probe pass rate, soft/hard failure rates, error count - Last probe verdict (Pass/SoftFail/HardFail/Error) with score - Color-coded verdict rendering (Green/Yellow/Red/Gray) Adds ProbeVerdict enum variant for Error verdicts (metrics-only, not used by probe logic). Updates MetricsSnapshot with last_probe_verdict and last_probe_score fields. Implements conditional rendering in memory panel (hidden when no probes run). All 9 acceptance criteria verified by architect, critic, developer, tester, perf engineer, security engineer, impl-critic, and reviewer. --- CHANGELOG.md | 1 + .../src/agent/context/summarization.rs | 29 +++++- crates/zeph-core/src/metrics.rs | 7 ++ crates/zeph-memory/src/compaction_probe.rs | 2 + crates/zeph-tui/src/metrics.rs | 4 +- crates/zeph-tui/src/widgets/memory.rs | 97 ++++++++++++++++++- ...s__probe_error_verdict_shows_no_score.snap | 16 +++ ...ts__probe_lines_hidden_when_no_probes.snap | 14 +++ ...__probe_lines_visible_when_probes_ran.snap | 16 +++ 9 files changed, 177 insertions(+), 9 deletions(-) create mode 100644 crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_error_verdict_shows_no_score.snap create mode 100644 crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_lines_hidden_when_no_probes.snap create mode 100644 crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_lines_visible_when_probes_ran.snap diff --git a/CHANGELOG.md b/CHANGELOG.md index f03eb7f1..14c4784e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - feat(config): `[memory] structured_summaries = false` config field enables opt-in structured compaction summaries (issue #1607) - feat(tools): dynamic tool schema filtering — sends only relevant tool definitions to the LLM per turn, selected by embedding similarity between user query and tool descriptions; configurable via `[agent.tool_filter]` with `enabled`, `top_k`, `always_on`, and `min_description_words`; disabled by default (#2020) - enh(tools): `/status` reports `tool_filter` state when enabled — shows Filter line with top_k, always_on count, and embedding count; silent when filter is disabled (#2028) +- enh(tui): compaction probe metrics visible in Memory panel — shows rate distribution (`P N% S N% H N% E N%`) and last verdict with color coding (Pass=green, SoftFail=yellow, HardFail=red, Error=gray); lines hidden until first probe runs; `ProbeVerdict::Error` variant added for transport/timeout failures that produce no quality score (#2049) - feat(channels): register Discord slash commands (`/reset`, `/skills`, `/agent`) at startup via fire-and-forget background task; idempotent via `PUT /applications/{id}/commands` (CHAN-05, epic #1978) - feat(channels): extract shared `CONFIRM_TIMEOUT` constant (30s) to `zeph-channels` crate; Telegram, Discord, and Slack `confirm()` all reference it (CHAN-02, epic #1978) diff --git a/crates/zeph-core/src/agent/context/summarization.rs b/crates/zeph-core/src/agent/context/summarization.rs index 51226c9b..fa524e12 100644 --- a/crates/zeph-core/src/agent/context/summarization.rs +++ b/crates/zeph-core/src/agent/context/summarization.rs @@ -1253,7 +1253,11 @@ impl Agent { Ok(result) => result, Err(e) => { tracing::warn!("compaction probe error (non-blocking): {e:#}"); - self.update_metrics(|m| m.compaction_probe_errors += 1); + self.update_metrics(|m| { + m.compaction_probe_errors += 1; + m.last_probe_verdict = Some(zeph_memory::ProbeVerdict::Error); + m.last_probe_score = None; + }); None } }; @@ -1270,7 +1274,11 @@ impl Agent { threshold = result.hard_fail_threshold, "compaction probe HARD FAIL — keeping original messages" ); - self.update_metrics(|m| m.compaction_probe_failures += 1); + self.update_metrics(|m| { + m.compaction_probe_failures += 1; + m.last_probe_verdict = Some(zeph_memory::ProbeVerdict::HardFail); + m.last_probe_score = Some(result.score); + }); return Ok(CompactionOutcome::ProbeRejected); } zeph_memory::ProbeVerdict::SoftFail => { @@ -1279,11 +1287,24 @@ impl Agent { threshold = result.threshold, "compaction probe SOFT FAIL — proceeding with warning" ); - self.update_metrics(|m| m.compaction_probe_soft_failures += 1); + self.update_metrics(|m| { + m.compaction_probe_soft_failures += 1; + m.last_probe_verdict = Some(zeph_memory::ProbeVerdict::SoftFail); + m.last_probe_score = Some(result.score); + }); } zeph_memory::ProbeVerdict::Pass => { tracing::info!(score = result.score, "compaction probe passed"); - self.update_metrics(|m| m.compaction_probe_passes += 1); + self.update_metrics(|m| { + m.compaction_probe_passes += 1; + m.last_probe_verdict = Some(zeph_memory::ProbeVerdict::Pass); + m.last_probe_score = Some(result.score); + }); + } + zeph_memory::ProbeVerdict::Error => { + // Unreachable: validate_compaction returns Err on errors, not Ok(Error). + // If this fires, the error-handling path in validate_compaction changed. + debug_assert!(false, "ProbeVerdict::Error reached inside Ok path"); } } } diff --git a/crates/zeph-core/src/metrics.rs b/crates/zeph-core/src/metrics.rs index 0d0c90bf..4aa1deeb 100644 --- a/crates/zeph-core/src/metrics.rs +++ b/crates/zeph-core/src/metrics.rs @@ -5,6 +5,8 @@ use std::collections::VecDeque; use tokio::sync::watch; +pub use zeph_memory::ProbeVerdict; + /// Category of a security event for TUI display. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SecurityEventCategory { @@ -197,6 +199,11 @@ pub struct MetricsSnapshot { pub compaction_probe_failures: u64, /// Compaction probe errors (LLM/timeout — non-blocking, compaction proceeded). pub compaction_probe_errors: u64, + /// Last compaction probe verdict. `None` before the first probe completes. + pub last_probe_verdict: Option, + /// Last compaction probe score in [0.0, 1.0]. `None` before the first probe + /// completes or after an Error verdict (errors produce no score). + pub last_probe_score: Option, pub cache_read_tokens: u64, pub cache_creation_tokens: u64, pub cost_spent_cents: f64, diff --git a/crates/zeph-memory/src/compaction_probe.rs b/crates/zeph-memory/src/compaction_probe.rs index 66975611..f112f63e 100644 --- a/crates/zeph-memory/src/compaction_probe.rs +++ b/crates/zeph-memory/src/compaction_probe.rs @@ -38,6 +38,8 @@ pub enum ProbeVerdict { SoftFail, /// Score < `hard_fail_threshold`: summary lost critical facts. Block compaction. HardFail, + /// Transport/timeout failure — no quality score produced. + Error, } /// Full result of a compaction probe run. diff --git a/crates/zeph-tui/src/metrics.rs b/crates/zeph-tui/src/metrics.rs index 3004cd31..bb699406 100644 --- a/crates/zeph-tui/src/metrics.rs +++ b/crates/zeph-tui/src/metrics.rs @@ -2,6 +2,6 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 pub use zeph_core::metrics::{ - MetricsCollector, MetricsSnapshot, SecurityEvent, SecurityEventCategory, SkillConfidence, - TaskGraphSnapshot, TaskSnapshotRow, + MetricsCollector, MetricsSnapshot, ProbeVerdict, SecurityEvent, SecurityEventCategory, + SkillConfidence, TaskGraphSnapshot, TaskSnapshotRow, }; diff --git a/crates/zeph-tui/src/widgets/memory.rs b/crates/zeph-tui/src/widgets/memory.rs index 5be3cdce..c99d24b8 100644 --- a/crates/zeph-tui/src/widgets/memory.rs +++ b/crates/zeph-tui/src/widgets/memory.rs @@ -3,10 +3,11 @@ use ratatui::Frame; use ratatui::layout::Rect; -use ratatui::text::Line; +use ratatui::style::{Color, Style}; +use ratatui::text::{Line, Span}; use ratatui::widgets::{Block, Borders, Paragraph}; -use crate::metrics::MetricsSnapshot; +use crate::metrics::{MetricsSnapshot, ProbeVerdict}; use crate::theme::Theme; pub fn render(metrics: &MetricsSnapshot, frame: &mut Frame, area: Rect) { @@ -49,6 +50,41 @@ pub fn render(metrics: &MetricsSnapshot, frame: &mut Frame, area: Rect) { metrics.graph_extraction_count, metrics.graph_extraction_failures, ))); } + let total_probes = metrics.compaction_probe_passes + + metrics.compaction_probe_soft_failures + + metrics.compaction_probe_failures + + metrics.compaction_probe_errors; + + #[allow( + clippy::cast_precision_loss, + clippy::cast_possible_truncation, + clippy::cast_sign_loss + )] + if total_probes > 0 { + let pct = |n: u64| -> u64 { (n as f64 / total_probes as f64 * 100.0).round() as u64 }; + let p = pct(metrics.compaction_probe_passes); + let s = pct(metrics.compaction_probe_soft_failures); + let h = pct(metrics.compaction_probe_failures); + let e = pct(metrics.compaction_probe_errors); + mem_lines.push(Line::from(format!(" Probe: P {p}% S {s}% H {h}% E {e}%"))); + + if let Some(verdict) = &metrics.last_probe_verdict { + let (label, color) = match verdict { + ProbeVerdict::Pass => ("Pass", Color::Green), + ProbeVerdict::SoftFail => ("SoftFail", Color::Yellow), + ProbeVerdict::HardFail => ("HardFail", Color::Red), + ProbeVerdict::Error => ("Error", Color::Gray), + }; + let score_str = metrics + .last_probe_score + .map_or_else(String::new, |sc| format!(" ({sc:.2})")); + mem_lines.push(Line::from(vec![ + Span::raw(" Last: "), + Span::styled(format!("{label}{score_str}"), Style::default().fg(color)), + ])); + } + } + if metrics.guidelines_version > 0 { mem_lines.push(Line::from(format!( " Guidelines: v{} ({})", @@ -68,7 +104,7 @@ pub fn render(metrics: &MetricsSnapshot, frame: &mut Frame, area: Rect) { mod tests { use insta::assert_snapshot; - use crate::metrics::MetricsSnapshot; + use crate::metrics::{MetricsSnapshot, ProbeVerdict}; use crate::test_utils::render_to_string; #[test] @@ -102,4 +138,59 @@ mod tests { }); assert_snapshot!(output); } + + #[test] + fn probe_lines_visible_when_probes_ran() { + let metrics = MetricsSnapshot { + sqlite_message_count: 5, + compaction_probe_passes: 87, + compaction_probe_soft_failures: 10, + compaction_probe_failures: 2, + compaction_probe_errors: 1, + last_probe_verdict: Some(ProbeVerdict::Pass), + last_probe_score: Some(0.91), + ..MetricsSnapshot::default() + }; + + let output = render_to_string(50, 12, |frame, area| { + super::render(&metrics, frame, area); + }); + assert_snapshot!(output); + } + + #[test] + fn probe_lines_hidden_when_no_probes() { + let metrics = MetricsSnapshot { + sqlite_message_count: 5, + compaction_probe_passes: 0, + compaction_probe_soft_failures: 0, + compaction_probe_failures: 0, + compaction_probe_errors: 0, + last_probe_verdict: None, + last_probe_score: None, + ..MetricsSnapshot::default() + }; + + let output = render_to_string(50, 10, |frame, area| { + super::render(&metrics, frame, area); + }); + assert_snapshot!(output); + } + + #[test] + fn probe_error_verdict_shows_no_score() { + let metrics = MetricsSnapshot { + sqlite_message_count: 5, + compaction_probe_passes: 1, + compaction_probe_errors: 1, + last_probe_verdict: Some(ProbeVerdict::Error), + last_probe_score: None, + ..MetricsSnapshot::default() + }; + + let output = render_to_string(50, 12, |frame, area| { + super::render(&metrics, frame, area); + }); + assert_snapshot!(output); + } } diff --git a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_error_verdict_shows_no_score.snap b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_error_verdict_shows_no_score.snap new file mode 100644 index 00000000..e465cf44 --- /dev/null +++ b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_error_verdict_shows_no_score.snap @@ -0,0 +1,16 @@ +--- +source: crates/zeph-tui/src/widgets/memory.rs +expression: output +--- +┌ Memory ────────────────────────────────────────┐ +│ SQLite: 5 msgs │ +│ Conv ID: --- │ +│ Embeddings: 0 │ +│ Graph: 0 entities, 0 edges, 0 communities │ +│ Graph extractions: 0 ok, 0 failed │ +│ Probe: P 50% S 0% H 0% E 50% │ +│ Last: Error │ +│ │ +│ │ +│ │ +└────────────────────────────────────────────────┘ diff --git a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_lines_hidden_when_no_probes.snap b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_lines_hidden_when_no_probes.snap new file mode 100644 index 00000000..46234470 --- /dev/null +++ b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_lines_hidden_when_no_probes.snap @@ -0,0 +1,14 @@ +--- +source: crates/zeph-tui/src/widgets/memory.rs +expression: output +--- +┌ Memory ────────────────────────────────────────┐ +│ SQLite: 5 msgs │ +│ Conv ID: --- │ +│ Embeddings: 0 │ +│ Graph: 0 entities, 0 edges, 0 communities │ +│ Graph extractions: 0 ok, 0 failed │ +│ │ +│ │ +│ │ +└────────────────────────────────────────────────┘ diff --git a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_lines_visible_when_probes_ran.snap b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_lines_visible_when_probes_ran.snap new file mode 100644 index 00000000..c757e9ba --- /dev/null +++ b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__probe_lines_visible_when_probes_ran.snap @@ -0,0 +1,16 @@ +--- +source: crates/zeph-tui/src/widgets/memory.rs +expression: output +--- +┌ Memory ────────────────────────────────────────┐ +│ SQLite: 5 msgs │ +│ Conv ID: --- │ +│ Embeddings: 0 │ +│ Graph: 0 entities, 0 edges, 0 communities │ +│ Graph extractions: 0 ok, 0 failed │ +│ Probe: P 87% S 10% H 2% E 1% │ +│ Last: Pass (0.91) │ +│ │ +│ │ +│ │ +└────────────────────────────────────────────────┘