From f09f825b2c76cf0d6869ef8c222193273005638c Mon Sep 17 00:00:00 2001 From: Justin Maier Date: Mon, 13 Apr 2026 21:53:17 -0600 Subject: [PATCH] feat(metrics): expose prefilter registry state via Prometheus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the 5 metrics from design-prefilter-registry.md plus one extra age gauge into the collect-on-scrape path. Atomics were already tracked on PrefilterEntry from PR #207; this PR adds scrape-side plumbing so Grafana can show registered count, cardinality, substitution rate, last compute duration, refresh errors, and age-since-refresh. - bitdex_prefilter_registered{index} — gauge, count of registered entries - bitdex_prefilter_cardinality{index,name} — gauge, current bitmap size - bitdex_prefilter_substitutions_total{index,name} — counter surfaced as gauge (matches existing cache_hits_total pattern — the value comes from an AtomicU64 counter on the entry, not from a live Prometheus counter) - bitdex_prefilter_last_compute_seconds{index,name} — gauge - bitdex_prefilter_refresh_errors_total{index,name} — gauge - bitdex_prefilter_age_seconds{index,name} — gauge, alert candidate: if an SWR thread or orchestrator isn't refreshing, age will grow unbounded Tests: lib 17/17 + integration 3/3 still green. No behavior change to the substitute() hot path. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/metrics.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/server.rs | 30 ++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/src/metrics.rs b/src/metrics.rs index 4e17758..fe721a7 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -232,6 +232,22 @@ pub struct Metrics { // -- Boot phase breakdown -- pub boot_phase_seconds: IntGaugeVec, + + // -- Prefilter registry (see src/prefilter.rs) -- + /// Number of registered prefilters per index. + pub prefilter_registered: IntGaugeVec, + /// Current bitmap cardinality per registered prefilter. + pub prefilter_cardinality: IntGaugeVec, + /// Cumulative number of queries that substituted this prefilter. + pub prefilter_substitutions_total: IntGaugeVec, + /// Seconds spent on the last compute/refresh for this prefilter. + pub prefilter_last_compute_seconds: IntGaugeVec, + /// Cumulative refresh errors per prefilter. + pub prefilter_refresh_errors_total: IntGaugeVec, + /// Seconds since last successful refresh per prefilter. Useful for + /// paging on stale prefilters that should have been refreshed by the + /// SWR thread or manual `/refresh` calls. + pub prefilter_age_seconds: IntGaugeVec, } impl Metrics { @@ -1163,6 +1179,31 @@ impl Metrics { &["phase"], ).unwrap(); + let prefilter_registered = IntGaugeVec::new( + Opts::new("bitdex_prefilter_registered", "Number of registered prefilters"), + &["index"], + ).unwrap(); + let prefilter_cardinality = IntGaugeVec::new( + Opts::new("bitdex_prefilter_cardinality", "Current bitmap cardinality of a registered prefilter"), + &["index", "name"], + ).unwrap(); + let prefilter_substitutions_total = IntGaugeVec::new( + Opts::new("bitdex_prefilter_substitutions_total", "Queries that matched this prefilter"), + &["index", "name"], + ).unwrap(); + let prefilter_last_compute_seconds = IntGaugeVec::new( + Opts::new("bitdex_prefilter_last_compute_seconds", "Seconds spent on the last compute/refresh"), + &["index", "name"], + ).unwrap(); + let prefilter_refresh_errors_total = IntGaugeVec::new( + Opts::new("bitdex_prefilter_refresh_errors_total", "Cumulative refresh errors"), + &["index", "name"], + ).unwrap(); + let prefilter_age_seconds = IntGaugeVec::new( + Opts::new("bitdex_prefilter_age_seconds", "Seconds since last successful refresh"), + &["index", "name"], + ).unwrap(); + // Register all metrics registry.register(Box::new(alive_documents.clone())).unwrap(); registry.register(Box::new(slot_high_water.clone())).unwrap(); @@ -1325,6 +1366,12 @@ impl Metrics { registry.register(Box::new(wal_ops_written_total.clone())).unwrap(); registry.register(Box::new(wal_last_applied_timestamp_seconds.clone())).unwrap(); registry.register(Box::new(boot_phase_seconds.clone())).unwrap(); + registry.register(Box::new(prefilter_registered.clone())).unwrap(); + registry.register(Box::new(prefilter_cardinality.clone())).unwrap(); + registry.register(Box::new(prefilter_substitutions_total.clone())).unwrap(); + registry.register(Box::new(prefilter_last_compute_seconds.clone())).unwrap(); + registry.register(Box::new(prefilter_refresh_errors_total.clone())).unwrap(); + registry.register(Box::new(prefilter_age_seconds.clone())).unwrap(); Self { registry, @@ -1461,6 +1508,12 @@ impl Metrics { wal_ops_written_total, wal_last_applied_timestamp_seconds, boot_phase_seconds, + prefilter_registered, + prefilter_cardinality, + prefilter_substitutions_total, + prefilter_last_compute_seconds, + prefilter_refresh_errors_total, + prefilter_age_seconds, } } diff --git a/src/server.rs b/src/server.rs index a889c48..ba2f6f0 100644 --- a/src/server.rs +++ b/src/server.rs @@ -5154,6 +5154,36 @@ async fn handle_metrics(State(state): State) -> impl IntoResponse { .with_label_values(&[name]) .set(engine.slot_counter() as i64); + // Prefilter registry gauges (see src/prefilter.rs) + let prefilter_entries = engine.prefilters().entries(); + m.prefilter_registered + .with_label_values(&[name]) + .set(prefilter_entries.len() as i64); + let now_secs = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs() as i64) + .unwrap_or(0); + for entry in &prefilter_entries { + let labels = &[name.as_str(), entry.name.as_str()]; + m.prefilter_cardinality + .with_label_values(labels) + .set(entry.cardinality() as i64); + m.prefilter_substitutions_total + .with_label_values(labels) + .set(entry.substitutions() as i64); + // Last compute duration as integer seconds (IntGauge API). + m.prefilter_last_compute_seconds + .with_label_values(labels) + .set((entry.compute_ms() / 1000) as i64); + m.prefilter_refresh_errors_total + .with_label_values(labels) + .set(entry.refresh_errors() as i64); + let age = now_secs.saturating_sub(entry.last_refreshed()).max(0); + m.prefilter_age_seconds + .with_label_values(labels) + .set(age); + } + // Cache gauges let t0 = std::time::Instant::now(); let uc = engine.unified_cache_stats();