diff --git a/clients/agent-runtime/Cargo.lock b/clients/agent-runtime/Cargo.lock index a1618352..e5683b21 100644 --- a/clients/agent-runtime/Cargo.lock +++ b/clients/agent-runtime/Cargo.lock @@ -815,7 +815,7 @@ dependencies = [ [[package]] name = "cerebro" -version = "0.4.0" +version = "1.0.0" dependencies = [ "anyhow", "async-trait", @@ -1160,7 +1160,7 @@ dependencies = [ [[package]] name = "corvus" -version = "0.4.0" +version = "1.0.0" dependencies = [ "anyhow", "async-trait", @@ -8947,4 +8947,4 @@ checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ "cc", "pkg-config", -] \ No newline at end of file +] diff --git a/clients/agent-runtime/src/agent/agent.rs b/clients/agent-runtime/src/agent/agent.rs index 79d5c89c..34a21c0a 100644 --- a/clients/agent-runtime/src/agent/agent.rs +++ b/clients/agent-runtime/src/agent/agent.rs @@ -13,7 +13,10 @@ use crate::agent::prompt::{ }; use crate::bootstrap; use crate::config::Config; -use crate::cost::{BudgetCheck, CostTracker, TokenUsage, UsagePeriod}; +use crate::cost::{ + BudgetCheck, BudgetEvaluation, CostService, CostTracker, MissionBudgetScope, TokenUsage, + UsagePeriod, +}; use crate::memory::{Memory, MemoryCategory}; use crate::observability::{redact_observer_payload, Observer, ObserverEvent}; use crate::providers::{ChatMessage, ChatRequest, ChatResponse, ConversationMessage, Provider}; @@ -24,6 +27,7 @@ use crate::security::{ use crate::tools::{Tool, ToolSpec}; use crate::util::truncate_with_ellipsis; use anyhow::Result; +use chrono::Utc; use futures_util::future::join_all; use std::collections::HashMap; use std::fmt; @@ -72,6 +76,13 @@ pub struct AgentTurnResult { pub event_log: Vec, } +#[derive(Debug, Clone)] +struct ActiveMissionBudget { + mission_id: String, + baseline_total_cost_usd: f64, + limit_usd: f64, +} + #[allow(clippy::struct_excessive_bools)] pub struct Agent { provider: Box, @@ -98,13 +109,16 @@ pub struct Agent { cost_tracker: Option>, cost_config: crate::config::CostConfig, mission_execution_context: bool, + active_mission_budget: Option, code_mode: bool, code_session_delegated: bool, } #[derive(Debug)] pub enum AgentExecutionError { - IterationBudgetExceeded { max_iterations: usize }, + IterationBudgetExceeded { + max_iterations: usize, + }, CostBudgetExceeded { current_usd: f64, limit_usd: f64, @@ -354,6 +368,7 @@ impl AgentBuilder { cost_tracker: self.cost_tracker, cost_config: self.cost_config.unwrap_or_default(), mission_execution_context: false, + active_mission_budget: None, code_mode: self.code_mode, code_session_delegated: self.code_session_delegated, }) @@ -394,6 +409,166 @@ impl Agent { self.history.clear(); } + pub(crate) fn apply_next_request_budget_override( + &self, + actor: impl Into, + reason: Option, + ) -> Result { + if !self.cost_config.enabled { + anyhow::bail!("Cost tracking is disabled for this session") + } + + let Some(tracker) = &self.cost_tracker else { + anyhow::bail!("Cost tracking is enabled, but the runtime cost tracker is unavailable") + }; + + let cost_service = CostService::new(Arc::clone(tracker)); + let now = Utc::now(); + let summary = cost_service.current_summary(now)?; + let override_record = cost_service.apply_override( + crate::cost::CostOverrideRequest { + actor: actor.into(), + scope: crate::cost::CostOverrideScope::NextRequest, + reason, + expires_at: None, + }, + now, + )?; + self.emit_budget_override_event( + &override_record, + crate::observability::BudgetOverrideAction::Granted, + summary.budget_state, + summary.active_period, + ); + Ok(override_record) + } + + fn budget_surface(&self) -> &'static str { + if self.code_mode { + "code_session" + } else if self.mission_execution_context { + "mission" + } else { + "agent_loop" + } + } + + fn emit_budget_warning_event( + &self, + current_usd: f64, + projected_usd: f64, + limit_usd: f64, + percent_used: f64, + period: UsagePeriod, + turn_context: Option<&TurnContext>, + ) { + if let Some(tracker) = &self.cost_tracker { + self.observer.record_event(&ObserverEvent::BudgetWarning( + crate::observability::BudgetThresholdEvent { + budget_state: crate::cost::BudgetState::Warning, + period, + current_usd, + projected_usd, + limit_usd, + percent_used, + session_id: turn_context + .and_then(|context| context.session_id.clone()) + .unwrap_or_else(|| tracker.session_id().to_string()), + surface: Some(self.budget_surface().to_string()), + }, + )); + } + } + + fn emit_budget_exceeded_event( + &self, + current_usd: f64, + projected_usd: f64, + limit_usd: f64, + percent_used: f64, + period: UsagePeriod, + turn_context: Option<&TurnContext>, + ) { + if let Some(tracker) = &self.cost_tracker { + self.observer.record_event(&ObserverEvent::BudgetExceeded( + crate::observability::BudgetThresholdEvent { + budget_state: crate::cost::BudgetState::Exceeded, + period, + current_usd, + projected_usd, + limit_usd, + percent_used, + session_id: turn_context + .and_then(|context| context.session_id.clone()) + .unwrap_or_else(|| tracker.session_id().to_string()), + surface: Some(self.budget_surface().to_string()), + }, + )); + } + } + + fn emit_budget_override_event( + &self, + override_record: &crate::cost::CostOverrideRecord, + action: crate::observability::BudgetOverrideAction, + previous_state: crate::cost::BudgetState, + period: Option, + ) { + self.observer.record_event(&ObserverEvent::BudgetOverride( + crate::observability::BudgetOverrideEvent { + action, + actor: override_record.actor.clone(), + scope: override_record.scope, + reason: override_record.reason.clone(), + session_id: override_record.session_id.clone(), + previous_state, + period, + override_id: Some(override_record.id.clone()), + surface: Some(self.budget_surface().to_string()), + }, + )); + } + + pub(crate) fn session_cost_summary( + &self, + now: chrono::DateTime, + ) -> Result> { + if !self.cost_config.enabled { + return Ok(None); + } + + let Some(tracker) = &self.cost_tracker else { + return Ok(None); + }; + + let cost_service = CostService::new(Arc::clone(tracker)); + cost_service.current_summary(now).map(Some) + } + + pub(crate) fn record_agent_start_event(&self, provider: &str, model: &str) { + self.observer.record_event(&ObserverEvent::AgentStart { + provider: provider.to_string(), + model: model.to_string(), + }); + } + + pub(crate) fn record_agent_end_event(&self, provider: &str, model: &str, duration: Duration) { + let (tokens_used, cost_usd) = self + .cost_tracker + .as_ref() + .and_then(|tracker| tracker.get_summary().ok()) + .map(|summary| (Some(summary.total_tokens), Some(summary.session_cost_usd))) + .unwrap_or((None, None)); + + self.observer.record_event(&ObserverEvent::AgentEnd { + provider: provider.to_string(), + model: model.to_string(), + duration, + tokens_used, + cost_usd, + }); + } + pub fn from_config(config: &Config) -> Result { let bootstrap = bootstrap::BootstrapContext::from_config(config)?; @@ -824,43 +999,109 @@ impl Agent { /// Run pre-flight budget check. Returns Ok(()) if allowed or warning, /// returns Err if budget exceeded. - fn enforce_budget_check(&self, model: &str) -> Result<()> { + fn enforce_budget_check( + &self, + model: &str, + turn_context: &TurnContext, + ) -> Result> { let Some(tracker) = &self.cost_tracker else { - return Ok(()); + return Ok(None); }; + + let cost_service = CostService::new(Arc::clone(tracker)); let estimated_cost = self.estimate_request_cost(model); - match tracker.check_budget(estimated_cost)? { - BudgetCheck::Allowed => Ok(()), - BudgetCheck::Warning { - current_usd, - limit_usd, - period, + match cost_service.evaluate_request( + estimated_cost, + self.current_mission_budget_scope()?, + Utc::now(), + )? { + BudgetEvaluation::Proceed { + check: BudgetCheck::Allowed, + override_applied, + reservation, + } => { + if let Some(override_applied) = override_applied { + self.emit_budget_override_event( + &override_applied, + crate::observability::BudgetOverrideAction::Consumed, + crate::cost::BudgetState::Allowed, + None, + ); + } + Ok(reservation) + } + BudgetEvaluation::Proceed { + check: + BudgetCheck::Warning { + current_usd, + projected_usd, + limit_usd, + percent_used, + period, + .. + }, + override_applied, + reservation, } => { - tracing::warn!( + if let Some(override_applied) = override_applied { + self.emit_budget_override_event( + &override_applied, + crate::observability::BudgetOverrideAction::Consumed, + crate::cost::BudgetState::Warning, + Some(period), + ); + } + self.emit_budget_warning_event( current_usd, + projected_usd, limit_usd, - ?period, - "Cost budget warning: approaching limit" + percent_used, + period, + Some(turn_context), ); - self.observer.record_event(&ObserverEvent::Error { - component: "cost_warning".to_string(), - message: format!( - "Budget warning: ${current_usd:.4} of \ - ${limit_usd:.2} {period:?} limit used" - ), - }); - Ok(()) + Ok(reservation) } - BudgetCheck::Exceeded { - current_usd, - limit_usd, - period, + BudgetEvaluation::Proceed { + check: + BudgetCheck::Exceeded { + current_usd: _, + projected_usd: _, + limit_usd: _, + percent_used: _, + period, + .. + }, + override_applied, + reservation, + } => { + if let Some(override_applied) = override_applied { + self.emit_budget_override_event( + &override_applied, + crate::observability::BudgetOverrideAction::Consumed, + crate::cost::BudgetState::Exceeded, + Some(period), + ); + } + Ok(reservation) + } + BudgetEvaluation::Blocked { + check: + BudgetCheck::Exceeded { + current_usd, + projected_usd, + limit_usd, + percent_used, + period, + .. + }, } => { - tracing::error!( + self.emit_budget_exceeded_event( current_usd, + projected_usd, limit_usd, - ?period, - "Cost budget exceeded — blocking LLM call" + percent_used, + period, + Some(turn_context), ); Err(AgentExecutionError::CostBudgetExceeded { current_usd, @@ -869,15 +1110,60 @@ impl Agent { } .into()) } + BudgetEvaluation::Blocked { check } => Err(anyhow::anyhow!( + "Budget evaluation blocked request unexpectedly: {check:?}" + )), + } + } + + fn current_mission_budget_scope(&self) -> Result> { + let Some(active_budget) = &self.active_mission_budget else { + return Ok(None); + }; + + let Some(tracker) = &self.cost_tracker else { + return Ok(None); + }; + + let current_total_cost_usd = tracker.cumulative_total_cost_usd(); + if current_total_cost_usd < active_budget.baseline_total_cost_usd { + anyhow::bail!( + "mission budget baseline exceeded current cumulative total; runtime cost state regressed" + ); } + + Ok(Some(MissionBudgetScope { + mission_id: active_budget.mission_id.clone(), + current_usd: current_total_cost_usd - active_budget.baseline_total_cost_usd, + limit_usd: active_budget.limit_usd, + })) + } + + fn begin_active_mission_budget(&mut self, mission_id: &str) -> Result<()> { + let Some(tracker) = &self.cost_tracker else { + self.active_mission_budget = None; + return Ok(()); + }; + + self.active_mission_budget = Some(ActiveMissionBudget { + mission_id: mission_id.to_string(), + baseline_total_cost_usd: tracker.cumulative_total_cost_usd(), + limit_usd: f64::from(self.mission_config.max_estimated_cost_cents) / 100.0, + }); + Ok(()) + } + + fn end_active_mission_budget(&mut self) { + self.active_mission_budget = None; } /// Record token usage after a successful LLM call using estimated tokens. fn record_estimated_usage( - &self, + &mut self, model: &str, response: &ChatResponse, turn_context: &TurnContext, + reservation: Option<&crate::cost::CostBudgetReservation>, ) { let Some(tracker) = &self.cost_tracker else { return; @@ -920,6 +1206,11 @@ impl Agent { if let Err(error) = tracker.record_usage(usage) { tracing::warn!("Failed to record cost usage: {error}"); + return; + } + + if let Some(reservation) = reservation { + tracker.commit_budget_reservation(&reservation.id); } } @@ -1331,8 +1622,25 @@ impl Agent { format!("mission-{nanos}") } - fn build_mission_coordinator(&self) -> MissionCoordinator { - MissionCoordinator::new(self.mission_config.clone().into()) + fn build_mission_coordinator(&self) -> Result { + let governance: crate::agent::mission::MissionGovernance = + self.mission_config.clone().into(); + + if self.cost_config.enabled { + let tracker = self.cost_tracker.as_ref().ok_or_else(|| { + anyhow::anyhow!( + "mission runtime cost tracker unavailable while cost tracking is enabled" + ) + })?; + + return MissionCoordinator::new_with_runtime_cost_tracker( + governance, + Arc::clone(tracker), + ) + .map_err(Self::mission_error); + } + + Ok(MissionCoordinator::new(governance)) } fn build_mission_plan(&self, objective: &str, resume_from: Option) -> MissionPlan { @@ -1730,10 +2038,20 @@ impl Agent { resume_from, }); - let coordinator = self.build_mission_coordinator(); + self.begin_active_mission_budget(&mission_id)?; + let coordinator = match self.build_mission_coordinator() { + Ok(coordinator) => coordinator, + Err(error) => { + self.end_active_mission_budget(); + return Err(error); + } + }; let plan = self.build_mission_plan(objective, resume_from); - self.run_mission_plan(&coordinator, &mission_id, Instant::now(), plan) - .await + let result = self + .run_mission_plan(&coordinator, &mission_id, Instant::now(), plan) + .await; + self.end_active_mission_budget(); + result } async fn step_with_context( @@ -1743,9 +2061,9 @@ impl Agent { turn_context: &TurnContext, ) -> Result { // Pre-flight budget check before LLM call - self.enforce_budget_check(effective_model)?; + let reservation = self.enforce_budget_check(effective_model, turn_context)?; - let response = self + let response = match self .provider .chat( ChatRequest { @@ -1760,10 +2078,26 @@ impl Agent { effective_model, self.temperature, ) - .await?; + .await + { + Ok(response) => response, + Err(error) => { + if let (Some(tracker), Some(reservation)) = + (&self.cost_tracker, reservation.as_ref()) + { + tracker.release_budget_reservation(&reservation.id); + } + return Err(error); + } + }; // Record estimated usage after successful LLM call - self.record_estimated_usage(effective_model, &response, turn_context); + self.record_estimated_usage( + effective_model, + &response, + turn_context, + reservation.as_ref(), + ); let (text, calls) = self.tool_dispatcher.parse_response(&response); if calls.is_empty() { @@ -1878,10 +2212,7 @@ pub async fn run( .unwrap_or("anthropic/claude-sonnet-4-20250514") .to_string(); - agent.observer.record_event(&ObserverEvent::AgentStart { - provider: provider_name.clone(), - model: model_name.clone(), - }); + agent.record_agent_start_event(&provider_name, &model_name); if let Some(msg) = message { let response = agent.run_single(&msg).await?; @@ -1890,20 +2221,7 @@ pub async fn run( agent.run_interactive().await?; } - let (tokens_used, cost_usd) = agent - .cost_tracker - .as_ref() - .and_then(|tracker| tracker.get_summary().ok()) - .map(|summary| (Some(summary.total_tokens), Some(summary.session_cost_usd))) - .unwrap_or((None, None)); - - agent.observer.record_event(&ObserverEvent::AgentEnd { - provider: provider_name, - model: model_name, - duration: start.elapsed(), - tokens_used, - cost_usd, - }); + agent.record_agent_end_event(&provider_name, &model_name, start.elapsed()); Ok(()) } @@ -1914,7 +2232,9 @@ mod tests { use crate::test_support::test_config; use async_trait::async_trait; use parking_lot::Mutex; + use parking_lot::RwLock; use std::collections::HashSet; + use std::sync::Arc; use tempfile::TempDir; struct MockProvider { @@ -2001,6 +2321,33 @@ mod tests { results: Mutex>>, } + #[derive(Default)] + struct RecordingObserver { + events: RwLock>, + } + + impl RecordingObserver { + fn snapshot(&self) -> Vec { + self.events.read().clone() + } + } + + impl Observer for RecordingObserver { + fn record_event(&self, event: &ObserverEvent) { + self.events.write().push(event.clone()); + } + + fn record_metric(&self, _metric: &crate::observability::ObserverMetric) {} + + fn name(&self) -> &str { + "recording" + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + } + #[async_trait] impl Memory for ValidationMemory { fn name(&self) -> &str { @@ -2319,6 +2666,7 @@ mod tests { daily_limit_usd: daily_limit, monthly_limit_usd: 1000.0, warn_at_percent: 80, + allow_override: true, ..Default::default() }; let tracker = if cost_enabled { @@ -2350,6 +2698,48 @@ mod tests { (agent, tracker, tmp) } + fn build_agent_with_recording_observer( + provider: Box, + daily_limit: f64, + ) -> ( + Agent, + Arc, + Arc, + TempDir, + ) { + let tmp = TempDir::new().unwrap(); + let cost_config = crate::config::CostConfig { + enabled: true, + daily_limit_usd: daily_limit, + monthly_limit_usd: 1000.0, + warn_at_percent: 80, + allow_override: true, + ..Default::default() + }; + let tracker = + Arc::new(crate::cost::CostTracker::new(cost_config.clone(), tmp.path()).unwrap()); + let memory_cfg = crate::config::MemoryConfig { + backend: "none".into(), + ..crate::config::MemoryConfig::default() + }; + let mem: Arc = Arc::from( + crate::memory::create_memory(&memory_cfg, std::path::Path::new("/tmp"), None).unwrap(), + ); + let observer = Arc::new(RecordingObserver::default()); + let agent = Agent::builder() + .provider(provider) + .tools(vec![Box::new(MockTool)]) + .memory(mem) + .observer(observer.clone() as Arc) + .tool_dispatcher(Box::new(XmlToolDispatcher)) + .workspace_dir(tmp.path().to_path_buf()) + .cost_tracker(Some(tracker.clone())) + .cost_config(cost_config) + .build() + .unwrap(); + (agent, tracker, observer, tmp) + } + #[tokio::test] async fn cost_tracker_records_usage_after_llm_call() { let provider = Box::new(MockProvider { @@ -2430,6 +2820,224 @@ mod tests { err_msg.contains("Budget exceeded"), "expected budget exceeded error, got: {err_msg}" ); + assert!( + !err_msg.contains("action budget exhausted"), + "token-spend denial should not be labeled as action-rate exhaustion: {err_msg}" + ); + } + + #[tokio::test] + async fn mission_scope_blocks_metered_call_independently_from_session_budget() { + let provider = Box::new(MockProvider { + responses: Mutex::new(vec![crate::providers::ChatResponse { + text: Some("should not reach".into()), + tool_calls: vec![], + }]), + }); + let (mut agent, tracker, _tmp) = build_agent_with_cost_tracker(provider, true, 100.0); + let tracker = tracker.unwrap(); + agent.mission_config.max_estimated_cost_cents = 100; + agent.begin_active_mission_budget("mission-a").unwrap(); + + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.05; + tracker.record_usage(usage).unwrap(); + + let result = agent.turn("mission checkpoint").await; + agent.end_active_mission_budget(); + + assert!(result.is_err()); + let error = result.unwrap_err().to_string(); + assert!(error.contains("Budget exceeded")); + assert!(error.contains("Mission limit")); + } + + #[tokio::test] + async fn mission_budget_scope_survives_session_reset() { + let provider = Box::new(MockProvider { + responses: Mutex::new(vec![crate::providers::ChatResponse { + text: Some("mission still tracked".into()), + tool_calls: vec![], + }]), + }); + let (mut agent, tracker, _tmp) = build_agent_with_cost_tracker(provider, true, 100.0); + let tracker = tracker.unwrap(); + agent.mission_config.max_estimated_cost_cents = 100; + agent.begin_active_mission_budget("mission-a").unwrap(); + + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 0.75; + tracker.record_usage(usage).unwrap(); + + tracker + .reset( + crate::cost::CostResetRequest { + scope: crate::cost::CostResetScope::Session, + actor: "tester".into(), + reason: Some("session reset".into()), + }, + chrono::Utc::now(), + ) + .unwrap(); + + let scope = agent.current_mission_budget_scope().unwrap().unwrap(); + assert!((scope.current_usd - 0.75).abs() < 0.0001); + assert!((scope.limit_usd - 1.0).abs() < 0.0001); + } + + #[tokio::test] + async fn token_budget_denial_is_reported_separately_from_action_rate_governance() { + let provider = Box::new(MockProvider { + responses: Mutex::new(vec![crate::providers::ChatResponse { + text: Some("should not reach".into()), + tool_calls: vec![], + }]), + }); + let (mut agent, tracker, _tmp) = build_agent_with_cost_tracker(provider, true, 1.0); + let tracker = tracker.unwrap(); + + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.1; + tracker.record_usage(usage).unwrap(); + + let result = agent.turn("hi").await; + assert!(result.is_err()); + + let error = result.unwrap_err().to_string(); + assert!(error.contains("Budget exceeded")); + assert!(!error.contains("action budget exhausted")); + } + + #[tokio::test] + async fn next_request_override_allows_one_blocked_turn() { + let provider = Box::new(MockProvider { + responses: Mutex::new(vec![ + crate::providers::ChatResponse { + text: Some("override succeeded".into()), + tool_calls: vec![], + }, + crate::providers::ChatResponse { + text: Some("should block again".into()), + tool_calls: vec![], + }, + ]), + }); + let (mut agent, tracker, _tmp) = build_agent_with_cost_tracker(provider, true, 1.0); + let tracker = tracker.unwrap(); + let service = crate::cost::CostService::new(tracker.clone()); + let now = chrono::Utc::now(); + + let mut usage = crate::cost::TokenUsage::new( + "anthropic/claude-sonnet-4-20250514", + 1_000, + 500, + 0.0, + 0.0, + ); + usage.cost_usd = 1.1; + usage.timestamp = now; + tracker.record_usage(usage).unwrap(); + + service + .apply_override( + crate::cost::CostOverrideRequest { + actor: "operator".to_string(), + scope: crate::cost::CostOverrideScope::NextRequest, + reason: Some("allow one follow-up".to_string()), + expires_at: Some(now + chrono::Duration::minutes(5)), + }, + now, + ) + .unwrap(); + + let response = agent.turn("hi with override").await.unwrap(); + assert_eq!(response, "override succeeded"); + + let result = agent.turn("hi after override").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn warning_threshold_emits_budget_warning_event() { + let provider = Box::new(MockProvider { + responses: Mutex::new(vec![crate::providers::ChatResponse { + text: Some("warning".into()), + tool_calls: vec![], + }]), + }); + let (mut agent, tracker, observer, _tmp) = + build_agent_with_recording_observer(provider, 1.0); + + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 0.81; + tracker.record_usage(usage).unwrap(); + + let response = agent + .turn_with_context("hi", TurnContext::with_session("webhook-123")) + .await + .unwrap(); + assert_eq!(response.final_text.as_deref(), Some("warning")); + + let events = observer.snapshot(); + assert!(events + .iter() + .any(|event| matches!(event, ObserverEvent::BudgetWarning(_)))); + assert!(events.iter().any(|event| matches!(event, + ObserverEvent::BudgetWarning(event) if event.session_id == "webhook-123" + ))); + } + + #[tokio::test] + async fn blocked_turn_emits_budget_exceeded_event() { + let provider = Box::new(MockProvider { + responses: Mutex::new(vec![crate::providers::ChatResponse { + text: Some("blocked".into()), + tool_calls: vec![], + }]), + }); + let (mut agent, tracker, observer, _tmp) = + build_agent_with_recording_observer(provider, 1.0); + + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.1; + tracker.record_usage(usage).unwrap(); + + let result = agent + .turn_with_context("hi", TurnContext::with_session("webhook-456")) + .await; + assert!(result.is_err()); + + let events = observer.snapshot(); + assert!(events + .iter() + .any(|event| matches!(event, ObserverEvent::BudgetExceeded(_)))); + assert!(events.iter().any(|event| matches!(event, + ObserverEvent::BudgetExceeded(event) if event.session_id == "webhook-456" + ))); + } + + #[test] + fn local_override_emits_budget_override_event() { + let provider = Box::new(MockProvider { + responses: Mutex::new(Vec::new()), + }); + let (agent, tracker, observer, _tmp) = build_agent_with_recording_observer(provider, 1.0); + + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.1; + tracker.record_usage(usage).unwrap(); + + agent + .apply_next_request_budget_override( + "paired-admin-token", + Some("token=super-secret".into()), + ) + .unwrap(); + + assert!(observer + .snapshot() + .into_iter() + .any(|event| matches!(event, ObserverEvent::BudgetOverride(_)))); } #[tokio::test] @@ -2458,4 +3066,4 @@ mod tests { "cost_tracker should be None when cost.enabled=false" ); } -} \ No newline at end of file +} diff --git a/clients/agent-runtime/src/agent/mission.rs b/clients/agent-runtime/src/agent/mission.rs index b8b18b4a..9f503a97 100644 --- a/clients/agent-runtime/src/agent/mission.rs +++ b/clients/agent-runtime/src/agent/mission.rs @@ -1,5 +1,15 @@ use std::sync::{Arc, Mutex}; +type SessionCostReader = Arc Result + Send + Sync>; + +enum MissionCostAccounting { + Local(Arc>), + RuntimeDerived { + baseline_cost_cents: u32, + session_cost_reader: SessionCostReader, + }, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum MissionState { ObjectiveAccepted, @@ -141,26 +151,64 @@ pub struct MissionOutcome { pub struct MissionCoordinator { pub state: Arc>, pub governance: MissionGovernance, - pub accumulated_cost_cents: Arc>, pub accumulated_steps: Arc>, pub elapsed_ms: Arc>, pub latest_successful_checkpoint: Arc>>, pub latest_failure: Arc>>, + cost_accounting: MissionCostAccounting, } impl MissionCoordinator { pub fn new(governance: MissionGovernance) -> Self { + Self::new_with_local_cost(governance) + } + + fn new_with_local_cost(governance: MissionGovernance) -> Self { Self { state: Arc::new(Mutex::new(MissionState::ObjectiveAccepted)), governance, - accumulated_cost_cents: Arc::new(Mutex::new(0)), accumulated_steps: Arc::new(Mutex::new(0)), elapsed_ms: Arc::new(Mutex::new(0)), latest_successful_checkpoint: Arc::new(Mutex::new(None)), latest_failure: Arc::new(Mutex::new(None)), + cost_accounting: MissionCostAccounting::Local(Arc::new(Mutex::new(0))), } } + pub fn new_with_runtime_session_cost( + governance: MissionGovernance, + session_cost_reader: SessionCostReader, + ) -> Result { + let baseline_cost_cents = session_cost_reader()?; + + Ok(Self { + state: Arc::new(Mutex::new(MissionState::ObjectiveAccepted)), + governance, + accumulated_steps: Arc::new(Mutex::new(0)), + elapsed_ms: Arc::new(Mutex::new(0)), + latest_successful_checkpoint: Arc::new(Mutex::new(None)), + latest_failure: Arc::new(Mutex::new(None)), + cost_accounting: MissionCostAccounting::RuntimeDerived { + baseline_cost_cents, + session_cost_reader, + }, + }) + } + + pub fn new_with_runtime_cost_tracker( + governance: MissionGovernance, + tracker: Arc, + ) -> Result { + let session_cost_reader: SessionCostReader = Arc::new(move || { + let summary = tracker + .get_summary() + .map_err(|_| MissionTerminationReason::GovernanceConstraintViolated)?; + usd_to_cents(summary.session_cost_usd) + }); + + Self::new_with_runtime_session_cost(governance, session_cost_reader) + } + pub fn plan_for_objective(objective: &str) -> MissionPlan { let checkpoints: Vec = mission_fragments(objective) .into_iter() @@ -324,18 +372,37 @@ impl MissionCoordinator { .ok_or(MissionTerminationReason::GovernanceConstraintViolated)?; drop(elapsed_ms); - let mut accumulated_cost_cents = self - .accumulated_cost_cents - .lock() - .map_err(|_| MissionTerminationReason::GovernanceConstraintViolated)?; - *accumulated_cost_cents = accumulated_cost_cents - .checked_add(cost_cents_delta) - .ok_or(MissionTerminationReason::GovernanceConstraintViolated)?; - drop(accumulated_cost_cents); + if let MissionCostAccounting::Local(accumulated_cost_cents) = &self.cost_accounting { + let mut accumulated_cost_cents = accumulated_cost_cents + .lock() + .map_err(|_| MissionTerminationReason::GovernanceConstraintViolated)?; + *accumulated_cost_cents = accumulated_cost_cents + .checked_add(cost_cents_delta) + .ok_or(MissionTerminationReason::GovernanceConstraintViolated)?; + drop(accumulated_cost_cents); + } self.enforce_post_checkpoint() } + pub fn current_accumulated_cost_cents(&self) -> Result { + match &self.cost_accounting { + MissionCostAccounting::Local(accumulated_cost_cents) => accumulated_cost_cents + .lock() + .map(|value| *value) + .map_err(|_| MissionTerminationReason::GovernanceConstraintViolated), + MissionCostAccounting::RuntimeDerived { + baseline_cost_cents, + session_cost_reader, + } => { + let current_cost_cents = session_cost_reader()?; + current_cost_cents + .checked_sub(*baseline_cost_cents) + .ok_or(MissionTerminationReason::GovernanceConstraintViolated) + } + } + } + pub fn enforce_post_checkpoint(&self) -> Result<(), MissionTerminationReason> { self.governance.validate()?; let (elapsed_ms, completed_steps, accumulated_cost_cents) = self.accounting_snapshot()?; @@ -360,10 +427,7 @@ impl MissionCoordinator { .accumulated_steps .lock() .map_err(|_| MissionTerminationReason::GovernanceConstraintViolated)?; - let accumulated_cost_cents = *self - .accumulated_cost_cents - .lock() - .map_err(|_| MissionTerminationReason::GovernanceConstraintViolated)?; + let accumulated_cost_cents = self.current_accumulated_cost_cents()?; Ok((elapsed_ms, completed_steps, accumulated_cost_cents)) } } @@ -427,6 +491,20 @@ fn parse_positive_u32(value: Option<&serde_json::Value>) -> Result Result { + if !cost_usd.is_finite() || cost_usd < 0.0 { + return Err(MissionTerminationReason::GovernanceConstraintViolated); + } + + let cents = (cost_usd * 100.0).round(); + if cents > f64::from(u32::MAX) { + return Err(MissionTerminationReason::GovernanceConstraintViolated); + } + + Ok(cents as u32) +} + fn governance_exceeded( elapsed_ms: u64, completed_steps: u32, @@ -460,6 +538,7 @@ fn governance_exceeded( #[cfg(test)] mod tests { use super::*; + use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; use std::thread; @@ -688,4 +767,77 @@ mod tests { assert!(coordinator.should_replan("Temporary timeout while executing checkpoint")); assert!(!coordinator.should_replan("permissions denied by policy")); } + + #[test] + fn runtime_derived_mission_cost_is_independent_from_prior_session_spend() { + let governance = MissionGovernance { + max_runtime_ms: 300_000, + max_steps: 10, + max_estimated_cost_cents: 100, + elapsed_ms: 0, + completed_steps: 0, + accumulated_cost_cents: 0, + }; + let session_spend_cents = Arc::new(AtomicU32::new(0)); + let session_spend_reader = { + let session_spend_cents = Arc::clone(&session_spend_cents); + Arc::new(move || Ok(session_spend_cents.load(Ordering::SeqCst))) + }; + + let first_mission = MissionCoordinator::new_with_runtime_session_cost( + governance.clone(), + session_spend_reader.clone(), + ) + .unwrap(); + + session_spend_cents.store(120, Ordering::SeqCst); + assert_eq!(first_mission.current_accumulated_cost_cents().unwrap(), 120); + assert_eq!( + first_mission.enforce_post_checkpoint().unwrap_err(), + MissionTerminationReason::BudgetExhausted + ); + + let second_mission = + MissionCoordinator::new_with_runtime_session_cost(governance, session_spend_reader) + .unwrap(); + + assert_eq!(second_mission.current_accumulated_cost_cents().unwrap(), 0); + second_mission.enforce_pre_checkpoint().unwrap(); + + session_spend_cents.store(170, Ordering::SeqCst); + assert_eq!(second_mission.current_accumulated_cost_cents().unwrap(), 50); + second_mission.enforce_post_checkpoint().unwrap(); + } + + #[test] + fn runtime_derived_cost_regression_does_not_reopen_mission_budget() { + let governance = MissionGovernance { + max_runtime_ms: 300_000, + max_steps: 10, + max_estimated_cost_cents: 100, + elapsed_ms: 0, + completed_steps: 0, + accumulated_cost_cents: 0, + }; + let session_spend_cents = Arc::new(AtomicU32::new(150)); + let session_spend_reader = { + let session_spend_cents = Arc::clone(&session_spend_cents); + Arc::new(move || Ok(session_spend_cents.load(Ordering::SeqCst))) + }; + + let mission = + MissionCoordinator::new_with_runtime_session_cost(governance, session_spend_reader) + .unwrap(); + + session_spend_cents.store(100, Ordering::SeqCst); + + assert_eq!( + mission.current_accumulated_cost_cents().unwrap_err(), + MissionTerminationReason::GovernanceConstraintViolated + ); + assert_eq!( + mission.enforce_post_checkpoint().unwrap_err(), + MissionTerminationReason::GovernanceConstraintViolated + ); + } } diff --git a/clients/agent-runtime/src/bootstrap/mod.rs b/clients/agent-runtime/src/bootstrap/mod.rs index 7e6a1e75..39e17482 100644 --- a/clients/agent-runtime/src/bootstrap/mod.rs +++ b/clients/agent-runtime/src/bootstrap/mod.rs @@ -192,18 +192,25 @@ impl BootstrapContext { config: &Config, memory: Arc, observer: Arc, + cost_tracker_override: Option>, ) -> anyhow::Result { - Self::from_effective_config_with_overrides(config, Some(memory), Some(observer)) + Self::from_effective_config_with_overrides( + config, + Some(memory), + Some(observer), + cost_tracker_override, + ) } fn from_effective_config(config: &Config) -> anyhow::Result { - Self::from_effective_config_with_overrides(config, None, None) + Self::from_effective_config_with_overrides(config, None, None, None) } fn from_effective_config_with_overrides( config: &Config, memory_override: Option>, observer_override: Option>, + cost_tracker_override: Option>, ) -> anyhow::Result { let profile = AgentProfile::from_config(config)?; @@ -252,7 +259,11 @@ impl BootstrapContext { .filter(|tool| profile.allows_tool(tool.name())) .collect(); - let cost_tracker = if config.cost.enabled { + let cost_tracker = if !config.cost.enabled { + None + } else if let Some(cost_tracker) = cost_tracker_override { + Some(cost_tracker) + } else { match CostTracker::new(config.cost.clone(), &config.workspace_dir) { Ok(tracker) => Some(Arc::new(tracker)), Err(error) => { @@ -260,8 +271,6 @@ impl BootstrapContext { None } } - } else { - None }; Ok(Self { @@ -602,7 +611,9 @@ mod tests { config.mcp.servers = vec![mock_mcp_server("docs", "search")]; let (memory, observer) = create_memory_and_observer(&config).unwrap(); - let ctx = BootstrapContext::for_gateway(&config, memory, observer).unwrap(); + let tracker = + Arc::new(CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap()); + let ctx = BootstrapContext::for_gateway(&config, memory, observer, Some(tracker)).unwrap(); let names: HashSet<&str> = ctx.tools.iter().map(|tool| tool.name()).collect(); if cfg!(feature = "mcp-runtime") { @@ -611,4 +622,19 @@ mod tests { assert!(!names.iter().any(|name| name.starts_with("mcp."))); } } + + #[test] + fn gateway_bootstrap_does_not_reuse_shared_tracker_when_cost_disabled() { + let tmp = tempfile::TempDir::new().unwrap(); + let mut config = test_config(&tmp); + config.cost.enabled = false; + + let (memory, observer) = create_memory_and_observer(&config).unwrap(); + let tracker = Arc::new( + CostTracker::new(crate::config::CostConfig::default(), &config.workspace_dir).unwrap(), + ); + let ctx = BootstrapContext::for_gateway(&config, memory, observer, Some(tracker)).unwrap(); + + assert!(ctx.cost_tracker.is_none()); + } } diff --git a/clients/agent-runtime/src/config/mod.rs b/clients/agent-runtime/src/config/mod.rs index e4cef489..74b72bc3 100644 --- a/clients/agent-runtime/src/config/mod.rs +++ b/clients/agent-runtime/src/config/mod.rs @@ -80,17 +80,40 @@ mod tests { allowed_commands: vec!["git".to_string(), "cargo".to_string()], forbidden_paths: vec!["/etc".to_string(), "/var".to_string()], max_actions_per_hour: 10, - max_cost_per_day_cents: 100, require_approval_for_medium_risk: true, block_high_risk_commands: true, always_ask: vec![], auto_approve: vec![], + deprecated_fields: vec![], }; assert_eq!(autonomy.max_actions_per_hour, 10); - assert_eq!(autonomy.max_cost_per_day_cents, 100); assert_eq!(autonomy.forbidden_paths.len(), 2); } + #[test] + fn reexported_autonomy_config_tracks_deprecated_alias_metadata() { + let autonomy: AutonomyConfig = toml::from_str( + r#" +level = "supervised" +workspace_only = true +allowed_commands = ["git"] +forbidden_paths = ["/etc"] +max_cost_per_day_cents = 12 +require_approval_for_medium_risk = true +block_high_risk_commands = true +auto_approve = [] +always_ask = [] +"#, + ) + .unwrap(); + + assert_eq!(autonomy.max_actions_per_hour, 12); + assert_eq!( + autonomy.deprecated_fields(), + &["autonomy.max_cost_per_day_cents".to_string()] + ); + } + #[test] fn reexported_observability_config_with_backends() { let obs = ObservabilityConfig { diff --git a/clients/agent-runtime/src/config/schema.rs b/clients/agent-runtime/src/config/schema.rs index fa1565f6..9683bf2a 100644 --- a/clients/agent-runtime/src/config/schema.rs +++ b/clients/agent-runtime/src/config/schema.rs @@ -579,11 +579,16 @@ impl Default for IdentityConfig { // ── Cost tracking and budget enforcement ─────────────────────────── #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct CostConfig { /// Enable cost tracking (default: false) #[serde(default)] pub enabled: bool, + /// Session spending limit in USD (default: 0.00, disabled) + #[serde(default = "default_session_limit")] + pub session_limit_usd: f64, + /// Daily spending limit in USD (default: 10.00) #[serde(default = "default_daily_limit")] pub daily_limit_usd: f64, @@ -606,13 +611,12 @@ pub struct CostConfig { } #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct ModelPricing { /// Input price per 1M tokens - #[serde(default)] pub input: f64, /// Output price per 1M tokens - #[serde(default)] pub output: f64, } @@ -620,6 +624,10 @@ fn default_daily_limit() -> f64 { 10.0 } +fn default_session_limit() -> f64 { + 0.0 +} + fn default_monthly_limit() -> f64 { 100.0 } @@ -632,6 +640,7 @@ impl Default for CostConfig { fn default() -> Self { Self { enabled: false, + session_limit_usd: default_session_limit(), daily_limit_usd: default_daily_limit(), monthly_limit_usd: default_monthly_limit(), warn_at_percent: default_warn_percent(), @@ -1398,14 +1407,13 @@ impl Default for ObservabilityConfig { // ── Autonomy / Security ────────────────────────────────────────── -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize)] pub struct AutonomyConfig { pub level: AutonomyLevel, pub workspace_only: bool, pub allowed_commands: Vec, pub forbidden_paths: Vec, pub max_actions_per_hour: u32, - pub max_cost_per_day_cents: u32, /// Require explicit approval for medium-risk shell commands. #[serde(default = "default_true")] @@ -1422,6 +1430,34 @@ pub struct AutonomyConfig { /// Tools that always require interactive approval, even after "Always". #[serde(default = "default_always_ask")] pub always_ask: Vec, + + #[serde(skip, default)] + pub deprecated_fields: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +struct RawAutonomyConfig { + #[serde(default)] + level: Option, + #[serde(default)] + workspace_only: Option, + #[serde(default)] + allowed_commands: Option>, + #[serde(default)] + forbidden_paths: Option>, + #[serde(default)] + max_actions_per_hour: Option, + #[serde(default)] + max_cost_per_day_cents: Option, + #[serde(default)] + require_approval_for_medium_risk: Option, + #[serde(default)] + block_high_risk_commands: Option, + #[serde(default)] + auto_approve: Option>, + #[serde(default)] + always_ask: Option>, } fn default_auto_approve() -> Vec { @@ -1472,12 +1508,75 @@ impl Default for AutonomyConfig { "~/.config".into(), ], max_actions_per_hour: 20, - max_cost_per_day_cents: 500, require_approval_for_medium_risk: true, block_high_risk_commands: true, auto_approve: default_auto_approve(), always_ask: default_always_ask(), + deprecated_fields: Vec::new(), + } + } +} + +impl<'de> Deserialize<'de> for AutonomyConfig { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + let raw = RawAutonomyConfig::deserialize(deserializer)?; + let mut config = Self::default(); + + if let Some(level) = raw.level { + config.level = level; + } + if let Some(workspace_only) = raw.workspace_only { + config.workspace_only = workspace_only; + } + if let Some(allowed_commands) = raw.allowed_commands { + config.allowed_commands = allowed_commands; + } + if let Some(forbidden_paths) = raw.forbidden_paths { + config.forbidden_paths = forbidden_paths; + } + if let Some(max_actions_per_hour) = raw.max_actions_per_hour { + config.max_actions_per_hour = max_actions_per_hour; + } + if let Some(max_cost_per_day_cents) = raw.max_cost_per_day_cents { + config + .deprecated_fields + .push("autonomy.max_cost_per_day_cents".to_string()); + if raw.max_actions_per_hour.is_none() { + config.max_actions_per_hour = max_cost_per_day_cents; + } + } + if let Some(require_approval_for_medium_risk) = raw.require_approval_for_medium_risk { + config.require_approval_for_medium_risk = require_approval_for_medium_risk; + } + if let Some(block_high_risk_commands) = raw.block_high_risk_commands { + config.block_high_risk_commands = block_high_risk_commands; + } + if let Some(auto_approve) = raw.auto_approve { + config.auto_approve = auto_approve; } + if let Some(always_ask) = raw.always_ask { + config.always_ask = always_ask; + } + + Ok(config) + } +} + +impl AutonomyConfig { + pub fn deprecated_fields(&self) -> &[String] { + &self.deprecated_fields + } + + pub fn action_rate_deprecation_warning(&self) -> Option { + self.deprecated_fields + .iter() + .any(|field| field == "autonomy.max_cost_per_day_cents") + .then(|| { + "autonomy.max_cost_per_day_cents is deprecated and has been normalized to autonomy.max_actions_per_hour".to_string() + }) } } @@ -2899,6 +2998,7 @@ impl Config { } config.apply_env_overrides(); + config.emit_deprecation_warnings(); config.validate_for_runtime()?; Ok(config) } else { @@ -3106,10 +3206,17 @@ impl Config { } } + fn emit_deprecation_warnings(&self) { + if let Some(message) = self.autonomy.action_rate_deprecation_warning() { + tracing::warn!("{message}"); + } + } + pub fn validate_for_runtime(&self) -> Result<()> { self.validate_agent_profile()?; self.validate_mcp_servers()?; self.validate_memory_config()?; + self.validate_cost_config()?; self.validate_delegate_overrides()?; self.validate_code_session_config()?; self.validate_account_pools()?; @@ -3118,6 +3225,30 @@ impl Config { self.validate_audio_config() } + fn validate_cost_config(&self) -> Result<()> { + if !self.cost.session_limit_usd.is_finite() || self.cost.session_limit_usd < 0.0 { + anyhow::bail!("cost.session_limit_usd must be a finite, non-negative value"); + } + if !self.cost.daily_limit_usd.is_finite() || self.cost.daily_limit_usd < 0.0 { + anyhow::bail!("cost.daily_limit_usd must be a finite, non-negative value"); + } + if !self.cost.monthly_limit_usd.is_finite() || self.cost.monthly_limit_usd < 0.0 { + anyhow::bail!("cost.monthly_limit_usd must be a finite, non-negative value"); + } + if self.cost.warn_at_percent > 100 { + anyhow::bail!("cost.warn_at_percent must be between 0 and 100 inclusive"); + } + for (model, pricing) in &self.cost.prices { + if !pricing.input.is_finite() || pricing.input < 0.0 { + anyhow::bail!("cost.prices.{model}.input must be a finite, non-negative value"); + } + if !pricing.output.is_finite() || pricing.output < 0.0 { + anyhow::bail!("cost.prices.{model}.output must be a finite, non-negative value"); + } + } + Ok(()) + } + fn validate_agent_profile(&self) -> Result<()> { if is_supported_agent_profile(&self.agent.profile) { return Ok(()); @@ -3771,11 +3902,45 @@ mod tests { assert!(a.allowed_commands.contains(&"cargo".to_string())); assert!(a.forbidden_paths.contains(&"/etc".to_string())); assert_eq!(a.max_actions_per_hour, 20); - assert_eq!(a.max_cost_per_day_cents, 500); assert!(a.require_approval_for_medium_risk); assert!(a.block_high_risk_commands); } + #[test] + fn autonomy_config_normalizes_deprecated_action_rate_alias() { + let parsed: AutonomyConfig = toml::from_str( + r#" +level = "supervised" +workspace_only = true +allowed_commands = ["git"] +forbidden_paths = ["/etc"] +max_cost_per_day_cents = 42 +require_approval_for_medium_risk = true +block_high_risk_commands = true +auto_approve = [] +always_ask = [] +"#, + ) + .unwrap(); + + assert_eq!(parsed.max_actions_per_hour, 42); + assert_eq!( + parsed.deprecated_fields(), + &["autonomy.max_cost_per_day_cents".to_string()] + ); + } + + #[test] + fn autonomy_config_serializes_canonical_action_rate_field_only() { + let mut config = AutonomyConfig::default(); + config.max_actions_per_hour = 33; + + let toml = toml::to_string(&config).unwrap(); + + assert!(toml.contains("max_actions_per_hour = 33")); + assert!(!toml.contains("max_cost_per_day_cents")); + } + #[test] fn runtime_config_default() { let r = RuntimeConfig::default(); @@ -3920,11 +4085,11 @@ default_temperature = 0.7 allowed_commands: vec!["docker".into()], forbidden_paths: vec!["/secret".into()], max_actions_per_hour: 50, - max_cost_per_day_cents: 1000, require_approval_for_medium_risk: false, block_high_risk_commands: true, auto_approve: vec!["file_read".into()], always_ask: vec![], + deprecated_fields: vec![], }, security: SecurityConfig::default(), runtime: RuntimeConfig { @@ -4262,6 +4427,59 @@ tool_dispatcher = "xml" assert!(config.validate_for_runtime().is_ok()); } + #[test] + fn validate_for_runtime_rejects_negative_cost_limits() { + let mut config = Config::default(); + config.cost.session_limit_usd = -1.0; + + let err = config.validate_for_runtime().unwrap_err(); + assert!(err + .to_string() + .contains("cost.session_limit_usd must be a finite, non-negative value")); + } + + #[test] + fn validate_for_runtime_rejects_negative_model_pricing() { + let mut config = Config::default(); + config.cost.prices.insert( + "bad-model".to_string(), + ModelPricing { + input: -0.1, + output: 1.0, + }, + ); + + let err = config.validate_for_runtime().unwrap_err(); + assert!(err + .to_string() + .contains("cost.prices.bad-model.input must be a finite, non-negative value")); + } + + #[test] + fn validate_for_runtime_rejects_warn_percent_above_hundred() { + let mut config = Config::default(); + config.cost.warn_at_percent = 255; + + let err = config.validate_for_runtime().unwrap_err(); + assert!(err + .to_string() + .contains("cost.warn_at_percent must be between 0 and 100 inclusive")); + } + + #[test] + fn model_pricing_requires_both_input_and_output_fields() { + let raw = r#" +[cost] +enabled = true + +[cost.prices."test/model"] +input = 1.0 +"#; + + let err = toml::from_str::(raw).unwrap_err(); + assert!(err.to_string().contains("missing field `output`")); + } + #[test] fn config_save_and_load_tmpdir() { let dir = std::env::temp_dir().join("corvus_test_config"); diff --git a/clients/agent-runtime/src/cost/mod.rs b/clients/agent-runtime/src/cost/mod.rs index a609853c..b9b7d207 100755 --- a/clients/agent-runtime/src/cost/mod.rs +++ b/clients/agent-runtime/src/cost/mod.rs @@ -1,7 +1,15 @@ +pub mod service; pub mod tracker; pub mod types; +pub use service::CostService; pub use tracker::CostTracker; // BudgetCheck and TokenUsage are intentionally re-exported as the public cost API // consumed outside the cost module. -pub use types::{BudgetCheck, TokenUsage}; +#[allow(unused_imports)] +pub use types::{ + BudgetCheck, BudgetEvaluation, BudgetScopeStatus, BudgetState, CostAuditEvent, CostAuditKind, + CostBudgetReservation, CostGovernanceSummary, CostHistory, CostHistoryPoint, CostHistoryTotals, + CostOverrideRecord, CostOverrideRequest, CostOverrideScope, CostResetRequest, CostResetResult, + CostResetScope, CostTrackerSnapshot, MissionBudgetScope, TokenUsage, UsagePeriod, +}; diff --git a/clients/agent-runtime/src/cost/service.rs b/clients/agent-runtime/src/cost/service.rs new file mode 100644 index 00000000..1e20e817 --- /dev/null +++ b/clients/agent-runtime/src/cost/service.rs @@ -0,0 +1,174 @@ +use super::tracker::CostTracker; +use super::types::{ + BudgetEvaluation, BudgetState, CostAuditEvent, CostGovernanceSummary, CostHistory, + CostOverrideRecord, CostOverrideRequest, CostResetRequest, CostResetResult, CostSummary, + MissionBudgetScope, UsagePeriod, +}; +use anyhow::{anyhow, Result}; +use chrono::{DateTime, Utc}; +use std::sync::Arc; + +/// Thin runtime-facing orchestration layer over the tracker. +#[derive(Clone)] +pub struct CostService { + tracker: Option>, +} + +impl CostService { + pub fn new(tracker: Arc) -> Self { + Self { + tracker: Some(tracker), + } + } + + pub fn disabled() -> Self { + Self { tracker: None } + } + + pub fn current_summary(&self, now: DateTime) -> Result { + let Some(tracker) = &self.tracker else { + return Ok(CostGovernanceSummary { + session_id: "disabled".to_string(), + usage: CostSummary { + session_cost_usd: 0.0, + daily_cost_usd: 0.0, + monthly_cost_usd: 0.0, + total_tokens: 0, + request_count: 0, + by_model: std::collections::HashMap::new(), + }, + budget_state: BudgetState::Allowed, + active_period: None, + scope_statuses: Vec::new(), + active_override: None, + }); + }; + + let snapshot = tracker.snapshot(now)?; + let usage = snapshot.usage; + let scope_statuses = snapshot.scope_statuses; + let active_override = snapshot.active_override; + + let active_scope = scope_statuses.iter().max_by(|left, right| { + budget_state_rank(left.state) + .cmp(&budget_state_rank(right.state)) + .then_with(|| left.percent_used.total_cmp(&right.percent_used)) + }); + + Ok(CostGovernanceSummary { + session_id: snapshot.session_id, + usage, + budget_state: active_scope.map_or(BudgetState::Allowed, |status| status.state), + active_period: active_scope.map(|status| status.period), + scope_statuses, + active_override, + }) + } + + pub fn history_window( + &self, + period: UsagePeriod, + window: usize, + now: DateTime, + ) -> Result { + let Some(tracker) = &self.tracker else { + return Ok(CostHistory { + period, + points: Vec::new(), + totals: super::types::CostHistoryTotals { + cost_usd: 0.0, + tokens: 0, + requests: 0, + }, + }); + }; + + tracker.history_window(period, window, now) + } + + pub fn history_range( + &self, + period: UsagePeriod, + start: DateTime, + end: DateTime, + ) -> Result { + let Some(tracker) = &self.tracker else { + return Ok(CostHistory { + period, + points: Vec::new(), + totals: super::types::CostHistoryTotals { + cost_usd: 0.0, + tokens: 0, + requests: 0, + }, + }); + }; + + tracker.history_range(period, start, end) + } + + pub fn reset(&self, request: CostResetRequest, now: DateTime) -> Result { + let tracker = self + .tracker + .as_ref() + .ok_or_else(|| anyhow!("Cost tracker is unavailable"))?; + tracker.reset(request, now) + } + + pub fn apply_override( + &self, + request: CostOverrideRequest, + now: DateTime, + ) -> Result { + let tracker = self + .tracker + .as_ref() + .ok_or_else(|| anyhow!("Cost tracker is unavailable"))?; + tracker.apply_override(request, now) + } + + pub fn audit_trail(&self, limit: usize) -> Result> { + let tracker = self + .tracker + .as_ref() + .ok_or_else(|| anyhow!("Cost tracker is unavailable"))?; + tracker.audit_trail(limit) + } + + pub fn evaluate_request( + &self, + estimated_cost_usd: f64, + mission_scope: Option, + now: DateTime, + ) -> Result { + let Some(tracker) = &self.tracker else { + return Ok(BudgetEvaluation::Proceed { + check: super::types::BudgetCheck::Allowed, + override_applied: None, + reservation: None, + }); + }; + + let (check, override_applied, reservation) = + tracker.reserve_budget_for_request(estimated_cost_usd, mission_scope.as_ref(), now)?; + + if matches!(check, super::types::BudgetCheck::Exceeded { .. }) && override_applied.is_none() + { + return Ok(BudgetEvaluation::Blocked { check }); + } + + Ok(BudgetEvaluation::Proceed { + check, + override_applied, + reservation, + }) + } +} + +fn budget_state_rank(state: BudgetState) -> u8 { + match state { + BudgetState::Allowed => 0, + BudgetState::Warning => 1, + BudgetState::Exceeded => 2, + } +} diff --git a/clients/agent-runtime/src/cost/tracker.rs b/clients/agent-runtime/src/cost/tracker.rs index 5c99d1ba..d5f43779 100755 --- a/clients/agent-runtime/src/cost/tracker.rs +++ b/clients/agent-runtime/src/cost/tracker.rs @@ -1,8 +1,13 @@ -use super::types::{BudgetCheck, CostRecord, CostSummary, ModelStats, TokenUsage, UsagePeriod}; +use super::types::{ + BudgetCheck, BudgetScopeStatus, CostAuditEvent, CostAuditKind, CostBudgetReservation, + CostHistory, CostHistoryPoint, CostHistoryTotals, CostOverrideRecord, CostOverrideRequest, + CostOverrideScope, CostRecord, CostResetRequest, CostResetResult, CostResetScope, CostSummary, + CostTrackerSnapshot, MissionBudgetScope, ModelStats, TokenUsage, UsagePeriod, +}; use crate::config::schema::CostConfig; use anyhow::{anyhow, Context, Result}; -use chrono::{Datelike, NaiveDate, Utc}; -use parking_lot::{Mutex, MutexGuard}; +use chrono::{DateTime, Datelike, Duration, NaiveDate, TimeZone, Utc}; +use parking_lot::{Mutex, MutexGuard, RwLock}; use std::collections::HashMap; use std::fs::{self, File, OpenOptions}; use std::io::{BufRead, BufReader, Write}; @@ -11,26 +16,50 @@ use std::sync::Arc; /// Cost tracker for API usage monitoring and budget enforcement. pub struct CostTracker { - config: CostConfig, + config: RwLock, storage: Arc>, + audit_storage: Arc>, session_id: String, session_costs: Arc>>, + active_override: Arc>>, + pending_reservations: Arc>>, + cumulative_total_cost_usd: Arc>, } +const MAX_HISTORY_WINDOW_DAYS: usize = 366; +const MAX_HISTORY_WINDOW_MONTHS: usize = 60; +const REDACTED_AUDIT_VALUE: &str = "[REDACTED]"; + impl CostTracker { /// Create a new cost tracker. pub fn new(config: CostConfig, workspace_dir: &Path) -> Result { let storage_path = resolve_storage_path(workspace_dir)?; + let audit_path = resolve_audit_path(workspace_dir); let storage = CostStorage::new(&storage_path).with_context(|| { format!("Failed to open cost storage at {}", storage_path.display()) })?; + let cumulative_total_cost_usd = storage + .read_records()? + .into_iter() + .map(|record| record.usage.cost_usd) + .sum(); + let audit_storage = CostAuditStorage::new(&audit_path).with_context(|| { + format!( + "Failed to open cost audit storage at {}", + audit_path.display() + ) + })?; Ok(Self { - config, + config: RwLock::new(config), storage: Arc::new(Mutex::new(storage)), + audit_storage: Arc::new(Mutex::new(audit_storage)), session_id: uuid::Uuid::new_v4().to_string(), session_costs: Arc::new(Mutex::new(Vec::new())), + active_override: Arc::new(Mutex::new(None)), + pending_reservations: Arc::new(Mutex::new(HashMap::new())), + cumulative_total_cost_usd: Arc::new(Mutex::new(cumulative_total_cost_usd)), }) } @@ -47,9 +76,76 @@ impl CostTracker { self.session_costs.lock() } + fn lock_audit_storage(&self) -> MutexGuard<'_, CostAuditStorage> { + self.audit_storage.lock() + } + + fn lock_active_override(&self) -> MutexGuard<'_, Option> { + self.active_override.lock() + } + + fn lock_pending_reservations(&self) -> MutexGuard<'_, HashMap> { + self.pending_reservations.lock() + } + + fn lock_cumulative_total_cost_usd(&self) -> MutexGuard<'_, f64> { + self.cumulative_total_cost_usd.lock() + } + + fn redacted_audit_actor(actor: &str) -> Option { + (!actor.trim().is_empty()).then(|| REDACTED_AUDIT_VALUE.to_string()) + } + + fn redacted_audit_reason(reason: Option<&str>) -> Option { + reason + .map(str::trim) + .filter(|reason| !reason.is_empty()) + .map(|_| REDACTED_AUDIT_VALUE.to_string()) + } + + fn append_audit_event_best_effort(&self, event: CostAuditEvent) { + if let Err(error) = self.append_audit_event(event.clone()) { + tracing::warn!( + kind = ?event.kind, + override_id = event.override_id.as_deref().unwrap_or("none"), + session_id = event.session_id.as_deref().unwrap_or("none"), + actor = event.actor.as_deref().unwrap_or("none"), + "Failed to persist cost audit event: {error}" + ); + } + } + + pub fn config(&self) -> CostConfig { + self.config.read().clone() + } + + pub fn update_config(&self, next: CostConfig) { + *self.config.write() = next; + } + /// Check if a request is within budget. pub fn check_budget(&self, estimated_cost_usd: f64) -> Result { - if !self.config.enabled { + self.check_budget_with_mission_scope(estimated_cost_usd, None) + } + + pub fn check_budget_with_mission_scope( + &self, + estimated_cost_usd: f64, + mission_scope: Option<&MissionBudgetScope>, + ) -> Result { + self.check_budget_with_pending_scope(estimated_cost_usd, mission_scope, 0.0, None) + } + + fn check_budget_with_pending_scope( + &self, + estimated_cost_usd: f64, + mission_scope: Option<&MissionBudgetScope>, + pending_total_usd: f64, + pending_mission_usd: Option, + ) -> Result { + let config = self.config(); + + if !config.enabled { return Ok(BudgetCheck::Allowed); } @@ -61,54 +157,125 @@ impl CostTracker { let mut storage = self.lock_storage(); let (daily_cost, monthly_cost) = storage.get_aggregated_costs()?; - - // Check daily limit - let projected_daily = daily_cost + estimated_cost_usd; - if projected_daily > self.config.daily_limit_usd { - return Ok(BudgetCheck::Exceeded { - current_usd: daily_cost, - limit_usd: self.config.daily_limit_usd, - period: UsagePeriod::Day, - }); + drop(storage); + + let session_cost = self.current_session_cost_usd(); + let projected_daily = daily_cost + pending_total_usd + estimated_cost_usd; + let projected_monthly = monthly_cost + pending_total_usd + estimated_cost_usd; + let projected_session = session_cost + pending_total_usd + estimated_cost_usd; + + let mut checks = vec![ + build_budget_check( + UsagePeriod::Session, + session_cost, + projected_session, + config.session_limit_usd, + config.warn_at_percent, + ), + build_budget_check( + UsagePeriod::Day, + daily_cost, + projected_daily, + config.daily_limit_usd, + config.warn_at_percent, + ), + build_budget_check( + UsagePeriod::Month, + monthly_cost, + projected_monthly, + config.monthly_limit_usd, + config.warn_at_percent, + ), + ]; + + if let Some(mission_scope) = mission_scope { + let pending_mission_usd = pending_mission_usd.unwrap_or(0.0); + checks.push(build_budget_check( + UsagePeriod::Mission, + mission_scope.current_usd, + mission_scope.current_usd + pending_mission_usd + estimated_cost_usd, + mission_scope.limit_usd, + config.warn_at_percent, + )); } - // Check monthly limit - let projected_monthly = monthly_cost + estimated_cost_usd; - if projected_monthly > self.config.monthly_limit_usd { - return Ok(BudgetCheck::Exceeded { - current_usd: monthly_cost, - limit_usd: self.config.monthly_limit_usd, - period: UsagePeriod::Month, - }); - } + Ok(select_budget_check(checks)) + } - // Check warning thresholds - let warn_threshold = f64::from(self.config.warn_at_percent.min(100)) / 100.0; - let daily_warn_threshold = self.config.daily_limit_usd * warn_threshold; - let monthly_warn_threshold = self.config.monthly_limit_usd * warn_threshold; + pub fn snapshot(&self, now: DateTime) -> Result { + self.expire_override_if_needed(now)?; + let config = self.config(); - if projected_daily >= daily_warn_threshold { - return Ok(BudgetCheck::Warning { - current_usd: daily_cost, - limit_usd: self.config.daily_limit_usd, - period: UsagePeriod::Day, - }); - } + let mut storage = self.lock_storage(); + let (daily_cost, monthly_cost) = storage.get_aggregated_costs()?; + let session_costs = self.lock_session_costs(); + let active_override = self.lock_active_override().clone(); - if projected_monthly >= monthly_warn_threshold { - return Ok(BudgetCheck::Warning { - current_usd: monthly_cost, - limit_usd: self.config.monthly_limit_usd, - period: UsagePeriod::Month, - }); - } + let session_cost: f64 = session_costs + .iter() + .map(|record| record.usage.cost_usd) + .sum(); + let total_tokens: u64 = session_costs + .iter() + .map(|record| record.usage.total_tokens) + .sum(); + let request_count = session_costs.len(); + let by_model = build_session_model_stats(&session_costs); + + let scope_statuses = if config.enabled { + vec![ + build_scope_status( + UsagePeriod::Session, + session_cost, + config.session_limit_usd, + config.warn_at_percent, + ), + build_scope_status( + UsagePeriod::Day, + daily_cost, + config.daily_limit_usd, + config.warn_at_percent, + ), + build_scope_status( + UsagePeriod::Month, + monthly_cost, + config.monthly_limit_usd, + config.warn_at_percent, + ), + ] + } else { + Vec::new() + }; - Ok(BudgetCheck::Allowed) + drop(session_costs); + drop(storage); + + Ok(CostTrackerSnapshot { + session_id: self.session_id.clone(), + usage: CostSummary { + session_cost_usd: session_cost, + daily_cost_usd: daily_cost, + monthly_cost_usd: monthly_cost, + total_tokens, + request_count, + by_model, + }, + scope_statuses, + active_override, + }) } /// Record a usage event. pub fn record_usage(&self, usage: TokenUsage) -> Result<()> { - if !self.config.enabled { + self.record_usage_for_session(&self.session_id, usage) + } + + pub fn record_usage_for_session( + &self, + session_id: impl Into, + usage: TokenUsage, + ) -> Result<()> { + if !self.config().enabled { return Ok(()); } @@ -118,7 +285,8 @@ impl CostTracker { )); } - let record = CostRecord::new(&self.session_id, usage); + let session_id = session_id.into(); + let record = CostRecord::new(&session_id, usage); // Persist first for durability guarantees. { @@ -126,40 +294,24 @@ impl CostTracker { storage.add_record(record.clone())?; } + *self.lock_cumulative_total_cost_usd() += record.usage.cost_usd; + // Then update in-memory session snapshot. - let mut session_costs = self.lock_session_costs(); - session_costs.push(record); + if session_id == self.session_id { + let mut session_costs = self.lock_session_costs(); + session_costs.push(record); + } Ok(()) } + pub fn cumulative_total_cost_usd(&self) -> f64 { + *self.lock_cumulative_total_cost_usd() + } + /// Get the current cost summary. pub fn get_summary(&self) -> Result { - let (daily_cost, monthly_cost) = { - let mut storage = self.lock_storage(); - storage.get_aggregated_costs()? - }; - - let session_costs = self.lock_session_costs(); - let session_cost: f64 = session_costs - .iter() - .map(|record| record.usage.cost_usd) - .sum(); - let total_tokens: u64 = session_costs - .iter() - .map(|record| record.usage.total_tokens) - .sum(); - let request_count = session_costs.len(); - let by_model = build_session_model_stats(&session_costs); - - Ok(CostSummary { - session_cost_usd: session_cost, - daily_cost_usd: daily_cost, - monthly_cost_usd: monthly_cost, - total_tokens, - request_count, - by_model, - }) + Ok(self.snapshot(Utc::now())?.usage) } /// Get the daily cost for a specific date. @@ -173,6 +325,304 @@ impl CostTracker { let storage = self.lock_storage(); storage.get_cost_for_month(year, month) } + + pub fn scope_statuses(&self) -> Result> { + Ok(self.snapshot(Utc::now())?.scope_statuses) + } + + pub fn reserve_budget_for_request( + &self, + estimated_cost_usd: f64, + mission_scope: Option<&MissionBudgetScope>, + now: DateTime, + ) -> Result<( + BudgetCheck, + Option, + Option, + )> { + self.expire_override_if_needed(now)?; + + let pending_total_usd; + let pending_mission_usd; + { + let pending = self.lock_pending_reservations(); + pending_total_usd = pending + .values() + .map(|reservation| reservation.estimated_cost_usd) + .sum(); + pending_mission_usd = mission_scope.map(|scope| { + pending + .values() + .filter(|reservation| { + reservation.mission_id.as_deref() == Some(scope.mission_id.as_str()) + }) + .map(|reservation| reservation.estimated_cost_usd) + .sum() + }); + } + + let check = self.check_budget_with_pending_scope( + estimated_cost_usd, + mission_scope, + pending_total_usd, + pending_mission_usd, + )?; + + let mut override_applied = None; + if matches!(check, BudgetCheck::Exceeded { .. }) && self.config().allow_override { + override_applied = self.consume_override_if_active(now)?; + } + + let proceed = !matches!(check, BudgetCheck::Exceeded { .. }) || override_applied.is_some(); + let reservation = if proceed && estimated_cost_usd > 0.0 { + let reservation = CostBudgetReservation { + id: uuid::Uuid::new_v4().to_string(), + estimated_cost_usd, + mission_id: mission_scope.map(|scope| scope.mission_id.clone()), + created_at: now, + }; + self.lock_pending_reservations() + .insert(reservation.id.clone(), reservation.clone()); + Some(reservation) + } else { + None + }; + + Ok((check, override_applied, reservation)) + } + + pub fn release_budget_reservation(&self, reservation_id: &str) { + self.lock_pending_reservations().remove(reservation_id); + } + + pub fn commit_budget_reservation(&self, reservation_id: &str) { + self.release_budget_reservation(reservation_id); + } + + fn current_session_cost_usd(&self) -> f64 { + self.lock_session_costs() + .iter() + .map(|record| record.usage.cost_usd) + .sum() + } + + pub fn history_window( + &self, + period: UsagePeriod, + window: usize, + now: DateTime, + ) -> Result { + if window == 0 { + return Err(anyhow!("History window must be greater than zero")); + } + + let records = self.lock_storage().read_records()?; + build_history_from_window(period, window, now, &records) + } + + pub fn history_range( + &self, + period: UsagePeriod, + start: DateTime, + end: DateTime, + ) -> Result { + if start > end { + return Err(anyhow!("History range start must be before end")); + } + + let records = self.lock_storage().read_records()?; + build_history_from_range(period, start, end, &records) + } + + pub fn apply_override( + &self, + request: CostOverrideRequest, + now: DateTime, + ) -> Result { + if !self.config().allow_override { + return Err(anyhow!("Cost overrides are disabled by policy")); + } + + self.expire_override_if_needed(now)?; + + let override_record = CostOverrideRecord { + id: uuid::Uuid::new_v4().to_string(), + actor: request.actor.clone(), + scope: request.scope, + reason: request.reason.clone(), + requested_at: now, + expires_at: request.expires_at, + session_id: Some(self.session_id.clone()), + remaining_uses: match request.scope { + CostOverrideScope::NextRequest => 1, + }, + }; + + *self.lock_active_override() = Some(override_record.clone()); + + self.append_audit_event_best_effort(CostAuditEvent { + id: uuid::Uuid::new_v4().to_string(), + kind: CostAuditKind::OverrideGranted, + recorded_at: now, + actor: Self::redacted_audit_actor(&request.actor), + reason: Self::redacted_audit_reason(request.reason.as_deref()), + period: None, + override_scope: Some(request.scope), + reset_scope: None, + override_id: Some(override_record.id.clone()), + session_id: Some(self.session_id.clone()), + expires_at: override_record.expires_at, + removed_cost_usd: None, + removed_requests: None, + }); + + Ok(override_record) + } + + pub fn active_override(&self, now: DateTime) -> Result> { + self.expire_override_if_needed(now)?; + Ok(self.lock_active_override().clone()) + } + + pub fn consume_override_if_active( + &self, + now: DateTime, + ) -> Result> { + self.expire_override_if_needed(now)?; + + let consumed = { + let mut active_override = self.lock_active_override(); + match active_override.as_mut() { + Some(override_record) if override_record.remaining_uses > 0 => { + override_record.remaining_uses -= 1; + let consumed = override_record.clone(); + if override_record.remaining_uses == 0 { + *active_override = None; + } + Some(consumed) + } + _ => None, + } + }; + + if let Some(override_record) = consumed.clone() { + self.append_audit_event_best_effort(CostAuditEvent { + id: uuid::Uuid::new_v4().to_string(), + kind: CostAuditKind::OverrideConsumed, + recorded_at: now, + actor: Self::redacted_audit_actor(&override_record.actor), + reason: Self::redacted_audit_reason(override_record.reason.as_deref()), + period: None, + override_scope: Some(override_record.scope), + reset_scope: None, + override_id: Some(override_record.id.clone()), + session_id: override_record.session_id.clone(), + expires_at: override_record.expires_at, + removed_cost_usd: None, + removed_requests: None, + }); + } + + Ok(consumed) + } + + pub fn reset(&self, request: CostResetRequest, now: DateTime) -> Result { + let session_id = self.session_id.clone(); + let mut storage = self.lock_storage(); + let records = storage.read_records()?; + let mut kept = Vec::with_capacity(records.len()); + let mut removed = Vec::new(); + + for record in records { + if matches_reset_scope(&record, request.scope, &session_id, now) { + removed.push(record); + } else { + kept.push(record); + } + } + + storage.replace_records(&kept)?; + drop(storage); + + { + let mut session_costs = self.lock_session_costs(); + session_costs + .retain(|record| !matches_reset_scope(record, request.scope, &session_id, now)); + } + + let removed_cost_usd: f64 = removed.iter().map(|record| record.usage.cost_usd).sum(); + let removed_requests = removed.len(); + + let audit_event = CostAuditEvent { + id: uuid::Uuid::new_v4().to_string(), + kind: CostAuditKind::ResetApplied, + recorded_at: now, + actor: Self::redacted_audit_actor(&request.actor), + reason: Self::redacted_audit_reason(request.reason.as_deref()), + period: None, + override_scope: None, + reset_scope: Some(request.scope), + override_id: None, + session_id: Some(session_id), + expires_at: None, + removed_cost_usd: Some(removed_cost_usd), + removed_requests: Some(removed_requests), + }; + + self.append_audit_event_best_effort(audit_event.clone()); + + Ok(CostResetResult { + scope: request.scope, + removed_cost_usd, + removed_requests, + effective_at: now, + audit_event, + }) + } + + pub fn audit_trail(&self, limit: usize) -> Result> { + self.lock_audit_storage().read_events(limit) + } + + fn append_audit_event(&self, event: CostAuditEvent) -> Result<()> { + self.lock_audit_storage().append(event) + } + + fn expire_override_if_needed(&self, now: DateTime) -> Result<()> { + let expired = { + let mut active_override = self.lock_active_override(); + match active_override.as_ref() { + Some(override_record) + if override_record + .expires_at + .is_some_and(|expires_at| expires_at <= now) => + { + active_override.take() + } + _ => None, + } + }; + + if let Some(override_record) = expired { + self.append_audit_event_best_effort(CostAuditEvent { + id: uuid::Uuid::new_v4().to_string(), + kind: CostAuditKind::OverrideExpired, + recorded_at: now, + actor: Self::redacted_audit_actor(&override_record.actor), + reason: Self::redacted_audit_reason(override_record.reason.as_deref()), + period: None, + override_scope: Some(override_record.scope), + reset_scope: None, + override_id: Some(override_record.id.clone()), + session_id: override_record.session_id.clone(), + expires_at: override_record.expires_at, + removed_cost_usd: None, + removed_requests: None, + }); + } + + Ok(()) + } } fn resolve_storage_path(workspace_dir: &Path) -> Result { @@ -204,6 +654,292 @@ fn resolve_storage_path(workspace_dir: &Path) -> Result { Ok(storage_path) } +fn resolve_audit_path(workspace_dir: &Path) -> PathBuf { + workspace_dir.join("state").join("cost-audit.jsonl") +} + +fn build_budget_check( + period: UsagePeriod, + current_usd: f64, + projected_usd: f64, + limit_usd: f64, + warn_at_percent: u8, +) -> BudgetCheck { + if limit_usd <= 0.0 { + return BudgetCheck::Allowed; + } + + let percent_used = (projected_usd / limit_usd) * 100.0; + if projected_usd > limit_usd { + return BudgetCheck::Exceeded { + current_usd, + projected_usd, + limit_usd, + percent_used, + period, + }; + } + + let warn_threshold = f64::from(warn_at_percent.min(100)); + if percent_used >= warn_threshold { + return BudgetCheck::Warning { + current_usd, + projected_usd, + limit_usd, + percent_used, + period, + }; + } + + BudgetCheck::Allowed +} + +fn build_scope_status( + period: UsagePeriod, + current_usd: f64, + limit_usd: f64, + warn_at_percent: u8, +) -> BudgetScopeStatus { + let check = build_budget_check(period, current_usd, current_usd, limit_usd, warn_at_percent); + let percent_used = if limit_usd > 0.0 { + (current_usd / limit_usd) * 100.0 + } else { + 0.0 + }; + + BudgetScopeStatus { + period, + state: check.state(), + current_usd, + limit_usd, + percent_used, + } +} + +fn select_budget_check(checks: I) -> BudgetCheck +where + I: IntoIterator, +{ + checks + .into_iter() + .max_by(|left, right| { + budget_check_severity(left) + .cmp(&budget_check_severity(right)) + .then_with(|| { + budget_check_percent_used(left).total_cmp(&budget_check_percent_used(right)) + }) + }) + .unwrap_or(BudgetCheck::Allowed) +} + +fn budget_check_severity(check: &BudgetCheck) -> u8 { + match check { + BudgetCheck::Allowed => 0, + BudgetCheck::Warning { .. } => 1, + BudgetCheck::Exceeded { .. } => 2, + } +} + +fn budget_check_percent_used(check: &BudgetCheck) -> f64 { + match check { + BudgetCheck::Allowed => 0.0, + BudgetCheck::Warning { percent_used, .. } | BudgetCheck::Exceeded { percent_used, .. } => { + *percent_used + } + } +} + +fn build_history_from_window( + period: UsagePeriod, + window: usize, + now: DateTime, + records: &[CostRecord], +) -> Result { + match period { + UsagePeriod::Day => { + if window > MAX_HISTORY_WINDOW_DAYS { + return Err(anyhow!("History window is too large")); + } + let start = now - Duration::days((window.saturating_sub(1)) as i64); + build_history_from_range(period, start, now, records) + } + UsagePeriod::Month => { + if window > MAX_HISTORY_WINDOW_MONTHS { + return Err(anyhow!("History window is too large")); + } + let month_offset = i32::try_from(window.saturating_sub(1)) + .map_err(|_| anyhow!("History window is too large"))?; + let (start_year, start_month) = shift_month(now.year(), now.month(), -month_offset); + let start = Utc + .with_ymd_and_hms(start_year, start_month, 1, 0, 0, 0) + .single() + .ok_or_else(|| anyhow!("Invalid monthly history window start"))?; + build_history_from_range(period, start, now, records) + } + UsagePeriod::Session => Err(anyhow!("Session history windows are not supported yet")), + UsagePeriod::Mission => Err(anyhow!("Mission history windows are not supported yet")), + } +} + +fn build_history_from_range( + period: UsagePeriod, + start: DateTime, + end: DateTime, + records: &[CostRecord], +) -> Result { + match period { + UsagePeriod::Day => build_daily_history(start, end, records), + UsagePeriod::Month => build_monthly_history(start, end, records), + UsagePeriod::Session => Err(anyhow!("Session history ranges are not supported yet")), + UsagePeriod::Mission => Err(anyhow!("Mission history ranges are not supported yet")), + } +} + +fn build_daily_history( + start: DateTime, + end: DateTime, + records: &[CostRecord], +) -> Result { + let start_date = start.date_naive(); + let end_date = end.date_naive(); + let bucket_count = (end_date - start_date).num_days(); + let mut points = Vec::new(); + let mut by_bucket: HashMap = HashMap::new(); + + for index in 0..=bucket_count { + let bucket_date = start_date + Duration::days(index); + by_bucket.insert( + bucket_date, + CostHistoryPoint { + bucket: bucket_date.format("%Y-%m-%d").to_string(), + cost_usd: 0.0, + tokens: 0, + requests: 0, + }, + ); + } + + for record in records { + let bucket_date = record.usage.timestamp.date_naive(); + if bucket_date < start_date || bucket_date > end_date { + continue; + } + + if let Some(point) = by_bucket.get_mut(&bucket_date) { + point.cost_usd += record.usage.cost_usd; + point.tokens += record.usage.total_tokens; + point.requests += 1; + } + } + + let mut dates: Vec<_> = by_bucket.into_iter().collect(); + dates.sort_by_key(|(date, _)| *date); + let mut totals = CostHistoryTotals { + cost_usd: 0.0, + tokens: 0, + requests: 0, + }; + + for (_, point) in dates { + totals.cost_usd += point.cost_usd; + totals.tokens += point.tokens; + totals.requests += point.requests; + points.push(point); + } + + Ok(CostHistory { + period: UsagePeriod::Day, + points, + totals, + }) +} + +fn build_monthly_history( + start: DateTime, + end: DateTime, + records: &[CostRecord], +) -> Result { + let mut points = Vec::new(); + let mut by_bucket: HashMap<(i32, u32), CostHistoryPoint> = HashMap::new(); + let mut year = start.year(); + let mut month = start.month(); + let end_key = (end.year(), end.month()); + + loop { + by_bucket.insert( + (year, month), + CostHistoryPoint { + bucket: format!("{year:04}-{month:02}"), + cost_usd: 0.0, + tokens: 0, + requests: 0, + }, + ); + + if (year, month) == end_key { + break; + } + + (year, month) = shift_month(year, month, 1); + } + + for record in records { + let bucket_key = ( + record.usage.timestamp.year(), + record.usage.timestamp.month(), + ); + if let Some(point) = by_bucket.get_mut(&bucket_key) { + point.cost_usd += record.usage.cost_usd; + point.tokens += record.usage.total_tokens; + point.requests += 1; + } + } + + let mut buckets: Vec<_> = by_bucket.into_iter().collect(); + buckets.sort_by_key(|((year, month), _)| (*year, *month)); + let mut totals = CostHistoryTotals { + cost_usd: 0.0, + tokens: 0, + requests: 0, + }; + + for (_, point) in buckets { + totals.cost_usd += point.cost_usd; + totals.tokens += point.tokens; + totals.requests += point.requests; + points.push(point); + } + + Ok(CostHistory { + period: UsagePeriod::Month, + points, + totals, + }) +} + +fn shift_month(year: i32, month: u32, delta: i32) -> (i32, u32) { + let absolute = year * 12 + month as i32 - 1 + delta; + let shifted_year = absolute.div_euclid(12); + let shifted_month = absolute.rem_euclid(12) as u32 + 1; + (shifted_year, shifted_month) +} + +fn matches_reset_scope( + record: &CostRecord, + scope: CostResetScope, + session_id: &str, + now: DateTime, +) -> bool { + match scope { + CostResetScope::Session => record.session_id == session_id, + CostResetScope::Day => record.usage.timestamp.date_naive() == now.date_naive(), + CostResetScope::Month => { + record.usage.timestamp.year() == now.year() + && record.usage.timestamp.month() == now.month() + } + } +} + fn build_session_model_stats(session_costs: &[CostRecord]) -> HashMap { let mut by_model: HashMap = HashMap::new(); @@ -303,6 +1039,12 @@ impl CostStorage { Ok(()) } + fn read_records(&self) -> Result> { + let mut records = Vec::new(); + self.for_each_record(|record| records.push(record))?; + Ok(records) + } + fn rebuild_aggregates(&mut self, day: NaiveDate, year: i32, month: u32) -> Result<()> { let mut daily_cost = 0.0; let mut monthly_cost = 0.0; @@ -367,6 +1109,36 @@ impl CostStorage { Ok(()) } + fn replace_records(&mut self, records: &[CostRecord]) -> Result<()> { + if let Some(parent) = self.path.parent() { + fs::create_dir_all(parent) + .with_context(|| format!("Failed to create directory {}", parent.display()))?; + } + + let temp_path = self.path.with_extension("jsonl.tmp"); + let mut file = File::create(&temp_path) + .with_context(|| format!("Failed to create temp storage at {}", temp_path.display()))?; + + for record in records { + writeln!(file, "{}", serde_json::to_string(record)?).with_context(|| { + format!("Failed to write cost record to {}", temp_path.display()) + })?; + } + + file.sync_all() + .with_context(|| format!("Failed to sync temp storage at {}", temp_path.display()))?; + fs::rename(&temp_path, &self.path).with_context(|| { + format!( + "Failed to replace cost storage from {} to {}", + temp_path.display(), + self.path.display() + ) + })?; + + let now = Utc::now(); + self.rebuild_aggregates(now.date_naive(), now.year(), now.month()) + } + /// Get aggregated costs for current day and month. fn get_aggregated_costs(&mut self) -> Result<(f64, f64)> { self.ensure_period_cache_current()?; @@ -401,9 +1173,88 @@ impl CostStorage { } } +struct CostAuditStorage { + path: PathBuf, +} + +impl CostAuditStorage { + fn new(path: &Path) -> Result { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .with_context(|| format!("Failed to create directory {}", parent.display()))?; + } + + Ok(Self { + path: path.to_path_buf(), + }) + } + + fn append(&mut self, event: CostAuditEvent) -> Result<()> { + let mut file = OpenOptions::new() + .create(true) + .append(true) + .open(&self.path) + .with_context(|| format!("Failed to open audit storage at {}", self.path.display()))?; + + writeln!(file, "{}", serde_json::to_string(&event)?) + .with_context(|| format!("Failed to write audit event to {}", self.path.display()))?; + file.sync_all() + .with_context(|| format!("Failed to sync audit storage at {}", self.path.display()))?; + Ok(()) + } + + fn read_events(&self, limit: usize) -> Result> { + if limit == 0 || !self.path.exists() { + return Ok(Vec::new()); + } + + let file = File::open(&self.path).with_context(|| { + format!("Failed to read audit storage from {}", self.path.display()) + })?; + let reader = BufReader::new(file); + let mut events = Vec::new(); + + for (line_number, line) in reader.lines().enumerate() { + let raw_line = line.with_context(|| { + format!( + "Failed to read line {} from audit storage {}", + line_number + 1, + self.path.display() + ) + })?; + + let trimmed = raw_line.trim(); + if trimmed.is_empty() { + continue; + } + + match serde_json::from_str::(trimmed) { + Ok(event) => events.push(event), + Err(error) => tracing::warn!( + "Skipping malformed cost audit record at {}:{}: {error}", + self.path.display(), + line_number + 1 + ), + } + } + + if events.len() > limit { + let split_at = events.len() - limit; + Ok(events.split_off(split_at)) + } else { + Ok(events) + } + } +} + #[cfg(test)] mod tests { use super::*; + use crate::cost::{ + CostAuditKind, CostOverrideRequest, CostOverrideScope, CostResetRequest, CostResetScope, + CostService, + }; + use chrono::TimeZone; use tempfile::TempDir; fn enabled_config() -> CostConfig { @@ -533,4 +1384,307 @@ mod tests { .to_string() .contains("Estimated cost must be a finite, non-negative value")); } + + #[test] + fn warning_threshold_uses_projected_cost_math() { + let tmp = TempDir::new().unwrap(); + let config = CostConfig { + enabled: true, + session_limit_usd: 10.0, + daily_limit_usd: 10.0, + monthly_limit_usd: 100.0, + warn_at_percent: 80, + ..Default::default() + }; + let tracker = CostTracker::new(config, tmp.path()).unwrap(); + + let usage = TokenUsage::new("test/model", 1_000_000, 0, 7.5, 0.0); + tracker.record_usage(usage).unwrap(); + + let check = tracker.check_budget(0.5).unwrap(); + match check { + BudgetCheck::Warning { + period, + current_usd, + limit_usd, + projected_usd, + percent_used, + } => { + assert_eq!(period, UsagePeriod::Day); + assert!((current_usd - 7.5).abs() < 0.0001); + assert!((projected_usd - 8.0).abs() < 0.0001); + assert!((limit_usd - 10.0).abs() < 0.0001); + assert!((percent_used - 80.0).abs() < 0.0001); + } + other => panic!("expected warning, got {other:?}"), + } + } + + #[test] + fn session_scope_is_evaluated_with_day_and_month_limits() { + let tmp = TempDir::new().unwrap(); + let config = CostConfig { + enabled: true, + session_limit_usd: 5.0, + daily_limit_usd: 10.0, + monthly_limit_usd: 100.0, + warn_at_percent: 80, + ..Default::default() + }; + let tracker = CostTracker::new(config, tmp.path()).unwrap(); + + let mut usage = TokenUsage::new("test/model", 1_000, 0, 0.0, 0.0); + usage.cost_usd = 4.9; + tracker.record_usage(usage).unwrap(); + + match tracker.check_budget(0.2).unwrap() { + BudgetCheck::Exceeded { + period, limit_usd, .. + } => { + assert_eq!(period, UsagePeriod::Session); + assert!((limit_usd - 5.0).abs() < 0.0001); + } + other => panic!("expected session-scope exceedance, got {other:?}"), + } + + let scopes = tracker.scope_statuses().unwrap(); + assert!(scopes + .iter() + .any(|scope| scope.period == UsagePeriod::Session)); + assert!(scopes.iter().any(|scope| scope.period == UsagePeriod::Day)); + assert!(scopes + .iter() + .any(|scope| scope.period == UsagePeriod::Month)); + } + + #[test] + fn mission_scope_can_govern_request_when_more_restrictive() { + let tmp = TempDir::new().unwrap(); + let config = CostConfig { + enabled: true, + session_limit_usd: 10.0, + daily_limit_usd: 100.0, + monthly_limit_usd: 1000.0, + warn_at_percent: 80, + ..Default::default() + }; + let tracker = Arc::new(CostTracker::new(config, tmp.path()).unwrap()); + let service = CostService::new(tracker); + + let evaluation = service + .evaluate_request( + 0.1, + Some(crate::cost::MissionBudgetScope { + mission_id: "mission-a".to_string(), + current_usd: 0.95, + limit_usd: 1.0, + }), + chrono::Utc::now(), + ) + .unwrap(); + + match evaluation { + crate::cost::BudgetEvaluation::Blocked { + check: + BudgetCheck::Exceeded { + period, limit_usd, .. + }, + } => { + assert_eq!(period, UsagePeriod::Mission); + assert!((limit_usd - 1.0).abs() < 0.0001); + } + other => panic!("expected mission-scope block, got {other:?}"), + } + } + + #[test] + fn history_window_aggregates_daily_buckets() { + let tmp = TempDir::new().unwrap(); + let tracker = Arc::new(CostTracker::new(enabled_config(), tmp.path()).unwrap()); + let service = CostService::new(tracker.clone()); + let now = chrono::Utc + .with_ymd_and_hms(2026, 4, 6, 12, 0, 0) + .single() + .unwrap(); + + let records = [ + ("day-1", now - chrono::Duration::days(2), 1.25), + ("day-2", now - chrono::Duration::days(1), 2.0), + ( + "day-2", + now - chrono::Duration::days(1) + chrono::Duration::hours(1), + 0.5, + ), + ("day-3", now, 3.25), + ]; + + for (session_id, timestamp, cost_usd) in records { + let mut usage = TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = cost_usd; + usage.timestamp = timestamp; + tracker.record_usage_for_session(session_id, usage).unwrap(); + } + + let history = service.history_window(UsagePeriod::Day, 3, now).unwrap(); + assert_eq!(history.points.len(), 3); + assert_eq!(history.points[0].bucket, "2026-04-04"); + assert!((history.points[0].cost_usd - 1.25).abs() < 0.0001); + assert_eq!(history.points[1].bucket, "2026-04-05"); + assert!((history.points[1].cost_usd - 2.5).abs() < 0.0001); + assert_eq!(history.points[2].bucket, "2026-04-06"); + assert!((history.points[2].cost_usd - 3.25).abs() < 0.0001); + assert!((history.totals.cost_usd - 7.0).abs() < 0.0001); + } + + #[test] + fn history_window_rejects_oversized_ranges() { + let tmp = TempDir::new().unwrap(); + let tracker = Arc::new(CostTracker::new(enabled_config(), tmp.path()).unwrap()); + let service = CostService::new(tracker); + let now = chrono::Utc::now(); + + let day_err = service + .history_window(UsagePeriod::Day, MAX_HISTORY_WINDOW_DAYS + 1, now) + .unwrap_err(); + assert!(day_err.to_string().contains("History window is too large")); + + let month_err = service + .history_window(UsagePeriod::Month, MAX_HISTORY_WINDOW_MONTHS + 1, now) + .unwrap_err(); + assert!(month_err + .to_string() + .contains("History window is too large")); + } + + #[test] + fn evaluate_request_reserves_budget_until_released() { + let tmp = TempDir::new().unwrap(); + let config = CostConfig { + enabled: true, + session_limit_usd: 1.0, + daily_limit_usd: 10.0, + monthly_limit_usd: 100.0, + warn_at_percent: 80, + ..Default::default() + }; + let tracker = Arc::new(CostTracker::new(config, tmp.path()).unwrap()); + let service = CostService::new(tracker.clone()); + let now = chrono::Utc::now(); + + let first = service.evaluate_request(0.75, None, now).unwrap(); + let reservation = match first { + crate::cost::BudgetEvaluation::Proceed { + reservation: Some(reservation), + .. + } => reservation, + other => panic!("expected reservation, got {other:?}"), + }; + + let second = service.evaluate_request(0.3, None, now).unwrap(); + assert!(matches!( + second, + crate::cost::BudgetEvaluation::Blocked { .. } + )); + + tracker.release_budget_reservation(&reservation.id); + + let third = service.evaluate_request(0.3, None, now).unwrap(); + assert!(matches!( + third, + crate::cost::BudgetEvaluation::Proceed { .. } + )); + } + + #[test] + fn reset_session_removes_only_current_session_records() { + let tmp = TempDir::new().unwrap(); + let tracker = Arc::new(CostTracker::new(enabled_config(), tmp.path()).unwrap()); + let service = CostService::new(tracker.clone()); + let now = chrono::Utc + .with_ymd_and_hms(2026, 4, 6, 12, 0, 0) + .single() + .unwrap(); + + let mut current_usage = TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + current_usage.cost_usd = 1.5; + current_usage.timestamp = now; + tracker.record_usage(current_usage).unwrap(); + + let mut other_usage = TokenUsage::new("test/model", 2_000, 500, 0.0, 0.0); + other_usage.cost_usd = 2.0; + other_usage.timestamp = now; + tracker + .record_usage_for_session("other-session", other_usage) + .unwrap(); + + let result = service + .reset( + CostResetRequest { + scope: CostResetScope::Session, + actor: "tester".to_string(), + reason: Some("clear current session".to_string()), + }, + now, + ) + .unwrap(); + + assert_eq!(result.scope, CostResetScope::Session); + assert_eq!(result.removed_requests, 1); + assert!((result.removed_cost_usd - 1.5).abs() < 0.0001); + + let summary = tracker.get_summary().unwrap(); + assert_eq!(summary.request_count, 0); + let day_cost = tracker.get_daily_cost(now.date_naive()).unwrap(); + assert!((day_cost - 2.0).abs() < 0.0001); + } + + #[test] + fn next_request_override_expires_before_use() { + let tmp = TempDir::new().unwrap(); + let config = CostConfig { + enabled: true, + session_limit_usd: 1.0, + daily_limit_usd: 1.0, + monthly_limit_usd: 100.0, + warn_at_percent: 80, + allow_override: true, + ..Default::default() + }; + let tracker = Arc::new(CostTracker::new(config, tmp.path()).unwrap()); + let service = CostService::new(tracker.clone()); + let now = chrono::Utc + .with_ymd_and_hms(2026, 4, 6, 12, 0, 0) + .single() + .unwrap(); + + let mut usage = TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.1; + usage.timestamp = now; + tracker.record_usage(usage).unwrap(); + + service + .apply_override( + CostOverrideRequest { + actor: "operator".to_string(), + scope: CostOverrideScope::NextRequest, + reason: Some("one retry".to_string()), + expires_at: Some(now + chrono::Duration::minutes(5)), + }, + now, + ) + .unwrap(); + + let evaluation = service + .evaluate_request(0.1, None, now + chrono::Duration::minutes(6)) + .unwrap(); + assert!(matches!( + evaluation, + crate::cost::BudgetEvaluation::Blocked { .. } + )); + + let audit = service.audit_trail(10).unwrap(); + assert!(audit + .iter() + .any(|event| event.kind == CostAuditKind::OverrideExpired)); + } } diff --git a/clients/agent-runtime/src/cost/types.rs b/clients/agent-runtime/src/cost/types.rs index 0e8d1679..9f671983 100755 --- a/clients/agent-runtime/src/cost/types.rs +++ b/clients/agent-runtime/src/cost/types.rs @@ -1,4 +1,5 @@ use serde::{Deserialize, Serialize}; +use std::collections::HashMap; /// Token usage information from a single API call. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -62,10 +63,12 @@ impl TokenUsage { /// Time period for cost aggregation. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] pub enum UsagePeriod { Session, Day, Month, + Mission, } /// A single cost record for persistent storage. @@ -91,26 +94,92 @@ impl CostRecord { } /// Budget enforcement result. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum BudgetCheck { /// Within budget, request can proceed Allowed, /// Warning threshold exceeded but request can proceed Warning { current_usd: f64, + projected_usd: f64, limit_usd: f64, + percent_used: f64, period: UsagePeriod, }, /// Budget exceeded, request blocked Exceeded { current_usd: f64, + projected_usd: f64, limit_usd: f64, + percent_used: f64, period: UsagePeriod, }, } +impl BudgetCheck { + pub fn state(&self) -> BudgetState { + match self { + Self::Allowed => BudgetState::Allowed, + Self::Warning { .. } => BudgetState::Warning, + Self::Exceeded { .. } => BudgetState::Exceeded, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BudgetState { + Allowed, + Warning, + Exceeded, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct BudgetScopeStatus { + pub period: UsagePeriod, + pub state: BudgetState, + pub current_usd: f64, + pub limit_usd: f64, + pub percent_used: f64, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MissionBudgetScope { + pub mission_id: String, + pub current_usd: f64, + pub limit_usd: f64, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct CostBudgetReservation { + pub id: String, + pub estimated_cost_usd: f64, + pub mission_id: Option, + pub created_at: chrono::DateTime, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct CostTrackerSnapshot { + pub session_id: String, + pub usage: CostSummary, + pub scope_statuses: Vec, + pub active_override: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum BudgetEvaluation { + Proceed { + check: BudgetCheck, + override_applied: Option, + reservation: Option, + }, + Blocked { + check: BudgetCheck, + }, +} + /// Cost summary for reporting. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct CostSummary { /// Total cost for the session pub session_cost_usd: f64, @@ -123,11 +192,21 @@ pub struct CostSummary { /// Number of requests pub request_count: usize, /// Breakdown by model - pub by_model: std::collections::HashMap, + pub by_model: HashMap, } -/// Statistics for a specific model. #[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CostGovernanceSummary { + pub session_id: String, + pub usage: CostSummary, + pub budget_state: BudgetState, + pub active_period: Option, + pub scope_statuses: Vec, + pub active_override: Option, +} + +/// Statistics for a specific model. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct ModelStats { /// Model name pub model: String, @@ -139,6 +218,104 @@ pub struct ModelStats { pub request_count: usize, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CostHistoryPoint { + pub bucket: String, + pub cost_usd: f64, + pub tokens: u64, + pub requests: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CostHistoryTotals { + pub cost_usd: f64, + pub tokens: u64, + pub requests: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CostHistory { + pub period: UsagePeriod, + pub points: Vec, + pub totals: CostHistoryTotals, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CostOverrideScope { + NextRequest, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CostOverrideRequest { + pub actor: String, + pub scope: CostOverrideScope, + pub reason: Option, + pub expires_at: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CostOverrideRecord { + pub id: String, + pub actor: String, + pub scope: CostOverrideScope, + pub reason: Option, + pub requested_at: chrono::DateTime, + pub expires_at: Option>, + pub session_id: Option, + pub remaining_uses: u32, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CostResetScope { + Session, + Day, + Month, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CostResetRequest { + pub scope: CostResetScope, + pub actor: String, + pub reason: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CostResetResult { + pub scope: CostResetScope, + pub removed_cost_usd: f64, + pub removed_requests: usize, + pub effective_at: chrono::DateTime, + pub audit_event: CostAuditEvent, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CostAuditKind { + OverrideGranted, + OverrideConsumed, + OverrideExpired, + ResetApplied, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CostAuditEvent { + pub id: String, + pub kind: CostAuditKind, + pub recorded_at: chrono::DateTime, + pub actor: Option, + pub reason: Option, + pub period: Option, + pub override_scope: Option, + pub reset_scope: Option, + pub override_id: Option, + pub session_id: Option, + pub expires_at: Option>, + pub removed_cost_usd: Option, + pub removed_requests: Option, +} + impl Default for CostSummary { fn default() -> Self { Self { @@ -147,7 +324,7 @@ impl Default for CostSummary { monthly_cost_usd: 0.0, total_tokens: 0, request_count: 0, - by_model: std::collections::HashMap::new(), + by_model: HashMap::new(), } } } diff --git a/clients/agent-runtime/src/gateway/admin.rs b/clients/agent-runtime/src/gateway/admin.rs index f69d1a0b..13f5343d 100644 --- a/clients/agent-runtime/src/gateway/admin.rs +++ b/clients/agent-runtime/src/gateway/admin.rs @@ -102,11 +102,12 @@ pub struct AdminAutonomyView { pub level: AutonomyLevel, pub workspace_only: bool, pub max_actions_per_hour: u32, - pub max_cost_per_day_cents: u32, pub require_approval_for_medium_risk: bool, pub block_high_risk_commands: bool, pub auto_approve: Vec, pub always_ask: Vec, + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub deprecated_fields: Vec, } #[derive(Debug, Clone, serde::Serialize)] @@ -199,6 +200,7 @@ pub struct AdminChannelStatusView { #[derive(Debug, Clone, serde::Serialize)] pub struct AdminCostView { pub enabled: bool, + pub session_limit_usd: f64, pub daily_limit_usd: f64, pub monthly_limit_usd: f64, pub warn_at_percent: u8, @@ -290,6 +292,8 @@ pub struct AdminConfigUpdateRequest { pub browser: Option, #[serde(default)] pub memory: Option, + #[serde(default)] + pub cost: Option, } #[derive(Debug, Clone, serde::Deserialize)] @@ -371,6 +375,20 @@ pub struct AdminAutonomyPatch { pub always_ask: Option>, } +impl AdminAutonomyPatch { + fn normalized_max_actions_per_hour(&self) -> Option { + self.max_actions_per_hour.or(self.max_cost_per_day_cents) + } + + fn deprecated_fields(&self) -> Vec { + if self.max_cost_per_day_cents.is_some() { + vec!["autonomy.max_cost_per_day_cents".to_string()] + } else { + Vec::new() + } + } +} + #[derive(Debug, Clone, serde::Deserialize)] #[serde(deny_unknown_fields)] pub struct AdminIdentityPatch { @@ -453,6 +471,23 @@ pub struct AdminMemoryPatch { pub cerebro: Option, } +#[derive(Debug, Clone, serde::Deserialize)] +#[serde(deny_unknown_fields)] +pub struct AdminCostPatch { + #[serde(default)] + pub enabled: Option, + #[serde(default)] + pub session_limit_usd: Option, + #[serde(default)] + pub daily_limit_usd: Option, + #[serde(default)] + pub monthly_limit_usd: Option, + #[serde(default)] + pub warn_at_percent: Option, + #[serde(default)] + pub allow_override: Option, +} + #[derive(Debug, Clone, serde::Deserialize)] #[serde(deny_unknown_fields)] pub struct AdminCerebroMemoryPatch { @@ -653,11 +688,11 @@ pub fn admin_config_view(cfg: &Config) -> AdminConfigView { level: cfg.autonomy.level, workspace_only: cfg.autonomy.workspace_only, max_actions_per_hour: cfg.autonomy.max_actions_per_hour, - max_cost_per_day_cents: cfg.autonomy.max_cost_per_day_cents, require_approval_for_medium_risk: cfg.autonomy.require_approval_for_medium_risk, block_high_risk_commands: cfg.autonomy.block_high_risk_commands, auto_approve: cfg.autonomy.auto_approve.clone(), always_ask: cfg.autonomy.always_ask.clone(), + deprecated_fields: cfg.autonomy.deprecated_fields().to_vec(), }, identity: AdminIdentityView { format: cfg.identity.format.clone(), @@ -748,6 +783,7 @@ pub fn admin_config_view(cfg: &Config) -> AdminConfigView { }, cost: AdminCostView { enabled: cfg.cost.enabled, + session_limit_usd: cfg.cost.session_limit_usd, daily_limit_usd: cfg.cost.daily_limit_usd, monthly_limit_usd: cfg.cost.monthly_limit_usd, warn_at_percent: cfg.cost.warn_at_percent, @@ -1143,6 +1179,7 @@ fn apply_patch(cfg: &mut Config, patch: &AdminConfigUpdateRequest) -> Result<(), apply_channels_patch(cfg, patch)?; apply_integrations_patch(cfg, patch)?; apply_memory_patch(cfg, patch)?; + apply_cost_patch(cfg, patch.cost.as_ref())?; Ok(()) } @@ -1264,11 +1301,13 @@ fn apply_autonomy_patch(cfg: &mut Config, autonomy: Option<&AdminAutonomyPatch>) if let Some(workspace_only) = autonomy.workspace_only { cfg.autonomy.workspace_only = workspace_only; } - if let Some(max_actions_per_hour) = autonomy.max_actions_per_hour { + if let Some(max_actions_per_hour) = autonomy.normalized_max_actions_per_hour() { cfg.autonomy.max_actions_per_hour = max_actions_per_hour; } - if let Some(max_cost_per_day_cents) = autonomy.max_cost_per_day_cents { - cfg.autonomy.max_cost_per_day_cents = max_cost_per_day_cents; + for deprecated_field in autonomy.deprecated_fields() { + if !cfg.autonomy.deprecated_fields.contains(&deprecated_field) { + cfg.autonomy.deprecated_fields.push(deprecated_field); + } } if let Some(require_approval_for_medium_risk) = autonomy.require_approval_for_medium_risk { cfg.autonomy.require_approval_for_medium_risk = require_approval_for_medium_risk; @@ -1627,6 +1666,53 @@ fn apply_memory_patch( Ok(()) } +fn apply_cost_patch(cfg: &mut Config, cost: Option<&AdminCostPatch>) -> Result<(), AdminResponse> { + let Some(cost) = cost else { + return Ok(()); + }; + + if let Some(enabled) = cost.enabled { + cfg.cost.enabled = enabled; + } + if let Some(session_limit_usd) = cost.session_limit_usd { + if !session_limit_usd.is_finite() || session_limit_usd < 0.0 { + return Err(bad_request( + "cost.session_limit_usd must be a finite value greater than or equal to 0", + )); + } + cfg.cost.session_limit_usd = session_limit_usd; + } + if let Some(daily_limit_usd) = cost.daily_limit_usd { + if !daily_limit_usd.is_finite() || daily_limit_usd <= 0.0 { + return Err(bad_request( + "cost.daily_limit_usd must be a finite value greater than 0", + )); + } + cfg.cost.daily_limit_usd = daily_limit_usd; + } + if let Some(monthly_limit_usd) = cost.monthly_limit_usd { + if !monthly_limit_usd.is_finite() || monthly_limit_usd <= 0.0 { + return Err(bad_request( + "cost.monthly_limit_usd must be a finite value greater than 0", + )); + } + cfg.cost.monthly_limit_usd = monthly_limit_usd; + } + if let Some(warn_at_percent) = cost.warn_at_percent { + if warn_at_percent == 0 || warn_at_percent > 100 { + return Err(bad_request( + "cost.warn_at_percent must be in range [1, 100]", + )); + } + cfg.cost.warn_at_percent = warn_at_percent; + } + if let Some(allow_override) = cost.allow_override { + cfg.cost.allow_override = allow_override; + } + + Ok(()) +} + fn apply_memory_backend_patch( cfg: &mut Config, backend: Option<&str>, @@ -1997,6 +2083,9 @@ pub async fn handle_admin_update_config( match next_cfg.save() { Ok(()) => ( { + if let Some(cost_tracker) = state.cost_tracker.as_ref() { + cost_tracker.update_config(next_cfg.cost.clone()); + } let mut shared_cfg = state.config.lock(); *shared_cfg = next_cfg; StatusCode::OK @@ -2400,6 +2489,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -3020,6 +3110,7 @@ mod tests { web_search: None, browser: None, memory: None, + cost: None, } } @@ -3160,6 +3251,7 @@ mod tests { web_search: None, browser: None, memory: None, + cost: None, }; let fields = restart_required_updates(&cfg, &patch); diff --git a/clients/agent-runtime/src/gateway/cost.rs b/clients/agent-runtime/src/gateway/cost.rs new file mode 100644 index 00000000..a0673f5d --- /dev/null +++ b/clients/agent-runtime/src/gateway/cost.rs @@ -0,0 +1,764 @@ +use crate::config::Config; +use crate::cost::{ + BudgetScopeStatus, BudgetState, CostOverrideRecord, CostOverrideRequest, CostOverrideScope, + CostResetRequest, CostResetResult, CostResetScope, CostService, UsagePeriod, +}; +use crate::gateway::{self, AppState}; +use crate::observability::{BudgetOverrideAction, BudgetOverrideEvent, ObserverEvent}; +use axum::{ + extract::{Query, State}, + http::{HeaderMap, StatusCode}, + response::{IntoResponse, Json}, +}; +use chrono::Utc; + +#[derive(Debug, Clone, serde::Deserialize, Default)] +pub struct CostHistoryQuery { + #[serde(default)] + pub period: Option, + #[serde(default)] + pub window: Option, +} + +#[derive(Debug, Clone, serde::Deserialize)] +#[serde(deny_unknown_fields)] +pub struct AdminCostResetRequest { + pub scope: CostResetScope, + #[serde(default)] + pub reason: Option, +} + +#[derive(Debug, Clone, serde::Deserialize)] +#[serde(deny_unknown_fields)] +pub struct AdminCostOverrideRequest { + pub scope: CostOverrideScope, + #[serde(default)] + pub reason: Option, +} + +#[derive(Debug, Clone, serde::Serialize)] +struct CostSummaryResponse { + summary: CostSummaryPayload, + config: CostConfigPayload, +} + +#[derive(Debug, Clone, serde::Serialize)] +struct CostSummaryPayload { + session_cost_usd: f64, + daily_cost_usd: f64, + monthly_cost_usd: f64, + total_tokens: u64, + request_count: usize, + percent_used_session: f64, + percent_used_daily: f64, + percent_used_monthly: f64, + budget_state: BudgetState, + #[serde(skip_serializing_if = "Option::is_none")] + period: Option, +} + +#[derive(Debug, Clone, serde::Serialize)] +struct CostConfigPayload { + enabled: bool, + session_limit_usd: f64, + daily_limit_usd: f64, + monthly_limit_usd: f64, + warn_at_percent: u8, + allow_override: bool, +} + +type CostResponse = (StatusCode, Json); + +fn internal_error(message: &'static str, error: &dyn std::fmt::Display) -> CostResponse { + tracing::error!("{message}: {error}"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ "error": message })), + ) +} + +fn bad_request(message: &str) -> CostResponse { + ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ "error": message })), + ) +} + +fn is_history_query_error(error: &anyhow::Error) -> bool { + let message = error.to_string(); + message.contains("History window must be greater than zero") + || message.contains("History window is too large") + || message.contains("History range start must be before end") + || message.contains("history windows are not supported yet") + || message.contains("history ranges are not supported yet") +} + +fn cost_service_from_state(state: &AppState) -> Result<(Config, CostService), CostResponse> { + let config = state.config.lock().clone(); + let service = match state.cost_tracker.clone() { + Some(tracker) => CostService::new(tracker), + None => CostService::disabled(), + }; + Ok((config, service)) +} + +fn scope_percent(scope_statuses: &[BudgetScopeStatus], period: UsagePeriod) -> f64 { + scope_statuses + .iter() + .find(|status| status.period == period) + .map_or(0.0, |status| status.percent_used) +} + +#[allow(clippy::unused_async)] +pub async fn handle_cost_summary( + State(state): State, + headers: HeaderMap, +) -> impl IntoResponse { + if let Some(rejection) = gateway::utils::admin_origin_guard(&headers) { + return rejection; + } + if let Some(rejection) = gateway::utils::admin_requires_auth(&state, &headers) { + return rejection; + } + + let (config, service) = match cost_service_from_state(&state) { + Ok(value) => value, + Err(response) => return response, + }; + + let summary = match service.current_summary(Utc::now()) { + Ok(summary) => summary, + Err(error) => return internal_error("Failed to load cost summary", &error), + }; + + let payload = CostSummaryResponse { + summary: CostSummaryPayload { + session_cost_usd: summary.usage.session_cost_usd, + daily_cost_usd: summary.usage.daily_cost_usd, + monthly_cost_usd: summary.usage.monthly_cost_usd, + total_tokens: summary.usage.total_tokens, + request_count: summary.usage.request_count, + percent_used_session: scope_percent(&summary.scope_statuses, UsagePeriod::Session), + percent_used_daily: scope_percent(&summary.scope_statuses, UsagePeriod::Day), + percent_used_monthly: scope_percent(&summary.scope_statuses, UsagePeriod::Month), + budget_state: summary.budget_state, + period: summary.active_period, + }, + config: CostConfigPayload { + enabled: config.cost.enabled, + session_limit_usd: config.cost.session_limit_usd, + daily_limit_usd: config.cost.daily_limit_usd, + monthly_limit_usd: config.cost.monthly_limit_usd, + warn_at_percent: config.cost.warn_at_percent, + allow_override: config.cost.allow_override, + }, + }; + + match serde_json::to_value(payload) { + Ok(value) => (StatusCode::OK, Json(value)), + Err(error) => internal_error("Failed to serialize cost summary", &error), + } +} + +#[allow(clippy::unused_async)] +pub async fn handle_cost_history( + State(state): State, + headers: HeaderMap, + Query(query): Query, +) -> impl IntoResponse { + if let Some(rejection) = gateway::utils::admin_origin_guard(&headers) { + return rejection; + } + if let Some(rejection) = gateway::utils::admin_requires_auth(&state, &headers) { + return rejection; + } + + let (_, service) = match cost_service_from_state(&state) { + Ok(value) => value, + Err(response) => return response, + }; + + let period = query.period.unwrap_or(UsagePeriod::Day); + let window = query.window.unwrap_or(30); + let history = match service.history_window(period, window, Utc::now()) { + Ok(history) => history, + Err(error) if is_history_query_error(&error) => { + return bad_request("Invalid cost history query") + } + Err(error) => return internal_error("Failed to load cost history", &error), + }; + + match serde_json::to_value(history) { + Ok(value) => (StatusCode::OK, Json(value)), + Err(error) => internal_error("Failed to serialize cost history", &error), + } +} + +#[allow(clippy::unused_async)] +pub async fn handle_admin_cost_reset( + State(state): State, + headers: HeaderMap, + body: Result, axum::extract::rejection::JsonRejection>, +) -> impl IntoResponse { + if let Some(rejection) = gateway::utils::admin_origin_guard(&headers) { + return rejection; + } + if let Some(rejection) = gateway::utils::admin_requires_auth(&state, &headers) { + return rejection; + } + + let Json(request) = match body { + Ok(body) => body, + Err(_) => { + return bad_request("Invalid JSON body for cost reset"); + } + }; + + let (_, service) = match cost_service_from_state(&state) { + Ok(value) => value, + Err(response) => return response, + }; + + let result = match service.reset( + CostResetRequest { + scope: request.scope, + actor: "gateway-admin".to_string(), + reason: request.reason, + }, + Utc::now(), + ) { + Ok(result) => result, + Err(error) => return internal_error("Failed to reset tracked costs", &error), + }; + + serialize_reset_result(result) +} + +#[allow(clippy::unused_async)] +pub async fn handle_admin_cost_override( + State(state): State, + headers: HeaderMap, + body: Result, axum::extract::rejection::JsonRejection>, +) -> impl IntoResponse { + if let Some(rejection) = gateway::utils::admin_origin_guard(&headers) { + return rejection; + } + if let Some(rejection) = gateway::utils::admin_requires_auth(&state, &headers) { + return rejection; + } + + let Json(request) = match body { + Ok(body) => body, + Err(_) => return bad_request("Invalid JSON body for cost override"), + }; + + let (_, service) = match cost_service_from_state(&state) { + Ok(value) => value, + Err(response) => return response, + }; + + let now = Utc::now(); + let previous_summary = match service.current_summary(now) { + Ok(summary) => summary, + Err(error) => return internal_error("Failed to load current cost state", &error), + }; + + let result = match service.apply_override( + CostOverrideRequest { + actor: "gateway-admin".to_string(), + scope: request.scope, + reason: request.reason, + expires_at: None, + }, + now, + ) { + Ok(result) => result, + Err(error) if error.to_string().contains("disabled by policy") => { + return ( + StatusCode::FORBIDDEN, + Json(serde_json::json!({ "error": error.to_string() })), + ); + } + Err(error) => return internal_error("Failed to apply cost override", &error), + }; + + state + .observer + .record_event(&ObserverEvent::BudgetOverride(BudgetOverrideEvent { + action: BudgetOverrideAction::Granted, + actor: result.actor.clone(), + scope: result.scope, + reason: result.reason.clone(), + session_id: result.session_id.clone(), + previous_state: previous_summary.budget_state, + period: previous_summary.active_period, + override_id: Some(result.id.clone()), + surface: Some("gateway_admin".to_string()), + })); + + serialize_override_result(result) +} + +fn serialize_reset_result(result: CostResetResult) -> CostResponse { + match serde_json::to_value(result) { + Ok(value) => (StatusCode::OK, Json(value)), + Err(error) => internal_error("Failed to serialize cost reset result", &error), + } +} + +fn serialize_override_result(result: CostOverrideRecord) -> CostResponse { + match serde_json::to_value(result) { + Ok(value) => (StatusCode::OK, Json(value)), + Err(error) => internal_error("Failed to serialize cost override result", &error), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::Config; + use crate::gateway::{GatewayRateLimiter, IdempotencyStore}; + use crate::memory::Memory; + use crate::security::pairing::PairingGuard; + use axum::http::{header, HeaderValue}; + use http_body_util::BodyExt; + use parking_lot::Mutex; + use std::sync::Arc; + use std::time::Duration; + + fn temp_config() -> Config { + let root = + std::env::temp_dir().join(format!("corvus-cost-gateway-test-{}", uuid::Uuid::new_v4())); + std::fs::create_dir_all(&root).unwrap(); + let config_path = root.join("config.toml"); + let workspace_path = root.join("workspace"); + std::fs::create_dir_all(&workspace_path).unwrap(); + let mut config = Config::default(); + config.config_path = config_path; + config.workspace_dir = workspace_path; + config + } + + fn test_state(config: Config, paired_token: Option<&str>) -> AppState { + let token = paired_token + .map(ToOwned::to_owned) + .into_iter() + .collect::>(); + let cost_tracker = Arc::new( + crate::cost::CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap(), + ); + AppState { + config: Arc::new(Mutex::new(config)), + cost_tracker: Some(cost_tracker), + provider: Arc::new(crate::gateway::tests::MockProvider::default()), + model: "test-model".into(), + temperature: 0.0, + mem: Arc::new(crate::gateway::tests::MockMemory) as Arc, + auto_save: false, + webhook_secret_hash: None, + pairing: Arc::new(PairingGuard::new(paired_token.is_some(), &token)), + trust_forwarded_headers: false, + rate_limiter: Arc::new(GatewayRateLimiter::new(100, 100, 100)), + idempotency_store: Arc::new(IdempotencyStore::new(Duration::from_secs(300), 1000)), + whatsapp: None, + whatsapp_app_secret: None, + channel_runtime_handle: None, + observer: Arc::new(crate::observability::NoopObserver), + transcriber: None, + audio_config: crate::config::AudioConfig::default(), + } + } + + fn test_state_without_tracker(config: Config, paired_token: Option<&str>) -> AppState { + let token = paired_token + .map(ToOwned::to_owned) + .into_iter() + .collect::>(); + AppState { + config: Arc::new(Mutex::new(config)), + cost_tracker: None, + provider: Arc::new(crate::gateway::tests::MockProvider::default()), + model: "test-model".into(), + temperature: 0.0, + mem: Arc::new(crate::gateway::tests::MockMemory) as Arc, + auto_save: false, + webhook_secret_hash: None, + pairing: Arc::new(PairingGuard::new(paired_token.is_some(), &token)), + trust_forwarded_headers: false, + rate_limiter: Arc::new(GatewayRateLimiter::new(100, 100, 100)), + idempotency_store: Arc::new(IdempotencyStore::new(Duration::from_secs(300), 1000)), + whatsapp: None, + whatsapp_app_secret: None, + channel_runtime_handle: None, + observer: Arc::new(crate::observability::NoopObserver), + transcriber: None, + audio_config: crate::config::AudioConfig::default(), + } + } + + fn admin_headers(token: &str) -> HeaderMap { + let mut headers = HeaderMap::new(); + headers.insert( + header::AUTHORIZATION, + HeaderValue::from_str(&format!("Bearer {token}")).unwrap(), + ); + headers.insert( + header::ORIGIN, + HeaderValue::from_static("http://127.0.0.1:3000"), + ); + headers + } + + async fn response_json(response: impl IntoResponse) -> (StatusCode, serde_json::Value) { + let response = response.into_response(); + let status = response.status(); + let body = response.into_body().collect().await.unwrap().to_bytes(); + let json: serde_json::Value = serde_json::from_slice(&body).unwrap_or_default(); + (status, json) + } + + fn record_usage(config: &Config, cost_usd: f64) { + let tracker = + crate::cost::CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap(); + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = cost_usd; + tracker.record_usage(usage).unwrap(); + } + + #[tokio::test] + async fn cost_summary_returns_usage_and_config_payload() { + let mut config = temp_config(); + config.cost.enabled = true; + config.cost.session_limit_usd = 3.0; + config.cost.daily_limit_usd = 5.0; + config.cost.monthly_limit_usd = 25.0; + let state = test_state(config, Some("zc_valid_token")); + let tracker = state.cost_tracker.clone().unwrap(); + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 2.5; + tracker.record_usage(usage).unwrap(); + + let (status, json) = + response_json(handle_cost_summary(State(state), admin_headers("zc_valid_token")).await) + .await; + + assert_eq!(status, StatusCode::OK); + assert_eq!(json["config"]["enabled"], true); + assert_eq!(json["summary"]["daily_cost_usd"], 2.5); + assert_eq!(json["summary"]["session_cost_usd"], 2.5); + assert_eq!(json["summary"]["budget_state"], "warning"); + assert_eq!(json["summary"]["period"], "session"); + assert_eq!( + json["summary"]["percent_used_session"], + serde_json::json!(83.333_333_333_333_34) + ); + assert_eq!(json["summary"]["percent_used_daily"], 50.0); + assert_eq!(json["summary"]["percent_used_monthly"], 10.0); + assert_eq!(json["config"]["session_limit_usd"], 3.0); + assert_eq!(json["config"]["daily_limit_usd"], 5.0); + assert_eq!(json["config"]["monthly_limit_usd"], 25.0); + assert_eq!(json["config"]["allow_override"], false); + } + + #[tokio::test] + async fn cost_history_returns_bucketed_payload() { + let mut config = temp_config(); + config.cost.enabled = true; + + let tracker = + crate::cost::CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap(); + let now = chrono::Utc::now(); + + let mut first = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + first.cost_usd = 1.0; + first.timestamp = now - chrono::Duration::days(1); + tracker + .record_usage_for_session("history-a", first) + .unwrap(); + + let mut second = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + second.cost_usd = 2.0; + second.timestamp = now; + tracker + .record_usage_for_session("history-b", second) + .unwrap(); + + let state = test_state(config, Some("zc_valid_token")); + let (status, json) = response_json( + handle_cost_history( + State(state), + admin_headers("zc_valid_token"), + Query(CostHistoryQuery { + period: Some(UsagePeriod::Day), + window: Some(2), + }), + ) + .await, + ) + .await; + + assert_eq!(status, StatusCode::OK); + assert_eq!(json["period"], "day"); + assert_eq!(json["points"].as_array().unwrap().len(), 2); + assert_eq!(json["totals"]["cost_usd"], 3.0); + assert_eq!(json["totals"]["tokens"], 3_000); + assert_eq!(json["totals"]["requests"], 2); + assert_eq!(json["points"][0]["tokens"], 1_500); + assert_eq!(json["points"][0]["requests"], 1); + assert_eq!(json["points"][1]["tokens"], 1_500); + assert_eq!(json["points"][1]["requests"], 1); + } + + #[tokio::test] + async fn cost_summary_returns_disabled_payload_without_tracker() { + let mut config = temp_config(); + config.cost.enabled = false; + + let state = test_state_without_tracker(config, Some("zc_valid_token")); + let (status, json) = + response_json(handle_cost_summary(State(state), admin_headers("zc_valid_token")).await) + .await; + + assert_eq!(status, StatusCode::OK); + assert_eq!(json["summary"]["session_cost_usd"], 0.0); + assert_eq!(json["summary"]["daily_cost_usd"], 0.0); + assert_eq!(json["summary"]["monthly_cost_usd"], 0.0); + assert_eq!(json["summary"]["budget_state"], "allowed"); + } + + #[tokio::test] + async fn admin_cost_reset_requires_auth() { + let mut config = temp_config(); + config.cost.enabled = true; + let state = test_state(config, Some("zc_valid_token")); + + let (status, _) = response_json( + handle_admin_cost_reset( + State(state), + HeaderMap::new(), + Ok(Json(AdminCostResetRequest { + scope: CostResetScope::Session, + reason: None, + })), + ) + .await, + ) + .await; + + assert_eq!(status, StatusCode::UNAUTHORIZED); + } + + #[tokio::test] + async fn admin_cost_reset_rejects_invalid_token_and_non_loopback_origin() { + let mut config = temp_config(); + config.cost.enabled = true; + let state = test_state(config, Some("zc_valid_token")); + + let (invalid_status, _) = response_json( + handle_admin_cost_reset( + State(state.clone()), + admin_headers("zc_invalid_token"), + Ok(Json(AdminCostResetRequest { + scope: CostResetScope::Session, + reason: None, + })), + ) + .await, + ) + .await; + + assert_eq!(invalid_status, StatusCode::UNAUTHORIZED); + + let mut forbidden_headers = admin_headers("zc_valid_token"); + forbidden_headers.insert( + header::ORIGIN, + HeaderValue::from_static("https://example.com"), + ); + let (origin_status, json) = response_json( + handle_admin_cost_reset( + State(state), + forbidden_headers, + Ok(Json(AdminCostResetRequest { + scope: CostResetScope::Session, + reason: None, + })), + ) + .await, + ) + .await; + + assert_eq!(origin_status, StatusCode::FORBIDDEN); + assert_eq!(json["error"], "Forbidden request origin"); + } + + #[tokio::test] + async fn admin_cost_reset_clears_requested_scope() { + let mut config = temp_config(); + config.cost.enabled = true; + record_usage(&config, 1.25); + + let state = test_state(config.clone(), Some("zc_valid_token")); + let (status, json) = response_json( + handle_admin_cost_reset( + State(state), + admin_headers("zc_valid_token"), + Ok(Json(AdminCostResetRequest { + scope: CostResetScope::Day, + reason: Some("cleanup".to_string()), + })), + ) + .await, + ) + .await; + + assert_eq!(status, StatusCode::OK); + assert_eq!(json["scope"], "day"); + assert_eq!(json["removed_requests"], 1); + + let tracker = + crate::cost::CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap(); + let summary = tracker.get_summary().unwrap(); + assert_eq!(summary.daily_cost_usd, 0.0); + } + + #[tokio::test] + async fn admin_cost_override_requires_auth() { + let mut config = temp_config(); + config.cost.enabled = true; + config.cost.allow_override = true; + let state = test_state(config, Some("zc_valid_token")); + + let (status, _) = response_json( + handle_admin_cost_override( + State(state), + HeaderMap::new(), + Ok(Json(AdminCostOverrideRequest { + scope: CostOverrideScope::NextRequest, + reason: None, + })), + ) + .await, + ) + .await; + + assert_eq!(status, StatusCode::UNAUTHORIZED); + } + + #[tokio::test] + async fn admin_cost_override_rejects_invalid_token_and_non_loopback_origin() { + let mut config = temp_config(); + config.cost.enabled = true; + config.cost.allow_override = true; + let state = test_state(config, Some("zc_valid_token")); + + let (invalid_status, _) = response_json( + handle_admin_cost_override( + State(state.clone()), + admin_headers("zc_invalid_token"), + Ok(Json(AdminCostOverrideRequest { + scope: CostOverrideScope::NextRequest, + reason: None, + })), + ) + .await, + ) + .await; + + assert_eq!(invalid_status, StatusCode::UNAUTHORIZED); + + let mut forbidden_headers = admin_headers("zc_valid_token"); + forbidden_headers.insert( + header::ORIGIN, + HeaderValue::from_static("https://example.com"), + ); + let (origin_status, json) = response_json( + handle_admin_cost_override( + State(state), + forbidden_headers, + Ok(Json(AdminCostOverrideRequest { + scope: CostOverrideScope::NextRequest, + reason: None, + })), + ) + .await, + ) + .await; + + assert_eq!(origin_status, StatusCode::FORBIDDEN); + assert_eq!(json["error"], "Forbidden request origin"); + } + + #[tokio::test] + async fn admin_cost_override_applies_to_shared_tracker_next_request() { + let mut config = temp_config(); + config.cost.enabled = true; + config.cost.allow_override = true; + config.cost.daily_limit_usd = 1.0; + config.cost.monthly_limit_usd = 10.0; + + let state = test_state(config, Some("zc_valid_token")); + let tracker = state.cost_tracker.clone().unwrap(); + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.1; + tracker.record_usage(usage).unwrap(); + + let (status, json) = response_json( + handle_admin_cost_override( + State(state), + admin_headers("zc_valid_token"), + Ok(Json(AdminCostOverrideRequest { + scope: CostOverrideScope::NextRequest, + reason: Some("incident".to_string()), + })), + ) + .await, + ) + .await; + + assert_eq!(status, StatusCode::OK); + assert_eq!(json["scope"], "next_request"); + + let service = CostService::new(tracker.clone()); + let first = service.evaluate_request(0.1, None, Utc::now()).unwrap(); + assert!(matches!( + first, + crate::cost::BudgetEvaluation::Proceed { + override_applied: Some(_), + .. + } + )); + + let second = service.evaluate_request(0.1, None, Utc::now()).unwrap(); + assert!(matches!( + second, + crate::cost::BudgetEvaluation::Blocked { .. } + )); + } + + #[tokio::test] + async fn admin_cost_override_returns_forbidden_when_policy_disallows_it() { + let mut config = temp_config(); + config.cost.enabled = true; + config.cost.allow_override = false; + let state = test_state(config, Some("zc_valid_token")); + + let (status, json) = response_json( + handle_admin_cost_override( + State(state), + admin_headers("zc_valid_token"), + Ok(Json(AdminCostOverrideRequest { + scope: CostOverrideScope::NextRequest, + reason: None, + })), + ) + .await, + ) + .await; + + assert_eq!(status, StatusCode::FORBIDDEN); + assert!(json["error"] + .as_str() + .unwrap_or_default() + .contains("disabled by policy")); + } +} diff --git a/clients/agent-runtime/src/gateway/mod.rs b/clients/agent-runtime/src/gateway/mod.rs index 5f462e83..9be94963 100644 --- a/clients/agent-runtime/src/gateway/mod.rs +++ b/clients/agent-runtime/src/gateway/mod.rs @@ -11,6 +11,7 @@ use crate::agent::dispatcher::{evaluate_tool_risk, DispatchAction}; use crate::bootstrap; use crate::channels::{Channel, SendMessage, WhatsAppChannel}; use crate::config::Config; +use crate::cost::CostTracker; #[cfg(test)] use crate::gateway::utils::{ blocked_http_onboarding_state, http_onboarding_state, HttpOnboardingState, @@ -44,6 +45,7 @@ use tower_http::timeout::TimeoutLayer; use uuid::Uuid; pub mod admin; +pub mod cost; pub mod sessions; pub mod utils; pub mod webhook_dispatch; @@ -77,6 +79,8 @@ struct AdminConfigUpdateRequest { gateway: Option, #[serde(default)] webhook: Option, + #[serde(default)] + cost: Option, } #[derive(Debug, Clone, serde::Deserialize, Default)] @@ -131,6 +135,12 @@ struct AdminAutonomyPatch { max_cost_per_day_cents: Option, } +impl AdminAutonomyPatch { + fn normalized_max_actions_per_hour(&self) -> Option { + self.max_actions_per_hour.or(self.max_cost_per_day_cents) + } +} + #[derive(Debug, Clone, serde::Deserialize, Default)] struct AdminSchedulerPatch { #[serde(default)] @@ -149,6 +159,22 @@ struct AdminWebhookPatch { secret: Option, } +#[derive(Debug, Clone, serde::Deserialize, Default)] +struct AdminCostPatch { + #[serde(default)] + enabled: Option, + #[serde(default)] + session_limit_usd: Option, + #[serde(default)] + daily_limit_usd: Option, + #[serde(default)] + monthly_limit_usd: Option, + #[serde(default)] + warn_at_percent: Option, + #[serde(default)] + allow_override: Option, +} + #[derive(Debug, Clone, serde::Deserialize)] #[serde(tag = "mode", rename_all = "snake_case")] enum AdminSecretUpdate { @@ -267,17 +293,21 @@ fn compare_autonomy_fields( fields, ); compare_primitive( - aut.max_actions_per_hour, + aut.normalized_max_actions_per_hour(), cfg.autonomy.max_actions_per_hour, "autonomy.max_actions_per_hour", fields, ); - compare_primitive( - aut.max_cost_per_day_cents, - cfg.autonomy.max_cost_per_day_cents, - "autonomy.max_cost_per_day_cents", - fields, - ); + if aut.max_actions_per_hour.is_none() { + if let Some(max_cost_per_day_cents) = aut.max_cost_per_day_cents { + compare_primitive( + Some(max_cost_per_day_cents), + cfg.autonomy.max_actions_per_hour, + "autonomy.max_cost_per_day_cents", + fields, + ); + } + } } } @@ -756,6 +786,7 @@ fn log_webhook_terminal_outcome(session_id: &str, runtime_path: &str, outcome: & fn webhook_outcome_label(outcome: &webhook_dispatch::WebhookTerminalOutcome) -> &'static str { match outcome { webhook_dispatch::WebhookTerminalOutcome::Completed => "completed", + webhook_dispatch::WebhookTerminalOutcome::BudgetExceeded { .. } => "budget_exceeded", webhook_dispatch::WebhookTerminalOutcome::ApprovalRequired { .. } => "approval_required", webhook_dispatch::WebhookTerminalOutcome::Timeout => "timeout", webhook_dispatch::WebhookTerminalOutcome::Fallback => "fallback", @@ -980,6 +1011,7 @@ fn normalize_max_keys(configured: usize, fallback: usize) -> usize { #[derive(Clone)] pub struct AppState { pub config: Arc>, + pub cost_tracker: Option>, pub provider: Arc, pub model: String, pub temperature: f64, @@ -1043,6 +1075,11 @@ fn print_startup_banner( println!(" POST /webhook — {{\"message\": \"your prompt\"}}"); println!(" GET /web/admin/config — redacted admin config"); println!(" PUT /web/admin/config — update admin config"); + println!(" PATCH /web/admin/config — patch admin config"); + println!(" GET /web/cost/summary — runtime cost summary"); + println!(" GET /web/cost/history — runtime cost history"); + println!(" POST /web/admin/cost/reset — reset tracked costs"); + println!(" POST /web/admin/cost/override — grant next-request override"); println!(" GET /web/admin/options — admin options catalog"); println!(" GET /web/admin/channels — channel configuration status"); println!(" GET /web/admin/scheduler — scheduler configuration status"); @@ -1129,6 +1166,10 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> { .unwrap_or_else(|| bootstrap::DEFAULT_MODEL.into()); let temperature = config.default_temperature; let (mem, observer) = bootstrap::create_memory_and_observer(&config)?; + let cost_tracker = Some(Arc::new(CostTracker::new( + config.cost.clone(), + &config.workspace_dir, + )?)); // Extract webhook secret for authentication let webhook_secret_hash: Option> = config.channels_config.webhook.as_ref().and_then(|webhook| { @@ -1206,6 +1247,7 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> { let state = AppState { config: config_state, + cost_tracker, provider, model, temperature, @@ -1232,12 +1274,40 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> { .route("/webhook", post(handle_webhook)) .route( "/web/admin/config", - get(handle_admin_get_config).put(handle_admin_update_config_wrapper), + get(handle_admin_get_config) + .put(handle_admin_update_config_wrapper) + .patch(handle_admin_update_config_wrapper), + ) + .route( + "/api/web/admin/config", + get(handle_admin_get_config) + .put(handle_admin_update_config_wrapper) + .patch(handle_admin_update_config_wrapper), ) .route( "/web/admin/provider-pools", get(handle_admin_get_provider_pools).put(handle_admin_update_provider_pools_wrapper), ) + .route("/web/cost/summary", get(handle_cost_summary_wrapper)) + .route("/api/web/cost/summary", get(handle_cost_summary_wrapper)) + .route("/web/cost/history", get(handle_cost_history_wrapper)) + .route("/api/web/cost/history", get(handle_cost_history_wrapper)) + .route( + "/web/admin/cost/reset", + post(handle_admin_cost_reset_wrapper), + ) + .route( + "/api/web/admin/cost/reset", + post(handle_admin_cost_reset_wrapper), + ) + .route( + "/web/admin/cost/override", + post(handle_admin_cost_override_wrapper), + ) + .route( + "/api/web/admin/cost/override", + post(handle_admin_cost_override_wrapper), + ) .route("/web/admin/options", get(handle_admin_options)) .route("/web/admin/channels", get(handle_admin_channels)) .route("/web/admin/scheduler", get(handle_admin_scheduler_status)) @@ -1455,6 +1525,41 @@ async fn handle_admin_update_provider_pools_wrapper( admin::handle_admin_update_provider_pools(State(state), headers, body).await } +#[allow(clippy::unused_async)] +async fn handle_cost_summary_wrapper( + State(state): State, + headers: HeaderMap, +) -> impl IntoResponse { + cost::handle_cost_summary(State(state), headers).await +} + +#[allow(clippy::unused_async)] +async fn handle_cost_history_wrapper( + State(state): State, + headers: HeaderMap, + query: Query, +) -> impl IntoResponse { + cost::handle_cost_history(State(state), headers, query).await +} + +#[allow(clippy::unused_async)] +async fn handle_admin_cost_reset_wrapper( + State(state): State, + headers: HeaderMap, + body: Result, axum::extract::rejection::JsonRejection>, +) -> impl IntoResponse { + cost::handle_admin_cost_reset(State(state), headers, body).await +} + +#[allow(clippy::unused_async)] +async fn handle_admin_cost_override_wrapper( + State(state): State, + headers: HeaderMap, + body: Result, axum::extract::rejection::JsonRejection>, +) -> impl IntoResponse { + cost::handle_admin_cost_override(State(state), headers, body).await +} + /// Webhook request body #[derive(serde::Deserialize)] pub struct WebhookBody { @@ -1641,6 +1746,23 @@ fn webhook_response_from_dispatch_result( } ((StatusCode::OK, Json(body)), true) } + webhook_dispatch::WebhookTerminalOutcome::BudgetExceeded { + current_usd, + limit_usd, + period, + } => { + let body = serde_json::json!({ + "error": { + "code": "budget_exceeded", + "governance_domain": "token_spend", + "period": period, + "current_usd": current_usd, + "limit_usd": limit_usd, + }, + "session_id": result.session_id, + }); + ((StatusCode::FORBIDDEN, Json(body)), false) + } webhook_dispatch::WebhookTerminalOutcome::ApprovalRequired { tool, reason } => { let body = serde_json::json!({ "error": { @@ -1749,6 +1871,7 @@ async fn handle_webhook( Arc::clone(&state.provider), Arc::clone(&state.mem), Arc::clone(&state.observer), + state.cost_tracker.clone(), &state.model, webhook_dispatch::WebhookTurnRequest { session_id: session_id.clone(), @@ -1778,6 +1901,22 @@ async fn handle_webhook( log_webhook_runtime_path(&session_id, false, "dispatcher_flag_disabled"); + if config.cost.enabled { + let response = ( + StatusCode::SERVICE_UNAVAILABLE, + Json(serde_json::json!({ + "error": { + "code": "cost_governance_requires_dispatcher", + "message": "Cost governance requires the webhook dispatcher path when cost.enabled=true", + } + })), + ); + release_idempotency_key(&state, reserved_idempotency_key, false); + update_session_activity_if_persisted(&state, &session_id, token_hash.as_deref(), false) + .await; + return response; + } + if !is_preview { if let Some((response, persist_idempotency)) = canonical_outcome_early_response(&state, &session_id, &scrubbed_message).await @@ -1870,13 +2009,19 @@ async fn handle_chat_stream( let dispatcher_enabled = webhook_dispatcher_enabled(&config); // ── Process message via existing dispatch ──────────── - let (response_text, is_error) = if dispatcher_enabled { + enum StreamProcessingOutcome { + Success(String), + Error(serde_json::Value), + } + + let stream_outcome = if dispatcher_enabled { log_webhook_runtime_path(&session_id, true, "stream_dispatcher"); let result = webhook_dispatch::execute( &config, Arc::clone(&state.provider), Arc::clone(&state.mem), Arc::clone(&state.observer), + state.cost_tracker.clone(), &state.model, webhook_dispatch::WebhookTurnRequest { session_id: session_id.clone(), @@ -1898,38 +2043,75 @@ async fn handle_chat_stream( .response_text .map(|t| scrub_sensitive_boundary_text(&t)) .unwrap_or_default(); - (text, false) + StreamProcessingOutcome::Success(text) } + webhook_dispatch::WebhookTerminalOutcome::BudgetExceeded { + current_usd, + limit_usd, + period, + } => StreamProcessingOutcome::Error(serde_json::json!({ + "code": "budget_exceeded", + "governance_domain": "token_spend", + "period": period, + "current_usd": current_usd, + "limit_usd": limit_usd, + "message": format!( + "Budget exceeded: ${current_usd:.4} spent against ${limit_usd:.2} {period:?} limit" + ), + })), webhook_dispatch::WebhookTerminalOutcome::Error => { - ("LLM request failed".to_string(), true) + StreamProcessingOutcome::Error(serde_json::json!({ + "code": "processing_error", + "message": "LLM request failed", + })) } webhook_dispatch::WebhookTerminalOutcome::Timeout => { - ("Request timed out".to_string(), true) + StreamProcessingOutcome::Error(serde_json::json!({ + "code": "timeout", + "message": "Request timed out", + })) } webhook_dispatch::WebhookTerminalOutcome::ApprovalRequired { tool, reason } => { - let msg = format!("Approval required for tool `{tool}`: {reason}"); - (msg, true) + StreamProcessingOutcome::Error(serde_json::json!({ + "code": "approval_required", + "tool": tool, + "reason": reason, + "message": format!("Approval required for tool `{tool}`: {reason}"), + })) } } } else { log_webhook_runtime_path(&session_id, false, "stream_legacy"); - if state.auto_save { - let key = webhook_memory_key(); - let _ = state - .mem - .store(&key, &scrubbed_message, MemoryCategory::Conversation, None) - .await; - } - match state - .provider - .simple_chat(message, &state.model, state.temperature) - .await - { - Ok(response) => (scrub_sensitive_boundary_text(&response), false), - Err(e) => { - let sanitized = providers::sanitize_api_error(&e.to_string()); - tracing::error!("Stream provider error: {sanitized}"); - ("LLM request failed".to_string(), true) + if config.cost.enabled { + StreamProcessingOutcome::Error(serde_json::json!({ + "code": "cost_governance_requires_dispatcher", + "message": "Cost governance requires the webhook dispatcher path when cost.enabled=true", + })) + } else { + if state.auto_save { + let key = webhook_memory_key(); + let _ = state + .mem + .store(&key, &scrubbed_message, MemoryCategory::Conversation, None) + .await; + } + + match state + .provider + .simple_chat(message, &state.model, state.temperature) + .await + { + Ok(response) => { + StreamProcessingOutcome::Success(scrub_sensitive_boundary_text(&response)) + } + Err(e) => { + let sanitized = providers::sanitize_api_error(&e.to_string()); + tracing::error!("Stream provider error: {sanitized}"); + StreamProcessingOutcome::Error(serde_json::json!({ + "code": "processing_error", + "message": "LLM request failed", + })) + } } } }; @@ -1947,16 +2129,11 @@ async fn handle_chat_stream( let message_id = Uuid::new_v4().to_string(); let sid = session_id.clone(); - let events: Vec> = if is_error { - let error_data = serde_json::json!({ - "code": "processing_error", - "message": response_text, - }); - vec![Ok(Event::default() + let events: Vec> = match stream_outcome { + StreamProcessingOutcome::Error(error_data) => vec![Ok(Event::default() .event("error") - .data(error_data.to_string()))] - } else { - vec![ + .data(error_data.to_string()))], + StreamProcessingOutcome::Success(response_text) => vec![ Ok(Event::default().event("chunk").data(&response_text)), Ok(Event::default().event("done").data( serde_json::json!({ @@ -1965,7 +2142,7 @@ async fn handle_chat_stream( }) .to_string(), )), - ] + ], }; Ok(Sse::new(futures::stream::iter(events))) @@ -2970,6 +3147,7 @@ mod tests { async fn metrics_endpoint_returns_hint_when_prometheus_is_disabled() { let state = AppState { config: Arc::new(Mutex::new(Config::default())), + cost_tracker: None, provider: Arc::new(MockProvider::default()), model: "test-model".into(), temperature: 0.0, @@ -3014,6 +3192,7 @@ mod tests { let observer: Arc = prom; let state = AppState { config: Arc::new(Mutex::new(Config::default())), + cost_tracker: None, provider: Arc::new(MockProvider::default()), model: "test-model".into(), temperature: 0.0, @@ -3672,6 +3851,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -3704,6 +3884,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -3746,6 +3927,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -3901,6 +4083,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -3950,6 +4133,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4001,6 +4185,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4042,6 +4227,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4090,6 +4276,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4151,6 +4338,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4174,11 +4362,75 @@ mod tests { assert_eq!(payload["config"]["channels"]["webhook"]["has_secret"], true); assert_eq!(payload["config"]["gateway"]["paired_tokens_count"], 2); assert_eq!(payload["config"]["runtime"]["kind"], "native"); + assert!(payload["config"]["autonomy"] + .get("max_actions_per_hour") + .is_some()); + assert!(payload["config"]["autonomy"] + .get("max_cost_per_day_cents") + .is_none()); assert!(payload.to_string().contains("has_secret")); assert!(!payload.to_string().contains("top-secret")); assert!(!payload.to_string().contains("hash1")); } + #[tokio::test] + async fn admin_config_response_reports_deprecated_action_rate_alias_metadata() { + let mut cfg = temp_config(); + cfg.autonomy = toml::from_str( + r#" +level = "supervised" +workspace_only = true +allowed_commands = ["git"] +forbidden_paths = ["/etc"] +max_cost_per_day_cents = 9 +require_approval_for_medium_risk = true +block_high_risk_commands = true +auto_approve = [] +always_ask = [] +"#, + ) + .unwrap(); + + let state = AppState { + config: Arc::new(Mutex::new(cfg)), + provider: Arc::new(MockProvider::default()), + model: "test-model".into(), + temperature: 0.0, + mem: Arc::new(MockMemory), + auto_save: false, + webhook_secret_hash: None, + pairing: Arc::new(PairingGuard::new(true, &["zc_valid_token".into()])), + trust_forwarded_headers: false, + rate_limiter: Arc::new(GatewayRateLimiter::new(100, 100, 100)), + idempotency_store: Arc::new(IdempotencyStore::new(Duration::from_secs(300), 1000)), + whatsapp: None, + whatsapp_app_secret: None, + channel_runtime_handle: None, + observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, + transcriber: None, + audio_config: crate::config::AudioConfig::default(), + }; + let mut headers = HeaderMap::new(); + headers.insert( + header::AUTHORIZATION, + HeaderValue::from_static("Bearer zc_valid_token"), + ); + + let response = handle_admin_get_config(State(state), headers) + .await + .into_response(); + assert_eq!(response.status(), StatusCode::OK); + + let body = response.into_body().collect().await.unwrap().to_bytes(); + let payload: serde_json::Value = serde_json::from_slice(&body).unwrap(); + + assert_eq!( + payload["config"]["autonomy"]["deprecated_fields"], + serde_json::json!(["autonomy.max_cost_per_day_cents"]) + ); + } + #[tokio::test] async fn admin_config_rejects_cross_origin_browser_request() { let cfg = temp_config(); @@ -4198,6 +4450,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4234,6 +4487,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4267,6 +4521,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4301,6 +4556,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4343,6 +4599,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4403,6 +4660,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4457,6 +4715,137 @@ mod tests { assert_eq!(before, after); } + #[tokio::test] + async fn admin_config_patch_updates_cost_fields_without_touching_autonomy() { + let mut cfg = temp_config(); + cfg.cost.enabled = false; + cfg.cost.daily_limit_usd = 10.0; + cfg.cost.monthly_limit_usd = 100.0; + cfg.cost.warn_at_percent = 80; + cfg.cost.allow_override = false; + cfg.autonomy.max_actions_per_hour = 7; + cfg.save().unwrap(); + + let shared_cfg = Arc::new(Mutex::new(cfg)); + let state = AppState { + config: shared_cfg.clone(), + provider: Arc::new(MockProvider::default()), + model: "test-model".into(), + temperature: 0.0, + mem: Arc::new(MockMemory), + auto_save: false, + webhook_secret_hash: None, + pairing: Arc::new(PairingGuard::new(true, &["zc_valid_token".into()])), + trust_forwarded_headers: false, + rate_limiter: Arc::new(GatewayRateLimiter::new(100, 100, 100)), + idempotency_store: Arc::new(IdempotencyStore::new(Duration::from_secs(300), 1000)), + whatsapp: None, + whatsapp_app_secret: None, + channel_runtime_handle: None, + observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, + transcriber: None, + audio_config: crate::config::AudioConfig::default(), + }; + let mut headers = HeaderMap::new(); + headers.insert( + header::AUTHORIZATION, + HeaderValue::from_static("Bearer zc_valid_token"), + ); + headers.insert( + header::ORIGIN, + HeaderValue::from_static("http://127.0.0.1:3000"), + ); + headers.insert(header::HOST, HeaderValue::from_static("127.0.0.1:3000")); + + let payload = serde_json::json!({ + "cost": { + "enabled": true, + "daily_limit_usd": 20.0, + "monthly_limit_usd": 250.0, + "warn_at_percent": 75, + "allow_override": true + } + }); + + let response = handle_admin_update_config_wrapper( + State(state), + headers, + Ok(Json( + serde_json::from_value::(payload).unwrap(), + )), + ) + .await + .into_response(); + + assert_eq!(response.status(), StatusCode::OK); + + let current = shared_cfg.lock().clone(); + assert!(current.cost.enabled); + assert_eq!(current.cost.daily_limit_usd, 20.0); + assert_eq!(current.cost.monthly_limit_usd, 250.0); + assert_eq!(current.cost.warn_at_percent, 75); + assert!(current.cost.allow_override); + assert_eq!(current.autonomy.max_actions_per_hour, 7); + } + + #[tokio::test] + async fn admin_config_patch_accepts_deprecated_action_rate_alias() { + let cfg = temp_config(); + cfg.save().unwrap(); + + let shared_cfg = Arc::new(Mutex::new(cfg)); + let state = AppState { + config: shared_cfg.clone(), + provider: Arc::new(MockProvider::default()), + model: "test-model".into(), + temperature: 0.0, + mem: Arc::new(MockMemory), + auto_save: false, + webhook_secret_hash: None, + pairing: Arc::new(PairingGuard::new(true, &["zc_valid_token".into()])), + trust_forwarded_headers: false, + rate_limiter: Arc::new(GatewayRateLimiter::new(100, 100, 100)), + idempotency_store: Arc::new(IdempotencyStore::new(Duration::from_secs(300), 1000)), + whatsapp: None, + whatsapp_app_secret: None, + channel_runtime_handle: None, + observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, + transcriber: None, + audio_config: crate::config::AudioConfig::default(), + }; + let mut headers = HeaderMap::new(); + headers.insert( + header::AUTHORIZATION, + HeaderValue::from_static("Bearer zc_valid_token"), + ); + headers.insert( + header::ORIGIN, + HeaderValue::from_static("http://127.0.0.1:3000"), + ); + headers.insert(header::HOST, HeaderValue::from_static("127.0.0.1:3000")); + + let payload = serde_json::json!({ + "autonomy": { + "max_cost_per_day_cents": 11 + } + }); + + let response = handle_admin_update_config_wrapper( + State(state), + headers, + Ok(Json( + serde_json::from_value::(payload).unwrap(), + )), + ) + .await + .into_response(); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!(shared_cfg.lock().autonomy.max_actions_per_hour, 11); + } + #[tokio::test] async fn admin_config_update_rejects_restart_required_security_changes() { let cfg = temp_config(); @@ -4479,6 +4868,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4558,6 +4948,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4618,6 +5009,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4663,6 +5055,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4709,6 +5102,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4783,6 +5177,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4864,6 +5259,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -4921,6 +5317,7 @@ mod tests { let state = AppState { config: Arc::new(Mutex::new(config)), + cost_tracker: None, provider, model: "test-model".into(), temperature: 0.0, @@ -5021,6 +5418,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5047,6 +5445,71 @@ mod tests { .contains("approval")); } + #[tokio::test] + async fn webhook_dispatcher_returns_machine_readable_budget_exceeded_payload() { + let _dispatcher = GatewayWebhookDispatcherEnvGuard::set("1").await; + + let provider_impl = Arc::new(SequencedChatProvider::new(vec![ChatResponse { + text: Some("should not run".into()), + tool_calls: Vec::new(), + }])); + let provider: Arc = provider_impl.clone(); + + let mut config = temp_config(); + config.gateway.webhook_dispatcher_enabled = true; + config.cost.enabled = true; + config.cost.daily_limit_usd = 1.0; + config.cost.monthly_limit_usd = 10.0; + let tracker = Arc::new( + crate::cost::CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap(), + ); + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.1; + tracker.record_usage(usage).unwrap(); + + let state = AppState { + config: Arc::new(Mutex::new(config)), + provider, + model: "test-model".into(), + temperature: 0.0, + mem: Arc::new(MockMemory), + auto_save: false, + webhook_secret_hash: None, + pairing: Arc::new(PairingGuard::new(false, &[])), + trust_forwarded_headers: false, + rate_limiter: Arc::new(GatewayRateLimiter::new(100, 100, 100)), + idempotency_store: Arc::new(IdempotencyStore::new(Duration::from_secs(300), 1000)), + whatsapp: None, + whatsapp_app_secret: None, + channel_runtime_handle: None, + observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: Some(tracker), + transcriber: None, + audio_config: crate::config::AudioConfig::default(), + }; + + let response = handle_webhook( + State(state), + test_connect_info(), + HeaderMap::new(), + Ok(Json(WebhookBody { + message: "hello".into(), + })), + ) + .await + .into_response(); + + assert_eq!(response.status(), StatusCode::FORBIDDEN); + let body = response.into_body().collect().await.unwrap().to_bytes(); + let payload: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert_eq!(provider_impl.chat_calls.load(Ordering::SeqCst), 0); + assert_eq!(payload["error"]["code"], "budget_exceeded"); + assert_eq!(payload["error"]["governance_domain"], "token_spend"); + assert_eq!(payload["error"]["period"], "day"); + assert_eq!(payload["error"]["current_usd"], 1.1); + assert_eq!(payload["error"]["limit_usd"], 1.0); + } + #[tokio::test] async fn webhook_dispatcher_returns_500_with_session_id_on_runtime_error() { let _dispatcher = GatewayWebhookDispatcherEnvGuard::set("1").await; @@ -5072,6 +5535,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5120,6 +5584,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5140,6 +5605,108 @@ mod tests { assert_eq!(provider_impl.simple_calls.load(Ordering::SeqCst), 1); } + #[tokio::test] + async fn webhook_rejects_legacy_path_when_cost_governance_is_enabled() { + let _dispatcher = GatewayWebhookDispatcherEnvGuard::set("0").await; + + let provider_impl = Arc::new(DispatchAwareProvider::default()); + let provider: Arc = provider_impl.clone(); + let mut config = temp_config(); + config.cost.enabled = true; + + let state = AppState { + config: Arc::new(Mutex::new(config)), + provider, + model: "test-model".into(), + temperature: 0.0, + mem: Arc::new(MockMemory), + auto_save: false, + webhook_secret_hash: None, + pairing: Arc::new(PairingGuard::new(false, &[])), + trust_forwarded_headers: false, + rate_limiter: Arc::new(GatewayRateLimiter::new(100, 100, 100)), + idempotency_store: Arc::new(IdempotencyStore::new(Duration::from_secs(300), 1000)), + whatsapp: None, + whatsapp_app_secret: None, + channel_runtime_handle: None, + observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, + transcriber: None, + audio_config: crate::config::AudioConfig::default(), + }; + + let response = handle_webhook( + State(state), + test_connect_info(), + HeaderMap::new(), + Ok(Json(WebhookBody { + message: "hello blocked legacy".into(), + })), + ) + .await + .into_response(); + + assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE); + assert_eq!(provider_impl.chat_calls.load(Ordering::SeqCst), 0); + assert_eq!(provider_impl.simple_calls.load(Ordering::SeqCst), 0); + } + + #[tokio::test] + async fn stream_legacy_path_returns_cost_governance_requires_dispatcher_error() { + use axum::extract::ConnectInfo; + use tower::ServiceExt; + + let _dispatcher = GatewayWebhookDispatcherEnvGuard::set("0").await; + + let provider_impl = Arc::new(DispatchAwareProvider::default()); + let provider: Arc = provider_impl.clone(); + let mut config = temp_config(); + config.cost.enabled = true; + let cost_tracker = + Arc::new(CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap()); + + let state = AppState { + config: Arc::new(Mutex::new(config)), + provider, + model: "test-model".into(), + temperature: 0.0, + mem: Arc::new(MockMemory), + auto_save: false, + webhook_secret_hash: None, + pairing: Arc::new(PairingGuard::new(false, &[])), + trust_forwarded_headers: false, + rate_limiter: Arc::new(GatewayRateLimiter::new(100, 100, 100)), + idempotency_store: Arc::new(IdempotencyStore::new(Duration::from_secs(300), 1000)), + whatsapp: None, + whatsapp_app_secret: None, + channel_runtime_handle: None, + observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: Some(cost_tracker), + transcriber: None, + audio_config: crate::config::AudioConfig::default(), + }; + + let req = http::Request::builder() + .method("POST") + .uri("/web/chat/stream") + .header("content-type", "application/json") + .extension(ConnectInfo(SocketAddr::from(([127, 0, 0, 1], 9999)))) + .body(axum::body::Body::from(r#"{"message":"hello"}"#)) + .unwrap(); + + let resp = build_stream_router(state).oneshot(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let collected = resp.into_body().collect().await.unwrap(); + let body_str = std::str::from_utf8(&collected.to_bytes()) + .unwrap() + .to_owned(); + + assert!(body_str.contains("event: error")); + assert!(body_str.contains("\"code\":\"cost_governance_requires_dispatcher\"")); + assert_eq!(provider_impl.chat_calls.load(Ordering::SeqCst), 0); + assert_eq!(provider_impl.simple_calls.load(Ordering::SeqCst), 0); + } + #[tokio::test] async fn generic_webhook_regression_remains_text_only() { let _dispatcher = GatewayWebhookDispatcherEnvGuard::set("0").await; @@ -5163,6 +5730,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5205,6 +5773,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5259,6 +5828,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5322,6 +5892,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5394,6 +5965,7 @@ mod tests { whatsapp_app_secret: Some(Arc::from("wa-secret")), channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5439,6 +6011,7 @@ mod tests { whatsapp_app_secret: Some(Arc::from("wa-secret")), channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5510,6 +6083,7 @@ mod tests { whatsapp_app_secret: Some(Arc::from("wa-secret")), channel_runtime_handle: Some(crate::channels::ChannelRuntimeHandle::new(tx)), observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5585,6 +6159,7 @@ mod tests { whatsapp_app_secret: Some(Arc::from("wa-secret")), channel_runtime_handle: Some(crate::channels::ChannelRuntimeHandle::new(tx)), observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5630,6 +6205,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5692,6 +6268,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5767,6 +6344,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5809,6 +6387,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5854,6 +6433,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -5900,6 +6480,7 @@ mod tests { whatsapp_app_secret: None, channel_runtime_handle: None, observer: Arc::new(crate::observability::NoopObserver), + cost_tracker: None, transcriber: None, audio_config: crate::config::AudioConfig::default(), }; @@ -6469,6 +7050,7 @@ mod tests { } AppState { config: Arc::new(Mutex::new(Config::default())), + cost_tracker: None, provider: Arc::new(MockProvider::default()), model: "test".into(), temperature: 0.0, @@ -6496,6 +7078,12 @@ mod tests { .with_state(state) } + fn build_stream_router(state: AppState) -> Router { + Router::new() + .route("/web/chat/stream", post(handle_chat_stream)) + .with_state(state) + } + /// Minimal valid WAV: RIFF magic + WAVE marker + fmt chunk, 0 data bytes. fn minimal_wav() -> Vec { let mut v: Vec = Vec::with_capacity(44); @@ -6901,6 +7489,7 @@ mod tests { let state = AppState { config: Arc::new(Mutex::new(Config::default())), + cost_tracker: None, provider: Arc::new(MockProvider::default()), model: "test".into(), temperature: 0.0, diff --git a/clients/agent-runtime/src/gateway/webhook_dispatch.rs b/clients/agent-runtime/src/gateway/webhook_dispatch.rs index 8ee4c057..4be394e0 100644 --- a/clients/agent-runtime/src/gateway/webhook_dispatch.rs +++ b/clients/agent-runtime/src/gateway/webhook_dispatch.rs @@ -3,6 +3,7 @@ use crate::agent::dispatcher::DispatchAction; use crate::agent::{Agent, AgentTurnEvent, AgentTurnOutcome, AgentTurnResult, TurnContext}; use crate::bootstrap; use crate::config::Config; +use crate::cost::UsagePeriod; use crate::memory::Memory; use crate::observability::Observer; use crate::pre_execution::BlockingOutcome; @@ -27,16 +28,24 @@ pub struct WebhookTurnRequest { pub include_sse_frames: bool, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] pub enum WebhookTerminalOutcome { Completed, - ApprovalRequired { tool: String, reason: String }, + BudgetExceeded { + current_usd: f64, + limit_usd: f64, + period: UsagePeriod, + }, + ApprovalRequired { + tool: String, + reason: String, + }, Timeout, Fallback, Error, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] pub struct WebhookTurnResult { pub session_id: String, pub model: String, @@ -48,7 +57,15 @@ pub struct WebhookTurnResult { pub(crate) enum CanonicalWebhookResult { Agent(AgentTurnResult), Blocking(BlockingOutcome), - ApprovalRequired { tool: String, reason: String }, + BudgetExceeded { + current_usd: f64, + limit_usd: f64, + period: UsagePeriod, + }, + ApprovalRequired { + tool: String, + reason: String, + }, Error, } @@ -184,6 +201,25 @@ pub(crate) fn map_canonical_result( Some(reason.as_str()), ), }, + CanonicalWebhookResult::BudgetExceeded { + current_usd, + limit_usd, + period, + } => WebhookTurnResult { + session_id: request.session_id.clone(), + model: model.to_string(), + outcome: WebhookTerminalOutcome::BudgetExceeded { + current_usd, + limit_usd, + period, + }, + response_text: None, + event_frames: event_frames_for_blocking_result( + request, + "budget_exceeded", + Some("budget_exceeded"), + ), + }, CanonicalWebhookResult::Blocking(BlockingOutcome::ApprovalRequired { tool }) => { let reason = approval_reason_for_tool(&tool); WebhookTurnResult { @@ -332,6 +368,7 @@ pub(crate) async fn execute( provider: Arc, memory: Arc, observer: Arc, + cost_tracker: Option>, model: &str, request: WebhookTurnRequest, ) -> WebhookTurnResult { @@ -356,10 +393,11 @@ pub(crate) async fn execute( } } - let bootstrap = match bootstrap::BootstrapContext::for_gateway(config, memory, observer) { - Ok(bootstrap) => bootstrap, - Err(_) => return map_canonical_result(&request, model, CanonicalWebhookResult::Error), - }; + let bootstrap = + match bootstrap::BootstrapContext::for_gateway(config, memory, observer, cost_tracker) { + Ok(bootstrap) => bootstrap, + Err(_) => return map_canonical_result(&request, model, CanonicalWebhookResult::Error), + }; let provider: Box = Box::new(SharedProvider { inner: provider }); let mut agent = match Agent::from_bootstrap_with_provider(config, bootstrap, provider) { @@ -385,7 +423,26 @@ pub(crate) async fn execute( map_canonical_result(&request, model, CanonicalWebhookResult::Agent(result)) } } - Err(_) => map_canonical_result(&request, model, CanonicalWebhookResult::Error), + Err(error) => { + if let Some(crate::agent::AgentExecutionError::CostBudgetExceeded { + current_usd, + limit_usd, + period, + }) = error.downcast_ref::() + { + map_canonical_result( + &request, + model, + CanonicalWebhookResult::BudgetExceeded { + current_usd: *current_usd, + limit_usd: *limit_usd, + period: *period, + }, + ) + } else { + map_canonical_result(&request, model, CanonicalWebhookResult::Error) + } + } } } @@ -529,6 +586,49 @@ mod tests { assert_eq!(result.response_text, None); } + #[tokio::test] + async fn execute_maps_cost_budget_exceeded_into_machine_readable_outcome() { + let (_temp, mut config) = test_config(); + config.cost.enabled = true; + config.cost.daily_limit_usd = 1.0; + config.cost.monthly_limit_usd = 10.0; + + let tracker = Arc::new( + crate::cost::CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap(), + ); + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.1; + tracker.record_usage(usage).unwrap(); + + let provider_impl = Arc::new(ScriptedProvider::new(vec![ChatResponse { + text: Some("should not be called".into()), + tool_calls: Vec::new(), + }])); + let provider: Arc = provider_impl.clone(); + + let result = execute( + &config, + provider, + Arc::new(TestMemory), + Arc::new(NoopObserver) as Arc, + Some(tracker), + "test-model", + WebhookTurnRequest { + session_id: "session-budget".into(), + session_source: WebhookSessionSource::Explicit, + message: "hello".into(), + include_sse_frames: false, + }, + ) + .await; + + assert_eq!(provider_impl.calls.load(Ordering::SeqCst), 0); + assert!(matches!( + result.outcome, + WebhookTerminalOutcome::BudgetExceeded { .. } + )); + } + #[test] fn approval_denial_is_detected_from_tool_results_history() { let history = vec![ConversationMessage::ToolResults(vec![ToolResultMessage { @@ -747,6 +847,7 @@ mod tests { provider, Arc::new(TestMemory), Arc::new(NoopObserver) as Arc, + None, "test-model", WebhookTurnRequest { session_id: "session-shell".into(), diff --git a/clients/agent-runtime/src/main.rs b/clients/agent-runtime/src/main.rs index bb53f2da..05a1d6e7 100644 --- a/clients/agent-runtime/src/main.rs +++ b/clients/agent-runtime/src/main.rs @@ -37,7 +37,8 @@ use anyhow::{anyhow, bail, Result}; use clap::{Parser, Subcommand}; use dialoguer::{Input, Password}; use serde::{Deserialize, Serialize}; -use std::time::Duration; +use std::sync::Arc; +use std::time::{Duration, Instant}; use tracing::{info, warn}; use tracing_subscriber::{fmt, EnvFilter}; @@ -140,6 +141,10 @@ enum Commands { /// Attach a peripheral (board:path, e.g. nucleo-f401re:/dev/ttyACM0) #[arg(long)] peripheral: Vec, + + /// Allow exactly one over-budget request for this CLI session + #[arg(long)] + override_budget: bool, }, /// Run a code-specialist session (inspect, plan, edit, verify, report) @@ -159,6 +164,10 @@ enum Commands { /// Temperature (0.0 - 2.0) #[arg(short, long, default_value = "0.7")] temperature: f64, + + /// Allow exactly one over-budget request for this CLI session + #[arg(long)] + override_budget: bool, }, /// Start the gateway server (webhooks, websockets) @@ -257,6 +266,94 @@ enum Commands { #[command(subcommand)] update_command: UpdateCommands, }, + + /// Inspect and manage runtime cost state + Cost { + #[command(subcommand)] + cost_command: CostCommands, + }, +} + +#[derive(Subcommand, Debug)] +enum CostCommands { + /// Show the current cost summary + Summary, + /// Show aggregated cost history + History { + /// Aggregation period + #[arg(long, value_enum, default_value_t = CostHistoryPeriod::Day)] + period: CostHistoryPeriod, + + /// Number of buckets to include + #[arg(long, default_value_t = 30)] + window: usize, + }, + /// Reset tracked costs for a specific scope + Reset { + /// Reset scope + #[arg(long, value_enum, default_value_t = CostResetScopeArg::Day)] + scope: CostResetScopeArg, + + /// Optional reason recorded in cost audit history + #[arg(long)] + reason: Option, + }, +} + +#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)] +enum CostHistoryPeriod { + Session, + Day, + Month, +} + +impl From for cost::UsagePeriod { + fn from(value: CostHistoryPeriod) -> Self { + match value { + CostHistoryPeriod::Session => Self::Session, + CostHistoryPeriod::Day => Self::Day, + CostHistoryPeriod::Month => Self::Month, + } + } +} + +#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)] +enum CostResetScopeArg { + Session, + Day, + Month, +} + +impl From for cost::CostResetScope { + fn from(value: CostResetScopeArg) -> Self { + match value { + CostResetScopeArg::Session => Self::Session, + CostResetScopeArg::Day => Self::Day, + CostResetScopeArg::Month => Self::Month, + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum CliSessionSurface { + Agent, + Code, +} + +impl CliSessionSurface { + fn label(self) -> &'static str { + match self { + Self::Agent => "agent", + Self::Code => "code", + } + } + + fn override_actor(self) -> &'static str { + match self { + Self::Agent => "cli-agent", + Self::Code => "cli-code", + } + } } #[derive(Subcommand, Debug)] @@ -728,6 +825,7 @@ async fn handle_cli_command(command: Commands, config: Config) -> Result<()> { model, temperature, peripheral, + override_budget, } => { Box::pin(handle_agent_command( config, @@ -736,6 +834,7 @@ async fn handle_cli_command(command: Commands, config: Config) -> Result<()> { model, temperature, peripheral, + override_budget, )) .await } @@ -745,6 +844,7 @@ async fn handle_cli_command(command: Commands, config: Config) -> Result<()> { provider, model, temperature, + override_budget, } => { Box::pin(handle_code_command( config, @@ -752,6 +852,7 @@ async fn handle_cli_command(command: Commands, config: Config) -> Result<()> { provider, model, temperature, + override_budget, )) .await } @@ -808,6 +909,204 @@ async fn handle_cli_command(command: Commands, config: Config) -> Result<()> { } Commands::Update { update_command } => handle_update_command(config, update_command).await, + + Commands::Cost { cost_command } => handle_cost_command(config, cost_command), + } +} + +fn handle_cost_command(config: Config, command: CostCommands) -> Result<()> { + let service = cost_service_for_config(&config)?; + + match command { + CostCommands::Summary => { + let summary = service.current_summary(chrono::Utc::now())?; + println!("{}", render_cost_summary(&summary, &config.cost)); + Ok(()) + } + CostCommands::History { period, window } => { + let history = service.history_window(period.into(), window, chrono::Utc::now())?; + println!("{}", render_cost_history(&history)); + Ok(()) + } + CostCommands::Reset { scope, reason } => { + let result = perform_cost_reset(&config, scope.into(), reason)?; + println!("{}", render_cost_reset(&result)); + Ok(()) + } + } +} + +fn cost_service_for_config(config: &Config) -> Result { + let tracker = cost::CostTracker::new(config.cost.clone(), &config.workspace_dir)?; + Ok(cost::CostService::new(Arc::new(tracker))) +} + +fn perform_cost_reset( + config: &Config, + scope: cost::CostResetScope, + reason: Option, +) -> Result { + let service = cost_service_for_config(config)?; + service.reset( + cost::CostResetRequest { + scope, + actor: "cli".to_string(), + reason, + }, + chrono::Utc::now(), + ) +} + +fn render_cost_summary( + summary: &cost::CostGovernanceSummary, + config: &crate::config::CostConfig, +) -> String { + let session_percent = scope_percent(&summary.scope_statuses, cost::UsagePeriod::Session); + let daily_percent = scope_percent(&summary.scope_statuses, cost::UsagePeriod::Day); + let monthly_percent = scope_percent(&summary.scope_statuses, cost::UsagePeriod::Month); + let active_period = summary.active_period.map(period_label).unwrap_or("none"); + let active_scope = summary + .scope_statuses + .iter() + .max_by(|left, right| { + budget_state_rank(left.state) + .cmp(&budget_state_rank(right.state)) + .then_with(|| left.percent_used.total_cmp(&right.percent_used)) + }) + .map(|status| period_label(status.period)) + .unwrap_or("none"); + + [ + format!("session_id={}", summary.session_id), + format!("budget_state={}", budget_state_label(summary.budget_state)), + format!("active_period={active_period}"), + format!("active_scope={active_scope}"), + format!("session_cost_usd={:.4}", summary.usage.session_cost_usd), + format!("daily_cost_usd={:.4}", summary.usage.daily_cost_usd), + format!("monthly_cost_usd={:.4}", summary.usage.monthly_cost_usd), + format!("request_count={}", summary.usage.request_count), + format!("total_tokens={}", summary.usage.total_tokens), + format!("percent_used_session={session_percent:.2}"), + format!("percent_used_daily={daily_percent:.2}"), + format!("percent_used_monthly={monthly_percent:.2}"), + format!("cost_enabled={}", config.enabled), + format!("session_limit_usd={:.4}", config.session_limit_usd), + format!("daily_limit_usd={:.4}", config.daily_limit_usd), + format!("monthly_limit_usd={:.4}", config.monthly_limit_usd), + format!("warn_at_percent={}", config.warn_at_percent), + format!("allow_override={}", config.allow_override), + ] + .join("\n") +} + +fn render_cost_history(history: &cost::CostHistory) -> String { + let mut lines = vec![ + format!("period={}", period_label(history.period)), + format!("points={}", history.points.len()), + format!("total_cost_usd={:.4}", history.totals.cost_usd), + format!("total_tokens={}", history.totals.tokens), + format!("total_requests={}", history.totals.requests), + ]; + + for point in &history.points { + lines.push(format!( + "bucket={} cost_usd={:.4} tokens={} requests={}", + point.bucket, point.cost_usd, point.tokens, point.requests + )); + } + + lines.join("\n") +} + +fn render_cost_reset(result: &cost::CostResetResult) -> String { + [ + format!("scope={}", reset_scope_label(result.scope)), + format!("removed_cost_usd={:.4}", result.removed_cost_usd), + format!("removed_requests={}", result.removed_requests), + format!("effective_at={}", result.effective_at.to_rfc3339()), + ] + .join("\n") +} + +fn apply_cli_budget_override( + agent: &crate::agent::Agent, + surface: CliSessionSurface, +) -> Result<()> { + let override_record = + agent.apply_next_request_budget_override(surface.override_actor(), None)?; + println!( + "budget_override=applied\nsurface={}\nscope=next_request\noverride_id={}", + surface.label(), + override_record.id + ); + Ok(()) +} + +fn print_cli_session_summary( + summary: Option, + surface: CliSessionSurface, +) { + if let Some(summary) = summary { + println!("{}", render_cli_session_summary(&summary, surface)); + } +} + +fn render_cli_session_summary( + summary: &cost::CostGovernanceSummary, + surface: CliSessionSurface, +) -> String { + let active_period = summary.active_period.map(period_label).unwrap_or("none"); + + [ + "session_summary=true".to_string(), + format!("surface={}", surface.label()), + format!("session_id={}", summary.session_id), + format!("budget_state={}", budget_state_label(summary.budget_state)), + format!("active_period={active_period}"), + format!("session_cost_usd={:.4}", summary.usage.session_cost_usd), + format!("request_count={}", summary.usage.request_count), + format!("total_tokens={}", summary.usage.total_tokens), + ] + .join("\n") +} + +fn scope_percent(scope_statuses: &[cost::BudgetScopeStatus], period: cost::UsagePeriod) -> f64 { + scope_statuses + .iter() + .find(|status| status.period == period) + .map_or(0.0, |status| status.percent_used) +} + +fn budget_state_label(state: cost::BudgetState) -> &'static str { + match state { + cost::BudgetState::Allowed => "allowed", + cost::BudgetState::Warning => "warning", + cost::BudgetState::Exceeded => "exceeded", + } +} + +fn budget_state_rank(state: cost::BudgetState) -> u8 { + match state { + cost::BudgetState::Allowed => 0, + cost::BudgetState::Warning => 1, + cost::BudgetState::Exceeded => 2, + } +} + +fn period_label(period: cost::UsagePeriod) -> &'static str { + match period { + cost::UsagePeriod::Session => "session", + cost::UsagePeriod::Day => "day", + cost::UsagePeriod::Month => "month", + cost::UsagePeriod::Mission => "mission", + } +} + +fn reset_scope_label(scope: cost::CostResetScope) -> &'static str { + match scope { + cost::CostResetScope::Session => "session", + cost::CostResetScope::Day => "day", + cost::CostResetScope::Month => "month", } } @@ -957,6 +1256,7 @@ async fn handle_agent_command( model: Option, temperature: f64, peripheral: Vec, + override_budget: bool, ) -> Result<()> { maybe_print_update_notice_bounded(&config).await; @@ -1000,7 +1300,60 @@ async fn handle_agent_command( return Ok(()); } - agent::run(config, message, provider, model, temperature, peripheral).await + let mut effective_config = config; + if let Some(p) = provider { + effective_config.default_provider = Some(p); + } + if let Some(m) = model { + effective_config.default_model = Some(m); + } + effective_config.default_temperature = temperature; + + if !peripheral.is_empty() { + anyhow::bail!( + "peripheral overrides are not currently supported; found {} override(s): {:?}", + peripheral.len(), + peripheral + ); + } + + let provider_name = effective_config + .default_provider + .as_deref() + .unwrap_or("openrouter") + .to_string(); + let model_name = effective_config + .default_model + .as_deref() + .unwrap_or("anthropic/claude-sonnet-4-20250514") + .to_string(); + let mut agent = crate::agent::Agent::from_config(&effective_config)?; + let session_start = Instant::now(); + + if override_budget { + apply_cli_budget_override(&agent, CliSessionSurface::Agent)?; + } + + agent.record_agent_start_event(&provider_name, &model_name); + + let run_result = if let Some(msg) = message { + let response = agent.run_single(&msg).await; + if let Ok(response) = &response { + println!("{response}"); + } + response.map(|_| ()) + } else { + agent.run_interactive().await + }; + + let summary_result = agent.session_cost_summary(chrono::Utc::now()); + agent.record_agent_end_event(&provider_name, &model_name, session_start.elapsed()); + match summary_result { + Ok(summary) => print_cli_session_summary(summary, CliSessionSurface::Agent), + Err(error) => tracing::warn!("Failed to load agent session cost summary: {error}"), + } + + run_result } async fn handle_code_command( @@ -1009,17 +1362,47 @@ async fn handle_code_command( provider: Option, model: Option, temperature: f64, + override_budget: bool, ) -> Result<()> { let config = apply_code_session_config(config, provider, model, temperature); info!("Starting code-specialist session (profile=code)"); + let provider_name = config + .default_provider + .as_deref() + .unwrap_or("openrouter") + .to_string(); + let model_name = config + .default_model + .as_deref() + .unwrap_or("anthropic/claude-sonnet-4-20250514") + .to_string(); let mut agent = crate::agent::Agent::code_from_config(&config)?; - if let Some(msg) = message { - let response = agent.run_single(&msg).await?; - println!("{response}"); + let session_start = Instant::now(); + + if override_budget { + apply_cli_budget_override(&agent, CliSessionSurface::Code)?; + } + + agent.record_agent_start_event(&provider_name, &model_name); + + let run_result = if let Some(msg) = message { + let response = agent.run_single(&msg).await; + if let Ok(response) = &response { + println!("{response}"); + } + response.map(|_| ()) } else { - agent.run_interactive().await?; + agent.run_interactive().await + }; + + let summary_result = agent.session_cost_summary(chrono::Utc::now()); + agent.record_agent_end_event(&provider_name, &model_name, session_start.elapsed()); + match summary_result { + Ok(summary) => print_cli_session_summary(summary, CliSessionSurface::Code), + Err(error) => tracing::warn!("Failed to load code session cost summary: {error}"), } - Ok(()) + + run_result } fn apply_code_session_config( @@ -1148,10 +1531,9 @@ async fn handle_status_command(config: Config) -> Result<()> { " Max actions/hour: {}", config.autonomy.max_actions_per_hour ); - println!( - " Max cost/day: ${:.2}", - f64::from(config.autonomy.max_cost_per_day_cents) / 100.0 - ); + if let Some(message) = config.autonomy.action_rate_deprecation_warning() { + println!(" Deprecation: {message}"); + } println!(); println!("Channels:"); println!(" CLI: ✅ always"); @@ -1715,10 +2097,94 @@ fn handle_status(auth_service: &auth::AuthService) -> Result<()> { #[cfg(test)] mod tests { use super::*; + use async_trait::async_trait; use clap::CommandFactory; use clap::Parser; + use std::sync::Arc; use tempfile::TempDir; + struct MainTestProvider; + + #[async_trait] + impl crate::providers::Provider for MainTestProvider { + async fn chat_with_system( + &self, + _system_prompt: Option<&str>, + _message: &str, + _model: &str, + _temperature: f64, + ) -> Result { + Ok("ok".to_string()) + } + + async fn chat( + &self, + _request: crate::providers::ChatRequest<'_>, + _model: &str, + _temperature: f64, + ) -> Result { + Ok(crate::providers::ChatResponse { + text: Some("ok".to_string()), + tool_calls: vec![], + }) + } + } + + struct MainTestTool; + + #[async_trait] + impl crate::tools::Tool for MainTestTool { + fn name(&self) -> &str { + "noop" + } + + fn description(&self) -> &str { + "noop" + } + + fn parameters_schema(&self) -> serde_json::Value { + serde_json::json!({ + "type": "object", + "properties": {}, + }) + } + + async fn execute(&self, _args: serde_json::Value) -> Result { + Ok(crate::tools::ToolResult { + success: true, + output: "ok".to_string(), + error: None, + structured: None, + }) + } + } + + fn build_test_agent( + cost_config: crate::config::CostConfig, + tracker: Option>, + workspace_dir: &std::path::Path, + ) -> crate::agent::Agent { + let memory_cfg = crate::config::MemoryConfig { + backend: "none".into(), + ..crate::config::MemoryConfig::default() + }; + let memory = + Arc::from(crate::memory::create_memory(&memory_cfg, workspace_dir, None).unwrap()); + let observer = Arc::new(crate::observability::NoopObserver {}); + + crate::agent::Agent::builder() + .provider(Box::new(MainTestProvider)) + .tools(vec![Box::new(MainTestTool)]) + .memory(memory) + .observer(observer) + .tool_dispatcher(Box::new(crate::agent::dispatcher::XmlToolDispatcher)) + .workspace_dir(workspace_dir.to_path_buf()) + .cost_tracker(tracker) + .cost_config(cost_config) + .build() + .unwrap() + } + #[test] fn cli_definition_has_no_flag_conflicts() { Cli::command().debug_assert(); @@ -2155,4 +2621,257 @@ mod tests { let agent_cli = Cli::try_parse_from(["corvus", "agent", "--message", "hello"]).unwrap(); assert!(matches!(agent_cli.command, Commands::Agent { .. })); } + + #[test] + fn agent_and_code_commands_parse_override_budget_flag() { + let code_cli = + Cli::try_parse_from(["corvus", "code", "--message", "hello", "--override-budget"]) + .unwrap(); + assert!(matches!( + code_cli.command, + Commands::Code { + override_budget: true, + .. + } + )); + + let agent_cli = + Cli::try_parse_from(["corvus", "agent", "--message", "hello", "--override-budget"]) + .unwrap(); + assert!(matches!( + agent_cli.command, + Commands::Agent { + override_budget: true, + .. + } + )); + } + + #[test] + fn cost_command_contract_parses_summary_history_and_reset() { + let summary = Cli::try_parse_from(["corvus", "cost", "summary"]).unwrap(); + assert!(matches!( + summary.command, + Commands::Cost { + cost_command: CostCommands::Summary + } + )); + + let history = Cli::try_parse_from([ + "corvus", "cost", "history", "--period", "month", "--window", "12", + ]) + .unwrap(); + assert!(matches!( + history.command, + Commands::Cost { + cost_command: CostCommands::History { + period: CostHistoryPeriod::Month, + window: 12, + } + } + )); + + let reset = Cli::try_parse_from([ + "corvus", "cost", "reset", "--scope", "day", "--reason", "cleanup", + ]) + .unwrap(); + assert!(matches!( + reset.command, + Commands::Cost { + cost_command: CostCommands::Reset { + scope: CostResetScopeArg::Day, + reason: Some(_), + } + } + )); + } + + #[test] + fn render_cost_summary_reports_budget_state_and_usage() { + let tmp = TempDir::new().unwrap(); + let mut config = crate::test_support::test_config(&tmp); + config.cost.enabled = true; + config.cost.session_limit_usd = 4.0; + + let tracker = Arc::new( + crate::cost::CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap(), + ); + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 3.3; + tracker.record_usage(usage).unwrap(); + + let service = crate::cost::CostService::new(tracker); + let summary = service.current_summary(chrono::Utc::now()).unwrap(); + let rendered = render_cost_summary(&summary, &config.cost); + + assert!(rendered.contains("budget_state=warning")); + assert!(rendered.contains("active_period=session")); + assert!(rendered.contains("percent_used_session=")); + assert!(rendered.contains("session_limit_usd=")); + assert!(rendered.contains("daily_cost_usd=")); + assert!(rendered.contains("monthly_limit_usd=")); + } + + #[test] + fn cli_override_application_registers_next_request_override() { + let tmp = TempDir::new().unwrap(); + let cost_config = crate::config::CostConfig { + enabled: true, + allow_override: true, + ..crate::config::CostConfig::default() + }; + let tracker = + Arc::new(crate::cost::CostTracker::new(cost_config.clone(), tmp.path()).unwrap()); + let agent = build_test_agent(cost_config, Some(tracker.clone()), tmp.path()); + + apply_cli_budget_override(&agent, CliSessionSurface::Agent).unwrap(); + + let active_override = tracker + .active_override(chrono::Utc::now()) + .unwrap() + .unwrap(); + assert_eq!( + active_override.scope, + crate::cost::CostOverrideScope::NextRequest + ); + assert_eq!(active_override.actor, "cli-agent"); + assert_eq!(active_override.remaining_uses, 1); + } + + #[test] + fn cli_override_application_writes_audit_and_allows_next_blocked_request_once() { + let tmp = TempDir::new().unwrap(); + let cost_config = crate::config::CostConfig { + enabled: true, + allow_override: true, + daily_limit_usd: 1.0, + monthly_limit_usd: 10.0, + ..crate::config::CostConfig::default() + }; + let tracker = + Arc::new(crate::cost::CostTracker::new(cost_config.clone(), tmp.path()).unwrap()); + let agent = build_test_agent(cost_config, Some(tracker.clone()), tmp.path()); + let service = crate::cost::CostService::new(tracker.clone()); + + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.1; + tracker.record_usage(usage).unwrap(); + + apply_cli_budget_override(&agent, CliSessionSurface::Agent).unwrap(); + + let first = service + .evaluate_request(0.1, None, chrono::Utc::now()) + .unwrap(); + assert!(matches!( + first, + crate::cost::BudgetEvaluation::Proceed { + override_applied: Some(_), + .. + } + )); + + let second = service + .evaluate_request(0.1, None, chrono::Utc::now()) + .unwrap(); + assert!(matches!( + second, + crate::cost::BudgetEvaluation::Blocked { .. } + )); + + let audit = service.audit_trail(10).unwrap(); + assert!(audit.iter().any(|event| { + event.kind == crate::cost::CostAuditKind::OverrideGranted + && event.actor.as_deref() == Some("[REDACTED]") + && event.override_scope == Some(crate::cost::CostOverrideScope::NextRequest) + })); + assert!(audit.iter().any(|event| { + event.kind == crate::cost::CostAuditKind::OverrideConsumed + && event.actor.as_deref() == Some("[REDACTED]") + })); + } + + #[test] + fn cli_override_application_fails_when_cost_tracking_disabled() { + let tmp = TempDir::new().unwrap(); + let cost_config = crate::config::CostConfig { + enabled: false, + allow_override: true, + ..crate::config::CostConfig::default() + }; + let agent = build_test_agent(cost_config, None, tmp.path()); + + let error = apply_cli_budget_override(&agent, CliSessionSurface::Code).unwrap_err(); + assert_eq!( + error.to_string(), + "Cost tracking is disabled for this session" + ); + } + + #[test] + fn cli_override_application_fails_when_override_policy_disabled() { + let tmp = TempDir::new().unwrap(); + let cost_config = crate::config::CostConfig { + enabled: true, + allow_override: false, + ..crate::config::CostConfig::default() + }; + let tracker = + Arc::new(crate::cost::CostTracker::new(cost_config.clone(), tmp.path()).unwrap()); + let agent = build_test_agent(cost_config, Some(tracker), tmp.path()); + + let error = apply_cli_budget_override(&agent, CliSessionSurface::Code).unwrap_err(); + assert_eq!(error.to_string(), "Cost overrides are disabled by policy"); + } + + #[test] + fn render_cli_session_summary_reports_exit_state() { + let summary = crate::cost::CostGovernanceSummary { + session_id: "session-123".to_string(), + usage: crate::cost::types::CostSummary { + session_cost_usd: 1.75, + daily_cost_usd: 1.75, + monthly_cost_usd: 1.75, + total_tokens: 2048, + request_count: 3, + by_model: std::collections::HashMap::new(), + }, + budget_state: crate::cost::BudgetState::Warning, + active_period: Some(crate::cost::UsagePeriod::Day), + scope_statuses: vec![], + active_override: None, + }; + + let rendered = render_cli_session_summary(&summary, CliSessionSurface::Code); + + assert!(rendered.contains("session_summary=true")); + assert!(rendered.contains("surface=code")); + assert!(rendered.contains("session_id=session-123")); + assert!(rendered.contains("budget_state=warning")); + assert!(rendered.contains("active_period=day")); + assert!(rendered.contains("session_cost_usd=1.7500")); + assert!(rendered.contains("request_count=3")); + assert!(rendered.contains("total_tokens=2048")); + } + + #[test] + fn perform_cost_reset_clears_requested_scope() { + let tmp = TempDir::new().unwrap(); + let mut config = crate::test_support::test_config(&tmp); + config.cost.enabled = true; + + let tracker = + crate::cost::CostTracker::new(config.cost.clone(), &config.workspace_dir).unwrap(); + let mut usage = crate::cost::TokenUsage::new("test/model", 1_000, 500, 0.0, 0.0); + usage.cost_usd = 1.25; + tracker.record_usage(usage).unwrap(); + + let result = perform_cost_reset( + &config, + crate::cost::CostResetScope::Day, + Some("test".to_string()), + ) + .unwrap(); + assert_eq!(result.scope, crate::cost::CostResetScope::Day); + assert_eq!(result.removed_requests, 1); + } } diff --git a/clients/agent-runtime/src/observability/log.rs b/clients/agent-runtime/src/observability/log.rs index 1ffd0f39..3dc87233 100755 --- a/clients/agent-runtime/src/observability/log.rs +++ b/clients/agent-runtime/src/observability/log.rs @@ -1,4 +1,8 @@ -use super::traits::{redact_observer_payload, Observer, ObserverEvent, ObserverMetric}; +use super::traits::{ + budget_state_label, cost_override_scope_label, redact_observer_payload, + redact_optional_observer_payload, usage_period_label, BudgetOverrideEvent, Observer, + ObserverEvent, ObserverMetric, +}; use std::any::Any; use tracing::info; @@ -11,6 +15,27 @@ impl LogObserver { } } +fn format_budget_override_log_payload(event: &BudgetOverrideEvent) -> String { + format!( + "action={} scope={} actor={} reason={} previous_state={} period={} session_id={} override_id={} surface={}", + event.action.as_str(), + cost_override_scope_label(event.scope), + event.redacted_actor(), + event + .redacted_reason() + .unwrap_or_else(|| "none".to_string()), + budget_state_label(event.previous_state), + event + .period + .map(usage_period_label) + .unwrap_or("none"), + event.session_id.as_deref().unwrap_or("none"), + event.override_id.as_deref().unwrap_or("none"), + redact_optional_observer_payload(event.surface.as_deref()) + .unwrap_or_else(|| "none".to_string()), + ) +} + impl Observer for LogObserver { fn record_event(&self, event: &ObserverEvent) { match event { @@ -74,6 +99,33 @@ impl Observer for LogObserver { ObserverEvent::Error { component, message } => { info!(component = %component, error = %message, "error"); } + ObserverEvent::BudgetWarning(event) => { + info!( + period = usage_period_label(event.period), + current_usd = event.current_usd, + projected_usd = event.projected_usd, + limit_usd = event.limit_usd, + percent_used = event.percent_used, + session_id = %event.session_id, + surface = ?event.surface, + "budget.warning" + ); + } + ObserverEvent::BudgetExceeded(event) => { + info!( + period = usage_period_label(event.period), + current_usd = event.current_usd, + projected_usd = event.projected_usd, + limit_usd = event.limit_usd, + percent_used = event.percent_used, + session_id = %event.session_id, + surface = ?event.surface, + "budget.exceeded" + ); + } + ObserverEvent::BudgetOverride(event) => { + info!(payload = %format_budget_override_log_payload(event), "budget.override"); + } ObserverEvent::LlmRequest { provider, model, @@ -238,6 +290,8 @@ impl Observer for LogObserver { #[cfg(test)] mod tests { use super::*; + use crate::cost::{BudgetState, CostOverrideScope, UsagePeriod}; + use crate::observability::BudgetOverrideAction; use std::time::Duration; #[test] @@ -509,4 +563,24 @@ mod tests { }); obs.record_metric(&ObserverMetric::RequestLatency(Duration::MAX)); } + + #[test] + fn budget_override_log_payload_redacts_sensitive_fields() { + let event = BudgetOverrideEvent { + action: BudgetOverrideAction::Granted, + actor: "paired-admin-token".into(), + scope: CostOverrideScope::NextRequest, + reason: Some("token=super-secret".into()), + session_id: Some("sess-123".into()), + previous_state: BudgetState::Exceeded, + period: Some(UsagePeriod::Day), + override_id: Some("ovr-123".into()), + surface: Some("gateway_admin".into()), + }; + + let payload = format_budget_override_log_payload(&event); + assert!(payload.contains("***REDACTED***")); + assert!(!payload.contains("paired-admin-token")); + assert!(!payload.contains("super-secret")); + } } diff --git a/clients/agent-runtime/src/observability/mod.rs b/clients/agent-runtime/src/observability/mod.rs index 14229955..ea4a5540 100755 --- a/clients/agent-runtime/src/observability/mod.rs +++ b/clients/agent-runtime/src/observability/mod.rs @@ -13,8 +13,11 @@ pub use self::multi::MultiObserver; pub use noop::NoopObserver; pub use otel::OtelObserver; pub use prometheus::PrometheusObserver; +#[allow(unused_imports)] pub use traits::{ - redact_observer_payload, AudioIngressEvent, AudioIngressOutcome, AudioIngressReason, + budget_state_label, cost_override_scope_label, redact_observer_payload, + redact_optional_observer_payload, usage_period_label, AudioIngressEvent, AudioIngressOutcome, + AudioIngressReason, BudgetOverrideAction, BudgetOverrideEvent, BudgetThresholdEvent, ImageIngressEvent, ImageIngressOutcome, ImageIngressReason, Observer, ObserverEvent, ObserverMetric, }; diff --git a/clients/agent-runtime/src/observability/otel.rs b/clients/agent-runtime/src/observability/otel.rs index a76d9492..1f6aeaa0 100755 --- a/clients/agent-runtime/src/observability/otel.rs +++ b/clients/agent-runtime/src/observability/otel.rs @@ -1,4 +1,8 @@ -use super::traits::{Observer, ObserverEvent, ObserverMetric}; +use super::traits::{ + budget_state_label, cost_override_scope_label, redact_optional_observer_payload, + usage_period_label, BudgetOverrideEvent, BudgetThresholdEvent, Observer, ObserverEvent, + ObserverMetric, +}; use opentelemetry::metrics::{Counter, Gauge, Histogram}; use opentelemetry::trace::{Span, SpanKind, Status, Tracer}; use opentelemetry::{global, KeyValue}; @@ -23,14 +27,89 @@ pub struct OtelObserver { channel_messages: Counter, heartbeat_ticks: Counter, errors: Counter, + budget_warnings: Counter, + budget_exceeded: Counter, + budget_overrides: Counter, request_latency: Histogram, tokens_used: Counter, + cost_usd_last: Gauge, active_sessions: Gauge, queue_depth: Gauge, image_ingress: Counter, audio_ingress: Counter, } +fn budget_threshold_trace_fields(event: &BudgetThresholdEvent) -> Vec<(String, String)> { + let mut fields = vec![ + ( + "budget_state".to_string(), + budget_state_label(event.budget_state).to_string(), + ), + ( + "period".to_string(), + usage_period_label(event.period).to_string(), + ), + ( + "current_usd".to_string(), + format!("{:.6}", event.current_usd), + ), + ( + "projected_usd".to_string(), + format!("{:.6}", event.projected_usd), + ), + ("limit_usd".to_string(), format!("{:.6}", event.limit_usd)), + ( + "percent_used".to_string(), + format!("{:.2}", event.percent_used), + ), + ("session_id".to_string(), event.session_id.clone()), + ]; + if let Some(surface) = event.surface.as_ref() { + fields.push(("surface".to_string(), surface.clone())); + } + fields +} + +fn budget_override_trace_fields(event: &BudgetOverrideEvent) -> Vec<(String, String)> { + let mut fields = vec![ + ("action".to_string(), event.action.as_str().to_string()), + ( + "scope".to_string(), + cost_override_scope_label(event.scope).to_string(), + ), + ("actor".to_string(), event.redacted_actor()), + ( + "previous_state".to_string(), + budget_state_label(event.previous_state).to_string(), + ), + ]; + + if let Some(reason) = event.redacted_reason() { + fields.push(("reason".to_string(), reason)); + } + if let Some(period) = event.period { + fields.push(("period".to_string(), usage_period_label(period).to_string())); + } + if let Some(session_id) = event.session_id.as_ref() { + fields.push(("session_id".to_string(), session_id.clone())); + } + if let Some(override_id) = event.override_id.as_ref() { + fields.push(("override_id".to_string(), override_id.clone())); + } + if let Some(surface) = redact_optional_observer_payload(event.surface.as_deref()) { + fields.push(("surface".to_string(), surface)); + } + + fields +} + +fn key_values_from_fields(fields: Vec<(String, String)>) -> Vec { + fields + .into_iter() + .map(|(key, value)| KeyValue::new(key, value)) + .collect() +} + impl OtelObserver { /// Create a new OTel observer exporting to the given OTLP endpoint. /// @@ -137,11 +216,32 @@ impl OtelObserver { .with_unit("s") .build(); + let budget_warnings = meter + .u64_counter("corvus.budget.warnings") + .with_description("Budget warning lifecycle events") + .build(); + + let budget_exceeded = meter + .u64_counter("corvus.budget.exceeded") + .with_description("Budget hard block lifecycle events") + .build(); + + let budget_overrides = meter + .u64_counter("corvus.budget.overrides") + .with_description("Budget override lifecycle events") + .build(); + let tokens_used = meter .u64_counter("corvus.tokens.used") .with_description("Total tokens consumed (monotonic)") .build(); + let cost_usd_last = meter + .f64_gauge("corvus.cost.usd") + .with_description("Latest observed request cost in USD") + .with_unit("usd") + .build(); + let active_sessions = meter .u64_gauge("corvus.sessions.active") .with_description("Current number of active sessions") @@ -174,8 +274,12 @@ impl OtelObserver { channel_messages, heartbeat_ticks, errors, + budget_warnings, + budget_exceeded, + budget_overrides, request_latency, tokens_used, + cost_usd_last, active_sessions, queue_depth, image_ingress, @@ -206,6 +310,76 @@ impl Observer for OtelObserver { | ObserverEvent::MissionGuardrailViolation { .. } | ObserverEvent::MissionCompleted { .. } | ObserverEvent::MissionTerminated { .. } => {} + ObserverEvent::BudgetWarning(event) => { + let attrs = [ + KeyValue::new("period", usage_period_label(event.period)), + KeyValue::new( + "surface", + event + .surface + .clone() + .unwrap_or_else(|| "unknown".to_string()), + ), + ]; + self.budget_warnings.add(1, &attrs); + + let mut span = tracer.build( + opentelemetry::trace::SpanBuilder::from_name("budget.warning") + .with_kind(SpanKind::Internal) + .with_attributes(key_values_from_fields(budget_threshold_trace_fields( + event, + ))), + ); + span.set_status(Status::Ok); + span.end(); + } + ObserverEvent::BudgetExceeded(event) => { + let attrs = [ + KeyValue::new("period", usage_period_label(event.period)), + KeyValue::new( + "surface", + event + .surface + .clone() + .unwrap_or_else(|| "unknown".to_string()), + ), + ]; + self.budget_exceeded.add(1, &attrs); + + let mut span = tracer.build( + opentelemetry::trace::SpanBuilder::from_name("budget.exceeded") + .with_kind(SpanKind::Internal) + .with_attributes(key_values_from_fields(budget_threshold_trace_fields( + event, + ))), + ); + span.set_status(Status::error("budget exceeded")); + span.end(); + } + ObserverEvent::BudgetOverride(event) => { + let attrs = [ + KeyValue::new("action", event.action.as_str()), + KeyValue::new("scope", cost_override_scope_label(event.scope)), + KeyValue::new( + "surface", + event + .surface + .clone() + .unwrap_or_else(|| "unknown".to_string()), + ), + ]; + self.budget_overrides.add(1, &attrs); + + let mut span = tracer.build( + opentelemetry::trace::SpanBuilder::from_name("budget.override") + .with_kind(SpanKind::Internal) + .with_attributes(key_values_from_fields(budget_override_trace_fields( + event, + ))), + ); + span.set_status(Status::Ok); + span.end(); + } ObserverEvent::AudioIngress(evt) => { let reason_str = evt .reason @@ -302,6 +476,13 @@ impl Observer for OtelObserver { } if let Some(c) = cost_usd { span.set_attribute(KeyValue::new("cost_usd", *c)); + self.cost_usd_last.record( + *c, + &[ + KeyValue::new("provider", provider.clone()), + KeyValue::new("model", model.clone()), + ], + ); } span.end(); @@ -449,6 +630,8 @@ impl Observer for OtelObserver { #[cfg(test)] mod tests { use super::*; + use crate::cost::{BudgetState, CostOverrideScope, UsagePeriod}; + use crate::observability::BudgetOverrideAction; use std::time::Duration; // Note: OtelObserver::new() requires an OTLP endpoint. @@ -607,4 +790,30 @@ mod tests { "observer creation must succeed even with unreachable endpoint" ); } + + #[test] + fn budget_override_trace_fields_redact_sensitive_values() { + let event = BudgetOverrideEvent { + action: BudgetOverrideAction::Granted, + actor: "paired-admin-token".into(), + scope: CostOverrideScope::NextRequest, + reason: Some("token=super-secret".into()), + session_id: Some("sess-123".into()), + previous_state: BudgetState::Exceeded, + period: Some(UsagePeriod::Day), + override_id: Some("ovr-123".into()), + surface: Some("gateway_admin".into()), + }; + + let fields = budget_override_trace_fields(&event); + assert!(fields + .iter() + .any(|(key, value)| key == "actor" && value == "***REDACTED***")); + assert!(fields + .iter() + .any(|(key, value)| key == "reason" && value == "***REDACTED***")); + assert!(fields + .iter() + .all(|(_, value)| !value.contains("super-secret"))); + } } diff --git a/clients/agent-runtime/src/observability/prometheus.rs b/clients/agent-runtime/src/observability/prometheus.rs index 4208295d..2d5eaed4 100755 --- a/clients/agent-runtime/src/observability/prometheus.rs +++ b/clients/agent-runtime/src/observability/prometheus.rs @@ -1,6 +1,7 @@ use super::traits::{Observer, ObserverEvent, ObserverMetric}; use prometheus::{ - Encoder, GaugeVec, Histogram, HistogramOpts, HistogramVec, IntCounterVec, Registry, TextEncoder, + Encoder, Gauge, GaugeVec, Histogram, HistogramOpts, HistogramVec, IntCounterVec, Registry, + TextEncoder, }; /// Prometheus-backed observer — exposes metrics for scraping via `/metrics`. @@ -13,6 +14,9 @@ pub struct PrometheusObserver { channel_messages: IntCounterVec, heartbeat_ticks: prometheus::IntCounter, errors: IntCounterVec, + budget_warnings: IntCounterVec, + budget_exceeded: IntCounterVec, + budget_overrides: IntCounterVec, // Histograms agent_duration: HistogramVec, @@ -21,6 +25,7 @@ pub struct PrometheusObserver { // Gauges tokens_used: prometheus::IntGauge, + cost_usd_last: Gauge, active_sessions: GaugeVec, queue_depth: GaugeVec, @@ -63,6 +68,33 @@ impl PrometheusObserver { ) .expect("valid metric"); + let budget_warnings = IntCounterVec::new( + prometheus::Opts::new( + "corvus_budget_warnings_total", + "Budget warning lifecycle events", + ), + &["period", "surface"], + ) + .expect("valid metric"); + + let budget_exceeded = IntCounterVec::new( + prometheus::Opts::new( + "corvus_budget_exceeded_total", + "Budget hard block lifecycle events", + ), + &["period", "surface"], + ) + .expect("valid metric"); + + let budget_overrides = IntCounterVec::new( + prometheus::Opts::new( + "corvus_budget_overrides_total", + "Budget override lifecycle events", + ), + &["action", "scope", "surface"], + ) + .expect("valid metric"); + let agent_duration = HistogramVec::new( HistogramOpts::new( "corvus_agent_duration_seconds", @@ -96,6 +128,12 @@ impl PrometheusObserver { prometheus::IntGauge::new("corvus_tokens_used_last", "Tokens used in the last request") .expect("valid metric"); + let cost_usd_last = Gauge::new( + "corvus_cost_usd_last", + "Cost of the last completed request in USD", + ) + .expect("valid metric"); + let active_sessions = GaugeVec::new( prometheus::Opts::new("corvus_active_sessions", "Number of active sessions"), &[], @@ -134,10 +172,14 @@ impl PrometheusObserver { registry.register(Box::new(channel_messages.clone())).ok(); registry.register(Box::new(heartbeat_ticks.clone())).ok(); registry.register(Box::new(errors.clone())).ok(); + registry.register(Box::new(budget_warnings.clone())).ok(); + registry.register(Box::new(budget_exceeded.clone())).ok(); + registry.register(Box::new(budget_overrides.clone())).ok(); registry.register(Box::new(agent_duration.clone())).ok(); registry.register(Box::new(tool_duration.clone())).ok(); registry.register(Box::new(request_latency.clone())).ok(); registry.register(Box::new(tokens_used.clone())).ok(); + registry.register(Box::new(cost_usd_last.clone())).ok(); registry.register(Box::new(active_sessions.clone())).ok(); registry.register(Box::new(queue_depth.clone())).ok(); @@ -148,10 +190,14 @@ impl PrometheusObserver { channel_messages, heartbeat_ticks, errors, + budget_warnings, + budget_exceeded, + budget_overrides, agent_duration, tool_duration, request_latency, tokens_used, + cost_usd_last, active_sessions, queue_depth, image_ingress, @@ -182,7 +228,7 @@ impl Observer for PrometheusObserver { model, duration, tokens_used, - cost_usd: _, + cost_usd, } => { // Agent duration is recorded via the histogram with provider/model labels self.agent_duration @@ -191,6 +237,7 @@ impl Observer for PrometheusObserver { if let Some(t) = tokens_used { self.tokens_used.set(i64::try_from(*t).unwrap_or(i64::MAX)); } + self.cost_usd_last.set(cost_usd.unwrap_or(0.0)); } ObserverEvent::ToolCallStart { tool: _ } | ObserverEvent::TurnComplete @@ -202,6 +249,31 @@ impl Observer for PrometheusObserver { | ObserverEvent::MissionGuardrailViolation { .. } | ObserverEvent::MissionCompleted { .. } | ObserverEvent::MissionTerminated { .. } => {} + ObserverEvent::BudgetWarning(event) => { + self.budget_warnings + .with_label_values(&[ + super::traits::usage_period_label(event.period), + event.surface.as_deref().unwrap_or("unknown"), + ]) + .inc(); + } + ObserverEvent::BudgetExceeded(event) => { + self.budget_exceeded + .with_label_values(&[ + super::traits::usage_period_label(event.period), + event.surface.as_deref().unwrap_or("unknown"), + ]) + .inc(); + } + ObserverEvent::BudgetOverride(event) => { + self.budget_overrides + .with_label_values(&[ + event.action.as_str(), + super::traits::cost_override_scope_label(event.scope), + event.surface.as_deref().unwrap_or("unknown"), + ]) + .inc(); + } ObserverEvent::AudioIngress(evt) => { let outcome = format!("{:?}", evt.outcome); let reason = evt @@ -287,6 +359,8 @@ impl Observer for PrometheusObserver { #[cfg(test)] mod tests { use super::*; + use crate::cost::{BudgetState, CostOverrideScope, UsagePeriod}; + use crate::observability::{BudgetOverrideAction, BudgetOverrideEvent}; use std::time::Duration; #[test] @@ -446,4 +520,25 @@ mod tests { let output = obs.encode(); assert!(output.contains("corvus_tokens_used_last 200")); } + + #[test] + fn budget_metrics_do_not_expose_sensitive_override_fields() { + let obs = PrometheusObserver::new(); + obs.record_event(&ObserverEvent::BudgetOverride(BudgetOverrideEvent { + action: BudgetOverrideAction::Granted, + actor: "paired-admin-token".into(), + scope: CostOverrideScope::NextRequest, + reason: Some("token=super-secret".into()), + session_id: Some("sess-123".into()), + previous_state: BudgetState::Exceeded, + period: Some(UsagePeriod::Day), + override_id: Some("ovr-123".into()), + surface: Some("gateway_admin".into()), + })); + + let output = obs.encode(); + assert!(output.contains("corvus_budget_overrides_total")); + assert!(!output.contains("paired-admin-token")); + assert!(!output.contains("super-secret")); + } } diff --git a/clients/agent-runtime/src/observability/traits.rs b/clients/agent-runtime/src/observability/traits.rs index 18fa41ec..dd27cf1f 100755 --- a/clients/agent-runtime/src/observability/traits.rs +++ b/clients/agent-runtime/src/observability/traits.rs @@ -1,3 +1,4 @@ +use crate::cost::{BudgetState, CostOverrideScope, UsagePeriod}; use std::time::Duration; const SENSITIVE_PAYLOAD_MARKERS: [&str; 5] = ["password", "token", "secret", "api_key", "auth"]; @@ -140,6 +141,83 @@ pub fn redact_observer_payload(value: &str) -> String { trimmed.to_string() } +pub fn redact_optional_observer_payload(value: Option<&str>) -> Option { + value.map(redact_observer_payload) +} + +pub fn usage_period_label(period: UsagePeriod) -> &'static str { + match period { + UsagePeriod::Session => "session", + UsagePeriod::Day => "day", + UsagePeriod::Month => "month", + UsagePeriod::Mission => "mission", + } +} + +pub fn budget_state_label(state: BudgetState) -> &'static str { + match state { + BudgetState::Allowed => "allowed", + BudgetState::Warning => "warning", + BudgetState::Exceeded => "exceeded", + } +} + +pub fn cost_override_scope_label(scope: CostOverrideScope) -> &'static str { + match scope { + CostOverrideScope::NextRequest => "next_request", + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BudgetThresholdEvent { + pub budget_state: BudgetState, + pub period: UsagePeriod, + pub current_usd: f64, + pub projected_usd: f64, + pub limit_usd: f64, + pub percent_used: f64, + pub session_id: String, + pub surface: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BudgetOverrideAction { + Granted, + Consumed, +} + +impl BudgetOverrideAction { + pub fn as_str(self) -> &'static str { + match self { + Self::Granted => "granted", + Self::Consumed => "consumed", + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BudgetOverrideEvent { + pub action: BudgetOverrideAction, + pub actor: String, + pub scope: CostOverrideScope, + pub reason: Option, + pub session_id: Option, + pub previous_state: BudgetState, + pub period: Option, + pub override_id: Option, + pub surface: Option, +} + +impl BudgetOverrideEvent { + pub fn redacted_actor(&self) -> String { + redact_observer_payload(&self.actor) + } + + pub fn redacted_reason(&self) -> Option { + redact_optional_observer_payload(self.reason.as_deref()) + } +} + /// Events the observer can record #[derive(Debug, Clone)] pub enum ObserverEvent { @@ -203,6 +281,9 @@ pub enum ObserverEvent { component: String, message: String, }, + BudgetWarning(BudgetThresholdEvent), + BudgetExceeded(BudgetThresholdEvent), + BudgetOverride(BudgetOverrideEvent), /// Image ingress lifecycle event (metadata only). ImageIngress(ImageIngressEvent), /// Audio ingress lifecycle event (metadata only). @@ -286,6 +367,7 @@ pub trait Observer: Send + Sync + 'static { #[cfg(test)] mod tests { use super::*; + use crate::cost::{BudgetState, CostOverrideScope, UsagePeriod}; use parking_lot::Mutex; use std::time::Duration; @@ -406,6 +488,63 @@ mod tests { ); } + #[test] + fn budget_override_event_redacts_sensitive_actor_and_reason() { + let event = BudgetOverrideEvent { + action: BudgetOverrideAction::Granted, + actor: "paired-admin-token".into(), + scope: CostOverrideScope::NextRequest, + reason: Some("token=super-secret".into()), + session_id: Some("sess-123".into()), + previous_state: BudgetState::Exceeded, + period: Some(UsagePeriod::Day), + override_id: Some("ovr-123".into()), + surface: Some("gateway_admin".into()), + }; + + assert_eq!(event.redacted_actor(), "***REDACTED***"); + assert_eq!(event.redacted_reason().as_deref(), Some("***REDACTED***")); + } + + #[test] + fn observer_event_budget_variants_exist() { + let warning = ObserverEvent::BudgetWarning(BudgetThresholdEvent { + budget_state: BudgetState::Warning, + period: UsagePeriod::Day, + current_usd: 8.2, + projected_usd: 8.2, + limit_usd: 10.0, + percent_used: 82.0, + session_id: "sess-123".into(), + surface: Some("agent_loop".into()), + }); + let exceeded = ObserverEvent::BudgetExceeded(BudgetThresholdEvent { + budget_state: BudgetState::Exceeded, + period: UsagePeriod::Day, + current_usd: 10.2, + projected_usd: 10.3, + limit_usd: 10.0, + percent_used: 103.0, + session_id: "sess-123".into(), + surface: Some("agent_loop".into()), + }); + let override_event = ObserverEvent::BudgetOverride(BudgetOverrideEvent { + action: BudgetOverrideAction::Consumed, + actor: "cli-agent".into(), + scope: CostOverrideScope::NextRequest, + reason: Some("incident mitigation".into()), + session_id: Some("sess-123".into()), + previous_state: BudgetState::Exceeded, + period: Some(UsagePeriod::Day), + override_id: Some("ovr-123".into()), + surface: Some("cli".into()), + }); + + assert!(matches!(warning, ObserverEvent::BudgetWarning(_))); + assert!(matches!(exceeded, ObserverEvent::BudgetExceeded(_))); + assert!(matches!(override_event, ObserverEvent::BudgetOverride(_))); + } + // ── Image ingress telemetry (Task 4.4) ─────────────────── #[test] diff --git a/clients/agent-runtime/src/security/policy.rs b/clients/agent-runtime/src/security/policy.rs index 710b02f2..f7e74388 100644 --- a/clients/agent-runtime/src/security/policy.rs +++ b/clients/agent-runtime/src/security/policy.rs @@ -123,7 +123,6 @@ pub struct SecurityPolicy { pub allowed_commands: Vec, pub forbidden_paths: Vec, pub max_actions_per_hour: u32, - pub max_cost_per_day_cents: u32, pub require_approval_for_medium_risk: bool, pub block_high_risk_commands: bool, pub tracker: ActionTracker, @@ -172,7 +171,6 @@ impl Default for SecurityPolicy { "~/.config".into(), ], max_actions_per_hour: 20, - max_cost_per_day_cents: 500, require_approval_for_medium_risk: true, block_high_risk_commands: true, tracker: ActionTracker::new(), @@ -687,7 +685,6 @@ impl SecurityPolicy { allowed_commands: autonomy_config.allowed_commands.clone(), forbidden_paths: autonomy_config.forbidden_paths.clone(), max_actions_per_hour: autonomy_config.max_actions_per_hour, - max_cost_per_day_cents: autonomy_config.max_cost_per_day_cents, require_approval_for_medium_risk: autonomy_config.require_approval_for_medium_risk, block_high_risk_commands: autonomy_config.block_high_risk_commands, tracker: ActionTracker::new(), @@ -1071,7 +1068,6 @@ mod tests { allowed_commands: vec!["docker".into()], forbidden_paths: vec!["/secret".into()], max_actions_per_hour: 100, - max_cost_per_day_cents: 1000, require_approval_for_medium_risk: false, block_high_risk_commands: false, ..crate::config::AutonomyConfig::default() @@ -1084,7 +1080,6 @@ mod tests { assert_eq!(policy.allowed_commands, vec!["docker"]); assert_eq!(policy.forbidden_paths, vec!["/secret"]); assert_eq!(policy.max_actions_per_hour, 100); - assert_eq!(policy.max_cost_per_day_cents, 1000); assert!(!policy.require_approval_for_medium_risk); assert!(!policy.block_high_risk_commands); assert_eq!(policy.workspace_dir, PathBuf::from("/tmp/test-workspace")); @@ -1100,7 +1095,6 @@ mod tests { assert!(!p.allowed_commands.is_empty()); assert!(!p.forbidden_paths.is_empty()); assert!(p.max_actions_per_hour > 0); - assert!(p.max_cost_per_day_cents > 0); assert!(p.require_approval_for_medium_risk); assert!(p.block_high_risk_commands); } @@ -1380,6 +1374,22 @@ mod tests { assert!(!p.record_action()); } + #[test] + fn action_rate_denials_are_labeled_separately_from_token_spend() { + let policy = SecurityPolicy { + max_actions_per_hour: 0, + ..default_policy() + }; + + let err = policy + .enforce_tool_operation(ToolOperation::Act, "file_write") + .unwrap_err(); + + assert!(err.contains("action budget exhausted")); + assert!(!err.to_ascii_lowercase().contains("cost")); + assert!(!err.to_ascii_lowercase().contains("token")); + } + #[test] fn rate_limit_high_allows_many() { let p = SecurityPolicy { @@ -1437,7 +1447,6 @@ mod tests { allowed_commands: vec![], forbidden_paths: vec![], max_actions_per_hour: 10, - max_cost_per_day_cents: 100, require_approval_for_medium_risk: true, block_high_risk_commands: true, ..crate::config::AutonomyConfig::default() diff --git a/clients/agent-runtime/src/tools/delegate.rs b/clients/agent-runtime/src/tools/delegate.rs index 3769dc91..b5bfaef6 100755 --- a/clients/agent-runtime/src/tools/delegate.rs +++ b/clients/agent-runtime/src/tools/delegate.rs @@ -171,9 +171,10 @@ impl DelegateTool { ), Ok(Err(e)) => { let status = match e.downcast_ref::() { - Some(AgentExecutionError::IterationBudgetExceeded { .. }) => { - CodeSessionStatus::BudgetExceeded - } + Some( + AgentExecutionError::IterationBudgetExceeded { .. } + | AgentExecutionError::CostBudgetExceeded { .. }, + ) => CodeSessionStatus::BudgetExceeded, None => CodeSessionStatus::Error, }; let error_text = e.to_string(); diff --git a/clients/agent-runtime/tests/admin_config_api_integration.rs b/clients/agent-runtime/tests/admin_config_api_integration.rs index be262fe4..34717637 100644 --- a/clients/agent-runtime/tests/admin_config_api_integration.rs +++ b/clients/agent-runtime/tests/admin_config_api_integration.rs @@ -71,6 +71,7 @@ fn headers() -> HeaderMap { fn state_with_config(config: Config) -> AppState { AppState { config: Arc::new(Mutex::new(config)), + cost_tracker: None, provider: Arc::new(IntegrationProvider), model: "model".into(), temperature: 0.7, @@ -167,6 +168,7 @@ async fn put_admin_config_updates_and_persists() { web_search: None, browser: None, memory: None, + cost: None, }; let response = @@ -207,6 +209,7 @@ async fn put_admin_config_rolls_back_on_save_failure() { web_search: None, browser: None, memory: None, + cost: None, }; let before = state.config.lock().default_provider.clone(); diff --git a/clients/web/apps/dashboard/src/components/config/CostOverview.spec.ts b/clients/web/apps/dashboard/src/components/config/CostOverview.spec.ts index 4601e71d..e1dc6b49 100644 --- a/clients/web/apps/dashboard/src/components/config/CostOverview.spec.ts +++ b/clients/web/apps/dashboard/src/components/config/CostOverview.spec.ts @@ -1,9 +1,30 @@ import { flushPromises, mount } from "@vue/test-utils"; -import { afterEach, describe, expect, it, vi } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { computed, ref } from "vue"; import { createI18n } from "vue-i18n"; -import CostOverview from "@/components/config/CostOverview.vue"; import { i18nConfig } from "@/i18n"; +import type { + AdminCostHistoryView, + AdminCostSummaryView, + AdminCostView, +} from "@/types/admin-config"; + +const hoisted = vi.hoisted(() => ({ + state: {} as Record, +})); + +vi.mock("@/composables/useCostGovernance", () => ({ + useCostGovernance: () => hoisted.state, +})); + +vi.mock("@corvus/ui", () => ({ + Button: { + template: "", + }, +})); + +import CostOverview from "@/components/config/CostOverview.vue"; function mountComponent() { return mount(CostOverview, { @@ -17,48 +38,200 @@ function mountComponent() { }); } +function setBaseConfig(overrides: Partial = {}) { + (hoisted.state.config as ReturnType>).value = { + enabled: true, + session_limit_usd: 25, + daily_limit_usd: 50, + monthly_limit_usd: 1000, + warn_at_percent: 80, + allow_override: true, + ...overrides, + }; +} + describe("CostOverview", () => { - afterEach(() => { - vi.unstubAllGlobals(); - }); + beforeEach(() => { + const config = ref(null); + const summary = ref(null); + const history = ref(null); + const loading = ref(false); + const error = ref(null); + const usageUnavailable = ref(false); + const usageError = ref(null); + const actionMessage = ref(null); + const actionError = ref(null); + const actionPending = ref(false); + const reload = vi.fn().mockResolvedValue(undefined); + const grantOverride = vi.fn().mockImplementation(async () => { + actionMessage.value = "Override granted: next_request"; + }); + const resetSession = vi.fn().mockImplementation(async () => { + actionMessage.value = "Session totals reset: 6"; + }); - it("renders cost data on successful fetch", async () => { - const mockConfig = { - config: { - cost: { - enabled: true, - daily_limit_usd: 50, - monthly_limit_usd: 1000, - warn_at_percent: 80, - allow_override: false, - }, - }, + hoisted.state = { + config, + summary, + history, + loading, + error, + usageUnavailable, + usageError, + actionMessage, + actionError, + actionPending, + reload, + grantOverride, + resetSession, + hasOperationalData: computed(() => summary.value !== null || history.value !== null), + activeBudgetState: computed(() => summary.value?.budget_state ?? "allowed"), }; - const fetchSpy = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve(mockConfig), - }); - vi.stubGlobal("fetch", fetchSpy); + setBaseConfig(); + (hoisted.state.summary as ReturnType>).value = null; + (hoisted.state.history as ReturnType>).value = null; + (hoisted.state.loading as ReturnType>).value = false; + (hoisted.state.error as ReturnType>).value = null; + (hoisted.state.usageUnavailable as ReturnType>).value = false; + (hoisted.state.usageError as ReturnType>).value = null; + (hoisted.state.actionMessage as ReturnType>).value = null; + (hoisted.state.actionError as ReturnType>).value = null; + (hoisted.state.actionPending as ReturnType>).value = false; + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("falls back to config-only mode when live usage APIs are unavailable", async () => { + (hoisted.state.usageUnavailable as ReturnType>).value = true; + (hoisted.state.usageError as ReturnType>).value = + "Live usage is unavailable. Showing saved policy only."; const wrapper = mountComponent(); await flushPromises(); - expect(fetchSpy).toHaveBeenCalledWith( - expect.stringContaining("/web/admin/config"), - expect.objectContaining({ - headers: { Authorization: "Bearer test-token" }, - }) - ); - + expect(hoisted.state.reload).toHaveBeenCalled(); expect(wrapper.find('[data-testid="cost-overview"]').exists()).toBe(true); + expect(wrapper.find('[data-testid="cost-config-fallback"]').exists()).toBe(true); expect(wrapper.text()).toContain("$50.00"); expect(wrapper.text()).toContain("$1,000.00"); expect(wrapper.text()).toContain("80%"); }); + it("renders warning state with live summary and history", async () => { + (hoisted.state.summary as ReturnType>).value = { + session_cost_usd: 12.4, + daily_cost_usd: 41, + monthly_cost_usd: 320, + total_tokens: 120044, + request_count: 63, + percent_used_session: 49.6, + percent_used_daily: 82, + percent_used_monthly: 32, + budget_state: "warning", + period: "day", + }; + (hoisted.state.history as ReturnType>).value = { + period: "day", + points: [ + { bucket: "2026-04-04", cost_usd: 9.25, tokens: 18000, requests: 8 }, + { bucket: "2026-04-05", cost_usd: 11.7, tokens: 22000, requests: 10 }, + ], + totals: { cost_usd: 20.95, tokens: 40000, requests: 18 }, + }; + + const wrapper = mountComponent(); + await flushPromises(); + + expect(wrapper.find('[data-testid="cost-live-summary"]').exists()).toBe(true); + expect(wrapper.find('[data-testid="cost-state-warning"]').exists()).toBe(true); + expect(wrapper.find('[data-testid="cost-history"]').exists()).toBe(true); + expect(wrapper.text()).toContain("$25.00"); + expect(wrapper.text()).toContain("$12.40"); + expect(wrapper.text()).toContain("$41.00"); + expect(wrapper.text()).toContain("50%"); + expect(wrapper.text()).toContain("2026-04-04"); + expect(wrapper.text()).toContain("2026-04-05"); + }); + + it("renders exceeded state when budget is blocked", async () => { + (hoisted.state.summary as ReturnType>).value = { + session_cost_usd: 30.5, + daily_cost_usd: 55, + monthly_cost_usd: 1005, + total_tokens: 190000, + request_count: 90, + percent_used_session: 100, + percent_used_daily: 110, + percent_used_monthly: 100.5, + budget_state: "exceeded", + period: "month", + }; + + const wrapper = mountComponent(); + await flushPromises(); + + expect(wrapper.find('[data-testid="cost-state-exceeded"]').exists()).toBe(true); + expect(wrapper.text()).toContain("$1,005.00"); + }); + + it("shows action affordances when operator actions are available", async () => { + (hoisted.state.summary as ReturnType>).value = { + session_cost_usd: 12.4, + daily_cost_usd: 41, + monthly_cost_usd: 320, + total_tokens: 120044, + request_count: 63, + percent_used_daily: 82, + percent_used_monthly: 32, + budget_state: "warning", + period: "day", + }; + (hoisted.state.history as ReturnType>).value = { + period: "day", + points: [{ bucket: "2026-04-05", cost_usd: 11.7, tokens: 22000, requests: 10 }], + totals: { cost_usd: 11.7, tokens: 22000, requests: 10 }, + }; + + const wrapper = mountComponent(); + await flushPromises(); + + await wrapper.get('[data-testid="cost-action-override"]').trigger("click"); + await wrapper.get('[data-testid="cost-action-reset-session"]').trigger("click"); + await flushPromises(); + + expect(hoisted.state.grantOverride).toHaveBeenCalledTimes(1); + expect(hoisted.state.resetSession).toHaveBeenCalledTimes(1); + }); + + it("keeps reset visible when overrides are disabled", async () => { + setBaseConfig({ allow_override: false }); + (hoisted.state.summary as ReturnType>).value = { + session_cost_usd: 4.2, + daily_cost_usd: 10, + monthly_cost_usd: 40, + total_tokens: 1200, + request_count: 4, + percent_used_session: 16.8, + percent_used_daily: 20, + percent_used_monthly: 4, + budget_state: "allowed", + period: "session", + }; + + const wrapper = mountComponent(); + await flushPromises(); + + expect(wrapper.find('[data-testid="cost-actions"]').exists()).toBe(true); + expect(wrapper.find('[data-testid="cost-action-override"]').exists()).toBe(false); + expect(wrapper.find('[data-testid="cost-action-reset-session"]').exists()).toBe(true); + }); + it("shows error on fetch failure", async () => { - vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("Network error"))); + (hoisted.state.config as ReturnType>).value = null; + (hoisted.state.error as ReturnType>).value = "Network error"; const wrapper = mountComponent(); await flushPromises(); @@ -68,13 +241,9 @@ describe("CostOverview", () => { }); it("shows error when cost data is missing from response", async () => { - vi.stubGlobal( - "fetch", - vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ config: {} }), - }) - ); + (hoisted.state.config as ReturnType>).value = null; + (hoisted.state.error as ReturnType>).value = + "Cost data not available"; const wrapper = mountComponent(); await flushPromises(); diff --git a/clients/web/apps/dashboard/src/components/config/CostOverview.vue b/clients/web/apps/dashboard/src/components/config/CostOverview.vue index 8184da2c..47a9aa93 100644 --- a/clients/web/apps/dashboard/src/components/config/CostOverview.vue +++ b/clients/web/apps/dashboard/src/components/config/CostOverview.vue @@ -1,8 +1,9 @@ diff --git a/clients/web/apps/dashboard/src/composables/useAdmin.ts b/clients/web/apps/dashboard/src/composables/useAdmin.ts index c304725a..aa236d9e 100644 --- a/clients/web/apps/dashboard/src/composables/useAdmin.ts +++ b/clients/web/apps/dashboard/src/composables/useAdmin.ts @@ -1,4 +1,14 @@ import { ref } from "vue"; +import type { + AdminConfigResponse, + AdminCostHistoryParams, + AdminCostHistoryView, + AdminCostOverrideRecordView, + AdminCostResetResultView, + AdminCostSummaryResponse, + AdminCostSummaryView, + AdminCostView, +} from "@/types/admin-config"; import type { AdminMemoryEntry, AdminMemoryListResponse, @@ -33,6 +43,9 @@ export function useAdmin( const sessionDetail = ref(null); const memoryEntries = ref([]); const memoryStats = ref(null); + const costConfig = ref(null); + const costSummary = ref(null); + const costHistory = ref(null); // NOTE: Single shared loading ref — concurrent calls will overwrite each other's state. // Acceptable for this dashboard's sequential usage pattern. const loading = ref(false); @@ -194,6 +207,145 @@ export function useAdmin( } } + async function fetchCostConfig(): Promise { + loading.value = true; + error.value = null; + try { + const url = buildUrl("/web/admin/config"); + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30_000); + try { + const res = await fetch(url, { headers: authHeaders(), signal: controller.signal }); + if (!res.ok) { + throw new Error(`HTTP ${res.status}`); + } + const data = (await res.json()) as AdminConfigResponse; + costConfig.value = data.config?.cost ?? null; + return costConfig.value; + } finally { + clearTimeout(timeoutId); + } + } catch (e: unknown) { + error.value = e instanceof Error ? e.message : String(e); + costConfig.value = null; + throw e; + } finally { + loading.value = false; + } + } + + async function fetchCostSummary(): Promise { + loading.value = true; + error.value = null; + try { + const url = buildUrl("/web/cost/summary"); + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30_000); + try { + const res = await fetch(url, { headers: authHeaders(), signal: controller.signal }); + if (!res.ok) { + throw new Error(`HTTP ${res.status}`); + } + const data = (await res.json()) as AdminCostSummaryResponse; + costSummary.value = data.summary; + costConfig.value = data.config; + return data; + } finally { + clearTimeout(timeoutId); + } + } catch (e: unknown) { + error.value = e instanceof Error ? e.message : String(e); + costSummary.value = null; + throw e; + } finally { + loading.value = false; + } + } + + async function fetchCostHistory( + params: AdminCostHistoryParams = {} + ): Promise { + loading.value = true; + error.value = null; + try { + const url = buildUrl("/web/cost/history", { + period: params.period, + window: params.window, + }); + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30_000); + try { + const res = await fetch(url, { headers: authHeaders(), signal: controller.signal }); + if (!res.ok) { + throw new Error(`HTTP ${res.status}`); + } + const data = (await res.json()) as AdminCostHistoryView; + costHistory.value = data; + return data; + } finally { + clearTimeout(timeoutId); + } + } catch (e: unknown) { + error.value = e instanceof Error ? e.message : String(e); + costHistory.value = null; + throw e; + } finally { + loading.value = false; + } + } + + async function resetCost(scope: "session" | "day" | "month"): Promise { + loading.value = true; + error.value = null; + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30_000); + try { + const url = buildUrl("/web/admin/cost/reset"); + const res = await fetch(url, { + method: "POST", + headers: authHeaders(), + body: JSON.stringify({ scope }), + signal: controller.signal, + }); + if (!res.ok) { + throw new Error(`HTTP ${res.status}`); + } + return (await res.json()) as AdminCostResetResultView; + } catch (e: unknown) { + error.value = e instanceof Error ? e.message : String(e); + throw e; + } finally { + clearTimeout(timeoutId); + loading.value = false; + } + } + + async function grantCostOverride(): Promise { + loading.value = true; + error.value = null; + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30_000); + try { + const url = buildUrl("/web/admin/cost/override"); + const res = await fetch(url, { + method: "POST", + headers: authHeaders(), + body: JSON.stringify({ scope: "next_request" }), + signal: controller.signal, + }); + if (!res.ok) { + throw new Error(`HTTP ${res.status}`); + } + return (await res.json()) as AdminCostOverrideRecordView; + } catch (e: unknown) { + error.value = e instanceof Error ? e.message : String(e); + throw e; + } finally { + clearTimeout(timeoutId); + loading.value = false; + } + } + async function deleteMemoryEntry(key: string): Promise { loading.value = true; error.value = null; @@ -233,6 +385,9 @@ export function useAdmin( sessionDetail, memoryEntries, memoryStats, + costConfig, + costSummary, + costHistory, loading, error, totalSessions, @@ -241,6 +396,11 @@ export function useAdmin( fetchSessionDetail, fetchMemoryEntries, fetchMemoryStats, + fetchCostConfig, + fetchCostSummary, + fetchCostHistory, + resetCost, + grantCostOverride, deleteMemoryEntry, isSessionApiAvailable, }; diff --git a/clients/web/apps/dashboard/src/composables/useCostGovernance.spec.ts b/clients/web/apps/dashboard/src/composables/useCostGovernance.spec.ts new file mode 100644 index 00000000..bbe3d68c --- /dev/null +++ b/clients/web/apps/dashboard/src/composables/useCostGovernance.spec.ts @@ -0,0 +1,392 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { effectScope, ref } from "vue"; + +import { useCostGovernance } from "@/composables/useCostGovernance"; + +const fetchMock = vi.fn<(input: RequestInfo | URL, init?: RequestInit) => Promise>(); + +function jsonResponse(body: unknown, status = 200) { + return new Response(JSON.stringify(body), { + status, + headers: { "Content-Type": "application/json" }, + }); +} + +function createComposable(gateway = ref("http://localhost:3000"), token = ref("test-token")) { + const scope = effectScope(); + const governance = scope.run(() => + useCostGovernance( + () => gateway.value, + () => token.value, + (key) => key + ) + ); + + if (!governance) { + throw new Error("Failed to create cost governance composable"); + } + + return { + governance, + gateway, + token, + stop: () => scope.stop(), + }; +} + +describe("useCostGovernance", () => { + beforeEach(() => { + fetchMock.mockReset(); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + }); + + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it("loads config and falls back cleanly when usage endpoints are unavailable", async () => { + fetchMock + .mockResolvedValueOnce( + jsonResponse({ + config: { + cost: { + enabled: true, + session_limit_usd: 15, + daily_limit_usd: 50, + monthly_limit_usd: 1000, + warn_at_percent: 80, + allow_override: true, + }, + }, + }) + ) + .mockResolvedValueOnce(new Response(null, { status: 404 })) + .mockResolvedValueOnce(new Response(null, { status: 404 })); + + const { governance, stop } = createComposable(); + + await governance.reload(); + + expect(governance.config.value?.daily_limit_usd).toBe(50); + expect(governance.summary.value).toBeNull(); + expect(governance.history.value).toBeNull(); + expect(governance.usageUnavailable.value).toBe(true); + + stop(); + }); + + it("loads live summary and history data", async () => { + fetchMock + .mockResolvedValueOnce( + jsonResponse({ + config: { + cost: { + enabled: true, + session_limit_usd: 15, + daily_limit_usd: 50, + monthly_limit_usd: 1000, + warn_at_percent: 80, + allow_override: true, + }, + }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + summary: { + session_cost_usd: 12.4, + daily_cost_usd: 41, + monthly_cost_usd: 320, + total_tokens: 120044, + request_count: 63, + percent_used_session: 82.7, + percent_used_daily: 82, + percent_used_monthly: 32, + budget_state: "warning", + period: "day", + }, + config: { + enabled: true, + session_limit_usd: 15, + daily_limit_usd: 50, + monthly_limit_usd: 1000, + warn_at_percent: 80, + allow_override: true, + }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + period: "day", + points: [{ bucket: "2026-04-05", cost_usd: 11.7, tokens: 22000, requests: 10 }], + totals: { cost_usd: 11.7, tokens: 22000, requests: 10 }, + }) + ); + + const { governance, stop } = createComposable(); + + await governance.reload(); + + expect(governance.summary.value?.budget_state).toBe("warning"); + expect(governance.summary.value?.percent_used_session).toBe(82.7); + expect(governance.config.value?.session_limit_usd).toBe(15); + expect(governance.history.value?.points).toHaveLength(1); + expect(governance.hasOperationalData.value).toBe(true); + + stop(); + }); + + it("calls override and reset endpoints through admin APIs", async () => { + fetchMock + .mockResolvedValueOnce( + jsonResponse({ + id: "override-1", + actor: "gateway-admin", + scope: "next_request", + requested_at: "2026-04-06T01:00:00Z", + remaining_uses: 1, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + config: { + cost: { + enabled: true, + daily_limit_usd: 50, + monthly_limit_usd: 1000, + warn_at_percent: 80, + allow_override: true, + }, + }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + summary: { + session_cost_usd: 0, + daily_cost_usd: 28.6, + monthly_cost_usd: 307.6, + total_tokens: 98044, + request_count: 57, + percent_used_session: 0, + percent_used_daily: 57.2, + percent_used_monthly: 30.8, + budget_state: "allowed", + period: null, + }, + config: { + enabled: true, + session_limit_usd: 15, + daily_limit_usd: 50, + monthly_limit_usd: 1000, + warn_at_percent: 80, + allow_override: true, + }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + period: "day", + points: [], + totals: { cost_usd: 0, tokens: 0, requests: 0 }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + scope: "session", + removed_cost_usd: 12.4, + removed_requests: 6, + effective_at: "2026-04-06T01:01:00Z", + audit_event: { + id: "audit-1", + kind: "reset_applied", + recorded_at: "2026-04-06T01:01:00Z", + }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + config: { + cost: { + enabled: true, + session_limit_usd: 15, + daily_limit_usd: 50, + monthly_limit_usd: 1000, + warn_at_percent: 80, + allow_override: true, + }, + }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + summary: { + session_cost_usd: 0, + daily_cost_usd: 28.6, + monthly_cost_usd: 307.6, + total_tokens: 98044, + request_count: 57, + percent_used_session: 0, + percent_used_daily: 57.2, + percent_used_monthly: 30.8, + budget_state: "allowed", + period: null, + }, + config: { + enabled: true, + session_limit_usd: 15, + daily_limit_usd: 50, + monthly_limit_usd: 1000, + warn_at_percent: 80, + allow_override: true, + }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + period: "day", + points: [], + totals: { cost_usd: 0, tokens: 0, requests: 0 }, + }) + ); + + const { governance, stop } = createComposable(); + + await governance.grantOverride(); + await governance.resetSession(); + + const overrideCall = fetchMock.mock.calls.find(([url]) => + String(url).includes("/web/admin/cost/override") + ); + const resetCall = fetchMock.mock.calls.find(([url]) => + String(url).includes("/web/admin/cost/reset") + ); + + expect(overrideCall?.[1]).toMatchObject({ + method: "POST", + headers: expect.objectContaining({ Authorization: "Bearer test-token" }), + body: JSON.stringify({ scope: "next_request" }), + }); + expect(resetCall?.[1]).toMatchObject({ + method: "POST", + headers: expect.objectContaining({ Authorization: "Bearer test-token" }), + body: JSON.stringify({ scope: "session" }), + }); + + stop(); + }); + + it("surfaces transport/auth errors instead of generic unavailable fallback", async () => { + fetchMock.mockResolvedValueOnce(new Response(null, { status: 401 })); + + const { governance, stop } = createComposable(); + await governance.reload(); + + expect(governance.error.value).toBe("HTTP 401"); + expect(governance.config.value).toBeNull(); + + fetchMock.mockReset(); + fetchMock.mockResolvedValueOnce(new Response(null, { status: 500 })); + + await governance.reload(); + + expect(governance.error.value).toBe("HTTP 500"); + stop(); + }); + + it("keeps only the latest overlapping reload results", async () => { + let resolveFirstConfig: ((value: Response) => void) | undefined; + let resolveSecondConfig: ((value: Response) => void) | undefined; + + fetchMock.mockImplementationOnce( + () => + new Promise((resolve) => { + resolveFirstConfig = resolve; + }) + ); + + const { governance, gateway, token, stop } = createComposable(); + const firstReload = governance.reload(); + + gateway.value = "http://localhost:4000"; + token.value = "new-token"; + + fetchMock + .mockImplementationOnce( + () => + new Promise((resolve) => { + resolveSecondConfig = resolve; + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + summary: { + session_cost_usd: 9, + daily_cost_usd: 9, + monthly_cost_usd: 9, + total_tokens: 900, + request_count: 9, + percent_used_session: 90, + percent_used_daily: 90, + percent_used_monthly: 9, + budget_state: "warning", + period: "session", + }, + config: { + enabled: true, + session_limit_usd: 10, + daily_limit_usd: 10, + monthly_limit_usd: 100, + warn_at_percent: 80, + allow_override: true, + }, + }) + ) + .mockResolvedValueOnce( + jsonResponse({ + period: "day", + points: [{ bucket: "2026-04-06", cost_usd: 9, tokens: 900, requests: 9 }], + totals: { cost_usd: 9, tokens: 900, requests: 9 }, + }) + ); + + const secondReload = governance.reload(); + + resolveSecondConfig?.( + jsonResponse({ + config: { + cost: { + enabled: true, + session_limit_usd: 10, + daily_limit_usd: 10, + monthly_limit_usd: 100, + warn_at_percent: 80, + allow_override: true, + }, + }, + }) + ); + await secondReload; + + resolveFirstConfig?.( + jsonResponse({ + config: { + cost: { + enabled: true, + session_limit_usd: 1, + daily_limit_usd: 1, + monthly_limit_usd: 1, + warn_at_percent: 50, + allow_override: false, + }, + }, + }) + ); + await firstReload; + + expect(governance.config.value?.session_limit_usd).toBe(10); + expect(governance.summary.value?.session_cost_usd).toBe(9); + expect(governance.history.value?.totals.requests).toBe(9); + stop(); + }); +}); diff --git a/clients/web/apps/dashboard/src/composables/useCostGovernance.ts b/clients/web/apps/dashboard/src/composables/useCostGovernance.ts new file mode 100644 index 00000000..cd79e113 --- /dev/null +++ b/clients/web/apps/dashboard/src/composables/useCostGovernance.ts @@ -0,0 +1,209 @@ +import { trimTrailingSlashes, validateGatewayUrl } from "@corvus/shared"; +import { computed, ref } from "vue"; +import { useAdmin } from "@/composables/useAdmin"; +import type { + AdminBudgetState, + AdminCostHistoryView, + AdminCostOverrideRecordView, + AdminCostResetResultView, + AdminCostSummaryView, + AdminCostView, + AdminDeprecatedFieldView, +} from "@/types/admin-config"; + +type Translator = (key: string) => string; + +export function useCostGovernance( + gatewayUrl: () => string, + bearerToken: () => string, + t: Translator +) { + const config = ref(null); + const summary = ref(null); + const history = ref(null); + const loading = ref(true); + const error = ref(null); + const usageUnavailable = ref(false); + const usageError = ref(null); + const actionMessage = ref(null); + const actionError = ref(null); + const actionPending = ref(false); + const deprecations = ref([]); + let reloadRequestId = 0; + + const admin = useAdmin(buildGatewayUrl, authHeaders); + + function buildGatewayUrl(path: string): string { + const base = validateGatewayUrl(gatewayUrl()); + if (!base) { + throw new Error(t("errors.invalidGatewayUrl")); + } + + const baseStr = trimTrailingSlashes(base.toString()); + return new URL(path.replace(/^\//, ""), `${baseStr}/`).toString(); + } + + function authHeaders(): Record { + const headers: Record = { + "Content-Type": "application/json", + }; + + const token = bearerToken().trim(); + if (token) { + headers.Authorization = `Bearer ${token}`; + } + + return headers; + } + + function updateDeprecations(): void { + // Pending backend support: populate deprecations from admin config payloads + // once fetchCostConfig() exposes cost-specific deprecation metadata. + deprecations.value = []; + } + + async function reload(): Promise { + const requestId = ++reloadRequestId; + const capturedGatewayUrl = gatewayUrl(); + const capturedBearerToken = bearerToken(); + const capturedAdmin = useAdmin( + (path: string) => { + const base = validateGatewayUrl(capturedGatewayUrl); + if (!base) { + throw new Error(t("errors.invalidGatewayUrl")); + } + + const baseStr = trimTrailingSlashes(base.toString()); + return new URL(path.replace(/^\//, ""), `${baseStr}/`).toString(); + }, + () => { + const headers: Record = { + "Content-Type": "application/json", + }; + const token = capturedBearerToken.trim(); + if (token) { + headers.Authorization = `Bearer ${token}`; + } + return headers; + } + ); + + loading.value = true; + error.value = null; + actionMessage.value = null; + actionError.value = null; + usageError.value = null; + usageUnavailable.value = false; + + try { + const nextConfig = await capturedAdmin.fetchCostConfig(); + if (requestId !== reloadRequestId) { + return; + } + if (!nextConfig) { + config.value = null; + error.value = t("errors.costNotAvailable"); + return; + } + + config.value = nextConfig; + updateDeprecations(); + + const [summaryResult, historyResult] = await Promise.allSettled([ + capturedAdmin.fetchCostSummary(), + capturedAdmin.fetchCostHistory({ period: "day", window: 7 }), + ]); + + if (requestId !== reloadRequestId) { + return; + } + + if (summaryResult.status === "fulfilled") { + summary.value = summaryResult.value.summary; + config.value = summaryResult.value.config; + } else { + summary.value = null; + } + + if (historyResult.status === "fulfilled") { + history.value = historyResult.value; + } else { + history.value = null; + } + + if (summaryResult.status === "rejected" || historyResult.status === "rejected") { + usageUnavailable.value = true; + usageError.value = t("cost.usageUnavailable"); + } + } catch (e: unknown) { + if (requestId !== reloadRequestId) { + return; + } + config.value = null; + summary.value = null; + history.value = null; + error.value = e instanceof Error ? e.message : String(e); + } finally { + if (requestId === reloadRequestId) { + loading.value = false; + } + } + } + + async function grantOverride(): Promise { + actionPending.value = true; + actionMessage.value = null; + actionError.value = null; + + try { + const result: AdminCostOverrideRecordView = await admin.grantCostOverride(); + await reload(); + actionMessage.value = `${t("cost.overrideGranted")}: ${result.scope}`; + } catch (e: unknown) { + actionError.value = e instanceof Error ? e.message : String(e); + } finally { + actionPending.value = false; + } + } + + async function resetSession(): Promise { + actionPending.value = true; + actionMessage.value = null; + actionError.value = null; + + try { + const result: AdminCostResetResultView = await admin.resetCost("session"); + await reload(); + actionMessage.value = `${t("cost.sessionReset")}: ${result.removed_requests}`; + } catch (e: unknown) { + actionError.value = e instanceof Error ? e.message : String(e); + } finally { + actionPending.value = false; + } + } + + const hasOperationalData = computed(() => summary.value !== null || history.value !== null); + + const activeBudgetState = computed( + () => summary.value?.budget_state ?? "allowed" + ); + + return { + config, + summary, + history, + loading, + error, + usageUnavailable, + usageError, + actionMessage, + actionError, + actionPending, + deprecations, + hasOperationalData, + activeBudgetState, + reload, + grantOverride, + resetSession, + }; +} diff --git a/clients/web/apps/dashboard/src/types/admin-config.ts b/clients/web/apps/dashboard/src/types/admin-config.ts index c00d514a..3c194ed2 100644 --- a/clients/web/apps/dashboard/src/types/admin-config.ts +++ b/clients/web/apps/dashboard/src/types/admin-config.ts @@ -162,12 +162,96 @@ export interface AdminChannelStatusView { export interface AdminCostView { enabled: boolean; + session_limit_usd: number; daily_limit_usd: number; monthly_limit_usd: number; warn_at_percent: number; allow_override: boolean; } +export type AdminUsagePeriod = "session" | "day" | "month"; +export type AdminSummaryPeriod = AdminUsagePeriod | "mission"; +export type AdminBudgetState = "allowed" | "warning" | "exceeded"; +export type AdminCostOverrideScope = "next_request"; +export type AdminCostResetScope = "session" | "day" | "month"; + +export interface AdminDeprecatedFieldView { + field: string; + replacement?: string | null; + message: string; +} + +export interface AdminCostDeprecationsView { + items: AdminDeprecatedFieldView[]; +} + +export interface AdminCostSummaryView { + session_cost_usd: number; + daily_cost_usd: number; + monthly_cost_usd: number; + total_tokens: number; + request_count: number; + percent_used_session: number; + percent_used_daily: number; + percent_used_monthly: number; + budget_state: AdminBudgetState; + period?: AdminSummaryPeriod | null; +} + +export interface AdminCostSummaryResponse { + summary: AdminCostSummaryView; + config: AdminCostView; +} + +export interface AdminCostHistoryPointView { + bucket: string; + cost_usd: number; + tokens: number; + requests: number; +} + +export interface AdminCostHistoryTotalsView { + cost_usd: number; + tokens: number; + requests: number; +} + +export interface AdminCostHistoryView { + period: AdminUsagePeriod; + points: AdminCostHistoryPointView[]; + totals: AdminCostHistoryTotalsView; +} + +export interface AdminCostOverrideRecordView { + id: string; + actor: string; + scope: AdminCostOverrideScope; + reason?: string | null; + requested_at: string; + expires_at?: string | null; + session_id?: string | null; + remaining_uses: number; +} + +export interface AdminCostResetAuditEventView { + id: string; + kind: string; + recorded_at: string; +} + +export interface AdminCostResetResultView { + scope: AdminCostResetScope; + removed_cost_usd: number; + removed_requests: number; + effective_at: string; + audit_event: AdminCostResetAuditEventView; +} + +export interface AdminCostHistoryParams { + period?: AdminUsagePeriod; + window?: number; +} + export interface AdminMcpView { enabled: boolean; servers: AdminMcpServerView[]; @@ -287,6 +371,7 @@ export interface AdminConfigView { tunnel?: AdminTunnelView; reliability?: AdminReliabilityView; heartbeat?: AdminHeartbeatView; + deprecations?: AdminCostDeprecationsView; } export interface AdminConfigResponse { diff --git a/clients/web/packages/locales/src/en.json b/clients/web/packages/locales/src/en.json index d7aeb083..dc6f64f9 100644 --- a/clients/web/packages/locales/src/en.json +++ b/clients/web/packages/locales/src/en.json @@ -323,10 +323,31 @@ "cost": { "loading": "Loading cost tracking...", "enabled": "Enabled", + "sessionLimit": "Session limit (USD)", "dailyLimit": "Daily limit (USD)", "monthlyLimit": "Monthly limit (USD)", "warnAtPercent": "Warn at percent", "allowOverride": "Allow override", + "liveStatus": "Live status", + "statusAllowed": "Within budget", + "statusWarning": "Approaching limit", + "statusExceeded": "Budget exceeded", + "periodStable": "stable", + "sessionSpend": "Session spend", + "dailySpend": "Daily spend", + "monthlySpend": "Monthly spend", + "requests": "Requests", + "tokens": "Tokens", + "sessionBudgetUsage": "Session budget usage", + "dailyBudgetUsage": "Daily budget usage", + "monthlyBudgetUsage": "Monthly budget usage", + "usageUnavailable": "Live usage is unavailable. Showing saved policy only.", + "history": "Recent history", + "actions": "Operator actions", + "grantOverride": "Grant next request override", + "resetSession": "Reset session totals", + "overrideGranted": "Override granted", + "sessionReset": "Session totals reset", "yes": "Yes", "no": "No" }, diff --git a/clients/web/packages/locales/src/es.json b/clients/web/packages/locales/src/es.json index 9f497544..a03eb908 100644 --- a/clients/web/packages/locales/src/es.json +++ b/clients/web/packages/locales/src/es.json @@ -323,10 +323,31 @@ "cost": { "loading": "Cargando seguimiento de costos...", "enabled": "Habilitado", + "sessionLimit": "Límite de sesión (USD)", "dailyLimit": "Límite diario (USD)", "monthlyLimit": "Límite mensual (USD)", "warnAtPercent": "Alertar en porcentaje", "allowOverride": "Permitir anulación", + "liveStatus": "Estado en vivo", + "statusAllowed": "Dentro del presupuesto", + "statusWarning": "Cerca del límite", + "statusExceeded": "Presupuesto excedido", + "periodStable": "estable", + "sessionSpend": "Gasto de la sesión", + "dailySpend": "Gasto diario", + "monthlySpend": "Gasto mensual", + "requests": "Solicitudes", + "tokens": "Tokens", + "sessionBudgetUsage": "Uso del presupuesto de la sesión", + "dailyBudgetUsage": "Uso del presupuesto diario", + "monthlyBudgetUsage": "Uso del presupuesto mensual", + "usageUnavailable": "El uso en vivo no está disponible. Mostrando solo la política guardada.", + "history": "Historial reciente", + "actions": "Acciones del operador", + "grantOverride": "Conceder anulación para la próxima solicitud", + "resetSession": "Reiniciar totales de sesión", + "overrideGranted": "Anulación concedida", + "sessionReset": "Totales de sesión reiniciados", "yes": "Sí", "no": "No" }, diff --git a/openspec/changes/2026-04-06-cost-governance-productization/state.yaml b/openspec/changes/2026-04-06-cost-governance-productization/state.yaml deleted file mode 100644 index 31570beb..00000000 --- a/openspec/changes/2026-04-06-cost-governance-productization/state.yaml +++ /dev/null @@ -1,9 +0,0 @@ -change: 2026-04-06-cost-governance-productization -current_phase: apply -completed: [explore, propose, apply] -next: verify -updated: 2026-04-06 -notes: | - DALLAY-255 (Wire CostTracker to agent loop) implemented and committed. - Remaining issues B-F blocked by Linear free workspace limit. - Planning deliverable for DALLAY-164 is complete. diff --git a/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/design.md b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/design.md new file mode 100644 index 00000000..ff9691cb --- /dev/null +++ b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/design.md @@ -0,0 +1,490 @@ +# Design: Cost Governance Productization + +## Technical Approach + +This change productizes the cost wiring that already exists in the runtime baseline by making the +runtime the single enforcement point for token-spend governance, then projecting that runtime state +outward through CLI, gateway admin/API, dashboard, reporting, and observability surfaces. +`CostService` is already instantiated at the runtime boundary in `clients/agent-runtime/src/bootstrap/mod.rs` +and enforced in `clients/agent-runtime/src/agent/agent.rs`, delegating to an internal `CostTracker`; +this design treats that as completed baseline +(Issue A) and focuses on the remaining architecture needed to turn runtime-local accounting into a +coherent platform feature. + +The core strategy is to separate two governance concerns that are currently conflated: + +1. **Token-spend governance** lives in the `cost` subsystem and is enforced by `CostService` + over an internal `CostTracker`. +2. **Action-rate governance** lives in `SecurityPolicy` and remains a tool-execution guardrail. + +All user-facing surfaces MUST consume this split model instead of re-implementing budget logic. +Gateway and dashboard remain presentation/control layers over runtime-owned state. Mission-level +accounting becomes an adapter over runtime cost records rather than a competing budget system. + +## Architecture Decisions + +### Decision: Split token-spend governance from action-rate governance + +**Choice**: `CostService` is the runtime contract for token-spend governance, backed by +`CostTracker` as the internal source of truth for token spend, budget thresholds, overrides, and +budget history. `SecurityPolicy` continues to own tool/action-rate limits only, with +`max_cost_per_day_cents` renamed to `max_actions_per_hour`-family semantics. + +**Alternatives considered**: Keep both concerns inside `SecurityPolicy`; move all governance into a +new combined "governance" module. + +**Rationale**: The runtime already has working spend accounting in `clients/agent-runtime/src/cost/`. +Reusing that path avoids duplicating persistence and budget math. Keeping action-rate limits inside +`SecurityPolicy` preserves current tool-safety boundaries and prevents mixing unrelated concepts. + +### Decision: Runtime evaluates budgets once; surfaces consume results + +**Choice**: Budget evaluation MUST happen in runtime-owned code before LLM execution, with CLI, +gateway, dashboard, and reporting consuming persisted summaries, audit events, and admin mutation +endpoints. + +**Alternatives considered**: Let each surface enforce its own limits; move checks into the gateway +only. + +**Rationale**: The same agent loop can run from CLI, webhook, or future surfaces. A single +runtime-owned evaluator guarantees consistent behavior and avoids divergence in warning/block rules. + +### Decision: Warning, block, and override form an audited state machine + +**Choice**: Budget outcomes are modeled as `allowed -> warning -> exceeded`, with explicit override +transitions that are always operator-initiated and always audited. + +**Alternatives considered**: Silent automatic override when `allow_override=true`; warning-only +behavior with no hard stop. + +**Rationale**: Silent override weakens governance and breaks trust. The platform needs explainable, +reviewable transitions that can be shown in admin and reporting surfaces. + +### Decision: Admin config and operational cost APIs stay separate + +**Choice**: Configuration remains under `/web/admin/config`, while live spend, history, resets, and +audit/reporting use dedicated `/web/cost/*` and admin-scoped override/reset endpoints. + +**Alternatives considered**: Put all cost reads/writes into `/web/admin/config`; create an entirely +separate dashboard backend. + +**Rationale**: The current dashboard already reads `/web/admin/config` for configuration snapshots. +Operational spend data has different freshness, pagination, and authorization needs, so it should be +separated from static config reads. + +### Decision: Mission governance adapts to runtime cost state instead of duplicating it + +**Choice**: `MissionCoordinator.accumulated_cost_cents` becomes a derived mission view backed by +runtime cost records/session summaries, not an independent counter of truth. + +**Alternatives considered**: Keep mission-local counters forever; fully remove mission cost fields. + +**Rationale**: Mission flows still need mission-specific termination decisions, but duplicated cost +storage would reintroduce drift. A derived mission view preserves mission UX while aligning on one +accounting system. + +### Decision: Deprecate the misleading config key with a compatibility alias + +**Choice**: Keep reading `autonomy.max_cost_per_day_cents` through Release N+2 as a deprecated +alias, emit warnings everywhere it is loaded or displayed, and write back only the renamed field. + +**Alternatives considered**: Hard break the old key immediately; keep both names indefinitely. + +**Rationale**: Immediate breakage is risky for existing operators. Permanent dual naming would keep +confusion alive. A bounded alias window is the safest migration path. + +## Data Flow + +### System overview + +```text + token usage + pricing +Provider ─────────────────────────────────────┐ + ▼ +CLI / webhook / gateway / agent loop ──> Agent runtime + │ + ├─ pre-flight budget check + ├─ post-call usage record + ├─ warning / exceeded / override audit + └─ persisted cost history (state/costs.jsonl) + │ + ┌──────────────────────┼──────────────────────┐ + ▼ ▼ ▼ + CLI summaries Gateway cost API Observability events + │ │ │ + └──────────────> Dashboard / reports <────────┘ +``` + +### Sequence: runtime budget evaluation + +```mermaid +sequenceDiagram + participant Surface as CLI/Webhook/Gateway-triggered turn + participant Agent as agent::Agent + participant Cost as cost::CostService + participant Provider as LLM Provider + participant Obs as Observer + + Surface->>Agent: request turn + Agent->>Cost: evaluate_request(estimated_cost) + Cost-->>Agent: Allowed | Warning | Exceeded + alt Allowed + Agent->>Provider: invoke model + Provider-->>Agent: response + token usage/estimations + Agent->>Cost: record_usage(token_usage) + Agent->>Obs: AgentEnd + cost_usd + else Warning + Agent->>Obs: BudgetWarning + Agent->>Provider: invoke model + Provider-->>Agent: response + Agent->>Cost: record_usage(token_usage) + else Exceeded + Agent->>Obs: BudgetExceeded + Agent-->>Surface: structured budget block + end +``` + +### Sequence: override and admin mutation flow + +```mermaid +sequenceDiagram + participant Operator as CLI operator / Admin UI + participant API as Gateway admin endpoint + participant Runtime as Runtime config/control plane + participant Cost as CostService + participant Obs as Observer + + Operator->>API: override or limit update request + API->>Runtime: validate auth + role + config guardrails + Runtime->>Obs: BudgetOverrideRequested / BudgetLimitChanged + alt temporary override granted + Runtime->>Cost: allow one execution window or scoped override token + Cost-->>API: override scope registered + API-->>Operator: override accepted with expiry/scope + else config change + Runtime-->>API: persisted config snapshot + API-->>Operator: updated limits returned + end +``` + +### Budget evaluation flow across runtime surfaces + +1. **Agent loop baseline** (`clients/agent-runtime/src/agent/agent.rs`) already performs pre-flight + `CostService::evaluate_request()` and post-call recording through the runtime cost contract. +2. **CLI** uses the same runtime-owned tracker. The CLI surface only adds operator affordances: + session summary on exit, `corvus cost` reads/history/reset, and explicit `--override-budget`. +3. **Gateway-triggered requests** reuse the same `Agent` path, so there is no separate gateway + budget engine. Gateway adds API exposure and admin authorization only. +4. **Mission flows** query/derive spend from runtime tracker data so mission guardrails and global + budgets stay aligned. +5. **Dashboard/reporting** are read models over runtime state and observer/audit events; they never + make independent budget decisions. + +## File Changes + +| File | Action | Description | +|------|--------|-------------| +| `openspec/changes/2026-04-06-cost-governance-productization/design.md` | Create | Design artifact for the remaining productization work | +| `clients/agent-runtime/src/cost/tracker.rs` | Modify | Add APIs for history, reset, override scoping, and richer summaries/audit context | +| `clients/agent-runtime/src/cost/types.rs` | Modify | Add transport-friendly summary/history/audit DTOs and explicit warning/exceeded payloads | +| `clients/agent-runtime/src/cost/service.rs` | Create | Runtime-facing orchestration layer over tracker reads, resets, override evaluation, and reporting projections | +| `clients/agent-runtime/src/cost/mod.rs` | Modify | Re-export service/types used by gateway, CLI, and agent surfaces | +| `clients/agent-runtime/src/agent/agent.rs` | Modify | Replace ad hoc warning logging with first-class budget events and override-aware flow | +| `clients/agent-runtime/src/agent/mission.rs` | Modify | Derive mission cost accounting from runtime cost data instead of separate counters as source of truth | +| `clients/agent-runtime/src/config/schema.rs` | Modify | Add deprecated alias handling and clear field documentation for action-rate vs spend budgets | +| `clients/agent-runtime/src/config/mod.rs` | Modify | Normalize deprecated autonomy key during load and emit deprecation warnings | +| `clients/agent-runtime/src/security/policy.rs` | Modify | Remove misleading spend naming from `SecurityPolicy` and keep only action-rate governance | +| `clients/agent-runtime/src/main.rs` | Modify | Update CLI config/status output and add `corvus cost` / `--override-budget` surface wiring | +| `clients/agent-runtime/src/gateway/admin.rs` | Modify | Expose cost config separately from autonomy naming and support cost-specific admin updates | +| `clients/agent-runtime/src/gateway/cost.rs` | Create | Cost summary/history/reset/override/report endpoints following existing gateway module layout | +| `clients/agent-runtime/src/gateway/mod.rs` | Modify | Route new cost endpoints and include restart/validation semantics for cost config patches | +| `clients/agent-runtime/src/observability/traits.rs` | Modify | Add budget warning/exceeded/override event variants and audit payload shape | +| `clients/agent-runtime/src/observability/log.rs` | Modify | Log cost governance lifecycle without leaking sensitive payloads | +| `clients/agent-runtime/src/observability/otel.rs` | Modify | Export budget outcome and override attributes/events for tracing | +| `clients/agent-runtime/src/observability/prometheus.rs` | Modify | Add counters/gauges for warnings, blocks, overrides, and current spend snapshots where appropriate | +| `clients/web/apps/dashboard/src/types/admin-config.ts` | Modify | Extend cost/admin view models for live usage and deprecation metadata | +| `clients/web/apps/dashboard/src/composables/useAdmin.ts` | Modify | Fetch cost summary/history/reporting data alongside existing admin resources | +| `clients/web/apps/dashboard/src/composables/useCostGovernance.ts` | Create | Focused dashboard data loader for spend, history, alerts, and operator actions | +| `clients/web/apps/dashboard/src/components/config/CostOverview.vue` | Modify | Move from config-only card to config + live usage + alerts + action affordances | +| `clients/web/apps/dashboard/src/components/config/CostOverview.spec.ts` | Modify | Cover config-only fallback, live usage rendering, and alert states | + +## Interfaces / Contracts + +### Runtime ownership model + +```ts +interface CostGovernanceState { + config: { + enabled: boolean; + session_limit_usd: number; + daily_limit_usd: number; + monthly_limit_usd: number; + warn_at_percent: number; + allow_override: boolean; + }; + summary: { + session_cost_usd: number; + daily_cost_usd: number; + monthly_cost_usd: number; + total_tokens: number; + request_count: number; + budget_state: "allowed" | "warning" | "exceeded"; + period?: "session" | "day" | "month" | "mission"; + percent_used_session: number; + percent_used_daily: number; + percent_used_monthly: number; + }; + warnings: Array<{ + budget_state: "warning" | "exceeded"; + period: "session" | "day" | "month" | "mission"; + current_usd: number; + projected_usd: number; + limit_usd: number; + percent_used: number; + surface?: string; + observed_at: string; + }>; +} +``` + +### Operational cost API shape + +```json +GET /api/web/cost/summary +{ + "summary": { + "session_cost_usd": 1.42, + "daily_cost_usd": 7.11, + "monthly_cost_usd": 31.48, + "total_tokens": 128044, + "request_count": 63, + "percent_used_session": 47.3, + "percent_used_daily": 71.1, + "percent_used_monthly": 31.48, + "budget_state": "warning", + "period": "day" + }, + "config": { + "enabled": true, + "session_limit_usd": 3.0, + "daily_limit_usd": 10.0, + "monthly_limit_usd": 100.0, + "warn_at_percent": 80, + "allow_override": true + } +} +``` + +```json +GET /api/web/cost/summary +{ + "summary": { + "session_cost_usd": 0.92, + "daily_cost_usd": 7.11, + "monthly_cost_usd": 31.48, + "total_tokens": 128044, + "request_count": 63, + "percent_used_session": 92.0, + "percent_used_daily": 71.1, + "percent_used_monthly": 31.48, + "budget_state": "warning", + "period": "mission" + } +} +``` + +```json +GET /api/web/cost/history?period=day&window=30 +{ + "period": "day", + "points": [ + { "bucket": "2026-04-01", "cost_usd": 1.2, "tokens": 18000, "requests": 8 }, + { "bucket": "2026-04-02", "cost_usd": 2.1, "tokens": 26000, "requests": 11 } + ], + "totals": { + "cost_usd": 31.48, + "tokens": 128044, + "requests": 63 + } +} +``` + +```json +POST /api/web/admin/cost/reset +{ + "scope": "session", + "reason": "operator reset after test run" +} +``` + +```json +POST /api/web/admin/cost/override +{ + "scope": "next_request", + "reason": "incident mitigation" +} +``` + +The acting principal is derived from the authenticated admin session on the server side. Requests +that include a client-provided `actor` field are rejected. + +```json +PATCH /api/web/admin/config +{ + "cost": { + "enabled": true, + "daily_limit_usd": 20.0, + "monthly_limit_usd": 250.0, + "warn_at_percent": 75, + "allow_override": true + }, + "autonomy": { + "max_actions_per_hour": 20 + } +} +``` + +### Audit / observability event shape + +```rust +BudgetWarning { + budget_state: BudgetState, + period: UsagePeriod, + current_usd: f64, + projected_usd: f64, + limit_usd: f64, + percent_used: f64, + session_id: String, + surface: Option, +} + +BudgetExceeded { + budget_state: BudgetState, + period: UsagePeriod, + current_usd: f64, + projected_usd: f64, + limit_usd: f64, + percent_used: f64, + session_id: String, + surface: Option, +} + +BudgetOverride { + action: BudgetOverrideAction, + actor: String, + scope: String, + reason: String, + session_id: Option, + previous_state: String, + period: Option, + override_id: Option, + surface: Option, +} +``` + +Observability outputs MUST redact sensitive governance fields before emission. In logs, metrics, +and traces: `actor` and `reason` are fully redacted, `session_id` is masked or omitted unless the +surface explicitly requires an internal correlation token, and any PII must be redacted before +recording. The runtime redaction implementation lives in the observability helpers so operators +should expect structured events without raw operator identity, free-form reasons, or directly +reusable session identifiers. + +### Dashboard/reporting surface shape + +The dashboard keeps the existing `CostOverview.vue` placement under config, but the component becomes +an operational panel with four conceptual zones: + +1. **Policy**: enabled flag, limits, warning threshold, override policy. +2. **Current usage**: session/day/month spend, percent used, request count, tokens. +3. **Alerts**: active warning/exceeded banner and recent override/reset activity. +4. **Reporting**: trend chart plus model/session breakdown backed by history endpoints. + +Reporting is intentionally API-first: the same history/report shapes should support dashboard charts, +CSV export later, and external admin/reporting tools without requiring a second backend. + +## Warning / Block / Override / Audit Flow + +1. **Warning**: when projected spend crosses `warn_at_percent`, the request proceeds, but the + runtime emits a budget warning event and exposes the state in summary/history responses. +2. **Block**: when projected spend exceeds the applicable hard limit, the runtime rejects the next + LLM request with structured budget metadata. In-flight requests are not interrupted. +3. **Override**: only available when `cost.allow_override=true` and initiated explicitly by an + operator through CLI flag or admin API. Overrides MUST be scoped (for example, next request or + temporary window), not global and silent. +4. **Audit**: every warning, block, override, config change, and reset emits observer events and is + queryable via reporting-oriented history/audit responses. +5. **Surface behavior**: CLI shows immediate warning/block text; gateway returns machine-readable + error bodies; dashboard surfaces current state and recent audit records. + +## Testing Strategy + +| Layer | What to Test | Approach | +|-------|-------------|----------| +| Unit | Budget threshold math, override scope expiry, deprecated config alias normalization, mission cost derivation | Extend `clients/agent-runtime/src/cost/*`, `config/*`, and `agent/mission.rs` tests with focused deterministic cases | +| Integration | Agent loop warning/block behavior, gateway cost endpoints, admin config patch semantics, reset/override audit emission | Rust integration-style tests around gateway handlers and `Agent` execution paths using temp workspace cost storage | +| E2E | Dashboard rendering of live usage/history/alerts, CLI operator flows, config migration behavior | Vitest component tests for dashboard plus targeted CLI/gateway end-to-end smoke paths in runtime tests | + +### Rollback & Feature-Flag Strategy + +- Budget-governance enforcement stays behind `cost.enabled` and the gateway dispatcher path, so the + first rollback step is to disable `cost.enabled` for affected workspaces or environments. +- If a rollout causes false budget blocks, gateway instability, or noisy operator actions, revert to + dispatcher-disabled or cost-disabled configuration before shipping code rollback. +- Roll out gradually by workspace/environment scope (or percentage of selected workspaces if an + operator automation layer exists), then watch budget warning/exceeded rates, gateway error rates, + and override frequency before widening exposure. +- Monitoring/alert thresholds should trigger rollback when budget-exceeded errors spike without a + matching spend increase, gateway cost endpoints return elevated 5xx responses, or admin config + mutations fail to propagate consistently. + +### Threat / Risk Notes + +- **Security:** override and reset paths are privileged actions; unexpected actor/reason exposure or + auth bypasses require an emergency patch. +- **Runtime:** reservation/accounting bugs can over-block or under-block requests; signals include + sudden drift between estimated and recorded spend or repeated blocked requests after resets. +- **Gateway:** dispatcher bypass or handler startup failure can silently disable governance; signals + include legacy path usage while `cost.enabled=true`, elevated 5xx on cost endpoints, or missing + structured budget-exceeded responses. + +## Migration / Rollout + +### Naming migration for `SecurityPolicy.max_cost_per_day_cents` + +1. **Release N** + - Runtime reads both `autonomy.max_cost_per_day_cents` and the new action-rate field mapping. + - If the deprecated key is present, config loading emits a warning explaining that the field is + action-rate governance, not token-spend governance. + - Admin/dashboard responses include only the new action-rate name for writes and primary display, + while optionally exposing `deprecated_fields` metadata for operator awareness. +2. **Release N+1** + - Deprecated key remains readable but warnings are escalated in CLI/admin UX and docs. +3. **Release N+2** + - Remove read support for `autonomy.max_cost_per_day_cents` once adoption is complete. + +### Delivery sequencing + +- **Issue A — Runtime wiring**: already complete baseline on main (PR #448). This design does not + re-plan that work. +- **Issue B — CLI product surface**: add `corvus cost`, exit summaries, explicit override flow. +- **Issue C — Gateway/API surface**: add operational cost endpoints, admin mutations, and machine- + readable block responses. +- **Issue D — Dashboard/reporting UX**: consume summary/history/audit endpoints and replace the + config-only cost card. +- **Issue E — Naming/model cleanup**: unify mission cost accounting with tracker state and execute + the `max_cost_per_day_cents` deprecation path. +- **Issue F — Observability/audit hardening**: add first-class observer events, OTel attributes, + Prometheus metrics, and reporting hooks. + +This sequencing keeps the current baseline stable while shipping user-visible value in thin vertical +slices from runtime outward. + +## Open Questions + +- [ ] Should override scope support only `next_request`, or also time-boxed/session-boxed windows? +- [ ] What operator identity source is canonical for gateway-admin audit records: paired token id, + user label, or both? +- [ ] How much history retention/rotation is required for `state/costs.jsonl` before reporting + becomes too expensive for long-lived runtimes? +- [ ] Should dashboard reporting include export/download in this change, or only API-ready report + shapes for a later iteration? diff --git a/openspec/changes/2026-04-06-cost-governance-productization/proposal.md b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/proposal.md similarity index 100% rename from openspec/changes/2026-04-06-cost-governance-productization/proposal.md rename to openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/proposal.md diff --git a/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/specs/cost-governance/spec.md b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/specs/cost-governance/spec.md new file mode 100644 index 00000000..028f83de --- /dev/null +++ b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/specs/cost-governance/spec.md @@ -0,0 +1,200 @@ +# Cost Governance Specification + +## Purpose + +This specification defines Corvus cost governance as a product capability rather than an isolated +runtime mechanism. It establishes the canonical budget model, enforcement semantics, override and +audit rules, required operator and admin surfaces, and the separation between token-spend +governance and action-rate governance. + +## Current Baseline + +- Issue A (`Wire CostTracker to agent loop`) is already merged and is part of the baseline for this + change. +- The runtime can instantiate cost tracking for the canonical agent loop and use that wiring behind + the existing `cost.enabled` gate. +- CLI, gateway/admin API, dashboard, session reporting, and observability product surfaces are NOT + yet complete and remain required work for this change. +- `SecurityPolicy.max_cost_per_day_cents` MUST be treated as legacy action-rate terminology, not as + the canonical token-spend budget model. + +## Requirements + +### Requirement: Budget Scope Model + +The system MUST govern token spend using four explicit budget scopes: `session`, `daily`, +`monthly`, and `mission`. + +- `session` MUST represent spend accumulated within one canonical session identity. +- `daily` MUST represent spend accumulated within the active UTC calendar day. +- `monthly` MUST represent spend accumulated within the active UTC calendar month. +- `mission` MUST represent spend accumulated within a single mission identity, independent from the + broader session total. +- The system MUST evaluate all configured token-spend scopes that apply to a request before allowing + a metered model call. +- A scope MAY be absent or disabled, but the system MUST NOT infer one scope from another. + +#### Scenario: Multiple configured scopes are evaluated together + +- GIVEN token-spend budgets are configured for `session`, `daily`, and `monthly` +- WHEN the runtime evaluates a metered model call for an active session +- THEN the system MUST check all three configured scopes before the call is admitted +- AND the most restrictive resulting decision MUST govern the call outcome. + +#### Scenario: Mission budget remains independent from session budget + +- GIVEN a session contains two separate missions +- AND the first mission has already consumed its configured mission budget +- WHEN a second mission starts within the same session +- THEN the system MUST evaluate the second mission against its own mission scope +- AND the system MUST continue to evaluate the enclosing session, daily, and monthly scopes + independently. + +### Requirement: Warning and Hard-Block Semantics + +The system MUST distinguish warning semantics from hard-block semantics for token-spend budgets. + +- A warning MUST occur when spend reaches or exceeds the configured warning threshold for a scope but + has not yet exceeded the hard limit for that same scope. +- A hard block MUST occur when a scope's hard limit is exceeded or would be exceeded by the next + metered model call. +- In-flight model calls MUST be allowed to complete once admitted. +- After a hard-block condition is reached, the next metered model call MUST be rejected until spend + returns below an active limit or an authorized override is applied. +- Warning states MUST be visible to operators and admins; hard-block states MUST be visible and + actionable. + +#### Scenario: Warning is emitted before limit is exceeded + +- GIVEN a daily budget has a hard limit of 100 USD and a warning threshold of 80 percent +- WHEN accumulated daily spend reaches 82 USD after a completed model call +- THEN the system MUST classify the daily scope as warning state +- AND the next metered model call MUST still be eligible to run if no hard limit is exceeded. + +#### Scenario: Hard block applies on the next metered call + +- GIVEN a session budget hard limit is 25 USD +- AND an admitted model call completes with total session spend now at 26 USD +- WHEN the agent loop attempts the next metered model call for that session +- THEN the system MUST reject that call as budget-exceeded +- AND the system MUST NOT cancel or retroactively fail the already completed prior call. + +### Requirement: Override Policy and Audit Trail + +The system MUST support explicit, auditable override behavior for token-spend hard blocks. + +- Overrides MUST require an explicit operator or admin action; silent overrides MUST NOT exist. +- The product model MUST support both local operator overrides and remote admin overrides. +- Every override MUST record who performed it, which scope or limit was overridden, when the + override occurred, and the justification or source context when supplied. +- Override records MUST be append-only audit events. +- The system SHOULD support temporary overrides with explicit expiry or replacement conditions. + +#### Scenario: Local operator override is audited + +- GIVEN a CLI operator encounters a budget-exceeded condition +- WHEN the operator uses an approved override control to continue execution +- THEN the system MUST permit execution only if overrides are enabled by policy +- AND the system MUST append an audit record containing the operator identity, affected budget + scope, timestamp, and override action. + +#### Scenario: Remote admin override is visible after application + +- GIVEN a gateway-admin surface raises a monthly budget for a running deployment +- WHEN the override is accepted by the authorized admin path +- THEN the new limit MUST become the active limit for subsequent evaluations +- AND the system MUST persist an audit event that can be queried independently of the runtime + process that applied it. + +### Requirement: Required Product Surfaces + +The system MUST expose cost-governance information and controls across the required product +surfaces: CLI, gateway/admin API, dashboard, session reporting, and observability. + +- The CLI MUST expose current budget status, warning or blocked state, and operator-approved + override flow when policy allows it. +- The gateway/admin API MUST expose current spend, budget status, history, and authorized + administrative controls for reset or limit adjustment. +- The dashboard MUST expose live or near-live budget status, warning or blocked indicators, and + historical spend views derived from gateway-safe APIs. +- Session reporting MUST expose session-level spend totals and budget outcomes for the completed or + active session. +- Observability MUST expose structured events and metrics for spend, warnings, hard blocks, and + overrides. + +#### Scenario: Operator surfaces show the same budget state + +- GIVEN a session is in warning state for the daily budget +- WHEN an operator checks the CLI and an admin checks the dashboard through the gateway +- THEN both surfaces MUST report the same warning classification for that active budget scope +- AND neither surface MUST require direct access to runtime-internal storage. + +#### Scenario: Session reporting includes budget outcome + +- GIVEN a session finishes after spending against active token budgets +- WHEN the session summary or history is requested +- THEN the system MUST include the session spend total +- AND the system MUST include whether the session ended within budget, in warning state, or blocked + by budget governance. + +#### Scenario: Observability records warning and override lifecycle + +- GIVEN a session first crosses a warning threshold and later receives an approved override +- WHEN observability data is emitted for that session +- THEN the system MUST emit structured records for both the warning and the override +- AND those records MUST be correlatable to the same session or mission identity without exposing + secrets. + +### Requirement: Separation of Governance Domains + +The system MUST keep token-spend governance separate from action-rate governance. + +- Token-spend governance MUST apply to model usage cost and spend budgets. +- Action-rate governance MUST apply to action frequency or action-count controls. +- Configuration, policy names, user-facing labels, and audit records MUST NOT present action-rate + controls as token-spend budgets. +- A request MAY be denied by either governance domain, but the denial reason MUST identify the + governing domain. +- Legacy names that imply token spend for action-rate controls MUST be removed or clearly deprecated. + +#### Scenario: Action-rate denial is not reported as token budget exhaustion + +- GIVEN an agent has exhausted its action-rate allowance but has token budget remaining +- WHEN a request is denied by policy +- THEN the system MUST report the denial as an action-rate governance outcome +- AND the system MUST NOT present the denial as daily or monthly token spend exhaustion. + +#### Scenario: Token budget denial leaves action-rate accounting unchanged + +- GIVEN an agent is blocked because its monthly token-spend budget is exceeded +- WHEN the denial is recorded +- THEN the system MUST classify the outcome under token-spend governance +- AND the system MUST NOT mutate action-rate counters solely because of that token-budget denial. + +### Requirement: Baseline Truthfulness and Remaining Productization Work + +The specification for this change MUST describe the already-merged runtime baseline truthfully while +also defining the remaining productization work required to complete the feature. + +- The merged agent-loop cost wiring MUST be treated as current baseline behavior for this change. +- Remaining work MUST include the operator and admin product surfaces, audit completeness, + observability completeness, and governance-domain cleanup needed for a production-ready feature. +- Product documentation and acceptance reviews MUST distinguish between baseline behavior already on + `main` and required surfaces that are still pending. +- The system MUST NOT claim full cost-governance product completion until the required surfaces and + governance separation in this specification are delivered. + +#### Scenario: Baseline includes runtime wiring but not surface completion + +- GIVEN an implementation review is performed after Issue A has merged +- WHEN the reviewer evaluates this change against the specification +- THEN the reviewer MUST treat agent-loop cost wiring as already satisfied baseline behavior +- AND the reviewer MUST still mark CLI, gateway/admin API, dashboard, session reporting, and + observability requirements as pending until delivered. + +#### Scenario: Product completion cannot be claimed from runtime-only wiring + +- GIVEN cost tracking is wired in the runtime and can enforce limits internally +- WHEN no dashboard, CLI summary, admin API, or override audit surface is available to operators +- THEN the system MUST be considered partially productized +- AND the feature MUST NOT be represented as complete cost governance. diff --git a/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/state.yaml b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/state.yaml new file mode 100644 index 00000000..9c71514c --- /dev/null +++ b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/state.yaml @@ -0,0 +1,12 @@ +change: 2026-04-06-cost-governance-productization +current_phase: verify +completed: [explore, propose, spec, design, tasks, apply, verify] +next: archive +updated: 2026-04-06 +notes: | + DALLAY-255 (Wire CostTracker to agent loop) is already part of the main baseline via PR #448. + The reopened Phase 6 follow-up tasks were implemented and the change now verifies as PASS WITH WARNINGS. + Remaining warnings are non-blocking: audit-query surface, extra governance-domain regression coverage, + and unrelated broader dashboard package noise outside the changed slice. + Apply is complete again after adding session and mission token-budget scopes, + tightening multi-scope evaluation, and re-running runtime plus scoped dashboard validation. diff --git a/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/tasks.md b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/tasks.md new file mode 100644 index 00000000..b007100c --- /dev/null +++ b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/tasks.md @@ -0,0 +1,43 @@ +# Tasks: Cost Governance Productization + +## Phase 1: Runtime Cost Service and Override Flow + +- [x] 1.1 Extend `clients/agent-runtime/src/cost/types.rs` with summary, history, audit, and override DTOs used by CLI, gateway, and dashboard. +- [x] 1.2 Create `clients/agent-runtime/src/cost/service.rs` to expose tracker-backed summary, history, reset, and scoped override operations. +- [x] 1.3 Update `clients/agent-runtime/src/cost/tracker.rs` and `clients/agent-runtime/src/cost/mod.rs` to support history windows, reset scopes, override expiry/scope, and shared exports. +- [x] 1.4 Update `clients/agent-runtime/src/agent/agent.rs` to emit first-class warning/exceeded outcomes and consume override decisions instead of ad hoc logging. +- [x] 1.5 Verification: add Rust unit tests for threshold math, history aggregation, reset behavior, and override scope expiry in `clients/agent-runtime/src/cost/*`. + +## Phase 2: CLI and Gateway/Admin Surface + +- [x] 2.1 Update `clients/agent-runtime/src/main.rs` to add `corvus cost` subcommands (`summary`, `history`, `reset`), exit summaries, and explicit `--override-budget` flow. +- [x] 2.2 Create `clients/agent-runtime/src/gateway/cost.rs` for `/api/web/cost/summary`, `/api/web/cost/history`, `/api/web/admin/cost/reset`, and `/api/web/admin/cost/override`. +- [x] 2.3 Update `clients/agent-runtime/src/gateway/admin.rs` and `clients/agent-runtime/src/gateway/mod.rs` to PATCH `cost` config separately from autonomy fields and route machine-readable budget errors. +- [x] 2.4 Verification: add runtime integration tests covering CLI override audit, gateway auth for reset/override, summary/history payloads, and blocked-request responses. + +## Phase 3: Dashboard and Reporting UX + +- [x] 3.1 Update `clients/web/apps/dashboard/src/types/admin-config.ts` and `src/composables/useAdmin.ts` for cost summary/history/deprecation metadata. +- [x] 3.2 Create `clients/web/apps/dashboard/src/composables/useCostGovernance.ts` to load live usage, alerts, and operator actions from gateway-safe APIs. +- [x] 3.3 Update `clients/web/apps/dashboard/src/components/config/CostOverview.vue` to show policy, live usage, warnings/blocks, overrides/resets, and reporting trends. +- [x] 3.4 Verification: extend `clients/web/apps/dashboard/src/components/config/CostOverview.spec.ts` for config-only fallback, warning/exceeded states, and live history rendering. + +## Phase 4: Observability and Audit Completion + +- [x] 4.1 Update `clients/agent-runtime/src/observability/traits.rs` with `BudgetWarning`, `BudgetExceeded`, and `BudgetOverride` event variants plus audit payloads. +- [x] 4.2 Update `clients/agent-runtime/src/observability/log.rs`, `otel.rs`, and `prometheus.rs` to emit cost lifecycle logs, spans, metrics, and `cost_usd` session data. +- [x] 4.3 Verification: add focused tests/assertions that warning, block, override, and reset events are emitted without leaking secrets. + +## Phase 5: Governance Cleanup and Mission Alignment + +- [x] 5.1 Update `clients/agent-runtime/src/agent/mission.rs` so mission spend is derived from runtime cost records instead of mission-local truth. +- [x] 5.2 Update `clients/agent-runtime/src/security/policy.rs`, `clients/agent-runtime/src/config/schema.rs`, and `clients/agent-runtime/src/config/mod.rs` to rename `max_cost_per_day_cents`, keep a one-release alias, and emit deprecation warnings. +- [x] 5.3 Verification: add tests for mission/session independence, action-rate vs token-spend denial labeling, and deprecated config normalization across CLI/admin responses. + +## Phase 6: Verification Follow-up Gaps + +- [x] 6.1 Add explicit `session` token-spend budget scope to runtime config, tracker evaluation, and exposed summaries. +- [x] 6.2 Add explicit `mission` token-spend budget scope to runtime evaluation so mission scope is enforced alongside session/day/month where applicable. +- [x] 6.3 Update budget evaluation to check all configured scopes before metered model calls and return the governing scope/result. +- [x] 6.4 Add spec-mapped tests for multi-scope evaluation, mission/session independence, governance-domain separation, and cross-surface consistency. +- [x] 6.5 Bring validation to green for verify expectations, including `cargo fmt --all -- --check` and relevant dashboard package-level checks or documented scoped exceptions. diff --git a/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/verify-report.md b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/verify-report.md new file mode 100644 index 00000000..26f6500d --- /dev/null +++ b/openspec/changes/archive/2026-04-06-2026-04-06-cost-governance-productization/verify-report.md @@ -0,0 +1,125 @@ +# Verification Report + +**Change**: 2026-04-06-cost-governance-productization +**Date**: 2026-04-06 +**Verifier**: sdd-verify + +--- + +## Completeness + +| Metric | Value | +|--------|-------| +| Tasks total | 20 | +| Tasks complete | 20 | +| Tasks incomplete | 0 | + +All checklist items in `tasks.md`, including Phase 6 follow-up work, are marked complete. + +--- + +## Build & Tests Execution + +**Rust formatting**: ✅ Passed +Command: `cargo fmt --all -- --check` + +**Rust clippy**: ✅ Passed +Command: `cargo clippy --all-targets -- -D warnings` + +**Rust tests**: ✅ Passed +Command: `cargo test` + +Notes: +- `cargo test` exited successfully and showed the updated runtime suite running, including new cost-governance tests such as `session_scope_is_evaluated_with_day_and_month_limits`, `mission_scope_can_govern_request_when_more_restrictive`, `mission_scope_blocks_metered_call_independently_from_session_budget`, and `token_budget_denial_is_reported_separately_from_action_rate_governance`. +- Output was truncated by tool limits, but the command completed with exit code 0. + +**Dashboard targeted cost specs**: ✅ Passed +Command: `pnpm exec vitest --run --environment happy-dom src/composables/useCostGovernance.spec.ts src/components/config/CostOverview.spec.ts` + +Result: 2 test files passed, 9 tests passed, 0 failed. + +**Dashboard targeted cost-file check**: ✅ Passed +Command: `pnpm exec biome check src/composables/useCostGovernance.ts src/composables/useCostGovernance.spec.ts src/components/config/CostOverview.vue src/components/config/CostOverview.spec.ts src/composables/useAdmin.ts src/types/admin-config.ts` + +Result: completed successfully. + +**Coverage**: ➖ Not configured +`openspec/config.yaml` does not define `rules.verify.coverage_threshold`. + +**Unrelated branch noise**: broader dashboard package-level scripts are still known to have failures in unrelated pre-existing areas per branch context. They were not treated as change blockers because the changed dashboard cost files have passing targeted tests/checks and the runtime validation gates are green. + +--- + +## Spec Compliance Matrix + +| Requirement | Scenario | Test | Result | +|-------------|----------|------|--------| +| Budget Scope Model | Multiple configured scopes are evaluated together | `clients/agent-runtime/src/cost/tracker.rs > session_scope_is_evaluated_with_day_and_month_limits` | ✅ COMPLIANT | +| Budget Scope Model | Mission budget remains independent from session budget | `clients/agent-runtime/src/agent/mission.rs > runtime_derived_mission_cost_is_independent_from_prior_session_spend`; `clients/agent-runtime/src/agent/agent.rs > mission_scope_blocks_metered_call_independently_from_session_budget` | ✅ COMPLIANT | +| Warning and Hard-Block Semantics | Warning is emitted before limit is exceeded | `clients/agent-runtime/src/cost/tracker.rs > warning_threshold_uses_projected_cost_math`; `clients/agent-runtime/src/agent/agent.rs > warning_threshold_emits_budget_warning_event` | ✅ COMPLIANT | +| Warning and Hard-Block Semantics | Hard block applies on the next metered call | `clients/agent-runtime/src/agent/agent.rs > budget_exceeded_blocks_llm_call`; `clients/agent-runtime/src/gateway/webhook_dispatch.rs > execute_maps_cost_budget_exceeded_into_machine_readable_outcome` | ✅ COMPLIANT | +| Override Policy and Audit Trail | Local operator override is audited | `clients/agent-runtime/src/main.rs > cli_override_application_writes_audit_and_allows_next_blocked_request_once` | ✅ COMPLIANT | +| Override Policy and Audit Trail | Remote admin override is visible after application | `clients/agent-runtime/src/gateway/cost.rs > admin_cost_override_applies_to_shared_tracker_next_request` | ⚠️ PARTIAL | +| Required Product Surfaces | Operator surfaces show the same budget state | `clients/agent-runtime/src/gateway/cost.rs > cost_summary_returns_usage_and_config_payload`; `clients/agent-runtime/src/main.rs > render_cost_summary_reports_budget_state_and_usage`; `clients/web/apps/dashboard/src/composables/useCostGovernance.spec.ts > loads live summary and history data`; `clients/web/apps/dashboard/src/components/config/CostOverview.spec.ts > renders warning state with live summary and history` | ✅ COMPLIANT | +| Required Product Surfaces | Session reporting includes budget outcome | `clients/agent-runtime/src/main.rs > render_cli_session_summary_reports_exit_state` | ✅ COMPLIANT | +| Required Product Surfaces | Observability records warning and override lifecycle | `clients/agent-runtime/src/agent/agent.rs > warning_threshold_emits_budget_warning_event`; `clients/agent-runtime/src/agent/agent.rs > local_override_emits_budget_override_event`; `clients/agent-runtime/src/observability/traits.rs > budget_override_event_redacts_sensitive_actor_and_reason` | ✅ COMPLIANT | +| Separation of Governance Domains | Action-rate denial is not reported as token budget exhaustion | `clients/agent-runtime/src/security/policy.rs > action_rate_denials_are_labeled_separately_from_token_spend` | ✅ COMPLIANT | +| Separation of Governance Domains | Token budget denial leaves action-rate accounting unchanged | `clients/agent-runtime/src/agent/agent.rs > token_budget_denial_is_reported_separately_from_action_rate_governance`; `clients/agent-runtime/src/gateway/mod.rs > webhook_dispatcher_returns_machine_readable_budget_exceeded_payload` | ⚠️ PARTIAL | +| Baseline Truthfulness and Remaining Productization Work | Baseline includes runtime wiring but not surface completion | Artifact review across `proposal.md`, `spec.md`, and `design.md` | ✅ COMPLIANT | +| Baseline Truthfulness and Remaining Productization Work | Product completion cannot be claimed from runtime-only wiring | Artifact review plus delivered CLI/gateway/dashboard surfaces in this branch | ✅ COMPLIANT | + +**Compliance summary**: 11/13 scenarios compliant, 2 partial, 0 failing, 0 untested. + +--- + +## Correctness (Static — Structural Evidence) + +| Requirement | Status | Notes | +|------------|--------|-------| +| Budget Scope Model | ✅ Implemented | `CostConfig` now includes `session_limit_usd`, `UsagePeriod` includes `Mission`, and `CostTracker::check_budget_with_mission_scope()` evaluates session/day/month plus mission when applicable. | +| Warning and Hard-Block Semantics | ✅ Implemented | `BudgetCheck`, `BudgetEvaluation`, pre-flight enforcement, warning emission, and next-call hard block behavior are wired through runtime and webhook paths. | +| Override Policy and Audit Trail | ⚠️ Partial | Local and remote next-request overrides are implemented and audited in append-only storage, but there is still no dedicated query API/report surface for persisted cost audit history after remote admin actions. | +| Required Product Surfaces | ✅ Implemented | CLI, gateway cost endpoints, dashboard live usage/history surface, session summary, and observability outputs all exist and are wired to runtime-owned state. | +| Separation of Governance Domains | ✅ Implemented | Token-spend and action-rate governance are separated in naming and user-facing error labeling; deprecated alias handling for `max_cost_per_day_cents` is present. | +| Baseline Truthfulness and Remaining Productization Work | ✅ Implemented | Artifacts accurately describe Issue A as baseline and the rest of the delivered product surfaces as the work of this change. | + +--- + +## Coherence (Design) + +| Decision | Followed? | Notes | +|----------|-----------|-------| +| Split token-spend governance from action-rate governance | ✅ Yes | Cost evaluation lives in `cost/*`; action-rate semantics remain in `SecurityPolicy` with renamed `max_actions_per_hour`. | +| Runtime evaluates budgets once; surfaces consume results | ✅ Yes | Agent loop performs evaluation centrally; CLI, gateway, and dashboard consume runtime summaries/results. | +| Warning, block, and override form an audited state machine | ✅ Yes | Warning/exceeded/override states and audit events are implemented. | +| Admin config and operational cost APIs stay separate | ✅ Yes | Config patching remains under admin config; operational usage/reset/override endpoints live in gateway cost handlers. | +| Mission governance adapts to runtime cost state instead of duplicating it | ✅ Yes | Mission scope is derived from runtime session spend deltas and evaluated alongside other scopes. | +| Deprecate the misleading config key with a compatibility alias | ✅ Yes | Deprecated alias normalization and metadata remain present across config/admin surfaces. | + +--- + +## Issues Found + +### CRITICAL + +None. + +### WARNING + +1. **Remote admin limit changes are not yet cost-audited through a dedicated query/report surface.** The branch supports remote admin override/reset flows and cost config patching, but there is still no dedicated API/report endpoint for querying persisted cost audit records independently. +2. **Governance-domain separation has strong labeling coverage, but counter-isolation is only partially proven.** Tests verify token-spend denials are not mislabeled and webhook payloads carry `governance_domain=token_spend`, but there is not a direct regression test asserting action-rate counters remain unchanged after token-budget denials. +3. **Broader dashboard package failures remain unrelated branch noise.** Changed cost files pass targeted tests/checks; broader dashboard package instability outside the cost-governance slice should be handled separately. + +### SUGGESTION + +1. Add a dedicated `cost audit` read endpoint or reporting projection so remote admin override/reset lifecycle can be queried directly. +2. Add one focused regression test asserting token-budget denials do not mutate action-rate counters. +3. Consider adding an explicit end-to-end test that compares CLI summary classification with gateway/dashboard classification from the same fixture data. + +--- + +## Verdict + +**PASS WITH WARNINGS** + +The Phase 6 follow-up work closed the previous FAIL blockers: explicit session/mission scopes are now implemented, multi-scope evaluation is in place, spec coverage is materially improved, and the runtime validation gates are green. Remaining issues are real but non-blocking for this change and are either follow-up product hardening or unrelated branch noise. diff --git a/openspec/specs/cost-governance/spec.md b/openspec/specs/cost-governance/spec.md new file mode 100644 index 00000000..028f83de --- /dev/null +++ b/openspec/specs/cost-governance/spec.md @@ -0,0 +1,200 @@ +# Cost Governance Specification + +## Purpose + +This specification defines Corvus cost governance as a product capability rather than an isolated +runtime mechanism. It establishes the canonical budget model, enforcement semantics, override and +audit rules, required operator and admin surfaces, and the separation between token-spend +governance and action-rate governance. + +## Current Baseline + +- Issue A (`Wire CostTracker to agent loop`) is already merged and is part of the baseline for this + change. +- The runtime can instantiate cost tracking for the canonical agent loop and use that wiring behind + the existing `cost.enabled` gate. +- CLI, gateway/admin API, dashboard, session reporting, and observability product surfaces are NOT + yet complete and remain required work for this change. +- `SecurityPolicy.max_cost_per_day_cents` MUST be treated as legacy action-rate terminology, not as + the canonical token-spend budget model. + +## Requirements + +### Requirement: Budget Scope Model + +The system MUST govern token spend using four explicit budget scopes: `session`, `daily`, +`monthly`, and `mission`. + +- `session` MUST represent spend accumulated within one canonical session identity. +- `daily` MUST represent spend accumulated within the active UTC calendar day. +- `monthly` MUST represent spend accumulated within the active UTC calendar month. +- `mission` MUST represent spend accumulated within a single mission identity, independent from the + broader session total. +- The system MUST evaluate all configured token-spend scopes that apply to a request before allowing + a metered model call. +- A scope MAY be absent or disabled, but the system MUST NOT infer one scope from another. + +#### Scenario: Multiple configured scopes are evaluated together + +- GIVEN token-spend budgets are configured for `session`, `daily`, and `monthly` +- WHEN the runtime evaluates a metered model call for an active session +- THEN the system MUST check all three configured scopes before the call is admitted +- AND the most restrictive resulting decision MUST govern the call outcome. + +#### Scenario: Mission budget remains independent from session budget + +- GIVEN a session contains two separate missions +- AND the first mission has already consumed its configured mission budget +- WHEN a second mission starts within the same session +- THEN the system MUST evaluate the second mission against its own mission scope +- AND the system MUST continue to evaluate the enclosing session, daily, and monthly scopes + independently. + +### Requirement: Warning and Hard-Block Semantics + +The system MUST distinguish warning semantics from hard-block semantics for token-spend budgets. + +- A warning MUST occur when spend reaches or exceeds the configured warning threshold for a scope but + has not yet exceeded the hard limit for that same scope. +- A hard block MUST occur when a scope's hard limit is exceeded or would be exceeded by the next + metered model call. +- In-flight model calls MUST be allowed to complete once admitted. +- After a hard-block condition is reached, the next metered model call MUST be rejected until spend + returns below an active limit or an authorized override is applied. +- Warning states MUST be visible to operators and admins; hard-block states MUST be visible and + actionable. + +#### Scenario: Warning is emitted before limit is exceeded + +- GIVEN a daily budget has a hard limit of 100 USD and a warning threshold of 80 percent +- WHEN accumulated daily spend reaches 82 USD after a completed model call +- THEN the system MUST classify the daily scope as warning state +- AND the next metered model call MUST still be eligible to run if no hard limit is exceeded. + +#### Scenario: Hard block applies on the next metered call + +- GIVEN a session budget hard limit is 25 USD +- AND an admitted model call completes with total session spend now at 26 USD +- WHEN the agent loop attempts the next metered model call for that session +- THEN the system MUST reject that call as budget-exceeded +- AND the system MUST NOT cancel or retroactively fail the already completed prior call. + +### Requirement: Override Policy and Audit Trail + +The system MUST support explicit, auditable override behavior for token-spend hard blocks. + +- Overrides MUST require an explicit operator or admin action; silent overrides MUST NOT exist. +- The product model MUST support both local operator overrides and remote admin overrides. +- Every override MUST record who performed it, which scope or limit was overridden, when the + override occurred, and the justification or source context when supplied. +- Override records MUST be append-only audit events. +- The system SHOULD support temporary overrides with explicit expiry or replacement conditions. + +#### Scenario: Local operator override is audited + +- GIVEN a CLI operator encounters a budget-exceeded condition +- WHEN the operator uses an approved override control to continue execution +- THEN the system MUST permit execution only if overrides are enabled by policy +- AND the system MUST append an audit record containing the operator identity, affected budget + scope, timestamp, and override action. + +#### Scenario: Remote admin override is visible after application + +- GIVEN a gateway-admin surface raises a monthly budget for a running deployment +- WHEN the override is accepted by the authorized admin path +- THEN the new limit MUST become the active limit for subsequent evaluations +- AND the system MUST persist an audit event that can be queried independently of the runtime + process that applied it. + +### Requirement: Required Product Surfaces + +The system MUST expose cost-governance information and controls across the required product +surfaces: CLI, gateway/admin API, dashboard, session reporting, and observability. + +- The CLI MUST expose current budget status, warning or blocked state, and operator-approved + override flow when policy allows it. +- The gateway/admin API MUST expose current spend, budget status, history, and authorized + administrative controls for reset or limit adjustment. +- The dashboard MUST expose live or near-live budget status, warning or blocked indicators, and + historical spend views derived from gateway-safe APIs. +- Session reporting MUST expose session-level spend totals and budget outcomes for the completed or + active session. +- Observability MUST expose structured events and metrics for spend, warnings, hard blocks, and + overrides. + +#### Scenario: Operator surfaces show the same budget state + +- GIVEN a session is in warning state for the daily budget +- WHEN an operator checks the CLI and an admin checks the dashboard through the gateway +- THEN both surfaces MUST report the same warning classification for that active budget scope +- AND neither surface MUST require direct access to runtime-internal storage. + +#### Scenario: Session reporting includes budget outcome + +- GIVEN a session finishes after spending against active token budgets +- WHEN the session summary or history is requested +- THEN the system MUST include the session spend total +- AND the system MUST include whether the session ended within budget, in warning state, or blocked + by budget governance. + +#### Scenario: Observability records warning and override lifecycle + +- GIVEN a session first crosses a warning threshold and later receives an approved override +- WHEN observability data is emitted for that session +- THEN the system MUST emit structured records for both the warning and the override +- AND those records MUST be correlatable to the same session or mission identity without exposing + secrets. + +### Requirement: Separation of Governance Domains + +The system MUST keep token-spend governance separate from action-rate governance. + +- Token-spend governance MUST apply to model usage cost and spend budgets. +- Action-rate governance MUST apply to action frequency or action-count controls. +- Configuration, policy names, user-facing labels, and audit records MUST NOT present action-rate + controls as token-spend budgets. +- A request MAY be denied by either governance domain, but the denial reason MUST identify the + governing domain. +- Legacy names that imply token spend for action-rate controls MUST be removed or clearly deprecated. + +#### Scenario: Action-rate denial is not reported as token budget exhaustion + +- GIVEN an agent has exhausted its action-rate allowance but has token budget remaining +- WHEN a request is denied by policy +- THEN the system MUST report the denial as an action-rate governance outcome +- AND the system MUST NOT present the denial as daily or monthly token spend exhaustion. + +#### Scenario: Token budget denial leaves action-rate accounting unchanged + +- GIVEN an agent is blocked because its monthly token-spend budget is exceeded +- WHEN the denial is recorded +- THEN the system MUST classify the outcome under token-spend governance +- AND the system MUST NOT mutate action-rate counters solely because of that token-budget denial. + +### Requirement: Baseline Truthfulness and Remaining Productization Work + +The specification for this change MUST describe the already-merged runtime baseline truthfully while +also defining the remaining productization work required to complete the feature. + +- The merged agent-loop cost wiring MUST be treated as current baseline behavior for this change. +- Remaining work MUST include the operator and admin product surfaces, audit completeness, + observability completeness, and governance-domain cleanup needed for a production-ready feature. +- Product documentation and acceptance reviews MUST distinguish between baseline behavior already on + `main` and required surfaces that are still pending. +- The system MUST NOT claim full cost-governance product completion until the required surfaces and + governance separation in this specification are delivered. + +#### Scenario: Baseline includes runtime wiring but not surface completion + +- GIVEN an implementation review is performed after Issue A has merged +- WHEN the reviewer evaluates this change against the specification +- THEN the reviewer MUST treat agent-loop cost wiring as already satisfied baseline behavior +- AND the reviewer MUST still mark CLI, gateway/admin API, dashboard, session reporting, and + observability requirements as pending until delivered. + +#### Scenario: Product completion cannot be claimed from runtime-only wiring + +- GIVEN cost tracking is wired in the runtime and can enforce limits internally +- WHEN no dashboard, CLI summary, admin API, or override audit surface is available to operators +- THEN the system MUST be considered partially productized +- AND the feature MUST NOT be represented as complete cost governance.