diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 970834e..f622dc3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -259,7 +259,7 @@ jobs: with: shared-key: machete save-if: ${{ github.ref == 'refs/heads/develop' }} - - uses: bnjbvr/cargo-machete@main + - uses: bnjbvr/cargo-machete@v0.9.1 # pinned — update manually coverage: name: Coverage diff --git a/CLAUDE.md b/CLAUDE.md index ef3c181..3cc8d9a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -49,6 +49,10 @@ Grob is a multi-provider LLM routing proxy written in Rust. It routes requests t | `src/traits.rs` | Core trait contracts (7+ traits for dispatch pipeline) | | `src/storage/` | Unified redb storage backend (GrobStore) | | `src/preset/` | Preset management system | +| `src/auth/auto_flow.rs` | Automatic credential setup at startup | +| `src/features/tool_layer/` | Tool-calling abstraction layer | +| `src/features/pledge/` | Pledge-based capability restrictions | +| `src/server/watch_sse.rs` | Live traffic inspector SSE backend | ## Local Setup @@ -93,7 +97,7 @@ feature/* ──► develop ──► (release-plz PR) ──► main ──► | Stage | Trigger | Jobs | |-------|---------|------| -| Quality gates | push to `develop` / PR | fmt, clippy, doc, shellcheck, actionlint | +| Quality gates | push to `develop` / PR | fmt, clippy, doc, actionlint | | Tests | push to `develop` / PR | unit tests (Ubuntu + macOS + Windows), integration tests | | Mutation testing | push to `develop` only | cargo-mutants on critical paths (router, DLP) | | Cross-build | push to `develop` + tag push | Multi-target binaries (Linux amd64/arm64/musl, macOS, Windows) | diff --git a/README.md b/README.md index 3f36478..410dbc1 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ **Grob** is a high-performance LLM routing proxy that sits between your AI tools and your providers. It redacts secrets before they reach the API, fails over transparently when a provider goes down, and fits in a 6 MB container with zero dependencies. -> **~100 us pure overhead** with full DLP + routing + caching + rate limiting -- [50x faster than LiteLLM, every feature measured individually](docs/reference/benchmarks.md). +> **~90 µs pure overhead** with full DLP + routing + caching + rate limiting -- [40x faster than LiteLLM, every feature measured individually](docs/reference/benchmarks.md). ```mermaid flowchart LR diff --git a/docs/reference/features.md b/docs/reference/features.md index b0f944b..7398535 100644 --- a/docs/reference/features.md +++ b/docs/reference/features.md @@ -1,6 +1,6 @@ # Feature Matrix -Exhaustive list of grob capabilities, extracted from the codebase (v0.30.0). +Exhaustive list of grob capabilities, extracted from the codebase (v0.35.1). ## Core Proxy diff --git a/docs/reference/storage.md b/docs/reference/storage.md index 503d82a..a5355a4 100644 --- a/docs/reference/storage.md +++ b/docs/reference/storage.md @@ -30,7 +30,8 @@ Value: JSON-serialized `SpendData`: "month": "2026-03", "total": 42.50, "by_provider": { "anthropic": 30.00, "openai": 12.50 }, - "by_model": { "claude-sonnet": 30.00, "gpt-4o": 12.50 } + "by_model": { "claude-sonnet": 30.00, "gpt-4o": 12.50 }, + "by_provider_count": { "anthropic": 15, "openai": 8 } } ``` diff --git a/src/security/circuit_breaker.rs b/src/security/circuit_breaker.rs index a6cc8a9..0f6ca1a 100644 --- a/src/security/circuit_breaker.rs +++ b/src/security/circuit_breaker.rs @@ -88,6 +88,7 @@ impl CircuitBreaker { // Check if timeout elapsed if self.last_state_change.elapsed() >= self.config.timeout { self.transition_to(CircuitState::HalfOpen); + self.half_open_calls += 1; true } else { false @@ -191,7 +192,8 @@ impl CircuitBreakerRegistry { Self::with_config(CircuitBreakerConfig::default()) } - fn with_config(config: CircuitBreakerConfig) -> Self { + /// Creates a registry with a custom circuit breaker configuration. + pub fn with_config(config: CircuitBreakerConfig) -> Self { Self { breakers: Arc::new(RwLock::new(HashMap::new())), default_config: config, @@ -359,4 +361,30 @@ mod tests { // Other providers not affected assert!(registry.can_execute("provider2").await); } + + #[tokio::test] + async fn half_open_allows_exactly_max_calls() { + let config = CircuitBreakerConfig { + failure_threshold: 1, + success_threshold: 1, + timeout: Duration::from_millis(10), + half_open_max_calls: 2, + }; + let registry = CircuitBreakerRegistry::with_config(config); + + // Trip to Open. + registry.record_failure("p").await; + assert!(!registry.can_execute("p").await); + + // Wait for timeout → first can_execute transitions to HalfOpen. + sleep(Duration::from_millis(20)).await; + + // Should allow exactly 2 calls (half_open_max_calls), not 3. + assert!(registry.can_execute("p").await, "call 1 should pass"); + assert!(registry.can_execute("p").await, "call 2 should pass"); + assert!( + !registry.can_execute("p").await, + "call 3 should be rejected" + ); + } }