From a49cef7d48465e6d1e3d3f21e8fd69a8dcaceb2e Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 14:53:07 +0100 Subject: [PATCH 01/60] feat: introduce seatbelt crate --- CHANGELOG.md | 1 + Cargo.lock | 80 ++ Cargo.toml | 5 + README.md | 1 + crates/seatbelt/CHANGELOG.md | 1 + crates/seatbelt/Cargo.toml | 109 +++ crates/seatbelt/README.md | 5 + crates/seatbelt/benches/circuit_breaker.rs | 55 ++ crates/seatbelt/benches/observability.rs | 147 ++++ crates/seatbelt/benches/retry.rs | 81 ++ crates/seatbelt/benches/timeout.rs | 60 ++ crates/seatbelt/examples/circuit_breaker.rs | 81 ++ .../seatbelt/examples/resilience_pipeline.rs | 74 ++ crates/seatbelt/examples/retry.rs | 45 + crates/seatbelt/examples/retry_advanced.rs | 109 +++ crates/seatbelt/examples/retry_outage.rs | 126 +++ crates/seatbelt/examples/timeout.rs | 55 ++ crates/seatbelt/examples/timeout_advanced.rs | 94 +++ crates/seatbelt/favicon.ico | 3 + crates/seatbelt/logo.png | 3 + crates/seatbelt/src/circuit_breaker/args.rs | 105 +++ .../seatbelt/src/circuit_breaker/callbacks.rs | 13 + .../seatbelt/src/circuit_breaker/constants.rs | 35 + .../src/circuit_breaker/engine/engine_core.rs | 676 +++++++++++++++ .../src/circuit_breaker/engine/engine_fake.rs | 31 + .../engine/engine_telemetry.rs | 279 +++++++ .../src/circuit_breaker/engine/engines.rs | 108 +++ .../src/circuit_breaker/engine/mod.rs | 107 +++ .../engine/probing/health_probe.rs | 212 +++++ .../src/circuit_breaker/engine/probing/mod.rs | 154 ++++ .../circuit_breaker/engine/probing/options.rs | 245 ++++++ .../circuit_breaker/engine/probing/probes.rs | 86 ++ .../engine/probing/single_probe.rs | 115 +++ .../src/circuit_breaker/execution_result.rs | 62 ++ .../src/circuit_breaker/half_open_mode.rs | 180 ++++ crates/seatbelt/src/circuit_breaker/health.rs | 350 ++++++++ crates/seatbelt/src/circuit_breaker/layer.rs | 736 +++++++++++++++++ crates/seatbelt/src/circuit_breaker/mod.rs | 334 ++++++++ .../src/circuit_breaker/partition_key.rs | 162 ++++ .../seatbelt/src/circuit_breaker/service.rs | 493 +++++++++++ .../seatbelt/src/circuit_breaker/telemetry.rs | 9 + crates/seatbelt/src/lib.rs | 146 ++++ crates/seatbelt/src/options/attempt.rs | 262 ++++++ crates/seatbelt/src/options/backoff.rs | 24 + .../seatbelt/src/options/define_fn_wrapper.rs | 162 ++++ crates/seatbelt/src/options/mod.rs | 39 + .../seatbelt/src/options/seatbelt_options.rs | 271 ++++++ crates/seatbelt/src/retry/args.rs | 158 ++++ crates/seatbelt/src/retry/backoff.rs | 689 ++++++++++++++++ crates/seatbelt/src/retry/callbacks.rs | 9 + crates/seatbelt/src/retry/constants.rs | 32 + crates/seatbelt/src/retry/layer.rs | 776 ++++++++++++++++++ crates/seatbelt/src/retry/mod.rs | 234 ++++++ crates/seatbelt/src/retry/service.rs | 640 +++++++++++++++ crates/seatbelt/src/retry/telemetry.rs | 10 + crates/seatbelt/src/rnd.rs | 54 ++ crates/seatbelt/src/telemetry/metrics.rs | 56 ++ crates/seatbelt/src/telemetry/mod.rs | 61 ++ crates/seatbelt/src/testing.rs | 154 ++++ crates/seatbelt/src/timeout/args.rs | 75 ++ crates/seatbelt/src/timeout/callbacks.rs | 9 + crates/seatbelt/src/timeout/layer.rs | 429 ++++++++++ crates/seatbelt/src/timeout/mod.rs | 211 +++++ crates/seatbelt/src/timeout/service.rs | 279 +++++++ crates/seatbelt/src/timeout/telemetry.rs | 4 + 65 files changed, 10411 insertions(+) create mode 100644 crates/seatbelt/CHANGELOG.md create mode 100644 crates/seatbelt/Cargo.toml create mode 100644 crates/seatbelt/README.md create mode 100644 crates/seatbelt/benches/circuit_breaker.rs create mode 100644 crates/seatbelt/benches/observability.rs create mode 100644 crates/seatbelt/benches/retry.rs create mode 100644 crates/seatbelt/benches/timeout.rs create mode 100644 crates/seatbelt/examples/circuit_breaker.rs create mode 100644 crates/seatbelt/examples/resilience_pipeline.rs create mode 100644 crates/seatbelt/examples/retry.rs create mode 100644 crates/seatbelt/examples/retry_advanced.rs create mode 100644 crates/seatbelt/examples/retry_outage.rs create mode 100644 crates/seatbelt/examples/timeout.rs create mode 100644 crates/seatbelt/examples/timeout_advanced.rs create mode 100644 crates/seatbelt/favicon.ico create mode 100644 crates/seatbelt/logo.png create mode 100644 crates/seatbelt/src/circuit_breaker/args.rs create mode 100644 crates/seatbelt/src/circuit_breaker/callbacks.rs create mode 100644 crates/seatbelt/src/circuit_breaker/constants.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/engine_core.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/engines.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/mod.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/probing/options.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs create mode 100644 crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs create mode 100644 crates/seatbelt/src/circuit_breaker/execution_result.rs create mode 100644 crates/seatbelt/src/circuit_breaker/half_open_mode.rs create mode 100644 crates/seatbelt/src/circuit_breaker/health.rs create mode 100644 crates/seatbelt/src/circuit_breaker/layer.rs create mode 100644 crates/seatbelt/src/circuit_breaker/mod.rs create mode 100644 crates/seatbelt/src/circuit_breaker/partition_key.rs create mode 100644 crates/seatbelt/src/circuit_breaker/service.rs create mode 100644 crates/seatbelt/src/circuit_breaker/telemetry.rs create mode 100644 crates/seatbelt/src/lib.rs create mode 100644 crates/seatbelt/src/options/attempt.rs create mode 100644 crates/seatbelt/src/options/backoff.rs create mode 100644 crates/seatbelt/src/options/define_fn_wrapper.rs create mode 100644 crates/seatbelt/src/options/mod.rs create mode 100644 crates/seatbelt/src/options/seatbelt_options.rs create mode 100644 crates/seatbelt/src/retry/args.rs create mode 100644 crates/seatbelt/src/retry/backoff.rs create mode 100644 crates/seatbelt/src/retry/callbacks.rs create mode 100644 crates/seatbelt/src/retry/constants.rs create mode 100644 crates/seatbelt/src/retry/layer.rs create mode 100644 crates/seatbelt/src/retry/mod.rs create mode 100644 crates/seatbelt/src/retry/service.rs create mode 100644 crates/seatbelt/src/retry/telemetry.rs create mode 100644 crates/seatbelt/src/rnd.rs create mode 100644 crates/seatbelt/src/telemetry/metrics.rs create mode 100644 crates/seatbelt/src/telemetry/mod.rs create mode 100644 crates/seatbelt/src/testing.rs create mode 100644 crates/seatbelt/src/timeout/args.rs create mode 100644 crates/seatbelt/src/timeout/callbacks.rs create mode 100644 crates/seatbelt/src/timeout/layer.rs create mode 100644 crates/seatbelt/src/timeout/mod.rs create mode 100644 crates/seatbelt/src/timeout/service.rs create mode 100644 crates/seatbelt/src/timeout/telemetry.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 9862cfa6..9a9ad24d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Please see each crate's change log below: - [`ohno`](./crates/ohno/CHANGELOG.md) - [`ohno_macros`](./crates/ohno_macros/CHANGELOG.md) - [`recoverable`](./crates/recoverable/CHANGELOG.md) +- [`seatbelt`](./crates/seatbelt/CHANGELOG.md) - [`thread_aware`](./crates/thread_aware/CHANGELOG.md) - [`thread_aware_macros`](./crates/thread_aware_macros/CHANGELOG.md) - [`thread_aware_macros_impl`](./crates/thread_aware_macros_impl/CHANGELOG.md) diff --git a/Cargo.lock b/Cargo.lock index 7a7145fe..b088a43e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -604,6 +604,16 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -951,6 +961,36 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.17", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.2", + "thiserror 2.0.17", + "tokio", + "tokio-stream", +] + [[package]] name = "os_pipe" version = "1.2.3" @@ -999,6 +1039,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec" +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + [[package]] name = "phf" version = "0.11.3" @@ -1305,6 +1351,29 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "seatbelt" +version = "0.1.0" +dependencies = [ + "alloc_tracker", + "anyhow", + "criterion", + "fastrand", + "futures", + "http", + "layered", + "mutants", + "ohno", + "opentelemetry", + "opentelemetry_sdk", + "recoverable", + "static_assertions", + "tick", + "tokio", + "tracing", + "tracing-subscriber", +] + [[package]] name = "semver" version = "1.0.27" @@ -1689,6 +1758,17 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.17" diff --git a/Cargo.toml b/Cargo.toml index 7a3a812f..8637621a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ layered = { path = "crates/layered", default-features = false, version = "0.1.0" ohno = { path = "crates/ohno", default-features = false, version = "0.2.1" } ohno_macros = { path = "crates/ohno_macros", default-features = false, version = "0.2.0" } recoverable = { path = "crates/recoverable", default-features = false, version = "0.1.0" } +seatbelt = { path = "crates/seatbelt", default-features = false, version = "0.1.0" } testing_aids = { path = "crates/testing_aids", default-features = false } thread_aware = { path = "crates/thread_aware", default-features = false, version = "0.6.0" } thread_aware_macros = { path = "crates/thread_aware_macros", default-features = false, version = "0.6.0" } @@ -95,6 +96,10 @@ trybuild = { version = "1.0.114", default-features = false } typeid = { version = "1.0.3", default-features = false } windows-sys = { version = "0.61.2", default-features = false } xxhash-rust = { version = "0.8.15", default-features = false } +fastrand = { version = "2.3.0", default-features = false, features = ["std"] } +opentelemetry = { version = "0.31.0", default-features = false } +opentelemetry_sdk = { version = "0.31.0", default-features = false } +http = { version = "1.2.0", default-features = false, features = ["std"] } [workspace.lints.rust] ambiguous_negative_literals = "warn" diff --git a/README.md b/README.md index 355872ea..1f7aca25 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ These are the crates built out of this repo: - [`ohno`](./crates/ohno/README.md) - High-quality Rust error handling. - [`ohno_macros`](./crates/ohno_macros/README.md) - Macros for the `ohno` crate. - [`recoverable`](./crates/recoverable/README.md) - Recovery information and classification for resilience patterns. +- [`seatbelt`](./crates/seatbelt/README.md) - Resilience and recovery mechanisms for services, applications, and libraries. - [`thread_aware`](./crates/thread_aware/README.md) - Facilities to support thread-isolated state. - [`thread_aware_macros`](./crates/thread_aware_macros/README.md) - Macros for the `thread_aware` crate. - [`thread_aware_macros_impl`](./crates/thread_aware_macros_impl/README.md) - Macros for the `thread_aware` crate. diff --git a/crates/seatbelt/CHANGELOG.md b/crates/seatbelt/CHANGELOG.md new file mode 100644 index 00000000..825c32f0 --- /dev/null +++ b/crates/seatbelt/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml new file mode 100644 index 00000000..59c29482 --- /dev/null +++ b/crates/seatbelt/Cargo.toml @@ -0,0 +1,109 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "seatbelt" +description = "Resilience and recovery mechanisms for services, applications, and libraries." +version = "0.1.0" +readme = "README.md" +keywords = ["oxidizer", "resilience", "tower", "recovery", "retry", "circuit-breaker"] +categories = ["data-structures"] + +edition.workspace = true +rust-version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +repository.workspace = true + +[package.metadata.docs.rs] +all-features = true + +[features] +service = ["dep:layered"] +telemetry = [] +metrics = ["dep:opentelemetry", "opentelemetry/metrics", "telemetry"] +logs = ["telemetry", "dep:tracing"] +# This exposes a common APIs for building resilience middlewares. +options = ["dep:tick", "telemetry", "service", "metrics", "logs"] +timeout = ["options"] +retry = ["options", "dep:fastrand"] +circuit-breaker = ["options", "dep:fastrand"] + +[dependencies] +recoverable = { workspace = true } +tick = { workspace = true, optional = true } +layered = { workspace = true, optional = true } +opentelemetry = { workspace = true, optional = true } +tracing = { workspace = true, optional = true } +fastrand = { workspace = true, optional = true } +futures = { workspace = true } + +[dev-dependencies] +tick = { workspace = true, features = ["test-util", "tokio"] } +layered = { workspace = true } +ohno = { workspace = true } +static_assertions.workspace = true +mutants.workspace = true +criterion.workspace = true +alloc_tracker.workspace = true +futures = { workspace = true, features = ["executor"] } +opentelemetry = { workspace = true, default-features = false, features = ["metrics"] } +opentelemetry_sdk = { workspace = true, default-features = false, features = ["metrics", "testing", "experimental_metrics_custom_reader"] } +anyhow.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true +fastrand.workspace = true +http.workspace = true +tokio = { workspace = true, features = ["rt", "macros"] } + +[[example]] +name = "timeout" +required-features = ["timeout"] + +[[example]] +name = "timeout_advanced" +required-features = ["timeout"] + +[[example]] +name = "retry" +required-features = ["retry"] + +[[example]] +name = "retry_advanced" +required-features = ["retry"] + +[[example]] +name = "retry_outage" +required-features = ["retry"] + +[[example]] +name = "resilience_pipeline" +required-features = ["retry", "timeout"] + +[[example]] +name = "circuit_breaker" +required-features = ["circuit-breaker"] + +[[bench]] +name = "observability" +harness = false +required-features = ["options"] + +[[bench]] +name = "timeout" +harness = false +required-features = ["timeout"] + +[[bench]] +name = "retry" +harness = false +required-features = ["retry"] + +[[bench]] +name = "circuit_breaker" +harness = false +required-features = ["circuit-breaker"] + +[lints] +workspace = true diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md new file mode 100644 index 00000000..44109329 --- /dev/null +++ b/crates/seatbelt/README.md @@ -0,0 +1,5 @@ + diff --git a/crates/seatbelt/benches/circuit_breaker.rs b/crates/seatbelt/benches/circuit_breaker.rs new file mode 100644 index 00000000..05fd75f8 --- /dev/null +++ b/crates/seatbelt/benches/circuit_breaker.rs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. + +use alloc_tracker::{Allocator, Session}; +use criterion::{Criterion, criterion_group, criterion_main}; +use futures::executor::block_on; +use layered::{Execute, Service, Stack}; +use oxidizer_benchmarking::BenchmarkGroupExt; +use seatbelt::circuit_breaker::CircuitBreaker; +use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use tick::Clock; + +#[global_allocator] +static ALLOCATOR: Allocator = Allocator::system(); + +pub fn entry(c: &mut Criterion) { + let mut group = c.benchmark_group("circuit_breaker"); + let session = Session::new(); + + // No circuit breaker + let service = Execute::new(|_input: Input| async move { Output }); + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "no-circuit-breaker", + &session, + ); + + // With circuit breaker (closed state) + let options = SeatbeltOptions::new(Clock::new_frozen()); + + let service = ( + CircuitBreaker::layer("bench", &options) + .recovery_with(|_, _| RecoveryInfo::never()) + .rejected_input_error(|_input, _args| Output) + .min_throughput(1000), // High threshold to keep circuit closed + Execute::new(|_input: Input| async move { Ok(Output) }), + ) + .build(); + + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "with-circuit-breaker", + &session, + ); + + session.print_to_stdout(); +} + +criterion_group!(benches, entry); +criterion_main!(benches); + +#[derive(Debug, Clone)] +struct Input; + +#[derive(Debug, Clone)] +struct Output; diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs new file mode 100644 index 00000000..7d94d860 --- /dev/null +++ b/crates/seatbelt/benches/observability.rs @@ -0,0 +1,147 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +use alloc_tracker::{Allocator, Session}; +use criterion::{Criterion, criterion_group, criterion_main}; +use futures::executor::block_on; +use layered::{Execute, Service}; +use opentelemetry::metrics::Counter; +use opentelemetry::{KeyValue, StringValue}; +use opentelemetry_sdk::error::OTelSdkResult; +use opentelemetry_sdk::metrics::data::ResourceMetrics; +use opentelemetry_sdk::metrics::exporter::PushMetricExporter; +use opentelemetry_sdk::metrics::{SdkMeterProvider, Temporality}; +use oxidizer_benchmarking::BenchmarkGroupExt; +use seatbelt::SeatbeltOptions; +use seatbelt::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; +use tick::Clock; + +#[global_allocator] +static ALLOCATOR: Allocator = Allocator::system(); + +pub fn entry(c: &mut Criterion) { + let mut group = c.benchmark_group("observability"); + let session = Session::new(); + + // No observability + let service = NoObservability(Execute::new(|v: Input| async move { Output::from(v) })); + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "no-observability", + &session, + ); + + // With observability + let options = SeatbeltOptions::new(Clock::new_frozen()); + let service = Observability::new( + &options, + Execute::new(|v: Input| async move { Output::from(v) }), + ); + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "observability", + &session, + ); + + // With observability + listener + let meter_provider = SdkMeterProvider::builder() + .with_periodic_exporter(EmptyExporter) + .build(); + let options = SeatbeltOptions::new(Clock::new_frozen()).meter_provider(&meter_provider); + let service = Observability::new( + &options, + Execute::new(|v: Input| async move { Output::from(v) }), + ); + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "observability-and-listener", + &session, + ); + + session.print_to_stdout(); +} + +criterion_group!(benches, entry); +criterion_main!(benches); + +struct NoObservability(S); + +impl Service for NoObservability +where + S: Service, +{ + type Out = Output; + + async fn execute(&self, input: Input) -> Self::Out { + self.0.execute(input).await + } +} + +struct Observability { + service: S, + event_reporter: Counter, + pipeline_name: StringValue, +} + +impl Observability { + pub fn new(options: &SeatbeltOptions, service: S) -> Self { + Self { + service, + event_reporter: options.create_resilience_event_counter(), + pipeline_name: options.get_pipeline_name().clone().into(), + } + } +} + +impl Service for Observability +where + S: Service, +{ + type Out = Output; + + async fn execute(&self, input: Input) -> Self::Out { + let output = self.service.execute(input).await; + + self.event_reporter.add( + 1, + &[ + KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), + KeyValue::new(STRATEGY_NAME, "benchmark.observability"), + KeyValue::new(EVENT_NAME, "custom"), + ], + ); + + output + } +} + +struct Input; + +struct Output; + +impl From for Output { + fn from(_input: Input) -> Self { + Self + } +} + +struct EmptyExporter; + +impl PushMetricExporter for EmptyExporter { + async fn export(&self, _metrics: &ResourceMetrics) -> OTelSdkResult { + Ok(()) + } + + fn force_flush(&self) -> OTelSdkResult { + Ok(()) + } + + fn shutdown_with_timeout(&self, _timeout: Duration) -> OTelSdkResult { + Ok(()) + } + + fn temporality(&self) -> Temporality { + Temporality::Cumulative + } +} diff --git a/crates/seatbelt/benches/retry.rs b/crates/seatbelt/benches/retry.rs new file mode 100644 index 00000000..018ce611 --- /dev/null +++ b/crates/seatbelt/benches/retry.rs @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +use alloc_tracker::{Allocator, Session}; +use criterion::{Criterion, criterion_group, criterion_main}; +use futures::executor::block_on; +use layered::{Execute, Service, Stack}; +use oxidizer_benchmarking::BenchmarkGroupExt; +use seatbelt::retry::Retry; +use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use tick::Clock; + +#[global_allocator] +static ALLOCATOR: Allocator = Allocator::system(); + +pub fn entry(c: &mut Criterion) { + let mut group = c.benchmark_group("retry"); + let session = Session::new(); + + // No retries + let service = Execute::new(|v: Input| async move { Output::from(v) }); + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "no-retry", + &session, + ); + + // With retry + let options = SeatbeltOptions::new(Clock::new_frozen()); + + let service = ( + Retry::layer("bench", &options) + .clone_input() + .recovery_with(|_, _| RecoveryInfo::never()), + Execute::new(|v: Input| async move { Output::from(v) }), + ) + .build(); + + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "with-retry", + &session, + ); + + // With retry and recovery + let options = SeatbeltOptions::new(Clock::new_frozen()); + + let service = ( + Retry::layer("bench", &options) + .clone_input() + .max_retry_attempts(1) + .base_delay(Duration::ZERO) + .recovery_with(|_, _| RecoveryInfo::retry()), + Execute::new(|v: Input| async move { Output::from(v) }), + ) + .build(); + + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "with-retry-and-recovery", + &session, + ); + + session.print_to_stdout(); +} + +criterion_group!(benches, entry); +criterion_main!(benches); + +#[derive(Debug, Clone)] +struct Input; + +#[derive(Debug, Clone)] +struct Output; + +impl From for Output { + fn from(_input: Input) -> Self { + Self + } +} diff --git a/crates/seatbelt/benches/timeout.rs b/crates/seatbelt/benches/timeout.rs new file mode 100644 index 00000000..aba68fb0 --- /dev/null +++ b/crates/seatbelt/benches/timeout.rs @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +use alloc_tracker::{Allocator, Session}; +use criterion::{Criterion, criterion_group, criterion_main}; +use futures::executor::block_on; +use layered::{Execute, Service, Stack}; +use oxidizer_benchmarking::BenchmarkGroupExt; +use seatbelt::SeatbeltOptions; +use seatbelt::timeout::Timeout; +use tick::Clock; + +#[global_allocator] +static ALLOCATOR: Allocator = Allocator::system(); + +pub fn entry(c: &mut Criterion) { + let mut group = c.benchmark_group("timeout"); + let session = Session::new(); + + // No timeout + let service = Execute::new(|v: Input| async move { Output::from(v) }); + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "no-timeout", + &session, + ); + + // With timeout + let options = SeatbeltOptions::new(Clock::new_frozen()); + + let service = ( + Timeout::layer("bench", &options) + .timeout_output(|_args| Output) + .timeout(Duration::from_secs(10)), + Execute::new(|v: Input| async move { Output::from(v) }), + ) + .build(); + + group.bench_with_memory( + || _ = block_on(service.execute(Input)), + "with-timeout", + &session, + ); + + session.print_to_stdout(); +} + +criterion_group!(benches, entry); +criterion_main!(benches); + +struct Input; + +struct Output; + +impl From for Output { + fn from(_input: Input) -> Self { + Self + } +} diff --git a/crates/seatbelt/examples/circuit_breaker.rs b/crates/seatbelt/examples/circuit_breaker.rs new file mode 100644 index 00000000..7ad854fc --- /dev/null +++ b/crates/seatbelt/examples/circuit_breaker.rs @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. + +//! Circuit breaker example that simulates a major service outage and tripping of the +//! circuit breaker by: +//! +//! 1. Monitoring failure rates in real-time +//! 2. Opening the circuit when failure thresholds are exceeded +//! 3. Allowing probe requests to test service recovery +//! 4. Automatically closing the circuit when the service recovers + +use std::time::Duration; + +use layered::{Execute, Service, Stack}; +use oxidizer_rt::Builtins; +use seatbelt::circuit_breaker::CircuitBreaker; +use seatbelt::{RecoveryInfo, SeatbeltOptions}; + +#[oxidizer_rt::main] +async fn main(builtins: Builtins) -> anyhow::Result<()> { + let options = SeatbeltOptions::new(&builtins); + + // Define stack with circuit breaker layer + let stack = ( + CircuitBreaker::layer("my_circuit_breaker", &options) + // Required: classify the recoverability of outputs + .recovery_with(|output, _args| match output { + Ok(_) => RecoveryInfo::never(), + Err(_) => RecoveryInfo::retry(), + }) + // Required: provide output when circuit is open + .rejected_input_error(|input, _args| { + format!("rejecting execution of '{input}' because circuit is open") + }) + // Decrease the following values to see the circuit breaker trip faster + // and speed-up the example + .sampling_duration(Duration::from_secs(2)) + .min_throughput(5) + .break_duration(Duration::from_secs(2)) + .on_probing(|_, _| println!("probing input let in to see if the service has recovered")) + .on_opened(|_, _| println!("circuit opened due to exceeding failure threshold")) + .on_closed(|_, args| { + println!( + "circuit closed because probing succeeded, opened for: {}s", + args.open_duration().as_secs() + ); + }), + Execute::new(execute_operation), + ); + + // Create the service from the stack + let service = stack.build(); + + // Execute multiple attempts, the circuit breaker will eventually open because the + // failure rate exceeds the threshold. You can play with this value an increase it to 300 + // to see how the circuit breaker eventually closes when the service recovers. + for attempt in 0..30 { + builtins.clock().delay(Duration::from_millis(50)).await; + + match service.execute(attempt).await { + Ok(output) => println!("{attempt}: {output}"), + Err(e) => println!("{attempt}: {e}"), + } + } + + Ok(()) +} + +// Simulate major service outage, 50% chance of failing +async fn execute_operation(input: u32) -> Result { + // After input 100, the service recovers and always succeeds + if input > 100 { + return Ok(format!("output-{input}")); + } + + if fastrand::i16(0..10) > 5 { + Err(format!("transient error for '{input}'")) + } else { + // Produce some output + Ok(format!("output-{input}")) + } +} diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs new file mode 100644 index 00000000..b0f649e7 --- /dev/null +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -0,0 +1,74 @@ +// Copyright (c) Microsoft Corporation. + +//! This example demonstrates how to combine multiple resilience middlewares +//! using the `seatbelt` crate to create a robust execution pipeline with basic +//! resilience capabilities. + +use std::time::Duration; + +use layered::{Execute, Service, Stack}; +use oxidizer_rt::Builtins; +use oxidizer_telemetry::Telemetry; +use oxidizer_telemetry::destination::{stdout_logs, stdout_metrics}; +use oxidizer_telemetry::logs::InterceptTracingToLogExporter; +use seatbelt::retry::Retry; +use seatbelt::timeout::Timeout; +use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use tracing_subscriber::util::SubscriberInitExt; + +#[oxidizer_rt::main] +async fn main(builtins: Builtins) { + let telemetry = configure_telemetry(); + + // Shared options for resilience middleware + let options = SeatbeltOptions::new(&builtins) + .meter_provider(telemetry.meter_provider()) + .pipeline_name("my_pipeline"); + + // Define stack with retry and timeout middlewares + let stack = ( + Retry::layer("my_retry", &options) + // automatically clones the input for retries + .clone_input() + // classify the output + .recovery_with(|output: &String, _args| match output.as_str() { + "error" | "timeout" => RecoveryInfo::retry(), + _ => RecoveryInfo::never(), + }), + Timeout::layer("my_timeout", &options) + .timeout(Duration::from_secs(1)) + .timeout_output(|_args| "timeout".to_string()), + Execute::new(execute_operation), + ); + + // Build the service + let service = stack.build(); + + // Execute the service with an input + let output = service.execute("value".to_string()).await; + + println!("execution finished, output: {output}"); +} + +async fn execute_operation(input: String) -> String { + if fastrand::i16(0..10) > 4 { + "error".to_string() + } else { + input + } +} + +fn configure_telemetry() -> Telemetry { + let telemetry = oxidizer_telemetry::builder() + .destination(stdout_metrics()) + .unwrap() + .destination(stdout_logs()) + .unwrap() + .build(); + + tracing_subscriber::registry() + .with_tracing_interception(telemetry.logger_provider()) + .init(); + + telemetry +} diff --git a/crates/seatbelt/examples/retry.rs b/crates/seatbelt/examples/retry.rs new file mode 100644 index 00000000..c7cc97b1 --- /dev/null +++ b/crates/seatbelt/examples/retry.rs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. + +//! Basic retry middleware example with automatic input cloning and simple recovery logic. + +use std::io::Error; + +use layered::{Execute, Service, Stack}; +use oxidizer_rt::Builtins; +use seatbelt::retry::Retry; +use seatbelt::{RecoveryInfo, SeatbeltOptions}; + +#[oxidizer_rt::main] +async fn main(state: Builtins) -> anyhow::Result<()> { + let options = SeatbeltOptions::new(&state); + + // Define stack with retry layer + let stack = ( + Retry::layer("my_retry", &options) + .clone_input() // Automatically clone input for retries + .recovery_with(|output, _args| match output { + Ok(_) => RecoveryInfo::never(), + Err(_) => RecoveryInfo::retry(), + }), + Execute::new(execute_operation), + ); + + // Create the service from the stack + let service = stack.build(); + + match service.execute("value".to_string()).await { + Ok(output) => println!("execution succeeded, result: {output}"), + Err(e) => println!("execution failed, error: {e}"), + } + + Ok(()) +} + +// 20% chance of failing with a transient error +async fn execute_operation(input: String) -> Result { + if fastrand::i16(0..10) > 8 { + Err(Error::other("transient execution error")) + } else { + Ok(input) + } +} diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs new file mode 100644 index 00000000..725eda02 --- /dev/null +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. + +//! Advanced retry middleware demonstrating custom input cloning and attempt info forwarding. +//! +//! Shows how to inject attempt metadata into requests via `.clone_input()`, access it +//! in the service function, and forward it through to the final output. + +use std::io::Error; +use std::time::Duration; + +use http::{Request, Response}; +use layered::{Execute, Service, Stack}; +use seatbelt::retry::Retry; +use seatbelt::{Attempt, RecoveryInfo, SeatbeltOptions}; +use tracing_subscriber::util::SubscriberInitExt; + +#[oxidizer_rt::main] +async fn main(state: Builtins) -> anyhow::Result<()> { + let options = SeatbeltOptions::new(&state) + .pipeline_name("retry_advanced") + .meter_provider(configure_telemetry().meter_provider()); + + // Define stack with retry layer + let stack = ( + Retry::layer("my_retry", &options) + // Custom input cloning - inject attempt info into request extensions + .clone_input_with(|input: &mut Request, args| { + let mut cloned = input.clone(); + cloned.extensions_mut().insert(args.attempt()); + Some(cloned) + }) + .max_retry_attempts(10) + .use_jitter(true) + .base_delay(Duration::from_millis(100)) + .recovery_with(|output, _args| match output { + Ok(_) => RecoveryInfo::never(), + Err(_) => RecoveryInfo::retry(), + }) + // Register a callback called just before the next retry + .on_retry(|_output, args| { + println!( + "retrying, attempt {}, delay: {}s", + args.attempt().index(), + args.retry_delay().as_secs_f32(), + ); + }), + Execute::new(send_request), + ); + + // Create the service from the stack + let service = stack.build(); + + let request = Request::builder() + .uri("https://example.com") + .body("value".to_string())?; + + match service.execute(request).await { + Ok(output) => { + // Extract attempt info that was forwarded through the pipeline + let attempts = output + .extensions() + .get::() + .map_or(0, |a| a.index()); + println!( + "execution succeeded, result: {}, attempts: {}", + output.body(), + attempts + ); + } + Err(e) => println!("execution failed, error: {e}"), + } + + Ok(()) +} + +// Only 20% chance of success, retries will be attempted with a high probability +async fn send_request(input: Request) -> Result, Error> { + if fastrand::i16(0..10) > 2 { + Err(Error::other("transient execution error")) + } else { + // Extract attempt info that was injected during custom cloning + let attempt = input + .extensions() + .get::() + .copied() + .unwrap_or_default(); + + // Forward attempt info to output via response extensions + Response::builder() + .extension(attempt) + .body("success".to_string()) + .map_err(Error::other) + } +} + +fn configure_telemetry() -> Telemetry { + let telemetry = oxidizer_telemetry::builder() + .destination(stdout_metrics()) + .unwrap() + .destination(stdout_logs()) + .unwrap() + .build(); + + tracing_subscriber::registry() + .with_tracing_interception(telemetry.logger_provider()) + .init(); + + telemetry +} diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs new file mode 100644 index 00000000..1d8f2653 --- /dev/null +++ b/crates/seatbelt/examples/retry_outage.rs @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. + +//! Demonstrates advanced retry patterns with input restoration from errors. +//! +//! This example showcases how to handle outage scenarios where: +//! - The original input cannot be cloned (expensive request bodies) +//! - Input must be restored from error information using `restore_input_on_error()` +//! - Failed requests are automatically retried with a fallback endpoint +//! - Outage detection and recovery are handled seamlessly + +use std::time::Duration; + +use http::{Request, Response}; +use layered::{Execute, Service, Stack}; +use oxidizer_rt::Builtins; +use seatbelt::retry::Retry; +use seatbelt::{Recovery, RecoveryInfo, SeatbeltOptions}; + +const ENDPOINT_WITH_OUTAGE: &str = "https://example.com"; +const ENDPOINT_ALIVE: &str = "https://fallback.example.com"; + +#[oxidizer_rt::main] +async fn main(state: Builtins) -> anyhow::Result<()> { + let options = SeatbeltOptions::new(&state); + + // Configure retry layer for outage handling with input restoration + let stack = ( + Retry::layer("outage_retry", &options) + // Disable input cloning - we'll restore from error instead + .clone_input_with(|_, _| None) + // Configure recovery based on an error type + .recovery_with(|output: &Result<_, HttpError>, _| match output { + Ok(_) => RecoveryInfo::never(), // Don't retry successful responses + Err(error) => error.recovery(), // Use error's recovery strategy + }) + // Enable unavailable detection and handling + .handle_unavailable(true) + // Restore input from error when retrying (key feature!) + .restore_input_from_error(|error: &mut HttpError, _| { + // Extract the original request and modify it for fallback endpoint + error.try_restore_request() + }), + Execute::new(send_request), + ); + + // Create the service from the stack + let service = stack.build(); + + // Create a request that will initially fail but can be restored + let request = Request::builder() + .uri(ENDPOINT_WITH_OUTAGE) + .body("important request data".to_string())?; + + println!("Sending request to: {}", request.uri()); + + // The service will: + // 1. Try the original endpoint (fails with outage) + // 2. Restore input from the error with fallback endpoint + // 3. Retry with the modified request (succeeds) + let response = service.execute(request).await?; + + println!("Final response: {}", response.body()); + + Ok(()) +} + +/// Simulates a service that has outages on the primary endpoint but works on fallback. +/// +/// This demonstrates the input restoration pattern where the original request is preserved +/// in the error so it can be modified and retried against a different endpoint. +async fn send_request(input: Request) -> Result, HttpError> { + if input.uri() == ENDPOINT_WITH_OUTAGE { + println!("Request to {} failed - simulating outage", input.uri()); + // Store the original request in the error for later restoration + Err(HttpError::outage(input)) + } else { + println!("Request to {} succeeded", input.uri()); + Ok(Response::new(format!("Success! Data from {}", input.uri()))) + } +} + +/// Custom error type that preserves the original request for restoration. +/// +/// This pattern allows failed requests to be modified and retried against different +/// endpoints without requiring the original input to be cloneable. +#[ohno::error] +struct HttpError { + /// The original request that failed, preserved for input restoration + rejected_request: Option>>, + /// Recovery strategy (retry vs. never) for this error type + recovery: RecoveryInfo, +} + +impl HttpError { + /// Creates an outage error that preserves the original request for retry. + fn outage(rejected_request: Request) -> Self { + Self::caused_by( + Some(Box::new(rejected_request)), + RecoveryInfo::unavailable().delay(Duration::from_millis(100)), + "simulated outage", + ) + } + + /// Restores the original request with a modified endpoint for retry. + /// + /// This is called by `restore_input_on_error()` to extract and modify the + /// original request. It changes the URI to the fallback endpoint and returns + /// the modified request for the next retry attempt. + fn try_restore_request(&mut self) -> Option> { + self.rejected_request + .take() // Extract the stored request + .map(|boxed_request| *boxed_request) // Unbox it + .map(|mut request| { + // Modify the request to use the fallback endpoint + *request.uri_mut() = ENDPOINT_ALIVE.parse().unwrap(); + println!("Restored request with fallback endpoint: {}", request.uri()); + request + }) + } +} + +impl Recovery for HttpError { + fn recovery(&self) -> RecoveryInfo { + self.recovery.clone() + } +} diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs new file mode 100644 index 00000000..346baa65 --- /dev/null +++ b/crates/seatbelt/examples/timeout.rs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. + +//! Simple timeout resilience middleware example. +//! +//! This example demonstrates the basic usage of the timeout middleware to cancel +//! long-running operations. + +use std::time::Duration; + +use anyhow::anyhow; +use layered::{Execute, Service, Stack}; +use oxidizer_rt::Builtins; +use seatbelt::SeatbeltOptions; +use seatbelt::timeout::Timeout; + +const TIMEOUT_DURATION: Duration = Duration::from_millis(100); +const PROCESSING_DELAY: Duration = Duration::from_millis(500); + +#[oxidizer_rt::main] +async fn main(state: Builtins) -> anyhow::Result<()> { + // Create common options + let options = SeatbeltOptions::new(&state); + + // Define stack with timeout layer + let stack = ( + Timeout::layer("my_timeout", &options) + // Required: specify the timeout duration + .timeout(TIMEOUT_DURATION) + // Required: create error output for timeouts + .timeout_error(|args| { + anyhow!( + "timeout occurred, timeout: {}ms", + args.timeout().as_millis() + ) + }), + Execute::new(move |_input| { + let clock = state.clock().clone(); + async move { + clock.delay(PROCESSING_DELAY).await; // Simulate some processing delay so the timeout can trigger + Ok(()) + } + }), + ); + + // Create the service from the stack + let service = stack.build(); + + for i in 0..10 { + // Execute the service, results in a timeout error + let timeout_error = service.execute(i.to_string()).await.unwrap_err(); + println!("{i} attempt, error: {timeout_error}"); + } + + Ok(()) +} diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs new file mode 100644 index 00000000..8121e395 --- /dev/null +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -0,0 +1,94 @@ +// Copyright (c) Microsoft Corporation. + +//! Advanced timeout resilience middleware example. +//! +//! This example demonstrates advanced usage of the timeout middleware, including working with +//! Result-based outputs, timeout callbacks, and dynamic timeout durations based on input. + +use std::time::Duration; + +use anyhow::anyhow; +use layered::{Execute, Service, Stack}; +use oxidizer_rt::Builtins; +use oxidizer_telemetry::Telemetry; +use oxidizer_telemetry::destination::{stdout_logs, stdout_metrics}; +use oxidizer_telemetry::logs::InterceptTracingToLogExporter; +use seatbelt::SeatbeltOptions; +use seatbelt::timeout::Timeout; +use tracing_subscriber::prelude::*; + +const TIMEOUT_DURATION: Duration = Duration::from_millis(20); +const PROCESSING_DELAY: Duration = Duration::from_secs(1); + +#[oxidizer_rt::main] +async fn main(state: Builtins) -> anyhow::Result<()> { + // Configure telemetry to see the timeout metrics and logs + let telemetry = configure_telemetry(); + + // Create service options + let options: SeatbeltOptions> = SeatbeltOptions::new(&state) + .pipeline_name("my_pipeline") + .meter_provider(telemetry.meter_provider()); + + // Define stack with timeout layer + let stack = ( + Timeout::layer("my_timeout", &options) + // Required: specify the timeout duration + .timeout(TIMEOUT_DURATION) + // Required: create error output for timeouts + .timeout_error(|args| { + anyhow!( + "timeout occurred, timeout: {}ms", + args.timeout().as_millis() + ) + }) + // Optional: callback to be invoked when a timeout occurs + .on_timeout(|_out, args| { + println!( + "timeout occurred, timeout: {}ms", + args.timeout().as_millis() + ); + }) + // Optional: override the default timeout duration by inspecting the input + .timeout_override(|input, _args| match input.as_str() { + "2" => Some(Duration::from_millis(300)), + _ => None, + }), + Execute::new(move |_input| { + let clock = state.clock().clone(); + async move { + // Simulate some processing delay so the timeout can trigger + clock.delay(PROCESSING_DELAY).await; + Ok(()) + } + }), + ); + + // Create the service from the stack + let service = stack.build(); + + for i in 0..10 { + // Execute the service, results in a timeout error + match service.execute(i.to_string()).await { + Ok(()) => println!("execute, input: {i}, result: success"), + Err(e) => println!("execute, input: {i}, error: {e}"), + } + } + + Ok(()) +} + +fn configure_telemetry() -> Telemetry { + let telemetry = oxidizer_telemetry::builder() + .destination(stdout_metrics()) + .unwrap() + .destination(stdout_logs()) + .unwrap() + .build(); + + tracing_subscriber::registry() + .with_tracing_interception(telemetry.logger_provider()) + .init(); + + telemetry +} diff --git a/crates/seatbelt/favicon.ico b/crates/seatbelt/favicon.ico new file mode 100644 index 00000000..ccae8114 --- /dev/null +++ b/crates/seatbelt/favicon.ico @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48225cb42c8ac02df5dd4a9bdba29c1e8d10436cc27055d6a021bcb951d4145 +size 480683 diff --git a/crates/seatbelt/logo.png b/crates/seatbelt/logo.png new file mode 100644 index 00000000..f2bd6691 --- /dev/null +++ b/crates/seatbelt/logo.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ad64ebb0185d7cd153acc291989d72e3a0094603d8bfe781a220861de247f2 +size 17876 diff --git a/crates/seatbelt/src/circuit_breaker/args.rs b/crates/seatbelt/src/circuit_breaker/args.rs new file mode 100644 index 00000000..d5729227 --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/args.rs @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +use tick::Clock; + +use crate::circuit_breaker::PartitionKey; + +/// Arguments for the [`recovery_with`][super::CircuitBreakerLayer::recovery_with] callback function. +/// +/// Provides context for recovery classification in the circuit breaker. +#[derive(Debug)] +#[non_exhaustive] +pub struct RecoveryArgs<'a> { + pub(crate) partition_key: &'a PartitionKey, + pub(crate) clock: &'a Clock, +} + +impl RecoveryArgs<'_> { + /// Returns the partition key associated with the recovery evaluation. + #[must_use] + pub fn partition_key(&self) -> &PartitionKey { + self.partition_key + } + + /// Returns a reference to the clock use by the circuit breaker. + #[must_use] + pub fn clock(&self) -> &Clock { + self.clock + } +} + +/// Arguments for the [`rejected_input`][super::CircuitBreakerLayer::rejected_input] callback function. +/// +/// Provides context for generating outputs when the inputs are rejected by the circuit breaker. +#[derive(Debug)] +pub struct RejectedInputArgs<'a> { + pub(crate) partition_key: &'a PartitionKey, +} + +impl RejectedInputArgs<'_> { + /// Returns the partition key associated with the rejected input. + #[must_use] + pub fn partition_key(&self) -> &PartitionKey { + self.partition_key + } +} + +/// Arguments for the [`on_probing`][super::CircuitBreakerLayer::on_probing] callback function. +/// +/// Provides context when the circuit breaker enters the probing state to test if the service has recovered. +#[derive(Debug)] +#[non_exhaustive] +pub struct OnProbingArgs<'a> { + pub(crate) partition_key: &'a PartitionKey, +} + +impl OnProbingArgs<'_> { + /// Returns the partition key associated with the probing execution. + #[must_use] + pub fn partition_key(&self) -> &PartitionKey { + self.partition_key + } +} + +/// Arguments for the [`on_closed`][super::CircuitBreakerLayer::on_closed] callback function. +/// +/// Provides context when the circuit breaker transitions to the closed state, allowing normal operation. +#[derive(Debug)] +#[non_exhaustive] +pub struct OnClosedArgs<'a> { + pub(crate) partition_key: &'a PartitionKey, + pub(crate) open_duration: std::time::Duration, +} + +impl OnClosedArgs<'_> { + /// Returns the partition key associated with this event. + #[must_use] + pub fn partition_key(&self) -> &PartitionKey { + self.partition_key + } + + /// Returns the duration the circuit was open before closing. + #[must_use] + pub fn open_duration(&self) -> Duration { + self.open_duration + } +} + +/// Arguments for the [`on_opened`][super::CircuitBreakerLayer::on_opened] callback function. +/// +/// Provides context when the circuit breaker transitions to the open state, blocking requests due to failures. +#[derive(Debug)] +#[non_exhaustive] +pub struct OnOpenedArgs<'a> { + pub(crate) partition_key: &'a PartitionKey, +} + +impl OnOpenedArgs<'_> { + /// Returns the partition key associated with this event. + #[must_use] + pub fn partition_key(&self) -> &PartitionKey { + self.partition_key + } +} diff --git a/crates/seatbelt/src/circuit_breaker/callbacks.rs b/crates/seatbelt/src/circuit_breaker/callbacks.rs new file mode 100644 index 00000000..b391324a --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/callbacks.rs @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation. + +use super::{ + OnClosedArgs, OnOpenedArgs, OnProbingArgs, PartitionKey, RecoveryArgs, RejectedInputArgs, +}; +use crate::RecoveryInfo; + +crate::define_fn_wrapper!(PartionKeyProvider(Fn(&In) -> PartitionKey)); +crate::define_fn_wrapper!(ShouldRecover(Fn(&Out, RecoveryArgs) -> RecoveryInfo)); +crate::define_fn_wrapper!(RejectedInput(Fn(In, RejectedInputArgs) -> Out)); +crate::define_fn_wrapper!(OnProbing(Fn(&mut In, OnProbingArgs))); +crate::define_fn_wrapper!(OnOpened(Fn(&Out, OnOpenedArgs))); +crate::define_fn_wrapper!(OnClosed(Fn(&Out, OnClosedArgs))); diff --git a/crates/seatbelt/src/circuit_breaker/constants.rs b/crates/seatbelt/src/circuit_breaker/constants.rs new file mode 100644 index 00000000..4977829a --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/constants.rs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +/// Minimum allowed duration for the circuit breaker's sampling window. +pub(crate) const MIN_SAMPLING_DURATION: Duration = Duration::from_secs(1); + +/// Default minimum throughput (number of requests) in the sampling window before +/// the circuit breaker can evaluate the failure rate and potentially trip the circuit. +/// +/// The defaults taken from Polly V8: +/// +pub(crate) const DEFAULT_MIN_THROUGHPUT: u32 = 100; + +/// Default duration of the circuit breaker's sampling window. +/// +/// The defaults taken from Polly V8: +/// +pub(crate) const DEFAULT_SAMPLING_DURATION: Duration = Duration::from_secs(30); + +/// Default failure threshold (percentage of failed requests) in the sampling window +/// that will trip the circuit breaker. +/// +/// The defaults taken from Polly V8: +/// +pub(crate) const DEFAULT_FAILURE_THRESHOLD: f32 = 0.1; + +/// Default duration that the circuit breaker remains open (broken) before +/// transitioning to half-open to test if the service has recovered. +/// +/// The defaults taken from Polly V8: +/// +pub(crate) const DEFAULT_BREAK_DURATION: Duration = Duration::from_secs(5); + +pub(crate) const ERR_POISONED_LOCK: &str = "poisoned lock - cannot continue execution because security and privacy guarantees can no longer be upheld"; diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs new file mode 100644 index 00000000..f7f2c23f --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs @@ -0,0 +1,676 @@ +// Copyright (c) Microsoft Corporation. + +use std::sync::Mutex; +use std::time::{Duration, Instant}; + +use tick::Clock; + +use super::{EngineOptions, EnterCircuitResult, ExitCircuitResult}; +use crate::circuit_breaker::constants::ERR_POISONED_LOCK; +use crate::circuit_breaker::engine::probing::{AllowProbeResult, Probes, ProbingResult}; +use crate::circuit_breaker::{ + CircuitEngine, ExecutionMode, ExecutionResult, HealthMetrics, HealthStatus, +}; + +/// Engine that manages the state of the circuit breaker. +#[derive(Debug)] +pub(crate) struct EngineCore { + state: Mutex, + options: EngineOptions, + clock: Clock, +} + +impl EngineCore { + pub fn new(options: EngineOptions, clock: Clock) -> Self { + Self { + state: Mutex::new(State::Closed { + health: options.health_metrics_builder.build(), + }), + options, + clock, + } + } +} + +impl CircuitEngine for EngineCore { + fn enter(&self) -> EnterCircuitResult { + let now = self.clock.instant(); + + // NOTE: Remember to execute all expensive operations (like time checks) outside the lock. + self.state + .lock() + .expect(ERR_POISONED_LOCK) + .enter(now, &self.options) + } + + fn exit(&self, result: ExecutionResult, _mode: ExecutionMode) -> ExitCircuitResult { + let now = self.clock.instant(); + + // NOTE: Remember to execute all expensive operations (like time checks) outside the lock. + self.state + .lock() + .expect(ERR_POISONED_LOCK) + .exit(result, now, &self.options) + } +} + +#[derive(Debug)] +enum State { + Closed { health: HealthMetrics }, + Open { open_until: Instant, stats: Stats }, + HalfOpen { probes: Probes, stats: Stats }, +} + +impl State { + fn enter(&mut self, now: Instant, settings: &EngineOptions) -> EnterCircuitResult { + match self { + Self::Closed { .. } => EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + Self::Open { open_until, stats } => { + if now >= *open_until { + let mut probes = Probes::new(&settings.probes); + let allow = probes.allow_probe(now); + stats.record_allow_result(allow); + + *self = Self::HalfOpen { + probes, + stats: stats.clone(), + }; + EnterCircuitResult::from(allow) + } else { + stats.rejected = stats.rejected.saturating_add(1); + EnterCircuitResult::Rejected + } + } + Self::HalfOpen { + probes, + stats: info, + } => { + let allow = probes.allow_probe(now); + info.record_allow_result(allow); + EnterCircuitResult::from(allow) + } + } + } + + fn exit( + &mut self, + result: ExecutionResult, + now: Instant, + settings: &EngineOptions, + ) -> ExitCircuitResult { + match self { + Self::Closed { health } => { + // first, record the result and evaluate the health metrics + health.record(result, now); + let health = health.health_info(); + + // decide the next state based on health status + match health.status() { + // Health is good, remain in a closed state + HealthStatus::Healthy => ExitCircuitResult::Unchanged, + // Health is poor, transition to Open state + HealthStatus::Unhealthy => { + *self = Self::Open { + open_until: now + settings.break_duration, + stats: Stats::new(now), + }; + ExitCircuitResult::Opened(health) + } + } + } + Self::Open { stats, .. } => { + // Record lost results for statistics purposes + stats.probes_lost = stats.probes_lost.saturating_add(1); + + // In open state, we don't process results. This can happen when multiple threads are involved and + // the state of circuit breaker changes between enter and exit calls since these are separate + // method calls that could be interleaved with other threads. Ignore the result. + ExitCircuitResult::Unchanged + } + Self::HalfOpen { probes, stats } => { + // record the result of the probe + stats.record_probe_execution_result(result); + + match probes.record(result, now) { + ProbingResult::Success => { + let stats = stats.clone(); + + *self = Self::Closed { + health: settings.health_metrics_builder.build(), + }; + + ExitCircuitResult::Closed(stats) + } + ProbingResult::Failure => { + stats.re_opened = stats.re_opened.saturating_add(1); + + *self = Self::Open { + open_until: now + settings.break_duration, + stats: stats.clone(), + }; + + ExitCircuitResult::Reopened + } + ProbingResult::Pending => ExitCircuitResult::Unchanged, + } + } + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Stats { + pub opened_at: Instant, + pub re_opened: usize, + pub probes_total: usize, + pub probes_lost: usize, + pub probes_successes: usize, + pub probes_failures: usize, + pub rejected: usize, +} + +impl Stats { + pub fn new(opened_at: Instant) -> Self { + Self { + opened_at, + probes_total: 0, + probes_lost: 0, + probes_successes: 0, + probes_failures: 0, + rejected: 0, + re_opened: 0, + } + } + + pub fn opened_duration(&self, now: Instant) -> Duration { + now.saturating_duration_since(self.opened_at) + } + + fn record_allow_result(&mut self, allow: AllowProbeResult) { + if allow == AllowProbeResult::Accepted { + self.probes_total = self.probes_total.saturating_add(1); + } else { + self.rejected = self.rejected.saturating_add(1); + } + } + + fn record_probe_execution_result(&mut self, result: ExecutionResult) { + match result { + ExecutionResult::Success => { + self.probes_successes = self.probes_successes.saturating_add(1); + } + ExecutionResult::Failure => { + self.probes_failures = self.probes_failures.saturating_add(1); + } + } + } +} + +#[cfg(test)] +mod tests { + use std::ops::Deref; + + use tick::ClockControl; + + use super::*; + use crate::circuit_breaker::HealthMetricsBuilder; + use crate::circuit_breaker::engine::probing::ProbesOptions; + + fn create_test_settings() -> EngineOptions { + EngineOptions { + break_duration: Duration::from_secs(5), + health_metrics_builder: HealthMetricsBuilder::new( + Duration::from_secs(30), + 0.1, // 10% failure threshold + 10, // minimum 10 requests + ), + probes: ProbesOptions::quick(Duration::from_secs(2)), + } + } + + fn create_test_engine() -> EngineCore { + let settings = create_test_settings(); + let clock = Clock::new_frozen(); + EngineCore::new(settings, clock) + } + + fn open_engine(engine: &EngineCore) { + const MAX_ATTEMPTS: usize = 1000; + + for _attempt in 0..MAX_ATTEMPTS { + engine.enter(); + let result = engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + if matches!(result, ExitCircuitResult::Opened(_)) { + return; + } + } + + panic!("failed to open the circuit after {MAX_ATTEMPTS} attempts"); + } + + #[test] + fn new_with_valid_settings_creates_closed_state() { + let engine = create_test_engine(); + + // Verify engine was created (we can't directly inspect the state due to encapsulation) + // but we can verify it starts in closed state by checking enter() behavior + let result = engine.enter(); + assert!(matches!( + result, + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal + } + )); + } + + #[test] + fn enter_when_closed_accepts_request() { + let engine = create_test_engine(); + + let result = engine.enter(); + + assert!(matches!( + result, + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal + } + )); + } + + #[test] + fn enter_when_open_before_timeout_rejects_request() { + let engine = create_test_engine(); + open_engine(&engine); + + // Verify circuit is now open + let result = engine.enter(); + assert!(matches!(result, EnterCircuitResult::Rejected)); + } + + #[test] + fn enter_when_open_after_timeout_transitions_to_half_open() { + let settings = create_test_settings(); + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Force circuit to open + open_engine(&engine); + + // Advance time beyond break duration + control.advance(Duration::from_secs(6)); + + let result = engine.enter(); + assert!(matches!( + result, + EnterCircuitResult::Accepted { + mode: ExecutionMode::Probe + } + )); + } + + #[test] + fn enter_when_half_open_within_break_duration_rejects_request() { + let settings = create_test_settings(); + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Force to open then half-open + open_engine(&engine); + control.advance(Duration::from_secs(6)); + engine.enter(); // Transitions to half-open + + // Try entering again immediately (within break duration) + let result = engine.enter(); + assert!(matches!(result, EnterCircuitResult::Rejected)); + } + + #[test] + fn enter_when_half_open_after_break_duration_resets_half_open_timer() { + let settings = create_test_settings(); + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Force to open then half-open + open_engine(&engine); + control.advance(Duration::from_secs(6)); + engine.enter(); // Transitions to half-open + + // Advance time beyond break duration while in half-open + control.advance(Duration::from_secs(6)); + + let result = engine.enter(); + assert!(matches!( + result, + EnterCircuitResult::Accepted { + mode: ExecutionMode::Probe + } + )); + } + + #[test] + fn exit_when_closed_with_success_remains_unchanged() { + let engine = create_test_engine(); + engine.enter(); + + let result = engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + + assert!(matches!(result, ExitCircuitResult::Unchanged)); + } + + #[test] + fn exit_when_closed_with_enough_failures_opens_circuit() { + let settings = EngineOptions { + break_duration: Duration::from_secs(5), + health_metrics_builder: HealthMetricsBuilder::new( + Duration::from_secs(30), + 0.1, // 10% failure threshold + 20, // minimum 20 requests (higher than default 10 for this test) + ), + probes: ProbesOptions::quick(Duration::from_secs(2)), + }; + let clock = Clock::new_frozen(); + let engine = EngineCore::new(settings, clock); + + // Record 19 successes and 3 failures = 22 total requests with ~13.6% failure rate + for _ in 0..19 { + engine.enter(); + engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + } + for _ in 0..2 { + engine.enter(); + engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + } + + // One more failure to trigger opening: 3 failures out of 22 total = ~13.6% > 10% + engine.enter(); + let result = engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + + assert!(matches!(result, ExitCircuitResult::Opened(_))); + } + + #[test] + fn exit_when_closed_with_insufficient_failures_remains_unchanged() { + let engine = create_test_engine(); + + // Record some failures but not enough to exceed a threshold (need at least 10 requests) + for _ in 0..5 { + engine.enter(); + engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + } + + engine.enter(); + let result = engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + + assert!(matches!(result, ExitCircuitResult::Unchanged)); + } + + #[test] + fn exit_when_open_ignores_result() { + let engine = create_test_engine(); + open_engine(&engine); + + // Try to record success in open state + let result = engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + assert!(matches!(result, ExitCircuitResult::Unchanged)); + + if let State::Open { stats, .. } = engine.state.lock().unwrap().deref() { + assert_eq!(stats.probes_lost, 1); + } else { + panic!("expected engine to be in Open state"); + } + } + + #[test] + fn exit_when_half_open_with_success_closes_circuit() { + let settings = create_test_settings(); + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Force to open then half-open + open_engine(&engine); + control.advance(Duration::from_secs(6)); + engine.enter(); // Transitions to half-open + + let result = engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + + assert!( + matches!(result, ExitCircuitResult::Closed(stats) if stats.probes_successes == 1 && stats.probes_total == 1) + ); + } + + #[test] + fn exit_when_half_open_with_failure_reopens_circuit() { + let settings = create_test_settings(); + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Force to open then half-open + open_engine(&engine); + control.advance(Duration::from_secs(6)); + engine.enter(); // Transitions to half-open + + let result = engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + + assert!(matches!(result, ExitCircuitResult::Reopened)); + } + + #[test] + fn circuit_breaker_full_cycle() { + let settings = create_test_settings(); + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Start in closed state + let result = engine.enter(); + assert!(matches!( + result, + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal + } + )); + + // Force to open state + open_engine(&engine); + + // Verify open state rejects requests + let result = engine.enter(); + assert!(matches!(result, EnterCircuitResult::Rejected)); + + // Advance time to allow transition to half-open + control.advance(Duration::from_secs(6)); + let result = engine.enter(); + assert!(matches!( + result, + EnterCircuitResult::Accepted { + mode: ExecutionMode::Probe + } + )); + + // Successful probe closes the circuit + let result = engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + + if let ExitCircuitResult::Closed(stats) = &result { + assert_eq!(stats.probes_successes, 1); + assert_eq!(stats.probes_total, 1); + assert_eq!(stats.rejected, 1); + assert_eq!(stats.probes_failures, 0); + assert_eq!(stats.probes_lost, 0); + assert_eq!(stats.re_opened, 0); + } else { + panic!("expected circuit to close after successful probe"); + } + + // Verify back to normal operation + let result = engine.enter(); + assert!(matches!( + result, + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal + } + )); + } + + #[test] + fn circuit_breaker_half_open_failure_cycle() { + let settings = create_test_settings(); + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Force to open state + open_engine(&engine); + + // Transition to half-open + control.advance(Duration::from_secs(6)); + engine.enter(); + + // Failed probe reopens circuit + let result = engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + assert!(matches!(result, ExitCircuitResult::Reopened)); + + // Verify circuit is open again + let result = engine.enter(); + assert!(matches!(result, EnterCircuitResult::Rejected)); + + // Transition to half-open + control.advance(Duration::from_secs(6)); + engine.enter(); + + let result = engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + + if let ExitCircuitResult::Closed(stats) = &result { + assert_eq!(stats.probes_successes, 1); + assert_eq!(stats.probes_total, 2); + assert_eq!(stats.rejected, 1); + assert_eq!(stats.probes_failures, 1); + assert_eq!(stats.probes_lost, 0); + assert_eq!(stats.re_opened, 1); + } else { + panic!("expected circuit to close after successful probe"); + } + } + + #[test] + fn concurrent_enter_exit_operations() { + let engine = create_test_engine(); + + // Simulate operations where enter and exit are called separately + // (though each method call is atomic due to the internal mutex) + engine.enter(); + let result1 = engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + + engine.enter(); + let result2 = engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + + // Both should complete without panicking + assert!(matches!(result1, ExitCircuitResult::Unchanged)); + assert!(matches!(result2, ExitCircuitResult::Unchanged)); + } + + #[test] + fn engine_with_custom_break_duration() { + let settings = EngineOptions { + break_duration: Duration::from_millis(100), + health_metrics_builder: HealthMetricsBuilder::new(Duration::from_secs(30), 0.1, 50), + probes: ProbesOptions::quick(Duration::from_secs(2)), + }; + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Force to open state + open_engine(&engine); + + // Verify still rejected just before timeout + control.advance(Duration::from_millis(99)); + let result = engine.enter(); + assert!(matches!(result, EnterCircuitResult::Rejected)); + + // Verify accepted just after timeout + control.advance(Duration::from_millis(2)); + let result = engine.enter(); + assert!(matches!( + result, + EnterCircuitResult::Accepted { + mode: ExecutionMode::Probe + } + )); + } + + #[test] + fn engine_with_custom_failure_threshold() { + let settings = EngineOptions { + break_duration: Duration::from_secs(5), + health_metrics_builder: HealthMetricsBuilder::new( + Duration::from_secs(30), + 0.5, // 50% failure threshold + 10, // minimum 10 requests + ), + probes: ProbesOptions::quick(Duration::from_secs(2)), + }; + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Record 6 failures and 4 successes (60% failure rate, 10 total requests) + for _ in 0..6 { + engine.enter(); + engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + } + for _ in 0..3 { + engine.enter(); + engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + } + + // Add one more failure to make it 7 failures out of 10 (70% > 50% threshold) + engine.enter(); + let result = engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + + assert!(matches!(result, ExitCircuitResult::Opened(_))); + } + + #[test] + fn stats_record_probe_execution_result_increments_correctly() { + let mut stats = Stats::new(Instant::now()); + + stats.record_probe_execution_result(ExecutionResult::Success); + assert_eq!(stats.probes_successes, 1); + assert_eq!(stats.probes_failures, 0); + + stats.record_probe_execution_result(ExecutionResult::Failure); + assert_eq!(stats.probes_successes, 1); + assert_eq!(stats.probes_failures, 1); + } + + #[test] + fn stats_record_allow_result_increments_correctly() { + let mut stats = Stats::new(Instant::now()); + + stats.record_allow_result(AllowProbeResult::Accepted); + assert_eq!(stats.probes_total, 1); + assert_eq!(stats.rejected, 0); + + stats.record_allow_result(AllowProbeResult::Rejected); + assert_eq!(stats.probes_total, 1); + assert_eq!(stats.rejected, 1); + } + + #[test] + fn stats_opened_for_calculates_duration_correctly() { + let opened_at = Instant::now(); + let stats = Stats::new(opened_at); + + // Simulate some time passing + let later = opened_at + Duration::from_secs(10); + + assert_eq!(stats.opened_duration(later), Duration::from_secs(10)); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs new file mode 100644 index 00000000..cf747a3b --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. + +use crate::circuit_breaker::{ + CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult, +}; + +/// Fake engine to be used in tests. +#[derive(Debug)] +pub(crate) struct EngineFake { + enter_result: EnterCircuitResult, + exit_result: ExitCircuitResult, +} + +impl EngineFake { + pub fn new(enter_result: EnterCircuitResult, exit_result: ExitCircuitResult) -> Self { + Self { + enter_result, + exit_result, + } + } +} + +impl CircuitEngine for EngineFake { + fn enter(&self) -> EnterCircuitResult { + self.enter_result.clone() + } + + fn exit(&self, _result: ExecutionResult, _mode: ExecutionMode) -> ExitCircuitResult { + self.exit_result.clone() + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs new file mode 100644 index 00000000..3653939f --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs @@ -0,0 +1,279 @@ +// Copyright (c) Microsoft Corporation. + +use opentelemetry::StringValue; +use opentelemetry::metrics::Counter; +use tick::Clock; + +use crate::circuit_breaker::telemetry::*; +use crate::circuit_breaker::{ + CircuitEngine, CircuitState, EnterCircuitResult, ExecutionMode, ExecutionResult, + ExitCircuitResult, +}; +use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; + +/// Wrapper around a circuit engine to add telemetry capabilities. +#[derive(Debug)] +pub(crate) struct EngineTelemetry { + inner: T, + pub(super) strategy_name: StringValue, + pub(super) pipeline_name: StringValue, + pub(super) resilience_events: Counter, + pub(super) partition_key: StringValue, + pub(super) clock: Clock, +} + +impl EngineTelemetry { + pub fn new( + inner: T, + strategy_name: StringValue, + pipeline_name: StringValue, + partition_key: StringValue, + resilience_events: Counter, + clock: Clock, + ) -> Self { + Self { + inner, + strategy_name, + pipeline_name, + resilience_events, + partition_key, + clock, + } + } +} + +impl CircuitEngine for EngineTelemetry { + fn enter(&self) -> EnterCircuitResult { + let enter_result = self.inner.enter(); + + if matches!(enter_result, EnterCircuitResult::Rejected) { + self.resilience_events.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_REJECTED_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + ], + ); + + tracing::event!( + name: "seatbelt.circuit_breaker.rejected", + tracing::Level::WARN, + pipeline.name = self.pipeline_name.as_str(), + strategy.name = self.strategy_name.as_str(), + circuit_breaker.state = CircuitState::Open.as_str(), + circuit_breaker.partition = self.partition_key.as_str(), + ); + } + + enter_result + } + + fn exit(&self, result: ExecutionResult, mode: ExecutionMode) -> ExitCircuitResult { + if mode == ExecutionMode::Probe { + self.resilience_events.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_PROBE_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::HalfOpen.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + opentelemetry::KeyValue::new(CIRCUIT_PROBE_RESULT, result.as_str()), + ], + ); + + tracing::event!( + name: "seatbelt.circuit_breaker.probe", + tracing::Level::INFO, + pipeline.name = self.pipeline_name.as_str(), + strategy.name = self.strategy_name.as_str(), + circuit_breaker.state = CircuitState::HalfOpen.as_str(), + circuit_breaker.partition = self.partition_key.as_str(), + circuit_breaker.probe.result = result.as_str(), + ); + } + + let exit_result = self.inner.exit(result, mode); + + // Emit telemetry events for circuit state changes + match exit_result { + ExitCircuitResult::Opened(health) => { + self.resilience_events.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_OPENED_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + ], + ); + + tracing::event!( + name: "seatbelt.circuit_breaker.opened", + tracing::Level::WARN, + pipeline.name = self.pipeline_name.as_str(), + strategy.name = self.strategy_name.as_str(), + circuit_breaker.state = CircuitState::Open.as_str(), + circuit_breaker.partition = self.partition_key.as_str(), + circuit_breaker.health.failure_rate = health.failure_rate(), + circuit_breaker.health.throughput = health.throughput(), + ); + } + ExitCircuitResult::Closed(ref stats) => { + self.resilience_events.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_CLOSED_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Closed.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + ], + ); + + tracing::event!( + name: "seatbelt.circuit_breaker.closed", + tracing::Level::INFO, + pipeline.name = self.pipeline_name.as_str(), + strategy.name = self.strategy_name.as_str(), + circuit_breaker.state = CircuitState::Closed.as_str(), + circuit_breaker.open.duration = stats.opened_duration(self.clock.instant()).as_secs(), + circuit_breaker.partition = self.partition_key.as_str(), + circuit_breaker.probes.total = stats.probes_total, + circuit_breaker.probes.successfull = stats.probes_successes, + circuit_breaker.probes.failed = stats.probes_failures, + circuit_breaker.probes.lost = stats.probes_lost, + circuit_breaker.rejections = stats.rejected, + circuit_breaker.re_opened = stats.re_opened, + ); + } + ExitCircuitResult::Reopened | ExitCircuitResult::Unchanged => { + // We do not report a telemetry event for reopening the circuit + // as it is redundant because it is always preceded by an "opened" + // event, or when there is no state change. + } + } + + exit_result + } +} + +#[cfg(test)] +#[cfg(not(miri))] +mod tests { + use std::time::Instant; + + use opentelemetry::KeyValue; + + use super::*; + use crate::circuit_breaker::{EngineFake, HealthInfo, Stats}; + use crate::telemetry::metrics::{create_meter, create_resilience_event_counter}; + use crate::testing::MetricTester; + + #[test] + fn enter_rejected_ensure_telemetry() { + let (tester, telemetry_engine) = create_engine(EngineFake::new( + EnterCircuitResult::Rejected, + ExitCircuitResult::Closed(Stats::new(Instant::now())), + )); + + let _ = telemetry_engine.enter(); + + tester.assert_attributes( + &[ + KeyValue::new(PIPELINE_NAME, "test_pipeline"), + KeyValue::new(STRATEGY_NAME, "test_strategy"), + KeyValue::new(EVENT_NAME, CIRCUIT_REJECTED_EVENT_NAME), + KeyValue::new(CIRCUIT_PARTITION, "test_partition"), + KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), + ], + Some(5), + ); + } + + #[test] + fn exit_probe_ensure_telemetry() { + let (tester, telemetry_engine) = create_engine(EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + ExitCircuitResult::Unchanged, + )); + + let _ = telemetry_engine.exit(ExecutionResult::Success, ExecutionMode::Probe); + + tester.assert_attributes( + &[ + KeyValue::new(PIPELINE_NAME, "test_pipeline"), + KeyValue::new(STRATEGY_NAME, "test_strategy"), + KeyValue::new(EVENT_NAME, CIRCUIT_PROBE_EVENT_NAME), + KeyValue::new(CIRCUIT_PARTITION, "test_partition"), + KeyValue::new(CIRCUIT_STATE, CircuitState::HalfOpen.as_str()), + KeyValue::new(CIRCUIT_PROBE_RESULT, ExecutionResult::Success.as_str()), + ], + Some(6), + ); + } + + #[test] + fn circuit_closed_ensure_telemetry() { + let (tester, telemetry_engine) = create_engine(EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + ExitCircuitResult::Closed(Stats::new(Instant::now())), + )); + + let _ = telemetry_engine.exit(ExecutionResult::Success, ExecutionMode::Normal); + + tester.assert_attributes( + &[ + KeyValue::new(PIPELINE_NAME, "test_pipeline"), + KeyValue::new(STRATEGY_NAME, "test_strategy"), + KeyValue::new(EVENT_NAME, CIRCUIT_CLOSED_EVENT_NAME), + KeyValue::new(CIRCUIT_PARTITION, "test_partition"), + KeyValue::new(CIRCUIT_STATE, CircuitState::Closed.as_str()), + ], + Some(5), + ); + } + + #[test] + fn circuit_opened_ensure_telemetry() { + let (tester, telemetry_engine) = create_engine(EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + ExitCircuitResult::Opened(HealthInfo::new(1, 0, 0.75, 100)), + )); + + let _ = telemetry_engine.exit(ExecutionResult::Failure, ExecutionMode::Normal); + tester.assert_attributes( + &[ + KeyValue::new(PIPELINE_NAME, "test_pipeline"), + KeyValue::new(STRATEGY_NAME, "test_strategy"), + KeyValue::new(EVENT_NAME, CIRCUIT_OPENED_EVENT_NAME), + KeyValue::new(CIRCUIT_PARTITION, "test_partition"), + KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), + ], + Some(5), + ); + } + + fn create_engine(engine: EngineFake) -> (MetricTester, EngineTelemetry) { + let tester = MetricTester::new(); + let telemetry_engine = EngineTelemetry::new( + engine, + "test_strategy".into(), + "test_pipeline".into(), + "test_partition".into(), + create_resilience_event_counter(&create_meter(tester.meter_provider())), + Clock::new_frozen(), + ); + (tester, telemetry_engine) + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/engines.rs b/crates/seatbelt/src/circuit_breaker/engine/engines.rs new file mode 100644 index 00000000..faa72bd9 --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/engines.rs @@ -0,0 +1,108 @@ +// Copyright (c) Microsoft Corporation. + +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; + +use opentelemetry::StringValue; +use opentelemetry::metrics::Counter; +use tick::Clock; + +use crate::circuit_breaker::constants::ERR_POISONED_LOCK; +use crate::circuit_breaker::{Engine, EngineCore, EngineOptions, EngineTelemetry, PartitionKey}; + +/// Manages circuit breaker engines for different partition keys. +#[derive(Debug)] +pub(crate) struct Engines { + map: Mutex>>, + engine_options: EngineOptions, + clock: Clock, + strategy_name: StringValue, + pipeline_name: StringValue, + resilience_event_counter: Counter, +} + +impl Engines { + pub fn new( + engine_options: EngineOptions, + clock: Clock, + strategy_name: StringValue, + pipeline_name: StringValue, + resilience_event_counter: Counter, + ) -> Self { + Self { + map: Mutex::new(HashMap::new()), + engine_options, + clock, + strategy_name, + pipeline_name, + resilience_event_counter, + } + } + + pub fn get_engine(&self, key: &PartitionKey) -> Arc { + let mut map = self.map.lock().expect(ERR_POISONED_LOCK); + + if let Some(engine) = map.get(key) { + return Arc::clone(engine); + } + + let engine = Arc::new(self.create_engine(key)); + map.insert(key.clone(), Arc::clone(&engine)); + engine + } + + #[cfg(test)] + fn len(&self) -> usize { + let map = self.map.lock().expect(ERR_POISONED_LOCK); + map.len() + } + + fn create_engine(&self, key: &PartitionKey) -> Engine { + EngineTelemetry::new( + EngineCore::new(self.engine_options.clone(), self.clock.clone()), + self.strategy_name.clone(), + self.pipeline_name.clone(), + key.to_string().into(), + self.resilience_event_counter.clone(), + self.clock.clone(), + ) + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + use crate::circuit_breaker::HealthMetricsBuilder; + use crate::circuit_breaker::engine::probing::ProbesOptions; + use crate::telemetry::metrics::create_resilience_event_counter; + + #[test] + fn get_engine_ok() { + let engines = Engines::new( + EngineOptions { + break_duration: Duration::from_secs(60), + health_metrics_builder: HealthMetricsBuilder::new( + Duration::from_millis(100), + 0.5, + 5, + ), + probes: ProbesOptions::quick(Duration::from_secs(1)), + }, + Clock::new_frozen(), + StringValue::from("strategy"), + StringValue::from("pipeline"), + create_resilience_event_counter(&opentelemetry::global::meter("test")), + ); + + assert!(Arc::ptr_eq( + &engines.get_engine(&PartitionKey::from("test")), + &engines.get_engine(&PartitionKey::from("test")) + )); + assert_eq!(engines.len(), 1); + + _ = engines.get_engine(&PartitionKey::from("test2")); + assert_eq!(engines.len(), 2); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/mod.rs b/crates/seatbelt/src/circuit_breaker/engine/mod.rs new file mode 100644 index 00000000..d6c6b3ec --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/mod.rs @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. + +use std::fmt::Debug; +use std::time::Duration; + +use crate::circuit_breaker::{ExecutionResult, HealthInfo, HealthMetricsBuilder}; + +pub(super) mod probing; + +#[derive(Debug, Copy, Clone)] +pub(crate) enum CircuitState { + Closed, + Open, + HalfOpen, +} + +impl CircuitState { + pub fn as_str(self) -> &'static str { + match self { + Self::Closed => "closed", + Self::Open => "open", + Self::HalfOpen => "half_open", + } + } +} + +/// Result of attempting to enter the circuit. +#[derive(Debug, Clone)] +pub(crate) enum EnterCircuitResult { + /// The operation is allowed to proceed. + /// + /// The `probe` indicates that this is a test operation used to evaluate whether + /// the circuit can be closed again. + Accepted { mode: ExecutionMode }, + + /// Operation is rejected due to open circuit. + Rejected, +} + +#[derive(Debug, Clone)] +pub(crate) enum ExitCircuitResult { + /// The state remains unchanged. + Unchanged, + + /// Circuit transitioned to Open state. + Opened(HealthInfo), + + /// Circuit re-transitioned to Open state due to a failure in Half-Open state. + Reopened, + + /// Circuit transitioned back to Closed state. + Closed(Stats), +} + +/// Configuration options for the circuit breaker engine. +#[derive(Debug, Clone)] +pub(crate) struct EngineOptions { + pub break_duration: Duration, + pub health_metrics_builder: HealthMetricsBuilder, + pub probes: probing::ProbesOptions, +} + +/// Determines the mode of execution for an operation. +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) enum ExecutionMode { + /// Regular operation. + Normal, + + /// A probe operation to test the health of the underlying service. + Probe, +} + +// Type alias for the default engine with telemetry. +pub type Engine = EngineTelemetry; + +/// Trait defining the behavior of a circuit breaker engine. +pub(crate) trait CircuitEngine: Debug + Send + Sync + 'static { + fn enter(&self) -> EnterCircuitResult; + + fn exit(&self, result: ExecutionResult, mode: ExecutionMode) -> ExitCircuitResult; +} + +mod engine_core; +pub(crate) use engine_core::*; + +#[cfg(test)] +mod engine_fake; +#[cfg(test)] +pub(crate) use engine_fake::*; + +mod engine_telemetry; +pub(crate) use engine_telemetry::*; + +mod engines; +pub(crate) use engines::*; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_circuit_state_as_str() { + assert_eq!(CircuitState::Closed.as_str(), "closed"); + assert_eq!(CircuitState::Open.as_str(), "open"); + assert_eq!(CircuitState::HalfOpen.as_str(), "half_open"); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs new file mode 100644 index 00000000..e4499f80 --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs @@ -0,0 +1,212 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Instant; + +use crate::circuit_breaker::engine::probing::{ + AllowProbeResult, HealthProbeOptions, ProbeOperation, ProbingResult, +}; +use crate::circuit_breaker::{ExecutionResult, HealthMetrics, HealthStatus}; +use crate::rnd::Rnd; + +#[derive(Debug)] +pub(crate) struct HealthProbe { + options: HealthProbeOptions, + metrics: HealthMetrics, + fallback_after: Option, + sample_until: Option, + rnd: Rnd, +} + +impl ProbeOperation for HealthProbe { + fn allow_probe(&mut self, now: Instant) -> AllowProbeResult { + // Sampling starts with the first probe attempt. Make sure relevant timestamps are set. + let sample_until = *self + .sample_until + .get_or_insert_with(|| now + self.options.stage_duration()); + + // Fallback probe is allowed only after the sampling duration has elapsed. + let fallback_after = *self.fallback_after.get_or_insert(sample_until); + + // Allow probe based on the probing ratio. + if self.rnd.next_f64() < self.options.probing_ratio { + return AllowProbeResult::Accepted; + } + + // Allow fallback probe to get through if we are past the sampling duration. + // This can happen if the traffic is very low and no probes were allowed + // by the rate sampling. This allows making progress in low-traffic scenarios + // as a last resort. + if now > fallback_after { + // Allow additional fallback probes only after another sampling duration + // in case allowed probe did not result in a recorded execution (e.g., due to timeout). + self.fallback_after = Some(now + self.options.stage_duration()); + return AllowProbeResult::Accepted; + } + + AllowProbeResult::Rejected + } + + fn record(&mut self, result: ExecutionResult, now: Instant) -> ProbingResult { + // Always record the result + self.metrics.record(result, now); + + // If we are still sampling, we cannot make a decision yet + if self.keep_sampling(now) { + return ProbingResult::Pending; + } + + // Sampling duration elapsed, use the health metrics to determine the result + match self.metrics.health_info().status() { + HealthStatus::Healthy => ProbingResult::Success, + HealthStatus::Unhealthy => ProbingResult::Failure, + } + } +} + +impl HealthProbe { + pub fn new(options: HealthProbeOptions) -> Self { + Self { + metrics: options.builder.build(), + options, + fallback_after: None, + sample_until: None, + rnd: Rnd::Real, + } + } + + fn keep_sampling(&self, now: Instant) -> bool { + match self.sample_until { + None => true, + Some(until) if now < until => true, + _ => false, + } + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + + #[test] + fn allow_probe_fallback() { + let options = HealthProbeOptions::new(Duration::from_secs(5), 0.5, 0.1); + let mut probe = HealthProbe::new(options); + probe.rnd = Rnd::new_fixed(0.5); + let now = Instant::now(); + + assert_eq!(probe.allow_probe(now), AllowProbeResult::Rejected); + + let later = now + Duration::from_secs(5); + assert_eq!(probe.allow_probe(later), AllowProbeResult::Rejected); + + // Allowed, because we are past the sampling duration and no probes were allowed yet + let later = now + Duration::from_secs(5) + Duration::from_micros(1); + assert_eq!(probe.allow_probe(later), AllowProbeResult::Accepted); + + // Not allowed, because we already let the fallback probe through + assert_eq!(probe.allow_probe(later), AllowProbeResult::Rejected); + + // Allowed again + let later = now + Duration::from_secs(10) + Duration::from_micros(2); + assert_eq!(probe.allow_probe(later), AllowProbeResult::Accepted); + } + + #[test] + fn allow_probe_rejected_when_at_ratio() { + let options = HealthProbeOptions::new(Duration::from_secs(5), 0.5, 0.1); + let mut probe = HealthProbe::new(options); + probe.rnd = Rnd::new_fixed(0.1); + + assert_eq!( + probe.allow_probe(Instant::now()), + AllowProbeResult::Rejected + ); + } + + #[test] + fn record_not_allowed_before() { + let options = HealthProbeOptions::new(Duration::from_secs(5), 0.99, 0.1); + let mut probe = HealthProbe::new(options); + let now = Instant::now(); + + assert_eq!( + probe.record(ExecutionResult::Success, now), + ProbingResult::Pending, + ); + + assert_eq!( + probe.record(ExecutionResult::Success, now), + ProbingResult::Pending, + ); + + let status = probe.metrics.health_info(); + assert_eq!(status.status(), HealthStatus::Healthy); + assert_eq!(status.throughput(), 2); + } + + #[test] + fn allow_then_record_after_sampling_period_healthy() { + let options = HealthProbeOptions::new(Duration::from_secs(5), 0.1, 1.0); + let mut probe = HealthProbe::new(options); + let now = Instant::now(); + + assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); + + // At the edge of a sampling period, success + assert_eq!( + probe.record(ExecutionResult::Success, now + Duration::from_secs(5)), + ProbingResult::Success, + ); + + assert_eq!( + probe.record(ExecutionResult::Success, now + Duration::from_secs(10)), + ProbingResult::Success, + ); + } + + #[test] + fn allow_then_record_after_sampling_period_unhealthy() { + let options = HealthProbeOptions::new(Duration::from_secs(5), 0.1, 1.0); + let mut probe = HealthProbe::new(options); + let now = Instant::now(); + + assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); + assert_eq!( + probe.record(ExecutionResult::Failure, now + Duration::from_secs(10)), + ProbingResult::Failure, + ); + } + + #[test] + fn record_multiple_ensure_health_evaluated() { + let options = HealthProbeOptions::new(Duration::from_secs(5), 0.6, 1.0); + let mut probe = HealthProbe::new(options); + let now = Instant::now(); + + assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); + assert_eq!( + probe.record(ExecutionResult::Success, now + Duration::from_secs(1)), + ProbingResult::Pending, + ); + assert_eq!( + probe.record(ExecutionResult::Failure, now + Duration::from_secs(2)), + ProbingResult::Pending, + ); + assert_eq!( + probe.record(ExecutionResult::Success, now + Duration::from_secs(6)), + ProbingResult::Success, + ); + + assert_eq!( + probe.record(ExecutionResult::Failure, now + Duration::from_secs(6)), + ProbingResult::Success, + ); + + assert_eq!( + probe.record(ExecutionResult::Failure, now + Duration::from_secs(6)), + ProbingResult::Failure, + ); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs new file mode 100644 index 00000000..dcf72dbf --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft Corporation. + +//! Probing mechanisms for circuit breakers. +//! +//! Probing is used to test if a service has recovered after a failure. +//! Different probing strategies can be implemented by implementing the `ProbeOperation` trait. +//! +//! - Various probes can be combined in sequence using the [`Probes`] struct. +//! - Unified view over various probe types is provided by the [`Probe`] enum. + +use std::fmt::Debug; +use std::time::Instant; + +use crate::circuit_breaker::{EnterCircuitResult, ExecutionMode, ExecutionResult}; + +mod health_probe; +mod options; +mod probes; +mod single_probe; + +pub(crate) use health_probe::*; +pub(crate) use options::*; +pub(crate) use probes::*; +pub(crate) use single_probe::*; + +/// Result of a probing attempt. +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) enum ProbingResult { + /// Probing succeeded, no more probing needed. + Success, + + /// Probing failed, circuit should remain open. + Failure, + + /// Probing is still in progress, more probes are needed. + Pending, +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) enum AllowProbeResult { + Accepted, + Rejected, +} + +impl From for EnterCircuitResult { + fn from(value: AllowProbeResult) -> Self { + match value { + AllowProbeResult::Accepted => Self::Accepted { + mode: ExecutionMode::Probe, + }, + AllowProbeResult::Rejected => Self::Rejected, + } + } +} + +/// Trait defining the behavior of a probing mechanism in a circuit breaker. +pub(crate) trait ProbeOperation: Send + Sync + Debug + 'static { + fn allow_probe(&mut self, now: Instant) -> AllowProbeResult; + + fn record(&mut self, result: ExecutionResult, now: Instant) -> ProbingResult; +} + +/// View over multiple probe types. +#[derive(Debug)] +pub(crate) enum Probe { + Single(SingleProbe), + Health(HealthProbe), +} + +impl Probe { + pub fn new(options: ProbeOptions) -> Self { + match options { + ProbeOptions::SingleProbe { cooldown } => Self::Single(SingleProbe::new(cooldown)), + ProbeOptions::HealthProbe(options) => Self::Health(HealthProbe::new(options)), + } + } +} + +impl ProbeOperation for Probe { + fn allow_probe(&mut self, now: Instant) -> AllowProbeResult { + match self { + Self::Single(probe) => probe.allow_probe(now), + Self::Health(health) => health.allow_probe(now), + } + } + + /// Record the result of a probing attempt. + /// + /// Once the probe reports success or failure, it is considered complete and + /// should never be used again. + fn record(&mut self, result: ExecutionResult, now: Instant) -> ProbingResult { + match self { + Self::Single(probe) => probe.record(result, now), + Self::Health(health) => health.record(result, now), + } + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + + #[test] + fn probe_new_creates_single_probe() { + let cooldown = Duration::from_secs(5); + let probe = Probe::new(ProbeOptions::SingleProbe { cooldown }); + assert!(matches!(probe, Probe::Single(duration) if duration.probe_cooldown() == cooldown)); + } + + #[test] + fn probe_allow_probe_delegates_to_inner() { + let mut probe = Probe::new(ProbeOptions::SingleProbe { + cooldown: Duration::from_secs(5), + }); + let now = Instant::now(); + + assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); + assert_eq!(probe.allow_probe(now), AllowProbeResult::Rejected); + } + + #[test] + fn probe_record_delegates_to_inner() { + let mut probe = Probe::new(ProbeOptions::SingleProbe { + cooldown: Duration::from_secs(5), + }); + let now = Instant::now(); + + assert_eq!( + probe.record(ExecutionResult::Success, now), + ProbingResult::Success + ); + assert_eq!( + probe.record(ExecutionResult::Failure, now), + ProbingResult::Failure + ); + } + + #[test] + fn allow_probe_result_to_enter_circuit_result_ok() { + assert!(matches!( + EnterCircuitResult::from(AllowProbeResult::Accepted), + EnterCircuitResult::Accepted { + mode: ExecutionMode::Probe + } + )); + + assert!(matches!( + EnterCircuitResult::from(AllowProbeResult::Rejected), + EnterCircuitResult::Rejected + )); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs new file mode 100644 index 00000000..9ac1810b --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs @@ -0,0 +1,245 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; +use std::vec::IntoIter; + +use crate::circuit_breaker::HealthMetricsBuilder; + +/// The minimum throughput during probing stage is set to 1, so at least one request must come +/// through in each probing stage to evaluate the health. +const MIN_THROUGHPUT: u32 = 1; + +/// Options for a single probe type. +#[derive(Debug, Clone)] +pub(crate) enum ProbeOptions { + /// A single probe that allows one probe. + /// + /// After the initial probe is allowed, it enters a cooldown period during which + /// no further probes are allowed. + SingleProbe { cooldown: Duration }, + + /// A health-based probe that uses health metrics to determine the health of the system. + HealthProbe(HealthProbeOptions), +} + +/// Configuration options for the probing mechanism. +#[derive(Debug, Clone)] +pub(crate) struct ProbesOptions { + probes: Vec, +} + +impl ProbesOptions { + pub fn quick(cooldown: Duration) -> Self { + Self::new([ProbeOptions::SingleProbe { cooldown }]) + } + + pub fn reliable(stage_duration: Duration, failure_threshold: f32) -> Self { + Self::gradual( + &[0.001, 0.01, 0.05, 0.1, 0.25, 0.5], + stage_duration, + failure_threshold, + ) + } + + pub fn gradual( + probing_ratio: &[f64], + stage_duration: Duration, + failure_threshold: f32, + ) -> Self { + // Start with a single probe + let initial = std::iter::once(ProbeOptions::SingleProbe { + cooldown: stage_duration, + }); + + // Then continue with health-based probes + let health = probing_ratio.iter().map(|probing_ratio| { + ProbeOptions::HealthProbe(HealthProbeOptions::new( + stage_duration, + failure_threshold, + *probing_ratio, + )) + }); + + Self::new(initial.chain(health)) + } + + pub fn new(probes: impl IntoIterator) -> Self { + let probes: Vec = probes.into_iter().collect(); + assert!(!probes.is_empty(), "the probes list cannot be empty"); + Self { probes } + } + + pub fn probes(&self) -> IntoIter { + self.probes.clone().into_iter() + } +} + +#[derive(Debug, Clone)] +pub struct HealthProbeOptions { + pub(super) builder: HealthMetricsBuilder, + pub(super) probing_ratio: f64, +} + +impl HealthProbeOptions { + pub fn new(stage_duration: Duration, failure_threshold: f32, probing_ratio: f64) -> Self { + assert!( + probing_ratio > 0.0 && probing_ratio <= 1.0, + "probing_ratio must be in (0.0, 1.0]" + ); + assert!( + (0.0..1.0).contains(&failure_threshold), + "failure_threshold must be in [0.0, 1.0)" + ); + assert!( + stage_duration > Duration::ZERO, + "stage_duration must be greater than zero" + ); + + Self { + // The min throughput is set to 0, so if no requests come in during the probing stage, + // the health will be considered healthy by default. + builder: HealthMetricsBuilder::new(stage_duration, failure_threshold, MIN_THROUGHPUT), + probing_ratio, + } + } + + pub fn stage_duration(&self) -> Duration { + self.builder.sampling_duration + } + + #[cfg(test)] + pub fn failure_threshold(&self) -> f32 { + self.builder.failure_threshold + } +} + +#[cfg(test)] +mod tests { + use static_assertions::assert_impl_all; + + use super::*; + + assert_impl_all!(ProbeOptions: Clone, std::fmt::Debug); + assert_impl_all!(ProbesOptions: Clone, std::fmt::Debug); + #[test] + fn single_probe_constructor_creates_correct_options() { + let cooldown = Duration::from_secs(15); + let options = ProbesOptions::quick(cooldown); + let probes: Vec<_> = options.probes().collect(); + + assert_eq!(probes.len(), 1); + assert!(matches!( + &probes[0], + ProbeOptions::SingleProbe { cooldown: c } if *c == Duration::from_secs(15) + )); + } + + #[test] + fn new_accepts_multiple_probes() { + let options = ProbesOptions::new([ + ProbeOptions::SingleProbe { + cooldown: Duration::from_secs(10), + }, + ProbeOptions::SingleProbe { + cooldown: Duration::from_secs(20), + }, + ProbeOptions::SingleProbe { + cooldown: Duration::from_secs(30), + }, + ]); + + let probes: Vec<_> = options.probes().collect(); + assert_eq!(probes.len(), 3); + assert!( + matches!(&probes[0], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(10)) + ); + assert!( + matches!(&probes[1], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(20)) + ); + assert!( + matches!(&probes[2], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(30)) + ); + } + + #[test] + fn clone_preserves_probe_count() { + let options = ProbesOptions::quick(Duration::from_secs(25)); + let cloned = options.clone(); + + assert_eq!(options.probes().count(), cloned.probes().count()); + } + + #[test] + fn probes_iterator_is_reusable() { + let options = ProbesOptions::quick(Duration::from_secs(30)); + + assert_eq!(options.probes().count(), 1); + assert_eq!(options.probes().count(), 1); + } + + #[test] + #[should_panic(expected = "the probes list cannot be empty")] + fn new_panics_with_empty_iterator() { + let _ = ProbesOptions::new(Vec::::new()); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn health_probe_options_ctor_ok() { + let sampling_duration = Duration::from_secs(60); + let failure_threshold = 0.2; + let probing_ratio = 0.1; + + let options = HealthProbeOptions::new(sampling_duration, failure_threshold, probing_ratio); + + assert_eq!(options.stage_duration(), sampling_duration); + assert_eq!(options.probing_ratio, probing_ratio); + assert_eq!(options.builder.failure_threshold, failure_threshold); + assert_eq!(options.builder.min_throughput, 1); + } + + #[should_panic(expected = "stage_duration must be greater than zero")] + #[test] + fn health_probe_options_ctor_sampling_duration() { + let _ = HealthProbeOptions::new(Duration::ZERO, 0.1, 0.5); + } + + #[should_panic(expected = "failure_threshold must be in [0.0, 1.0)")] + #[test] + fn health_probe_options_ctor_failure_threshold() { + let _ = HealthProbeOptions::new(Duration::from_secs(10), 1.0, 0.5); + } + + #[should_panic(expected = "probing_ratio must be in (0.0, 1.0]")] + #[test] + fn health_probe_options_ctor_probing_ratio() { + let _ = HealthProbeOptions::new(Duration::from_secs(10), 0.1, 0.0); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn probes_options_reliable_ok() { + let options = ProbesOptions::reliable(Duration::from_secs(30), 0.2); + let probes: Vec<_> = options.probes().collect(); + + assert_eq!(probes.len(), 7); + assert!(matches!( + &probes[0], + ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(30) + )); + + let expected_ratios = [0.001, 0.01, 0.05, 0.1, 0.25, 0.5]; + for (i, ratio) in expected_ratios.iter().enumerate() { + let probe = &probes[i + 1]; + + match probe { + ProbeOptions::HealthProbe(options) => { + assert_eq!(options.builder.sampling_duration, Duration::from_secs(30)); + assert_eq!(options.builder.failure_threshold, 0.2); + assert_eq!(options.probing_ratio, *ratio); + } + ProbeOptions::SingleProbe { .. } => panic!("expected HealthProbe"), + } + } + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs new file mode 100644 index 00000000..85333cdf --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Instant; +use std::vec; + +use super::{AllowProbeResult, Probe, ProbeOperation, ProbeOptions, ProbesOptions, ProbingResult}; +use crate::circuit_breaker::ExecutionResult; + +/// Manages a sequence of probes. +#[derive(Debug)] +pub(crate) struct Probes { + probes: vec::IntoIter, + current: Probe, +} + +impl Probes { + pub fn new(options: &ProbesOptions) -> Self { + let mut probes = options.probes(); + let probe = probes + .next() + .expect("probes are never empty because ProbesOptions enforces that"); + + Self { + probes, + current: Probe::new(probe), + } + } + + pub fn allow_probe(&mut self, now: Instant) -> AllowProbeResult { + self.current.allow_probe(now) + } + + pub fn record(&mut self, result: ExecutionResult, now: Instant) -> ProbingResult { + match self.current.record(result, now) { + ProbingResult::Success => { + // check if there are more probes to try + match self.probes.next() { + Some(probe) => { + self.current = Probe::new(probe); + ProbingResult::Pending + } + None => ProbingResult::Success, + } + } + ProbingResult::Pending => ProbingResult::Pending, + ProbingResult::Failure => ProbingResult::Failure, + } + } +} + +#[cfg(test)] +mod tests { + + use std::time::Duration; + + use super::*; + + #[test] + fn multiple_probes_ok() { + let options = ProbesOptions::new([ + ProbeOptions::SingleProbe { + cooldown: Duration::from_secs(1), + }, + ProbeOptions::SingleProbe { + cooldown: Duration::from_secs(2), + }, + ]); + let mut probes = Probes::new(&options); + let now = Instant::now(); + + assert_eq!(probes.allow_probe(now), AllowProbeResult::Accepted); + assert_eq!(probes.allow_probe(now), AllowProbeResult::Rejected); + assert_eq!( + probes.record(ExecutionResult::Success, now), + ProbingResult::Pending + ); + + assert_eq!(probes.allow_probe(now), AllowProbeResult::Accepted); + assert_eq!( + probes.record(ExecutionResult::Success, now), + ProbingResult::Success + ); + + assert!(probes.probes.next().is_none()); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs new file mode 100644 index 00000000..f1ad3a2d --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::{Duration, Instant}; + +use super::{AllowProbeResult, ProbeOperation, ProbingResult}; +use crate::circuit_breaker::ExecutionResult; + +/// Allows a single probe to get in and based on the result either closes the circuit +/// or goes back to open state. +#[derive(Debug, Clone)] +pub(crate) struct SingleProbe { + probe_cooldown: Duration, + entered_at: Option, +} + +impl SingleProbe { + pub fn new(probe_cooldown: Duration) -> Self { + Self { + probe_cooldown, + entered_at: None, + } + } + + #[cfg(test)] + pub fn probe_cooldown(&self) -> Duration { + self.probe_cooldown + } +} + +impl ProbeOperation for SingleProbe { + fn allow_probe(&mut self, now: Instant) -> AllowProbeResult { + match self.entered_at { + // First probe attempt - record the timestamp to start the cooldown period + None => { + self.entered_at = Some(now); + AllowProbeResult::Accepted + } + // Cooldown has elapsed, allow the probe and reset the cooldown timer. + // We allow additional probe after the cooldown period to handle the case + // where the probe result is not recorded due to future being dropped. + Some(entered_at) if now.saturating_duration_since(entered_at) > self.probe_cooldown => { + self.entered_at = Some(now); + AllowProbeResult::Accepted + } + Some(_) => AllowProbeResult::Rejected, + } + } + + fn record(&mut self, result: ExecutionResult, _now: Instant) -> ProbingResult { + match result { + ExecutionResult::Success => ProbingResult::Success, + ExecutionResult::Failure => ProbingResult::Failure, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn allow_probe_accepts_single_probe() { + let mut probe = SingleProbe::new(Duration::from_secs(5)); + let now = Instant::now(); + + // The first probe should be accepted + assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); + + // The second probe immediately should be rejected + assert_eq!(probe.allow_probe(now), AllowProbeResult::Rejected); + + // After 3 seconds, still should be rejected + let later = now + Duration::from_secs(3); + assert_eq!(probe.allow_probe(later), AllowProbeResult::Rejected); + + // After cooldown, the probe should be accepted again + let later = now + Duration::from_secs(6); + assert_eq!(probe.allow_probe(later), AllowProbeResult::Accepted); + } + + #[test] + fn allow_probe_check_bounds() { + let mut probe = SingleProbe::new(Duration::from_secs(5)); + let now = Instant::now(); + + // The first probe should be accepted + assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); + + // After exactly cooldown duration, the probe should still be rejected + let later = now + Duration::from_secs(5); + assert_eq!(probe.allow_probe(later), AllowProbeResult::Rejected); + + // After cooldown + 1 microsecond, the probe should be accepted + let later = now + Duration::from_secs(5) + Duration::from_micros(1); + assert_eq!(probe.allow_probe(later), AllowProbeResult::Accepted); + } + + #[test] + fn record_ensure_correct_result() { + let mut probe = SingleProbe::new(Duration::from_secs(5)); + let now = Instant::now(); + + // Record a success + assert_eq!( + probe.record(ExecutionResult::Success, now), + ProbingResult::Success + ); + + // Record a failure + assert_eq!( + probe.record(ExecutionResult::Failure, now), + ProbingResult::Failure + ); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/execution_result.rs b/crates/seatbelt/src/circuit_breaker/execution_result.rs new file mode 100644 index 00000000..b7785d15 --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/execution_result.rs @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. + +use crate::RecoveryInfo; + +/// An evaluated execution result. +/// +/// From the perspective of a circuit breaker, an execution can either +/// succeed or fail. This enum captures that binary outcome. +#[derive(Debug, PartialEq, Copy, Clone)] +pub(crate) enum ExecutionResult { + Success, + Failure, +} + +impl ExecutionResult { + pub fn as_str(self) -> &'static str { + match self { + Self::Success => "success", + Self::Failure => "failure", + } + } +} + +impl ExecutionResult { + pub fn from_recovery(recovery: &RecoveryInfo) -> Self { + match recovery.kind() { + crate::RecoveryKind::Retry | crate::RecoveryKind::Unavailable => Self::Failure, + _ => Self::Success, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_execution_result_from_recovery() { + assert_eq!( + ExecutionResult::from_recovery(&RecoveryInfo::retry()), + ExecutionResult::Failure + ); + assert_eq!( + ExecutionResult::from_recovery(&RecoveryInfo::unavailable()), + ExecutionResult::Failure + ); + assert_eq!( + ExecutionResult::from_recovery(&RecoveryInfo::never()), + ExecutionResult::Success + ); + assert_eq!( + ExecutionResult::from_recovery(&RecoveryInfo::unknown()), + ExecutionResult::Success + ); + } + + #[test] + fn test_execution_result_as_str() { + assert_eq!(ExecutionResult::Success.as_str(), "success"); + assert_eq!(ExecutionResult::Failure.as_str(), "failure"); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/half_open_mode.rs b/crates/seatbelt/src/circuit_breaker/half_open_mode.rs new file mode 100644 index 00000000..2af22e63 --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/half_open_mode.rs @@ -0,0 +1,180 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +use crate::circuit_breaker::constants::MIN_SAMPLING_DURATION; +use crate::circuit_breaker::engine::probing::ProbesOptions; + +/// Defines the behavior of the circuit breaker when transitioning from half-open to closed state. +/// +/// The half-open state is a transitional phase where the circuit breaker allows a limited number of +/// requests to pass through to test if the underlying service has recovered. The chosen mode +/// determines how aggressively the circuit breaker probes the service during this phase. +/// +/// Currently, two modes are supported: +/// +/// - [`HalfOpenMode::quick`]: Allows a single probe request to determine if the service has recovered. +/// - [`HalfOpenMode::reliable`](default): Gradually increases the percentage of probing requests over multiple stages. +#[derive(Debug, Clone, PartialEq)] +pub struct HalfOpenMode { + inner: Mode, +} + +impl HalfOpenMode { + /// Allow quick recovery from half-open state with a single probe. + /// + /// This approach is less reliable compared to the [`HalfOpenMode::reliable`] mode, but + /// can close the circuit faster. + /// + /// The downside of this approach is that it relies on a single execution to determine + /// the health of the service. If that execution happens to succeed by chance, the circuit + /// closes and later requests may fail again, leading to instability and re-opening the circuit + /// again. + #[must_use] + pub fn quick() -> Self { + Self { inner: Mode::Quick } + } + + /// Gradually increase the percentage of probing requests over multiple stages. + /// + /// This approach allows more requests to pass through in a controlled manner, + /// increasing the probing rate over time. This can help more reliably evaluate the + /// health of the underlying service over time rather than relying on a single execution. + /// + /// The pre-configured ratios for each probing stage are: + /// `0.1%, 1%, 5%, 10%, 25%, 50%` + /// + /// Each probing stage advances after the stage duration has elapsed, and the health + /// metrics indicate that the failure rate is below the configured threshold. If any probing stage + /// fails, the circuit reopens immediately and the cycle starts over. + /// + /// # Arguments + /// + /// - `stage_duration` - Optional custom stage duration for each probing stage. If not provided, + /// the value of [`break_duration`][CircuitBreakerLayer::break_duration] is used. The provided stage + /// duration is clamped to a minimum of 1 second. + #[must_use] + pub fn reliable(stage_duration: impl Into>) -> Self { + Self { + inner: Mode::Reliable(stage_duration.into().map(|d| d.max(MIN_SAMPLING_DURATION))), + } + } + + pub(super) fn to_options( + &self, + default_stage_duration: Duration, + failure_threshold: f32, + ) -> ProbesOptions { + match self.inner { + Mode::Quick => ProbesOptions::quick(default_stage_duration), + Mode::Reliable(duration) => ProbesOptions::reliable( + duration.unwrap_or(default_stage_duration), + failure_threshold, + ), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +enum Mode { + Quick, + Reliable(Option), +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::circuit_breaker::engine::probing::ProbeOptions; + + #[test] + fn quick_mode_creates_single_probe() { + let mode = HalfOpenMode::quick(); + let options = mode.to_options(Duration::from_secs(30), 0.1); + let probes: Vec<_> = options.probes().collect(); + + assert_eq!(probes.len(), 1); + assert!(matches!(probes[0], ProbeOptions::SingleProbe { .. })); + } + + #[test] + fn quick_mode_uses_default_duration() { + let mode = HalfOpenMode::quick(); + let default = Duration::from_secs(30); + let options = mode.to_options(default, 0.1); + let probes: Vec<_> = options.probes().collect(); + + assert!(matches!( + &probes[0], + ProbeOptions::SingleProbe { cooldown } if *cooldown == default + )); + } + + #[test] + fn reliable_mode_creates_seven_probes() { + let mode = HalfOpenMode::reliable(None); + let options = mode.to_options(Duration::from_secs(30), 0.1); + + assert_eq!(options.probes().len(), 7); + } + + #[test] + fn reliable_mode_with_custom_duration() { + let custom = Duration::from_secs(45); + let mode = HalfOpenMode::reliable(custom); + let options = mode.to_options(Duration::from_secs(30), 0.1); + let probes: Vec<_> = options.probes().collect(); + + assert!(matches!( + &probes[0], + ProbeOptions::SingleProbe { cooldown } if *cooldown == custom + )); + + #[expect(clippy::float_cmp, reason = "Test")] + for probe in &probes[1..] { + if let ProbeOptions::HealthProbe(h) = probe { + assert_eq!(h.stage_duration(), custom); + + assert_eq!(h.failure_threshold(), 0.1); + } + } + } + + #[test] + fn reliable_mode_with_default_duration() { + let mode = HalfOpenMode::reliable(None); + let default = Duration::from_secs(60); + let options = mode.to_options(default, 0.1); + let probes: Vec<_> = options.probes().collect(); + + assert!(matches!( + &probes[0], + ProbeOptions::SingleProbe { cooldown } if *cooldown == default + )); + + for probe in &probes[1..] { + if let ProbeOptions::HealthProbe(h) = probe { + assert_eq!(h.stage_duration(), default); + } + } + } + + #[test] + fn reliable_mode_accepts_various_inputs() { + let mode1 = HalfOpenMode::reliable(Duration::from_secs(10)); + let mode2 = HalfOpenMode::reliable(Some(Duration::from_secs(10))); + let mode3 = HalfOpenMode::reliable(None); + + assert!(matches!(mode1.inner, Mode::Reliable(Some(_)))); + assert!(matches!(mode2.inner, Mode::Reliable(Some(_)))); + assert!(matches!(mode3.inner, Mode::Reliable(None))); + } + + #[test] + fn reliable_mode_clamps_min_duration() { + let mode = HalfOpenMode::reliable(Duration::from_millis(500)); + + assert!( + matches!(mode.inner, Mode::Reliable(duration) if duration == Some(Duration::from_secs(1))) + ); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/health.rs b/crates/seatbelt/src/circuit_breaker/health.rs new file mode 100644 index 00000000..ab60f35b --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/health.rs @@ -0,0 +1,350 @@ +// Copyright (c) Microsoft Corporation. + +use std::collections::VecDeque; +use std::time::{Duration, Instant}; + +use super::ExecutionResult; +use crate::circuit_breaker::constants::MIN_SAMPLING_DURATION; + +const WINDOW_COUNT: u32 = 10; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) enum HealthStatus { + Healthy, + Unhealthy, +} + +/// Aggregated health information that can be used to determine the failure rate and throughput +/// of a service over a recent sampling period. +#[must_use] +#[derive(Debug, Copy, Clone)] +pub(crate) struct HealthInfo { + throughput: u32, + failure_rate: f32, + health_status: HealthStatus, +} + +impl HealthInfo { + pub fn new(successes: u32, failures: u32, failure_threshold: f32, min_throughput: u32) -> Self { + let throughput = successes.saturating_add(failures); + + if throughput == 0 { + return Self { + throughput: 0, + failure_rate: 0.0, + health_status: HealthStatus::Healthy, + }; + } + + #[expect(clippy::cast_possible_truncation, reason = "Acceptable")] + let failure_rate = (f64::from(failures) / f64::from(throughput)) as f32; + + Self { + throughput, + failure_rate, + health_status: if failure_rate >= failure_threshold && throughput >= min_throughput { + HealthStatus::Unhealthy + } else { + HealthStatus::Healthy + }, + } + } + + pub fn throughput(&self) -> u32 { + self.throughput + } + + pub fn failure_rate(&self) -> f32 { + self.failure_rate + } + + pub fn status(&self) -> HealthStatus { + self.health_status + } +} + +/// Pre-configured builder that creates `HealthMetrics` instances with consistent settings. +#[derive(Debug, Clone)] +pub(crate) struct HealthMetricsBuilder { + pub(crate) sampling_duration: Duration, + pub(crate) failure_threshold: f32, + pub(crate) min_throughput: u32, +} + +impl HealthMetricsBuilder { + pub fn new(sampling_duration: Duration, failure_threshold: f32, min_throughput: u32) -> Self { + Self { + sampling_duration: sampling_duration.max(MIN_SAMPLING_DURATION), + failure_threshold, + min_throughput, + } + } + + pub fn build(&self) -> HealthMetrics { + HealthMetrics::new( + self.sampling_duration, + self.failure_threshold, + self.min_throughput, + ) + } +} + +/// Tracks execution results over a sliding time window to provide health metrics. +#[derive(Debug)] +pub(crate) struct HealthMetrics { + sampling_duration: Duration, + window_duration: Duration, + windows: VecDeque, + failure_threshold: f32, + min_throughput: u32, +} + +impl HealthMetrics { + fn new(sampling_duration: Duration, failure_threshold: f32, min_throughput: u32) -> Self { + Self { + sampling_duration, + window_duration: sampling_duration / WINDOW_COUNT, + windows: VecDeque::with_capacity(WINDOW_COUNT as usize), + failure_threshold, + min_throughput, + } + } + + pub fn record(&mut self, result: ExecutionResult, now: Instant) { + // Remove old windows + while let Some(front) = self.windows.front() + && now.duration_since(front.started_at) > self.sampling_duration + { + self.windows.pop_front(); + } + + // Get or create the current window + if let Some(back) = self.windows.back_mut() + && now.duration_since(back.started_at) < self.window_duration + { + // Update the existing window + back.update(result); + } else { + // Create a new window + let mut new_window = Window::new(now); + new_window.update(result); + self.windows.push_back(new_window); + } + } + + pub fn health_info(&self) -> HealthInfo { + let mut successes = 0_u32; + let mut failures = 0_u32; + + for w in &self.windows { + successes = successes.saturating_add(w.successes); + failures = failures.saturating_add(w.failures); + } + + HealthInfo::new( + successes, + failures, + self.failure_threshold, + self.min_throughput, + ) + } +} + +#[derive(Debug)] +struct Window { + successes: u32, + failures: u32, + started_at: Instant, +} + +impl Window { + fn new(started_at: Instant) -> Self { + Self { + successes: 0, + failures: 0, + started_at, + } + } + + fn update(&mut self, result: ExecutionResult) { + match result { + ExecutionResult::Success => self.successes += 1, + ExecutionResult::Failure => self.failures += 1, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn factory_ok() { + let builder = HealthMetricsBuilder::new(Duration::from_secs(10), 0.5, 5); + let metrics = builder.build(); + + assert_eq!(metrics.sampling_duration, Duration::from_secs(10)); + assert_eq!(metrics.window_duration, Duration::from_secs(1)); + assert_eq!(metrics.failure_threshold, 0.5); + assert_eq!(metrics.min_throughput, 5); + + // small sampling duration is clamped + let builder = HealthMetricsBuilder::new(Duration::from_millis(500), 0.5, 5); + let metrics = builder.build(); + assert_eq!(metrics.sampling_duration, MIN_SAMPLING_DURATION); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn record_when_empty() { + let mut metrics = HealthMetrics::new(Duration::from_secs(10), 0.5, 5); + let start = Instant::now(); + metrics.record(ExecutionResult::Success, start); + let info = metrics.health_info(); + + assert_eq!(info.throughput(), 1); + assert_eq!(info.failure_rate(), 0.0); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn create_health_info_healthy_when_not_throughput() { + let metrics = HealthMetrics::new(Duration::from_secs(10), 0.5, 5); + + let info = metrics.health_info(); + + assert_eq!(info.throughput(), 0); + assert_eq!(info.failure_rate(), 0.0); + assert_eq!(info.status(), HealthStatus::Healthy); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn record_twice() { + let mut metrics = HealthMetrics::new(Duration::from_secs(10), 0.5, 2); + let start = Instant::now(); + metrics.record(ExecutionResult::Success, start); + metrics.record(ExecutionResult::Failure, start); + let info = metrics.health_info(); + + assert_eq!(info.throughput(), 2); + assert_eq!(info.failure_rate(), 0.5); + assert_eq!(info.status(), HealthStatus::Unhealthy); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn record_ensure_old_window_discarded() { + let mut metrics = HealthMetrics::new(Duration::from_secs(10), 0.5, 5); + let start = Instant::now(); + metrics.record(ExecutionResult::Success, start); + + // Advance time beyond the sampling duration + let later = start + Duration::from_secs(11); + metrics.record(ExecutionResult::Success, later); + let info = metrics.health_info(); + + assert_eq!(info.throughput(), 1); + assert_eq!(info.failure_rate(), 0.0); + } + + #[test] + fn new_ensure_initialized_properly() { + let metrics = HealthMetrics::new(Duration::from_secs(10), 0.5, 5); + assert_eq!(metrics.sampling_duration, Duration::from_secs(10)); + assert_eq!(metrics.window_duration, Duration::from_secs(1)); + assert!(metrics.windows.is_empty()); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn ensure_multiple_windows_created() { + let mut metrics = HealthMetrics::new(Duration::from_secs(10), 0.5, 5); + let start = Instant::now(); + for i in 0..30 { + let now = start + Duration::from_millis(i * 100); + metrics.record(ExecutionResult::Success, now); + } + + assert_eq!(metrics.windows.len(), 3); + + let first_window = &metrics.windows[0]; + assert_eq!(first_window.successes, 10); + assert_eq!(first_window.failures, 0); + assert_eq!(first_window.started_at, start); + + // discard the first window + let later = start + Duration::from_secs(12); + metrics.record(ExecutionResult::Success, later); + let info = metrics.health_info(); + + assert_eq!(metrics.windows.len(), 2); + assert_eq!(info.throughput(), 11); + assert_eq!(info.failure_rate(), 0.0); + } + + mod health_info_create_tests { + use super::*; + + #[test] + fn zero_throughput_is_healthy() { + let info = HealthInfo::new(0, 0, 0.5, 10); + assert_eq!( + (info.throughput(), info.failure_rate(), info.status()), + (0, 0.0, HealthStatus::Healthy) + ); + } + + #[test] + fn only_successes_is_healthy() { + let info = HealthInfo::new(10, 0, 0.5, 5); + assert_eq!( + (info.throughput(), info.failure_rate(), info.status()), + (10, 0.0, HealthStatus::Healthy) + ); + } + + #[test] + fn only_failures_above_threshold_is_unhealthy() { + let info = HealthInfo::new(0, 10, 0.5, 5); + assert_eq!( + (info.throughput(), info.failure_rate(), info.status()), + (10, 1.0, HealthStatus::Unhealthy) + ); + } + + #[test] + fn failure_threshold_boundaries() { + // At threshold + let info = HealthInfo::new(5, 5, 0.5, 5); + assert_eq!(info.status(), HealthStatus::Unhealthy); + + // Below threshold + let info = HealthInfo::new(6, 4, 0.5, 5); + assert_eq!(info.status(), HealthStatus::Healthy); + } + + #[test] + fn min_throughput_boundaries() { + // Below min throughput - healthy despite high failure rate + let info = HealthInfo::new(0, 3, 0.5, 5); + assert_eq!(info.status(), HealthStatus::Healthy); + + // At min throughput - unhealthy with high failure rate + let info = HealthInfo::new(1, 4, 0.5, 5); + assert_eq!(info.status(), HealthStatus::Unhealthy); + } + + #[test] + fn edge_cases() { + // Saturating add + let info = HealthInfo::new(u32::MAX, 1, 0.5, 5); + assert_eq!(info.throughput(), u32::MAX); + + // Zero threshold + let info = HealthInfo::new(1, 1, 0.0, 0); + assert_eq!(info.status(), HealthStatus::Unhealthy); + } + } +} diff --git a/crates/seatbelt/src/circuit_breaker/layer.rs b/crates/seatbelt/src/circuit_breaker/layer.rs new file mode 100644 index 00000000..9001e56a --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/layer.rs @@ -0,0 +1,736 @@ +// Copyright (c) Microsoft Corporation. + +use std::marker::PhantomData; +use std::time::Duration; + +use opentelemetry::StringValue; + +use super::constants::{ + DEFAULT_BREAK_DURATION, DEFAULT_FAILURE_THRESHOLD, DEFAULT_MIN_THROUGHPUT, + DEFAULT_SAMPLING_DURATION, +}; +use super::{ + CircuitBreaker, Engines, HalfOpenMode, HealthMetricsBuilder, OnClosed, OnClosedArgs, OnOpened, + OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RejectedInput, + RejectedInputArgs, ShouldRecover, +}; +use crate::circuit_breaker::engine::probing::ProbesOptions; +use crate::service::Layer; +use crate::{EnableIf, NotSet, Recovery, RecoveryInfo, SeatbeltOptions, Set}; + +/// Builder for configuring circuit breaker resilience middleware. +/// +/// This type is created by calling [`CircuitBreaker::layer`] and uses the +/// typestate pattern to enforce that required properties are configured before the circuit breaker +/// middleware can be built: +/// +/// - [`recovery`][CircuitBreakerLayer::recovery]: Required to determine if an output represents a failure +/// - [`rejected_input`][CircuitBreakerLayer::rejected_input]: Required to specify the output when the circuit is open and inputs are rejected +/// +/// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit_breaker] documentation. +#[derive(Debug)] +pub struct CircuitBreakerLayer { + options: SeatbeltOptions, + recovery: Option>, + rejected_input: Option>, + on_opened: Option>, + on_closed: Option>, + on_probing: Option>, + partition_key: Option>, + enable_if: EnableIf, + strategy_name: StringValue, + failure_threshold: f32, + min_throughput: u32, + sampling_duration: Duration, + break_duration: Duration, + half_open_mode: HalfOpenMode, + _state: PhantomData Out>, +} + +impl CircuitBreakerLayer { + #[must_use] + pub(crate) fn new(name: StringValue, options: &SeatbeltOptions) -> Self { + Self { + options: options.clone(), + recovery: None, + rejected_input: None, + on_opened: None, + on_closed: None, + on_probing: None, + partition_key: None, + enable_if: EnableIf::always(), + strategy_name: name, + failure_threshold: DEFAULT_FAILURE_THRESHOLD, + min_throughput: DEFAULT_MIN_THROUGHPUT, + sampling_duration: DEFAULT_SAMPLING_DURATION, + break_duration: DEFAULT_BREAK_DURATION, + half_open_mode: HalfOpenMode::reliable(None), + _state: PhantomData, + } + } +} + +impl + CircuitBreakerLayer, RecoveryState, RejectedInputState> +{ + /// Sets the error to return when the circuit breaker is open for Result-returning services. + /// + /// When the circuit is open, requests are immediately rejected and this function + /// is called to generate the error that should be returned to the caller. + /// The error is automatically wrapped in a `Result::Err`. + /// + /// This is a convenience method for Result-returning services that allows you to + /// provide a meaningful error when the circuit breaker prevents a request from + /// reaching the underlying service. + /// + /// # Arguments + /// + /// * `error_producer` - Function that generates the error to return when the circuit is open + #[must_use] + pub fn rejected_input_error( + self, + error_producer: impl Fn(In, RejectedInputArgs) -> E + Send + Sync + 'static, + ) -> CircuitBreakerLayer, RecoveryState, Set> { + self.into_state::() + .rejected_input(move |input, args| Err(error_producer(input, args))) + .into_state() + } +} + +impl + CircuitBreakerLayer +{ + /// Sets the recovery classification function. + /// + /// This function determines whether a specific output represents a failure + /// by examining the output and returning a [`RecoveryInfo`] classification. + /// + /// The function receives the output and [`RecoveryArgs`][crate::circuit_breaker::RecoveryArgs] + /// with context about the circuit breaker state. + /// + /// # Arguments + /// + /// * `recover_fn` - Function that takes a reference to the output and + /// [`RecoveryArgs`][crate::circuit_breaker::RecoveryArgs] containing circuit breaker context, + /// and returns a [`RecoveryInfo`] decision + #[must_use] + pub fn recovery_with( + mut self, + recover_fn: impl Fn(&Out, crate::circuit_breaker::RecoveryArgs) -> RecoveryInfo + + Send + + Sync + + 'static, + ) -> CircuitBreakerLayer { + self.recovery = Some(ShouldRecover::new(recover_fn)); + self.into_state::() + } + + /// Automatically sets the recovery classification function for types that implement [`Recovery`]. + /// + /// This is a convenience method that uses the [`Recovery`] trait to determine + /// whether an output represents a failure. For types that implement [`Recovery`], + /// this provides a simple way to enable circuit breaker behavior without manually + /// implementing a recovery classification function. + /// + /// This is equivalent to calling [`recovery_with`][CircuitBreakerLayer::recovery_with] with + /// `|output, _args| output.recovery()`. + /// + /// # Type Requirements + /// + /// This method is only available when the output type `Out` implements [`Recovery`]. + #[must_use] + pub fn recovery(self) -> CircuitBreakerLayer + where + Out: Recovery, + { + self.recovery_with(|out, _args| out.recovery()) + } + + /// Sets the output to return when the circuit breaker is open. + /// + /// When the circuit is open, requests are immediately rejected and this function + /// is called to generate the output that should be returned to the caller. + /// + /// This allows you to provide a meaningful error message or fallback value + /// when the circuit breaker prevents a request from reaching the underlying service. + /// + /// # Arguments + /// + /// * `rejected_fn` - Function that generates the output to return when the circuit is open + #[must_use] + pub fn rejected_input( + mut self, + rejected_fn: impl Fn(In, RejectedInputArgs) -> Out + Send + Sync + 'static, + ) -> CircuitBreakerLayer { + self.rejected_input = Some(RejectedInput::new(rejected_fn)); + self.into_state::() + } + + /// Sets the failure threshold for the circuit breaker. + /// + /// The circuit breaker will open when the failure rate exceeds this threshold + /// over the sampling duration. The value should be between 0.0 and 1.0, where + /// 0.1 represents a 10% failure threshold. Values greater than 1.0 will be clamped to 1.0. + /// + /// **Default**: 0.1 (10% failure rate) + /// + /// # Arguments + /// + /// * `threshold` - The failure threshold (0.0 to 1.0, values > 1.0 are clamped) + #[must_use] + pub fn failure_threshold(mut self, threshold: f32) -> Self { + self.failure_threshold = threshold.min(1.0); + self + } + + /// Sets the minimum throughput required before the circuit breaker can open. + /// + /// The circuit breaker will only consider opening if at least this many requests + /// have been processed during the sampling duration. This prevents the circuit + /// from opening due to a small number of failures when overall traffic is low. + /// + /// **Default**: 100 requests + /// + /// # Arguments + /// + /// * `throughput` - The minimum number of requests required + #[must_use] + pub fn min_throughput(mut self, throughput: u32) -> Self { + self.min_throughput = throughput; + self + } + + /// Sets the sampling duration for calculating failure rates. + /// + /// The circuit breaker calculates failure rates over this time window. + /// Only requests within this duration are considered when determining + /// whether the failure rate exceeds the threshold. + /// + /// **Default**: 30 seconds + /// + /// > **Note**: The sampling duration cannot be lower than 1 second. If value is less + /// > than 1 second, it will be clamped to 1 second. + /// + /// # Arguments + /// + /// * `duration` - The time window for sampling failures + #[must_use] + pub fn sampling_duration(mut self, duration: Duration) -> Self { + self.sampling_duration = duration; + self + } + + /// Sets the break duration for how long the circuit stays open. + /// + /// When the circuit breaker opens due to failures, it will remain open + /// for this duration before transitioning to half-open state to test + /// if the underlying service has recovered. + /// + /// **Default**: 5 seconds + /// + /// # Arguments + /// + /// * `duration` - How long the circuit stays open after breaking + #[must_use] + pub fn break_duration(mut self, duration: Duration) -> Self { + self.break_duration = duration; + self + } + + /// Sets the callback to be invoked when the circuit breaker opens. + /// + /// This callback is called whenever the circuit breaker transitions from + /// closed to open state due to exceeding the failure threshold. + /// + /// **Default**: No callback + /// + /// # Arguments + /// + /// * `callback` - Function that takes a reference to the output and + /// [`OnOpenedArgs`] containing circuit breaker context + #[must_use] + pub fn on_opened( + mut self, + callback: impl Fn(&Out, OnOpenedArgs) + Send + Sync + 'static, + ) -> Self { + self.on_opened = Some(OnOpened::new(callback)); + self + } + + /// Sets the callback to be invoked when the circuit breaker closes. + /// + /// This callback is called whenever the circuit breaker transitions from + /// half-open state to closed state after successful recovery. + /// + /// **Default**: No callback + /// + /// # Arguments + /// + /// * `callback` - Function that takes a reference to the output and + /// [`OnClosedArgs`] containing circuit breaker context + #[must_use] + pub fn on_closed( + mut self, + callback: impl Fn(&Out, OnClosedArgs) + Send + Sync + 'static, + ) -> Self { + self.on_closed = Some(OnClosed::new(callback)); + self + } + + /// Sets the callback to be invoked when the circuit breaker is probing. + /// + /// This callback is called when the circuit breaker is in half-open state + /// and is testing whether the underlying service has recovered. + /// + /// **Default**: No callback + /// + /// # Arguments + /// + /// * `callback` - Function that takes a mutable reference to the input and + /// [`OnProbingArgs`] containing circuit breaker context + #[must_use] + pub fn on_probing( + mut self, + callback: impl Fn(&mut In, OnProbingArgs) + Send + Sync + 'static, + ) -> Self { + self.on_probing = Some(OnProbing::new(callback)); + self + } + + /// Sets the partition key provider function. + /// + /// The partitioning key is used to maintain separate circuit breaker states + /// for different inputs. The idea is to extract the partition key from the input + /// so that requests with the same key share the same circuit breaker state. + /// + /// **Default**: Single global circuit (no partitioning) - all requests share the same circuit breaker state + /// + /// If no partition key provider is set, a default key is used, meaning all requests + /// share the same circuit breaker state. + /// + /// The typical scenario is HTTP request where the partition key could be derived from + /// the combination of scheme and authority (host and port). This allows separate + /// circuit breaker states for different backend services. + /// + /// # Arguments + /// + /// * `key_provider` - Function that takes a reference to the input and returns + /// a [`PartitionKey`] identifying the partition for circuit breaker state + /// + /// # Example + /// + /// ```rust + /// # use seatbelt::circuit_breaker::{CircuitBreakerLayer, PartitionKey}; + /// // Example HTTP request structure + /// struct HttpRequest { + /// scheme: String, + /// host: String, + /// port: u16, + /// path: String, + /// } + /// # fn example(circuit_breaker_layer: CircuitBreakerLayer) { + /// // Configure circuit breaker with a partition key based on a scheme, host and port. + /// let layer = circuit_breaker_layer.partition_key(|request: &HttpRequest| { + /// let partition = format!("{}://{}:{}", request.scheme, request.host, request.port); + /// PartitionKey::from(partition) + /// }); + /// + /// // This ensures that: + /// // - Requests to https://api.service1.com share one circuit breaker state + /// // - Requests to https://api.service2.com:8080 share another circuit breaker state + /// // - Requests to http://localhost:3000 share yet another circuit breaker state + /// # } + /// ``` + /// + /// # Telemetry + /// + /// The values used to create partition keys are included in telemetry data (logs and metrics) + /// for observability purposes. **Important**: Ensure that the values from which partition keys + /// are created do not contain any sensitive data such as authentication tokens, personal + /// identifiable information (PII), or other confidential data. + #[must_use] + pub fn partition_key( + mut self, + key_provider: impl Fn(&In) -> PartitionKey + Send + Sync + 'static, + ) -> Self { + self.partition_key = Some(PartionKeyProvider::new(key_provider)); + self + } + + /// Sets the half-open mode for the circuit breaker. + /// + /// This determines how the circuit breaker behaves when transitioning from half-open + /// to a closed state. + /// + /// **Default**: [`HalfOpenMode::reliable`] + #[must_use] + pub fn half_open_mode(mut self, mode: HalfOpenMode) -> Self { + self.half_open_mode = mode; + self + } + + /// Optionally enables the circuit breaker middleware based on a condition. + /// + /// When disabled, requests pass through without circuit breaker protection. + /// This call replaces any previous condition. + /// + /// **Default**: Always enabled + /// + /// # Arguments + /// + /// * `is_enabled` - Function that takes a reference to the input and returns + /// `true` if circuit breaker protection should be enabled for this request + #[must_use] + pub fn enable_if(mut self, is_enabled: impl Fn(&In) -> bool + Send + Sync + 'static) -> Self { + self.enable_if = EnableIf::new(is_enabled); + self + } + + /// Enables the circuit breaker middleware unconditionally. + /// + /// All requests will have circuit breaker protection applied. + /// This call replaces any previous condition. + /// + /// **Note**: This is the default behavior - circuit breaker is enabled by default. + #[must_use] + pub fn enable_always(mut self) -> Self { + self.enable_if = EnableIf::always(); + self + } + + /// Disables the circuit breaker middleware completely. + /// + /// All requests will pass through without circuit breaker protection. + /// This call replaces any previous condition. + /// + /// **Note**: This overrides the default enabled behavior. + #[must_use] + pub fn disable(mut self) -> Self { + self.enable_if = EnableIf::never(); + self + } +} + +impl Layer for CircuitBreakerLayer { + type Service = CircuitBreaker; + + fn layer(&self, inner: S) -> Self::Service { + CircuitBreaker { + inner, + clock: self.options.get_clock().clone(), + recovery: self + .recovery + .clone() + .expect("recovery must be set in Ready state"), + rejected_input: self + .rejected_input + .clone() + .expect("rejected_input must be set in Ready state"), + enable_if: self.enable_if.clone(), + engines: self.engines(), + on_opened: self.on_opened.clone(), + on_closed: self.on_closed.clone(), + on_probing: self.on_probing.clone(), + partition_key: self.partition_key.clone(), + } + } +} + +impl + CircuitBreakerLayer +{ + fn probes_options(&self) -> ProbesOptions { + self.half_open_mode + // we will use break duration as the sampling duration for probes + .to_options(self.break_duration, self.failure_threshold) + } + + fn engines(&self) -> Engines { + Engines::new( + super::engine::EngineOptions { + break_duration: self.break_duration, + health_metrics_builder: HealthMetricsBuilder::new( + self.sampling_duration, + self.failure_threshold, + self.min_throughput, + ), + probes: self.probes_options(), + }, + self.options.get_clock().clone(), + self.strategy_name.clone(), + self.options.get_pipeline_name().clone().into(), + self.options.create_resilience_event_counter(), + ) + } + + fn into_state(self) -> CircuitBreakerLayer { + CircuitBreakerLayer { + options: self.options, + recovery: self.recovery, + rejected_input: self.rejected_input, + on_opened: self.on_opened, + on_closed: self.on_closed, + on_probing: self.on_probing, + partition_key: self.partition_key, + enable_if: self.enable_if, + strategy_name: self.strategy_name, + failure_threshold: self.failure_threshold, + min_throughput: self.min_throughput, + sampling_duration: self.sampling_duration, + break_duration: self.break_duration, + half_open_mode: self.half_open_mode, + _state: PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + + use layered::Execute; + use tick::Clock; + + use super::*; + use crate::circuit_breaker::RecoveryArgs; + use crate::circuit_breaker::engine::probing::ProbeOptions; + use crate::testing::RecoverableType; + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn new_creates_correct_initial_state() { + let options = create_test_options(); + let layer: CircuitBreakerLayer<_, _, NotSet, NotSet> = + CircuitBreakerLayer::new(StringValue::from("test_breaker"), &options); + + assert!(layer.recovery.is_none()); + assert!(layer.rejected_input.is_none()); + assert_eq!(layer.strategy_name, StringValue::from("test_breaker")); + assert!(layer.enable_if.call(&"test_input".to_string())); + assert_eq!(layer.failure_threshold, 0.1); + assert_eq!(layer.min_throughput, 100); + assert_eq!(layer.sampling_duration, Duration::from_secs(30)); + } + + #[test] + fn recovery_sets_correctly() { + let options = create_test_options(); + let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); + + let layer: CircuitBreakerLayer<_, _, Set, NotSet> = layer.recovery_with(|output, _args| { + if output.contains("error") { + RecoveryInfo::retry() + } else { + RecoveryInfo::never() + } + }); + + let result = layer.recovery.as_ref().unwrap().call( + &"error message".to_string(), + RecoveryArgs { + partition_key: &PartitionKey::default(), + clock: &Clock::new_frozen(), + }, + ); + assert_eq!(result, RecoveryInfo::retry()); + + let result = layer.recovery.as_ref().unwrap().call( + &"success".to_string(), + RecoveryArgs { + partition_key: &PartitionKey::default(), + clock: &Clock::new_frozen(), + }, + ); + assert_eq!(result, RecoveryInfo::never()); + } + + #[test] + fn recovery_auto_sets_correctly() { + let options = SeatbeltOptions::::new(Clock::new_frozen()); + let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); + + let layer: CircuitBreakerLayer<_, _, Set, NotSet> = layer.recovery(); + + let result = layer.recovery.as_ref().unwrap().call( + &RecoverableType::from(RecoveryInfo::retry()), + RecoveryArgs { + partition_key: &PartitionKey::default(), + clock: &Clock::new_frozen(), + }, + ); + assert_eq!(result, RecoveryInfo::retry()); + + let result = layer.recovery.as_ref().unwrap().call( + &RecoverableType::from(RecoveryInfo::never()), + RecoveryArgs { + partition_key: &PartitionKey::default(), + clock: &Clock::new_frozen(), + }, + ); + assert_eq!(result, RecoveryInfo::never()); + } + + #[test] + fn rejected_input_sets_correctly() { + let options = create_test_options(); + let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); + + let layer: CircuitBreakerLayer<_, _, NotSet, Set> = + layer.rejected_input(|_, _| "rejected".to_string()); + + let result = layer.rejected_input.as_ref().unwrap().call( + "test".to_string(), + RejectedInputArgs { + partition_key: &PartitionKey::default(), + }, + ); + assert_eq!(result, "rejected"); + } + + #[test] + fn enable_disable_conditions_work() { + let layer = create_ready_layer().enable_if(|input| input.contains("enable")); + + assert!(layer.enable_if.call(&"enable_test".to_string())); + assert!(!layer.enable_if.call(&"disable_test".to_string())); + + let layer = layer.disable(); + assert!(!layer.enable_if.call(&"anything".to_string())); + + let layer = layer.enable_always(); + assert!(layer.enable_if.call(&"anything".to_string())); + } + + #[test] + fn layer_builds_service_when_ready() { + let layer = create_ready_layer(); + let _service = layer.layer(Execute::new(|input: String| async move { input })); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn failure_threshold_sets_correctly() { + let layer = create_ready_layer(); + + // Test setting a valid threshold + let layer = layer.failure_threshold(0.25); + assert_eq!(layer.failure_threshold, 0.25); + + // Test clamping values greater than 1.0 + let layer = layer.failure_threshold(1.5); + assert_eq!(layer.failure_threshold, 1.0); + + // Test edge cases + let layer = layer.failure_threshold(0.0); + assert_eq!(layer.failure_threshold, 0.0); + + let layer = layer.failure_threshold(1.0); + assert_eq!(layer.failure_threshold, 1.0); + } + + #[test] + fn min_throughput_sets_correctly() { + let layer = create_ready_layer(); + + // Test setting different throughput values + let layer = layer.min_throughput(50); + assert_eq!(layer.min_throughput, 50); + + let layer = layer.min_throughput(1000); + assert_eq!(layer.min_throughput, 1000); + + let layer = layer.min_throughput(0); + assert_eq!(layer.min_throughput, 0); + } + + #[test] + fn sampling_duration_sets_correctly() { + let layer = create_ready_layer(); + + // Test setting different durations + let layer = layer.sampling_duration(Duration::from_secs(10)); + assert_eq!(layer.sampling_duration, Duration::from_secs(10)); + + let layer = layer.sampling_duration(Duration::from_secs(60)); + assert_eq!(layer.sampling_duration, Duration::from_secs(60)); + + let layer = layer.sampling_duration(Duration::from_millis(500)); + assert_eq!(layer.sampling_duration, Duration::from_millis(500)); + } + + #[test] + fn break_duration_sets_correctly() { + let layer = create_ready_layer(); + + // Test setting different break durations + let layer = layer.break_duration(Duration::from_secs(5)); + assert_eq!(layer.break_duration, Duration::from_secs(5)); + + let layer = layer.break_duration(Duration::from_secs(120)); + assert_eq!(layer.break_duration, Duration::from_secs(120)); + + let layer = layer.break_duration(Duration::from_millis(2000)); + assert_eq!(layer.break_duration, Duration::from_millis(2000)); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + fn default_values_are_correct() { + let options = create_test_options(); + let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); + + assert_eq!(layer.failure_threshold, DEFAULT_FAILURE_THRESHOLD); + assert_eq!(layer.min_throughput, DEFAULT_MIN_THROUGHPUT); + assert_eq!(layer.sampling_duration, DEFAULT_SAMPLING_DURATION); + assert_eq!(layer.break_duration, DEFAULT_BREAK_DURATION); + assert_eq!(layer.half_open_mode, HalfOpenMode::reliable(None)); + } + + #[test] + #[expect(clippy::float_cmp, reason = "Test")] + pub fn half_open_mode_ok() { + let layer = create_ready_layer().half_open_mode(HalfOpenMode::quick()); + assert_eq!(layer.half_open_mode, HalfOpenMode::quick()); + + let probes = layer + .break_duration(Duration::from_secs(234)) + .failure_threshold(0.52) + .half_open_mode(HalfOpenMode::reliable(None)) + .probes_options(); + + // access the last probe which should be the health probe + let options = probes.probes().last().unwrap(); + + match options { + ProbeOptions::HealthProbe(options) => { + assert_eq!(options.stage_duration(), Duration::from_secs(234)); + assert_eq!(options.failure_threshold(), 0.52); + } + ProbeOptions::SingleProbe { .. } => panic!("Expected HealthProbe"), + } + } + + #[test] + fn static_assertions() { + static_assertions::assert_impl_all!(CircuitBreakerLayer: Layer); + static_assertions::assert_not_impl_all!(CircuitBreakerLayer: Layer); + static_assertions::assert_not_impl_all!(CircuitBreakerLayer: Layer); + static_assertions::assert_impl_all!(CircuitBreakerLayer: Debug); + } + + fn create_test_options() -> SeatbeltOptions { + SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") + } + + fn create_ready_layer() -> CircuitBreakerLayer { + CircuitBreakerLayer::new(StringValue::from("test"), &create_test_options()) + .recovery_with(|output, _args| { + if output.contains("error") { + RecoveryInfo::retry() + } else { + RecoveryInfo::never() + } + }) + .rejected_input(|_, _| "circuit is open".to_string()) + } +} diff --git a/crates/seatbelt/src/circuit_breaker/mod.rs b/crates/seatbelt/src/circuit_breaker/mod.rs new file mode 100644 index 00000000..897c0c8a --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/mod.rs @@ -0,0 +1,334 @@ +// Copyright (c) Microsoft Corporation. + +//! Circuit breaker resilience middleware for preventing cascading failures. +//! +//! This module provides automatic circuit breaking capabilities with configurable failure +//! thresholds, break durations, and comprehensive telemetry. The primary types are: +//! +//! - [`CircuitBreaker`] is the middleware that wraps an inner service and monitors failure rates +//! - [`CircuitBreakerLayer`] is used to configure and construct the circuit breaker middleware +//! +//! A circuit breaker monitors the success and failure rates of operations and can temporarily +//! block requests when the failure rate exceeds a configured threshold. This prevents cascading failures +//! and gives downstream services time to recover. +//! +//! # Quick Start +//! +//! ```rust +//! # use layered::{Execute, Service, Stack}; +//! # use tick::Clock; +//! # use seatbelt::circuit_breaker::CircuitBreaker; +//! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; +//! # async fn example(clock: Clock) -> Result<(), Box> { +//! let options = SeatbeltOptions::new(&clock); +//! +//! let stack = ( +//! CircuitBreaker::layer("circuit_breaker", &options) +//! // Required: determine if output indicates a failure by using recovery metadata +//! .recovery_with(|result: &Result, _| match result { +//! Ok(_) => RecoveryInfo::never(), +//! Err(_) => RecoveryInfo::retry(), +//! }) +//! // Required: provide output when the input is rejected on an open circuit +//! .rejected_input_error(|input, args| "service unavailable".to_string()), +//! Execute::new(my_operation), +//! ); +//! +//! let service = stack.build(); +//! let result = service.execute("input".to_string()).await; +//! # let _result = result; +//! # Ok(()) +//! # } +//! # async fn my_operation(input: String) -> Result { Ok(input) } +//! ``` +//! +//! # Configuration +//! +//! The [`CircuitBreakerLayer`] uses a type state pattern to enforce that all required properties +//! are configured before the layer can be built. This compile-time safety ensures that you cannot +//! accidentally create a circuit breaker layer without properly specifying recovery logic and +//! rejected input handling. The properties that must be configured are: +//! +//! - [`recovery`][CircuitBreakerLayer::recovery]: Detects the recovery classification for output. +//! This is used to determine if an operation succeeded or failed. +//! - [`rejected_input`][CircuitBreakerLayer::rejected_input]: Provide the output to return when the +//! circuit is open and execution is being rejected. +//! +//! Each circuit breaker layer requires an identifier for telemetry purposes. This identifier +//! should use `snake_case` naming convention to maintain consistency across the telemetry. +//! +//! # Thread Safety +//! +//! The [`CircuitBreaker`] type is thread-safe and implements both `Send` and `Sync` as enforced +//! by the `Service` trait it implements. This allows circuit breaker middleware to be safely +//! shared across multiple threads and used in concurrent environments. +//! +//! # Circuit Breaker States and Transitions +//! +//! The circuit breaker operates in three states: +//! +//! - **Closed**: Normal operation. Requests pass through and failures are tracked. +//! - **Open**: The circuit is broken. Requests are immediately rejected without calling +//! the underlying service. +//! - **Half-Open**: Testing if the service has recovered. A limited number of probing requests are +//! allowed through to assess the health of the underlying service. +//! +//! ```text +//! ┌────────┐ Failure threshold exceeded ┌──────────┐ +//! │ Closed │ ────────────────────────────────────▶│ Open │ +//! └────────┘ └──────────┘ +//! ▲ │ +//! │ │ +//! │ ┌────────────────┐ │ +//! └────────────│ Half-Open │◀──────────────────┘ +//! Probing └────────────────┘ Break duration +//! successful elapsed +//! ``` +//! +//! ## Closed State +//! +//! The circuit starts in the closed state and operates normally: +//! +//! - All requests pass through to the underlying service +//! - Failures are tracked and evaluated against the failure threshold +//! - When the failure threshold is exceeded, transitions to **Open** +//! - You can observe transitions into the closed state by providing +//! the [`on_closed`][CircuitBreakerLayer::on_closed] callback. +//! +//! ## Open State +//! +//! When the circuit is open: +//! +//! - Requests are immediately rejected with the output provided by [`rejected_input`][CircuitBreakerLayer::rejected_input] +//! - No calls are made to the underlying service +//! - After the break duration elapsed, transitions to **Half-Open** +//! - You can observe transitions into the open state by providing +//! the [`on_opened`][CircuitBreakerLayer::on_opened] callback. +//! +//! ## Half-Open State +//! +//! The circuit enters a testing phase: +//! +//! - A limited number of probing requests are allowed through +//! - Success rate is carefully monitored +//! - If sufficient successful probing requests occur, transitions back to **Closed** +//! - If failures continue, the circuit stays in the Half-Open state until the underlying service recovers. +//! Half-open state respects the break duration before allowing more probing requests. +//! - You can observe when the circuit is probing in half-open state by providing +//! the [`on_probing`][CircuitBreakerLayer::on_probing] callback. +//! - You can configure the probing behavior and the sensitivity of how quickly the circuit +//! closes again after successful probes by using [`half_open_mode`][CircuitBreakerLayer::half_open_mode] +//! +//! # Recovery Classification +//! +//! The circuit breaker uses [`RecoveryInfo`][crate::RecoveryInfo] to classify operation results. The following +//! recovery kinds are classified as failures that contribute to tripping the circuit: +//! +//! - [`RecoveryKind::Retry`][crate::RecoveryKind::Retry] +//! - [`RecoveryKind::Unavailable`][crate::RecoveryKind::Unavailable] +//! +//! # Partitioning +//! +//! Circuit breakers can maintain separate circuit states for different logical groups of requests +//! by providing a [`partition_key`][CircuitBreakerLayer::partition_key] function. This allows +//! the creation of multiple independent circuits based on the input properties. +//! +//! For example, a typical scenario where partitioning is useful is HTTP request where the partition key +//! is extracted from the request scheme, host, and port. This allows isolation of circuit states +//! for different downstream endpoints. +//! +//! > **Note**: Each unique partition key creates a separate circuit state. Be mindful of memory usage +//! > with high-cardinality partition keys. +//! +//! # Defaults +//! +//! The circuit breaker middleware uses the following default values when optional configuration +//! is not provided: +//! +//! | Parameter | Default Value | Description | Configured By | +//! |-----------|---------------|-------------|---------------| +//! | Failure threshold | `0.1` (10%) | Circuit opens when failure rate exceeds this percentage | [`failure_threshold`][CircuitBreakerLayer::failure_threshold] | +//! | Minimum throughput | `100` requests | Minimum request volume required before circuit can open | [`min_throughput`][CircuitBreakerLayer::min_throughput] | +//! | Sampling duration | `30` seconds | Time window for calculating failure rates | [`sampling_duration`][CircuitBreakerLayer::sampling_duration] | +//! | Break duration | `5` seconds | Duration circuit remains open before testing recovery | [`break_duration`][CircuitBreakerLayer::break_duration] | +//! | Partitioning | Single global circuit | All requests share the same circuit breaker state | [`partition_key`][CircuitBreakerLayer::partition_key] | +//! | Half-open mode | `Reliable` | Gradual recovery with increasing probe percentages | [`half_open_mode`][CircuitBreakerLayer::half_open_mode] | +//! | Enable condition | Always enabled | Circuit breaker protection is applied to all requests | [`enable_if`][CircuitBreakerLayer::enable_if], [`enable_always`][CircuitBreakerLayer::enable_always], [`disable`][CircuitBreakerLayer::disable] | +//! +//! These defaults provide a reasonable starting point for most use cases, offering a balance +//! between resilience and responsiveness to service recovery. +//! +//! # Telemetry +//! +//! ## Metrics +//! +//! - **Metric**: `resilience.event` (counter) +//! - **When**: Emitted when circuit state transitions occur and when requests are rejected +//! - **Attributes**: +//! - `resilience.pipeline.name`: Pipeline identifier from [`SeatbeltOptions::pipeline_name`] +//! - `resilience.strategy.name`: Circuit breaker identifier from [`CircuitBreaker::layer`] +//! - `resilience.event.name`: One of: +//! - `circuit_opened`: When the circuit transitions to open state due to failure threshold being exceeded +//! - `circuit_closed`: When the circuit transitions to closed state after successful probing +//! - `circuit_rejected`: When a request is rejected due to the circuit being in open state +//! - `circuit_probe`: When a probing request is executed in half-open state +//! - `resilience.circuit_breaker.state`: Current circuit state (`closed`, `open`, or `half_open`) +//! - `resilience.circuit_breaker.probe.result`: Result of probe execution (`success` or `failure`, only present for probe events) +//! +//! Additional structured logging events are emitted with detailed health metrics (failure rate, throughput) for circuit state transitions. +//! +//! # Examples +//! +//! ## Basic Usage +//! +//! This example demonstrates the basic usage of configuring and using circuit breaker middleware. +//! +//! ```rust +//! # use layered::{Execute, Service, Stack}; +//! # use tick::Clock; +//! # use seatbelt::circuit_breaker::CircuitBreaker; +//! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; +//! # async fn example(clock: Clock) -> Result<(), Box> { +//! // Define common options for resilience middleware. The clock is runtime-specific and +//! // must be provided. See its documentation for details. +//! let options = SeatbeltOptions::new(&clock).pipeline_name("example"); +//! +//! let stack = ( +//! CircuitBreaker::layer("my_breaker", &options) +//! // Required: determine if output indicates failure +//! .recovery_with(|result: &Result, _args| match result { +//! // These are demonstrative, real code will have more meaningful recovery detection +//! Ok(_) => RecoveryInfo::never(), +//! Err(msg) if msg.contains("transient") => RecoveryInfo::retry(), +//! Err(_) => RecoveryInfo::never(), +//! }) +//! // Required: provide output when circuit is open +//! .rejected_input_error(|_, _| "service unavailable".to_string()), +//! Execute::new(execute_unreliable_operation), +//! ); +//! +//! // Build the service +//! let service = stack.build(); +//! +//! // Execute the service +//! let result = service.execute("test input".to_string()).await; +//! # let _result = result; +//! # Ok(()) +//! # } +//! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } +//! ``` +//! +//! ## Advanced Usage +//! +//! This example demonstrates advanced usage of the circuit breaker middleware, including custom +//! failure thresholds, sampling durations, break durations, and state change callbacks. +//! +//! ```rust +//! # use std::time::Duration; +//! # use layered::{Execute, Service, Stack}; +//! # use tick::Clock; +//! # use seatbelt::circuit_breaker::{CircuitBreaker, PartitionKey, HalfOpenMode}; +//! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; +//! # async fn example(clock: Clock) -> Result<(), Box> { +//! // Define common options for resilience middleware. +//! let options = SeatbeltOptions::new(&clock).pipeline_name("advanced_example"); +//! +//! let stack = ( +//! CircuitBreaker::layer("advanced_breaker", &options) +//! // Required: determine if output indicates failure +//! .recovery_with(|result: &Result, _args| match result { +//! Err(msg) if msg.contains("rate_limit") => RecoveryInfo::unavailable(), +//! Err(msg) if msg.contains("timeout") => RecoveryInfo::retry(), +//! Err(msg) if msg.contains("server_error") => RecoveryInfo::retry(), +//! Err(_) => RecoveryInfo::never(), // Client errors don't count as failures +//! Ok(_) => RecoveryInfo::never(), +//! }) +//! // Required: provide output when circuit is open +//! .rejected_input_error(|_input, _args| { +//! "service temporarily unavailable due to exceeding failure threshold".to_string() +//! }) +//! // Optional configuration +//! .half_open_mode(HalfOpenMode::reliable(None)) // close the circuit gradually (default) +//! .failure_threshold(0.05) // Trip at 5% failure threshold (less sensitive than default 10%) +//! .min_throughput(50) // Require minimum 50 requests before considering circuit open +//! .sampling_duration(Duration::from_secs(60)) // Evaluate failures over 60-second window +//! .break_duration(Duration::from_secs(30)) // Stay open for 30 seconds before testing +//! // You can provide your own partitioning logic if needed. The default is a single global +//! // circuit. By partitioning, you can have separate circuits for different inputs. +//! .partition_key(|input| PartitionKey::from(detect_partition(input))) +//! // State change callbacks for monitoring and alerting +//! .on_opened(|output, _args| { +//! println!("circuit breaker OPENED due to failure: {:?}", output); +//! // In real code, this would trigger alerts, metrics, logging, etc. +//! }) +//! .on_closed(|output, _args| { +//! println!("circuit breaker CLOSED, service recovered: {:?}", output); +//! // In real code, this would log recovery, update dashboards, etc. +//! }) +//! .on_probing(|input, _args| { +//! println!("circuit breaker PROBING with input: {:?}", input); +//! // Optionally modify input for probing requests +//! }), +//! Execute::new(execute_unreliable_operation), +//! ); +//! +//! // Build and execute the service +//! let service = stack.build(); +//! let result = service.execute("test_timeout".to_string()).await; +//! # let _result = result; +//! # Ok(()) +//! # } +//! # fn detect_partition(input: &String) -> String { input.to_string() } +//! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } +//! ``` +//! +//! ## Incomplete Configuration +//! +//! This example demonstrates what happens when the `CircuitBreakerLayer` is not fully configured. +//! The code below will not compile because the circuit breaker layer is missing required configuration. +//! +//! ```compile_fail +//! # use seatbelt::circuit_breaker::CircuitBreaker; +//! # use seatbelt::SeatbeltOptions; +//! # use layered::Execute; +//! # fn example(service_options: SeatbeltOptions>) { +//! let stack = ( +//! CircuitBreaker::layer("test", &service_options), // Missing required configuration! +//! Execute::new(|input| async move { Ok(input) }) +//! ); +//! +//! // This will fail to compile +//! let service = stack.build(); +//! # } +//! ``` +//! +//! For more comprehensive examples, see the [examples directory](https://github.com/microsoft/oxidizer/tree/main/crates/seatbelt/examples). + +mod args; +mod callbacks; +mod layer; +mod service; +#[doc(inline)] +pub use args::{OnClosedArgs, OnOpenedArgs, OnProbingArgs, RecoveryArgs, RejectedInputArgs}; +pub(super) use callbacks::*; +#[doc(inline)] +pub use layer::CircuitBreakerLayer; +#[doc(inline)] +pub use service::CircuitBreaker; + +mod execution_result; +pub(super) use execution_result::ExecutionResult; + +mod health; +pub(super) use health::*; + +mod constants; +mod engine; +mod telemetry; +pub(super) use engine::*; + +mod partition_key; +pub use partition_key::PartitionKey; + +mod half_open_mode; +pub use half_open_mode::HalfOpenMode; diff --git a/crates/seatbelt/src/circuit_breaker/partition_key.rs b/crates/seatbelt/src/circuit_breaker/partition_key.rs new file mode 100644 index 00000000..3a46606a --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/partition_key.rs @@ -0,0 +1,162 @@ +// Copyright (c) Microsoft Corporation. + +use std::borrow::Cow; +use std::collections::hash_map::DefaultHasher; +use std::fmt::Display; +use std::hash::Hasher; + +/// Key that identifies a partition for which a separate circuit breaker instance is maintained. +/// +/// Currently, it supports either integer or string keys. For maximum performance, prefer using integer keys +/// or static string keys (i.e. `&'static str`). +/// +/// # Examples +/// +/// ## Creation from a number +/// +/// ```rust +/// use seatbelt::circuit_breaker::PartitionKey; +/// +/// let key = PartitionKey::from(42_u64); +/// assert_eq!(key.to_string(), "42"); +/// ``` +/// +/// ## Creation from HTTP request authority and scheme +/// +/// ```rust +/// use seatbelt::circuit_breaker::PartitionKey; +/// +/// // Simulate extracting authority and scheme from an HTTP request +/// let scheme = "https"; +/// let authority = "api.example.com"; +/// let partition_value = format!("{}://{}", scheme, authority); +/// +/// let key = PartitionKey::from(partition_value); +/// assert_eq!(key.to_string(), "https://api.example.com"); +/// +/// // For better performance, use hashing. Note that you must provide a display label +/// // for the hashed key. +/// let hashed_key = PartitionKey::hashed(&(scheme, authority), "scheme_and_authority"); +/// assert_eq!(hashed_key.to_string(), "scheme_and_authority"); +/// ``` +/// +/// # Telemetry +/// +/// The values used to create partition keys are included in telemetry data (logs and metrics) +/// for observability purposes. **Important**: Ensure that the values from which partition keys +/// are created do not contain any sensitive data such as authentication tokens, personal +/// identifiable information (PII), or other confidential data. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PartitionKey(PartitionKeyValue); + +impl PartitionKey { + pub(crate) const fn default() -> Self { + Self(PartitionKeyValue::String(Cow::Borrowed("default"))) + } + + /// Create a partition key by hashing the given value. + /// + /// The value must implement the `Hash` trait. This is useful for creating partition + /// keys from complex types. The resulting partition key will be based on the hash of + /// the value. You must provide a `label` that will be used for display and telemetry. + pub fn hashed(value: &T, label: &'static str) -> Self { + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + Self(PartitionKeyValue::Hashed(hasher.finish(), label)) + } +} + +impl Display for PartitionKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.0 { + PartitionKeyValue::Number(n) => write!(f, "{n}"), + PartitionKeyValue::String(s) => f.write_str(s), + PartitionKeyValue::Hashed(_, label) => f.write_str(label), + } + } +} + +impl From for PartitionKey { + fn from(value: u64) -> Self { + Self(PartitionKeyValue::Number(value)) + } +} + +impl From<&'static str> for PartitionKey { + fn from(value: &'static str) -> Self { + Self(PartitionKeyValue::String(Cow::Borrowed(value))) + } +} + +impl From for PartitionKey { + fn from(value: String) -> Self { + Self(PartitionKeyValue::String(Cow::Owned(value))) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum PartitionKeyValue { + Number(u64), + Hashed(u64, &'static str), + String(Cow<'static, str>), +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + use std::hash::Hash; + + use static_assertions::assert_impl_all; + + use super::*; + + assert_impl_all!(PartitionKey: Send, Sync, Unpin, Clone, Hash, Display, Debug, PartialEq, Eq); + + #[test] + fn from_u64_and_display() { + let k = PartitionKey::from(42u64); + assert_eq!(k.to_string(), "42"); + assert_eq!(k, PartitionKey::from(42u64)); + } + + #[test] + fn from_static_str_and_string() { + let a: PartitionKey = "hello".into(); + let b: PartitionKey = String::from("hello").into(); + assert_eq!(a.to_string(), "hello"); + assert_eq!(b.to_string(), "hello"); + assert_eq!(a, b); + } + + #[test] + fn hashed_matches_manual_hasher() { + let value = "some value"; + let pk = PartitionKey::hashed(&value, "my_label"); + + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + let expected = hasher.finish(); + + match &pk.0 { + PartitionKeyValue::Hashed(n, _) => { + assert_eq!(*n, expected); + } + _ => panic!("Expected Inner::Hashed variant"), + } + + assert_eq!(pk.to_string(), "my_label"); + } + + #[test] + fn partitionkey_hash_consistent() { + let k1 = PartitionKey::from(123u64); + let k2 = PartitionKey::from(123u64); + + let mut h1 = DefaultHasher::new(); + k1.hash(&mut h1); + let mut h2 = DefaultHasher::new(); + k2.hash(&mut h2); + + assert_eq!(h1.finish(), h2.finish()); + } +} diff --git a/crates/seatbelt/src/circuit_breaker/service.rs b/crates/seatbelt/src/circuit_breaker/service.rs new file mode 100644 index 00000000..8eebf816 --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/service.rs @@ -0,0 +1,493 @@ +// Copyright (c) Microsoft Corporation. + +use std::ops::ControlFlow; + +use layered::Service; +use tick::Clock; + +use super::{ + CircuitBreakerLayer, CircuitEngine, Engines, EnterCircuitResult, ExecutionMode, + ExecutionResult, ExitCircuitResult, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, + OnProbingArgs, PartionKeyProvider, PartitionKey, RecoveryArgs, RejectedInput, + RejectedInputArgs, ShouldRecover, +}; +use crate::{EnableIf, NotSet}; + +/// Applies circuit breaker logic to prevent cascading failures. +/// +/// `CircuitBreaker` wraps an inner [`Service`] and monitors the success and failure rates +/// of operations. When the failure rate exceeds a configured threshold, the circuit breaker opens +/// and temporarily blocks requests to give the downstream service time to recover. +/// +/// This middleware is designed to be used across services, applications, and libraries +/// to prevent cascading failures in distributed systems. +/// +/// `CircuitBreaker` is configured by calling [`CircuitBreaker::layer`] and using the +/// builder methods on the returned [`CircuitBreakerLayer`] instance. +/// +/// For comprehensive examples and usage patterns, see the [`circuit_breaker` module][crate::circuit_breaker] documentation. +#[derive(Debug)] +pub struct CircuitBreaker { + pub(super) inner: S, + pub(super) clock: Clock, + pub(super) recovery: ShouldRecover, + pub(super) rejected_input: RejectedInput, + pub(super) enable_if: EnableIf, + pub(super) engines: Engines, + pub(super) partition_key: Option>, + pub(super) on_opened: Option>, + pub(super) on_closed: Option>, + pub(super) on_probing: Option>, +} + +impl CircuitBreaker { + /// Creates a new circuit breaker layer with the specified name and options. + /// + /// Returns a [`CircuitBreakerLayer`] that must be configured with required parameters + /// before it can be used to build a circuit breaker service. + pub fn layer( + name: impl Into>, + options: &crate::SeatbeltOptions, + ) -> CircuitBreakerLayer { + CircuitBreakerLayer::new(name.into().into(), options) + } +} + +impl Service for CircuitBreaker +where + In: Send, + S: Service, +{ + type Out = Out; + + async fn execute(&self, input: In) -> Self::Out { + // Check if a circuit breaker is enabled for this input + if !self.enable_if.call(&input) { + return self.inner.execute(input).await; + } + + // Determine the partition key for this input + let partition_key = self + .partition_key + .as_ref() + .map_or_else(PartitionKey::default, |partition_key| { + partition_key.call(&input) + }); + + // Retrieve the engine for this partition + let engine = self.engines.get_engine(&partition_key); + + // Before + let (input, mode) = match self.before_execute(engine.as_ref(), input, &partition_key) { + ControlFlow::Continue(input) => input, + ControlFlow::Break(output) => return output, + }; + + // Execute the inner service + let output = self.inner.execute(input).await; + + // After + self.after_execute(engine.as_ref(), &output, mode, &partition_key); + + output + } +} + +impl CircuitBreaker { + #[inline] + fn before_execute( + &self, + engine: &impl CircuitEngine, + mut input: In, + partition_key: &PartitionKey, + ) -> ControlFlow { + // Try to enter the circuit + match engine.enter() { + EnterCircuitResult::Accepted { mode } => { + match mode { + // regular execution, do nothing special + ExecutionMode::Normal => ControlFlow::Continue((input, ExecutionMode::Normal)), + // This is a probing execution that happens when the circuit is half-open. + // Invoke the on_probing callback if configured. + ExecutionMode::Probe => { + if let Some(on_probing) = &self.on_probing { + on_probing.call(&mut input, OnProbingArgs { partition_key }); + } + + ControlFlow::Continue((input, ExecutionMode::Probe)) + } + } + } + // Circuit is open, return rejected input output + EnterCircuitResult::Rejected => ControlFlow::Break( + self.rejected_input + .call(input, RejectedInputArgs { partition_key }), + ), + } + } + + fn after_execute( + &self, + engine: &impl CircuitEngine, + output: &Out, + mode: ExecutionMode, + partition_key: &PartitionKey, + ) { + let recovery = self.recovery.call( + output, + RecoveryArgs { + partition_key, + clock: &self.clock, + }, + ); + + // Evaluate the execution result based on recovery decision + let execution_result = ExecutionResult::from_recovery(&recovery); + + // Exit the circuit and handle state transitions + match engine.exit(execution_result, mode) { + ExitCircuitResult::Unchanged | ExitCircuitResult::Reopened => { + // we explicitly do nothing here + } + ExitCircuitResult::Opened(_health) => { + if let Some(on_opened) = &self.on_opened { + on_opened.call(output, OnOpenedArgs { partition_key }); + } + } + ExitCircuitResult::Closed(stats) => { + if let Some(on_closed) = &self.on_closed { + on_closed.call( + output, + OnClosedArgs { + partition_key, + open_duration: stats.opened_duration(self.clock.instant()), + }, + ); + } + } + } + } +} + +#[cfg(test)] +#[cfg(not(miri))] +mod tests { + use std::sync::Arc; + use std::sync::atomic::{AtomicBool, Ordering}; + use std::time::{Duration, Instant}; + + use layered::Execute; + use oxidizer_rt::Builtins; + use tick::ClockControl; + + use super::*; + use crate::circuit_breaker::constants::DEFAULT_BREAK_DURATION; + use crate::circuit_breaker::{EngineFake, HalfOpenMode, HealthInfo, Stats}; + use crate::service::Layer; + use crate::{RecoveryInfo, SeatbeltOptions, Set}; + + #[test] + fn layer_ensure_defaults() { + let options = SeatbeltOptions::::new(Clock::new_frozen()) + .pipeline_name("test_pipeline"); + let layer: CircuitBreakerLayer = + CircuitBreaker::layer("test_breaker", &options); + let layer = layer + .recovery_with(|_, _| RecoveryInfo::never()) + .rejected_input(|_, _| "rejected".to_string()); + + let breaker = layer.layer(Execute::new(|v: String| async move { v })); + + assert!(breaker.enable_if.call(&"str".to_string())); + } + + #[oxidizer_rt::test] + async fn circuit_breaker_disabled_no_inner_calls(state: oxidizer_rt::Builtins) { + let service = create_ready_circuit_breaker_layer(state.clock()) + .disable() + .layer(Execute::new(move |v: String| async move { v })); + + let result = service.execute("test".to_string()).await; + + assert_eq!(result, "test"); + } + + #[oxidizer_rt::test] + async fn passthrough_behavior(_state: oxidizer_rt::Builtins) { + let clock = Clock::new_frozen(); + let service = create_ready_circuit_breaker_layer(&clock) + .layer(Execute::new(move |v: String| async move { v })); + + let result = service.execute("test".to_string()).await; + + assert_eq!(result, "test"); + } + + #[test] + fn before_execute_accepted() { + let service = create_ready_circuit_breaker_layer(&Clock::new_frozen()) + .on_probing(|_, _| panic!("should not be called")) + .layer(Execute::new(move |v: String| async move { v })); + + let engine = EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + ExitCircuitResult::Unchanged, + ); + + let result = service + .before_execute(&engine, "test".to_string(), &PartitionKey::default()) + .continue_value() + .unwrap(); + assert_eq!(result, ("test".to_string(), ExecutionMode::Normal)); + } + + #[test] + fn before_execute_accepted_with_probing() { + let probing_called = Arc::new(AtomicBool::new(false)); + let probing_called_clone = Arc::clone(&probing_called); + + let service = create_ready_circuit_breaker_layer(&Clock::new_frozen()) + .on_probing(move |value, _| { + assert_eq!(value, "test"); + probing_called.store(true, std::sync::atomic::Ordering::SeqCst); + }) + .layer(Execute::new(move |v: String| async move { v })); + + let engine = EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Probe, + }, + ExitCircuitResult::Unchanged, + ); + + let result = service + .before_execute(&engine, "test".to_string(), &PartitionKey::default()) + .continue_value() + .unwrap(); + assert_eq!(result, ("test".to_string(), ExecutionMode::Probe)); + assert!(probing_called_clone.load(std::sync::atomic::Ordering::SeqCst)); + } + + #[test] + fn before_execute_rejected() { + let service = create_ready_circuit_breaker_layer(&Clock::new_frozen()) + .rejected_input(|_, _| "rejected".to_string()) + .layer(Execute::new(move |v: String| async move { v })); + + let engine = EngineFake::new(EnterCircuitResult::Rejected, ExitCircuitResult::Unchanged); + + let result = service + .before_execute(&engine, "test".to_string(), &PartitionKey::default()) + .break_value() + .unwrap(); + assert_eq!(result, "rejected"); + } + + #[test] + fn after_execute_unchanged() { + let service = create_ready_circuit_breaker_layer(&Clock::new_frozen()) + .on_opened(|_, _| panic!("should not be called")) + .on_closed(|_, _| panic!("should not be called")) + .layer(Execute::new(move |v: String| async move { v })); + + let engine = EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + ExitCircuitResult::Unchanged, + ); + + // This should not panic, indicating no callbacks were invoked + service.after_execute( + &engine, + &"success".to_string(), + ExecutionMode::Normal, + &PartitionKey::default(), + ); + } + + #[test] + fn after_execute_reopened() { + let service = create_ready_circuit_breaker_layer(&Clock::new_frozen()) + .on_opened(|_, _| panic!("should not be called")) + .on_closed(|_, _| panic!("should not be called")) + .layer(Execute::new(move |v: String| async move { v })); + + let engine = EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + ExitCircuitResult::Reopened, + ); + + // This should not panic, indicating no callbacks were invoked + service.after_execute( + &engine, + &"success".to_string(), + ExecutionMode::Normal, + &PartitionKey::default(), + ); + } + + #[test] + fn after_execute_opened() { + let opened_called = Arc::new(AtomicBool::new(false)); + let opened_called_clone = Arc::clone(&opened_called); + + let service = create_ready_circuit_breaker_layer(&Clock::new_frozen()) + .on_opened(move |output, _| { + assert_eq!(output, "error_response"); + opened_called.store(true, Ordering::SeqCst); + }) + .on_closed(|_, _| panic!("on_closed should not be called")) + .layer(Execute::new(move |v: String| async move { v })); + + let engine = EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + ExitCircuitResult::Opened(HealthInfo::new(1, 1, 1.0, 1)), + ); + + service.after_execute( + &engine, + &"error_response".to_string(), + ExecutionMode::Normal, + &PartitionKey::default(), + ); + assert!(opened_called_clone.load(Ordering::SeqCst)); + } + + #[test] + fn after_execute_closed() { + let closed_called = Arc::new(AtomicBool::new(false)); + let closed_called_clone = Arc::clone(&closed_called); + + let service = create_ready_circuit_breaker_layer(&Clock::new_frozen()) + .on_opened(|_, _| panic!("on_opened should not be called")) + .on_closed(move |output, _| { + assert_eq!(output, "success_response"); + closed_called.store(true, Ordering::SeqCst); + }) + .layer(Execute::new(move |v: String| async move { v })); + + let engine = EngineFake::new( + EnterCircuitResult::Accepted { + mode: ExecutionMode::Normal, + }, + ExitCircuitResult::Closed(Stats::new(Instant::now())), + ); + + service.after_execute( + &engine, + &"success_response".to_string(), + ExecutionMode::Normal, + &PartitionKey::default(), + ); + assert!(closed_called_clone.load(Ordering::SeqCst)); + } + + #[oxidizer_rt::test] + async fn execute_end_to_end_with_callbacks(_state: Builtins) { + let probing_called = Arc::new(AtomicBool::new(false)); + let opened_called = Arc::new(AtomicBool::new(false)); + let closed_called = Arc::new(AtomicBool::new(false)); + + let probing_called_clone = Arc::clone(&probing_called); + let opened_called_clone = Arc::clone(&opened_called); + let closed_called_clone = Arc::clone(&closed_called); + + let clock_control = ClockControl::new(); + + // Create a service that transforms input and can trigger different circuit states + let service = create_ready_circuit_breaker_layer(&clock_control.to_clock()) + .min_throughput(5) + .half_open_mode(HalfOpenMode::quick()) + .on_probing(move |input, _| { + assert_eq!(input, "probe_input"); + probing_called.store(true, Ordering::SeqCst); + }) + .on_opened(move |output, _| { + assert_eq!(output, "error_output"); + opened_called.store(true, Ordering::SeqCst); + }) + .on_closed(move |output, args| { + assert_eq!(output, "probe_output"); + assert!(args.open_duration() > Duration::ZERO); + closed_called.store(true, Ordering::SeqCst); + }) + .layer(Execute::new(move |input: String| async move { + // Transform input to simulate different scenarios + match input.as_str() { + "probe_input" => "probe_output".to_string(), + "success_input" => "success_output".to_string(), + "error_input" => "error_output".to_string(), + _ => input, + } + })); + + // break the circuit first by simulating failures + for _ in 0..5 { + let result = service.execute("error_input".to_string()).await; + assert_eq!(result, "error_output"); + } + + // rejected input + let result = service.execute("success_input".to_string()).await; + assert_eq!(result, "circuit is open"); + assert!(opened_called_clone.load(Ordering::SeqCst)); + assert!(!closed_called_clone.load(Ordering::SeqCst)); + + // send probe and close the circuit + clock_control.advance(DEFAULT_BREAK_DURATION); + let result = service.execute("probe_input".to_string()).await; + assert_eq!(result, "probe_output"); + assert!(probing_called_clone.load(Ordering::SeqCst)); + assert!(closed_called_clone.load(Ordering::SeqCst)); + + // normal execution should pass through + let result = service.execute("success_input".to_string()).await; + assert_eq!(result, "success_output"); + } + + #[oxidizer_rt::test] + async fn different_partitions_ensure_isolated(builtins: Builtins) { + let service = create_ready_circuit_breaker_layer(builtins.clock()) + .partition_key(|input| PartitionKey::from(input.clone())) + .min_throughput(3) + .recovery_with(|_, _| RecoveryInfo::retry()) + .rejected_input(|_, args| format!("circuit is open, partition: {}", args.partition_key)) + .layer(Execute::new(|input: String| async move { input })); + + // break the circuit for partition "A" + for _ in 0..3 { + let result = service.execute("A".to_string()).await; + assert_eq!(result, "A"); + } + + let result = service.execute("A".to_string()).await; + assert_eq!(result, "circuit is open, partition: A"); + + // Execute on partition "B" should pass through + let result = service.execute("B".to_string()).await; + assert_eq!(result, "B"); + } + + fn create_ready_circuit_breaker_layer( + clock: &Clock, + ) -> CircuitBreakerLayer { + let options = + SeatbeltOptions::::new(clock.clone()).pipeline_name("test_pipeline"); + CircuitBreaker::layer("test_breaker", &options) + .recovery_with(|output, _| { + if output.contains("error") { + RecoveryInfo::retry() + } else { + RecoveryInfo::never() + } + }) + .rejected_input(|_, _| "circuit is open".to_string()) + } +} diff --git a/crates/seatbelt/src/circuit_breaker/telemetry.rs b/crates/seatbelt/src/circuit_breaker/telemetry.rs new file mode 100644 index 00000000..47238709 --- /dev/null +++ b/crates/seatbelt/src/circuit_breaker/telemetry.rs @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. + +pub(super) const CIRCUIT_OPENED_EVENT_NAME: &str = "circuit_opened"; +pub(super) const CIRCUIT_CLOSED_EVENT_NAME: &str = "circuit_closed"; +pub(super) const CIRCUIT_REJECTED_EVENT_NAME: &str = "circuit_rejected"; +pub(super) const CIRCUIT_PROBE_EVENT_NAME: &str = "circuit_probe"; +pub(super) const CIRCUIT_STATE: &str = "resilience.circuit_breaker.state"; +pub(super) const CIRCUIT_PROBE_RESULT: &str = "resilience.circuit_breaker.probe.result"; +pub(super) const CIRCUIT_PARTITION: &str = "resilience.circuit_breaker.partition"; diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs new file mode 100644 index 00000000..4542bf43 --- /dev/null +++ b/crates/seatbelt/src/lib.rs @@ -0,0 +1,146 @@ +// Copyright (c) Microsoft Corporation. + +#![expect( + rustdoc::broken_intra_doc_links, + reason = "Too ugly to make 'live links' possible with the combination of features" +)] + +//! Resilience and fault handling for applications and libraries. +//! +//! This crate helps applications handle transient faults gracefully through composable +//! resilience patterns. It provides resilience middleware for building robust distributed systems +//! that can automatically handle timeouts, retries, and other failure scenarios. +//! +//! # Runtime Agnostic Design +//! +//! The seatbelt crate is designed to be **runtime agnostic** and works seamlessly across any +//! async runtime. This flexibility allows you to use the same resilience patterns across +//! different projects and migrate between runtimes without changing your resilience patterns. +//! +//! # Core Types +//! +//! - [`RecoveryInfo`]: Classifies errors as recoverable (transient) or non-recoverable (permanent). +//! - [`Recovery`]: A trait for types that can determine their recoverability. +//! +//! # Quick Start +//! +//! Add resilience to your services with just a few lines of code. **Retry** handles transient failures +//! and **Timeout** prevents operations from hanging indefinitely: +//! +//! ```rust +//! # #[cfg(all(feature = "retry", feature = "timeout"))] +//! # { +//! # use std::time::Duration; +//! # use oxidizer_rt::Builtins; +//! # use layered::{Execute, Service, Stack}; +//! use seatbelt::retry::Retry; +//! use seatbelt::timeout::Timeout; +//! use seatbelt::{RecoveryInfo, SeatbeltOptions}; +//! +//! # #[oxidizer_rt::main] +//! # async fn main(state: Builtins) { +//! # let clock = state.clock().clone(); +//! let options = SeatbeltOptions::new(&clock); +//! let service = ( +//! // Retry middleware: Automatically retries failed operations +//! Retry::layer("retry", &options) +//! .clone_input() +//! .recovery_with(|output: &String, _| match output.as_str() { +//! "temporary_error" => RecoveryInfo::retry(), +//! "operation timed out" => RecoveryInfo::retry(), +//! _ => RecoveryInfo::never(), +//! }), +//! // Timeout middleware: Cancels operations that take too long +//! Timeout::layer("timeout", &options) +//! .timeout_output(|_| "operation timed out".to_string()) +//! .timeout(Duration::from_secs(30)), +//! // Your core business logic +//! Execute::new(my_string_operation), +//! ) +//! .build(); +//! +//! let result = service.execute("input data".to_string()).await; +//! # } +//! # async fn my_string_operation(input: String) -> String { +//! # // Simulate processing that transforms the input string +//! # format!("processed: {}", input) +//! # } +//! # } +//! ``` +//! +//! > **Note**: Resilience middleware requires [`Clock`][tick::Clock] from the [`tick`] crate for timing +//! > operations like delays, timeouts, and backoff calculations. The clock is passed through +//! > [`SeatbeltOptions`] when creating middleware layers. +//! +//! See [Built-in Middlewares](#built-in-middleware) for more details. +//! +//! # Recovery Metadata +//! +//! Error types can implement [`Recovery`] to provide additional metadata about their retry characteristics. +//! This enables callers to use a unified, streamlined approach when determining whether to retry an +//! operation, regardless of the underlying error type or source. +//! +//! # Built-in Middleware +//! +//! This crate provides built-in resilience middleware that you can use out of the box. See the documentation +//! for each module for details on how to use them. +//! +//! - [`timeout`]: Cancels long-running operations. +//! - [`retry`]: Automatically retries failed operations with configurable backoff strategies. +//! - [`circuit_breaker`]: Prevents cascading failures by stopping requests to unhealthy services. +//! +//! ## Features +//! +//! This crate supports several optional features that can be enabled to extend functionality: +//! +//! - `options`: Enables common APIs for building resilience middleware, including [`SeatbeltOptions`]. +//! Requires [`tick`] for timing operations. +//! - `service`: Re-exports common types for building middleware from [`layered`] crate. +//! - `telemetry`: Enables telemetry and observability features using OpenTelemetry for monitoring +//! resilience operations. +//! - `metrics`: Exposes the OpenTelemetry metrics API for collecting and reporting metrics. +//! - `timeout`: Enables the [`timeout`] middleware for canceling long-running operations. +//! - `retry`: Enables the [`retry`] middleware for automatically retrying failed operations with +//! configurable backoff strategies, jitter, and recovery classification. +//! - `circuit-breaker`: Enables the [`circuit_breaker`] middleware for preventing cascading failures. + +#[doc(inline)] +pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; + +#[cfg(any(feature = "options", test))] +pub(crate) mod options; + +#[cfg(any(feature = "options", test))] +pub(crate) use options::EnableIf; +#[cfg(any(feature = "options", test))] +pub(crate) use options::define_fn_wrapper; + +#[cfg(any(feature = "retry", test))] +pub(crate) use crate::options::MaxAttempts; +#[cfg(any(feature = "options", test))] +pub use crate::options::{Attempt, Backoff, NotSet, SeatbeltOptions, Set}; + +#[cfg(any(feature = "telemetry", test))] +pub mod telemetry; + +#[cfg(any(feature = "timeout", test))] +pub mod timeout; + +#[cfg(any(feature = "retry", test))] +pub mod retry; + +#[cfg(any(feature = "circuit-breaker", test))] +pub mod circuit_breaker; + +/// Re-exported types from the [`layered`] crate. +#[cfg(any(feature = "service", test))] +pub mod service { + #[doc(inline)] + pub use layered::{Layer, Service, Stack}; +} + +#[cfg(any(feature = "retry", feature = "circuit-breaker", test))] +mod rnd; + +#[cfg(test)] +pub mod testing; diff --git a/crates/seatbelt/src/options/attempt.rs b/crates/seatbelt/src/options/attempt.rs new file mode 100644 index 00000000..d90d6dc6 --- /dev/null +++ b/crates/seatbelt/src/options/attempt.rs @@ -0,0 +1,262 @@ +// Copyright (c) Microsoft Corporation. + +use std::fmt::Display; + +/// Represents a single attempt in a retry operation. +/// +/// This struct tracks the current attempt index, and it provides methods to check if this is the +/// first or last attempt. +/// +/// The default attempt has: +/// - `attempt`: 0 (first attempt, 0-based indexing) +/// - `is_last`: true (indicating this is both the first and last attempt) +/// +/// This represents a single-shot operation with no retries, where the first +/// attempt is also the final attempt. +/// +/// # Examples +/// +/// ``` +/// use seatbelt::Attempt; +/// +/// // Create the first attempt (attempt 0) +/// let attempt = Attempt::new(0, false); +/// assert!(attempt.is_first()); +/// assert!(!attempt.is_last()); +/// assert_eq!(attempt.index(), 0); +/// +/// // Create the last attempt (attempt 2) +/// let last_attempt = Attempt::new(2, true); +/// assert!(!last_attempt.is_first()); +/// assert!(last_attempt.is_last()); +/// assert_eq!(last_attempt.index(), 2); +/// +/// // Use the default attempt (single-shot operation) +/// let default_attempt = Attempt::default(); +/// assert_eq!(default_attempt.index(), 0); +/// assert!(default_attempt.is_first()); +/// assert!(default_attempt.is_last()); +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Attempt { + index: u32, + is_last: bool, +} + +impl Default for Attempt { + fn default() -> Self { + Self::new(0, true) + } +} + +impl Attempt { + /// Creates a new attempt with the given attempt index and maximum attempts. + /// + /// # Examples + /// + /// ``` + /// use seatbelt::Attempt; + /// + /// let attempt = Attempt::new(0, false); + /// assert_eq!(attempt.index(), 0); + /// ``` + #[must_use] + pub fn new(index: u32, is_last: bool) -> Self { + Self { index, is_last } + } + + /// Returns true if this is the first attempt (attempt 0). + /// + /// # Examples + /// + /// ``` + /// use seatbelt::Attempt; + /// + /// let first_attempt = Attempt::new(0, false); + /// assert!(first_attempt.is_first()); + /// + /// let second_attempt = Attempt::new(1, false); + /// assert!(!second_attempt.is_first()); + /// ``` + #[must_use] + pub fn is_first(self) -> bool { + self.index == 0 + } + + /// Returns true if this is the last allowed attempt. + /// + /// # Examples + /// + /// ``` + /// use seatbelt::Attempt; + /// + /// let not_last = Attempt::new(1, false); + /// assert!(!not_last.is_last()); + /// + /// let last = Attempt::new(1, true); + /// assert!(last.is_last()); + /// ``` + #[must_use] + pub fn is_last(self) -> bool { + self.is_last + } + + /// Returns the current attempt index (0-based). + /// + /// # Examples + /// + /// ``` + /// use seatbelt::Attempt; + /// + /// let attempt = Attempt::new(3, false); + /// assert_eq!(attempt.index(), 3); + /// ``` + #[must_use] + pub fn index(self) -> u32 { + self.index + } + + #[cfg_attr(test, mutants::skip)] // causes test timeouts + #[cfg(any(feature = "retry", test))] + pub(crate) fn increment(self, max_attempts: MaxAttempts) -> Option { + let next = self.index.saturating_add(1); + + match max_attempts { + MaxAttempts::Finite(index) => { + // If we've reached or exceeded the maximum number of attempts, return None. + if next >= index { + return None; + } + + let is_last = next == index.saturating_sub(1); + Some(Self::new(next, is_last)) + } + MaxAttempts::Infinite => Some(Self::new(next, false)), + } + } +} + +impl Display for Attempt { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.index.fmt(f) + } +} + +/// Represents the maximum number of retry attempts allowed. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg(any(feature = "retry", test))] +#[non_exhaustive] +pub(crate) enum MaxAttempts { + /// A finite number of retry attempts. + Finite(u32), + + /// An infinite number of retry attempts. + #[cfg(any(feature = "retry", test))] // currently, only used with retry feature + Infinite, +} + +#[cfg(any(feature = "retry", test))] +impl MaxAttempts { + #[cfg(any(feature = "retry", test))] + pub fn first_attempt(self) -> Attempt { + Attempt::new(0, matches!(self, Self::Finite(1))) + } +} + +#[cfg(any(feature = "retry", test))] +impl From for MaxAttempts { + fn from(value: u32) -> Self { + Self::Finite(value) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_with_zero_is_first_and_not_last() { + let a = Attempt::new(0, false); + assert_eq!(a.index(), 0); + assert!(a.is_first()); + assert!(!a.is_last()); + } + + #[test] + fn new_when_equal_to_max_is_last() { + let a = Attempt::new(5, true); + assert!(a.is_last()); + assert!(!a.is_first()); + } + + #[test] + fn new_with_zero_max_is_both_first_and_last() { + let a = Attempt::new(0, true); + assert!(a.is_first()); + assert!(a.is_last()); + } + + #[test] + fn increment_correct_behavior() { + let max_attempts = MaxAttempts::Finite(2); + let a = Attempt::new(0, false); + assert_eq!(a.index(), 0); + assert!(!a.is_last()); + + let a = a.increment(max_attempts).unwrap(); + assert_eq!(a.index(), 1); + assert!(a.is_last()); + + let a = a.increment(max_attempts); + assert!(a.is_none()); + } + + #[test] + fn increment_with_infinite_preserves_number() { + let a = Attempt::new(u32::MAX, false); + let next = a.increment(MaxAttempts::Infinite).unwrap(); + assert!(!next.is_last()); + assert_eq!(next.index(), u32::MAX); + } + + #[test] + fn display_shows_index() { + let a = Attempt::new(42, false); + assert_eq!(format!("{a}"), "42"); + } + + #[test] + fn from_u32_to_max_attempts() { + let finite: MaxAttempts = 5u32.into(); + assert_eq!(finite, MaxAttempts::Finite(5)); + + let infinite: MaxAttempts = u32::MAX.into(); + assert_eq!(infinite, MaxAttempts::Finite(u32::MAX)); + } + + #[test] + fn first_attempt_returns_correct_attempt() { + let max_finite = MaxAttempts::Finite(3); + let first = max_finite.first_attempt(); + assert_eq!(first.index(), 0); + assert!(!first.is_last()); + + let max_one = MaxAttempts::Finite(1); + let first_one = max_one.first_attempt(); + assert_eq!(first_one.index(), 0); + assert!(first_one.is_last()); + + let max_infinite = MaxAttempts::Infinite; + let first_infinite = max_infinite.first_attempt(); + assert_eq!(first_infinite.index(), 0); + assert!(!first_infinite.is_last()); + } + + #[test] + fn default_ok() { + let default = Attempt::default(); + assert_eq!(default.index(), 0); + assert!(default.is_first()); + assert!(default.is_last()); + } +} diff --git a/crates/seatbelt/src/options/backoff.rs b/crates/seatbelt/src/options/backoff.rs new file mode 100644 index 00000000..748488cb --- /dev/null +++ b/crates/seatbelt/src/options/backoff.rs @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. + +/// Defines the backoff strategy used by resilience middleware for retry operations. +/// +/// Backoff strategies control how delays between retry attempts are calculated, providing +/// different approaches to spacing out retries to avoid overwhelming failing systems while +/// balancing responsiveness and resource utilization. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Backoff { + /// Constant backoff strategy that maintains consistent delays between attempts. + /// + /// **Example with 2s base delay:** `2s, 2s, 2s, 2s, ...` + Constant, + + /// Linear backoff strategy that increases delays proportionally with attempt count. + /// + /// **Example with 2s base delay:** `2s, 4s, 6s, 8s, 10s, ...` + Linear, + + /// Exponential backoff strategy that doubles delays with each attempt. + /// + /// **Example with 2s base delay:** `2s, 4s, 8s, 16s, 32s, ...` + Exponential, +} diff --git a/crates/seatbelt/src/options/define_fn_wrapper.rs b/crates/seatbelt/src/options/define_fn_wrapper.rs new file mode 100644 index 00000000..dbfd6451 --- /dev/null +++ b/crates/seatbelt/src/options/define_fn_wrapper.rs @@ -0,0 +1,162 @@ +// Copyright (c) Microsoft Corporation. + +/// A macro to generate `Fn` like wrapper types with consistent patterns. +/// +/// This macro generates a type that wraps a function in an `Arc`, +/// providing `Clone`, `Debug`, and convenient constructor methods. We need this to allow storing +/// user-provided functions (e.g., predicates) in a thread-safe, cloneable way. +/// +/// # Syntax +/// +/// ```rust,ignore +/// define_fn_wrapper!(TypeName(Fn(args) -> ReturnType)); +/// ``` +/// +/// # Example +/// +/// ```rust,ignore +/// define_fn_wrapper!(ShouldRetry(Fn(&Res, ShouldRetryArgs) -> Recovery)); +/// ``` +/// +/// This generates a `ShouldRetry` struct with methods: +/// - `new(predicate: F) -> Self` where `F: Fn(...) + Send + Sync + 'static` +/// - `call(&self, args...) -> ReturnType` to invoke the wrapped function +/// - `Clone` and `Debug` implementations +macro_rules! define_fn_wrapper { + // Match pattern: Name(Fn(param_name: param_type, ...) -> return_type) + ($name:ident<$($generics:ident),*>(Fn($($param_name:ident: $param_ty:ty),*) -> $return_ty:ty)) => { + pub(crate) struct $name<$($generics),*>(std::sync::Arc $return_ty + Send + Sync>); + + impl<$($generics),*> $name<$($generics),*> { + pub(crate) fn new(predicate: F) -> Self + where + F: Fn($($param_ty),*) -> $return_ty + Send + Sync + 'static, + { + Self(std::sync::Arc::new(predicate)) + } + + pub(crate) fn call(&self, $($param_name: $param_ty),*) -> $return_ty { + (self.0)($($param_name),*) + } + } + + impl<$($generics),*> Clone for $name<$($generics),*> { + fn clone(&self) -> Self { + Self(self.0.clone()) + } + } + + impl<$($generics),*> std::fmt::Debug for $name<$($generics),*> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct(stringify!($name)).finish() + } + } + }; + + // Match pattern without return type (defaults to unit) + ($name:ident<$($generics:ident),*>(Fn($($param_name:ident: $param_ty:ty),*))) => { + $crate::define_fn_wrapper!($name<$($generics),*>(Fn($($param_name: $param_ty),*) -> ())); + }; + + // Alternative match for simple cases without explicit parameter names + // For two parameters + ($name:ident<$($generics:ident),*>(Fn($param1:ty, $param2:ty) -> $return_ty:ty)) => { + $crate::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1, arg2: $param2) -> $return_ty)); + }; + + // For two parameters without return type + ($name:ident<$($generics:ident),*>(Fn($param1:ty, $param2:ty))) => { + $crate::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1, arg2: $param2) -> ())); + }; + + // For one parameter + ($name:ident<$($generics:ident),*>(Fn($param1:ty) -> $return_ty:ty)) => { + $crate::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1) -> $return_ty)); + }; + + // For one parameter without return type + ($name:ident<$($generics:ident),*>(Fn($param1:ty))) => { + $crate::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1) -> ())); + }; + + // For zero parameters + ($name:ident<$($generics:ident),*>(Fn() -> $return_ty:ty)) => { + $crate::define_fn_wrapper!($name<$($generics),*>(Fn() -> $return_ty)); + }; + + // For zero parameters without return type + ($name:ident<$($generics:ident),*>(Fn())) => { + $crate::define_fn_wrapper!($name<$($generics),*>(Fn() -> ())); + }; + + // Match pattern without return type (defaults to unit) + ($name:ident(Fn($($param_name:ident: $param_ty:ty),*))) => { + $crate::define_fn_wrapper!($name(Fn($($param_name: $param_ty),*) -> ())); + }; + + // Alternative match for simple cases without explicit parameter names + // For two parameters + ($name:ident(Fn($param1:ty, $param2:ty) -> $return_ty:ty)) => { + $crate::define_fn_wrapper!($name(Fn(arg1: $param1, arg2: $param2) -> $return_ty)); + }; + + // For two parameters without return type + ($name:ident(Fn($param1:ty, $param2:ty))) => { + $crate::define_fn_wrapper!($name(Fn(arg1: $param1, arg2: $param2) -> ())); + }; + + // For one parameter + ($name:ident(Fn($param1:ty) -> $return_ty:ty)) => { + $crate::define_fn_wrapper!($name(Fn(arg1: $param1) -> $return_ty)); + }; + + // For one parameter without return type + ($name:ident(Fn($param1:ty))) => { + $crate::define_fn_wrapper!($name(Fn(arg1: $param1) -> ())); + }; + + // For zero parameters + ($name:ident(Fn() -> $return_ty:ty)) => { + $crate::define_fn_wrapper!($name(Fn() -> $return_ty)); + }; + + // For zero parameters without return type + ($name:ident(Fn())) => { + $crate::define_fn_wrapper!($name(Fn() -> ())); + }; +} + +pub(crate) use define_fn_wrapper; + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + + define_fn_wrapper!(InOut(Fn(&In) -> Out)); + + #[test] + fn static_assertions() { + static_assertions::assert_impl_all!(InOut: Send, Sync, Debug, Clone); + } + + #[test] + fn call_ok() { + let wrapper = InOut::new(|input: &String| input.clone()); + + let result = wrapper.call(&"Hello, World!".to_string()); + assert_eq!(result, "Hello, World!".to_string()); + + let wrapper = wrapper; + let result = wrapper.call(&"Hello, World!".to_string()); + assert_eq!(result, "Hello, World!".to_string()); + } + + #[test] + fn debug_ok() { + let wrapper = InOut::new(|input: &String| input.clone()); + + let debug_str = format!("{wrapper:?}"); + + assert_eq!(debug_str, "InOut"); + } +} diff --git a/crates/seatbelt/src/options/mod.rs b/crates/seatbelt/src/options/mod.rs new file mode 100644 index 00000000..af728227 --- /dev/null +++ b/crates/seatbelt/src/options/mod.rs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. + +/// A flag indicating that the required property is set. +#[non_exhaustive] +#[derive(Debug)] +pub struct Set; + +/// A flag indicating that the required property has not been set. +#[non_exhaustive] +#[derive(Debug)] +pub struct NotSet; + +crate::define_fn_wrapper!(EnableIf(Fn(&In) -> bool)); + +impl EnableIf { + /// Creates a new `EnableIf` instance that always returns `true`. + pub fn always() -> Self { + Self::new(|_| true) + } + + /// Creates a new `EnableIf` instance that always returns `false`. + pub fn never() -> Self { + Self::new(|_| false) + } +} + +mod seatbelt_options; +pub use seatbelt_options::SeatbeltOptions; + +mod define_fn_wrapper; +pub(crate) use define_fn_wrapper::define_fn_wrapper; + +mod attempt; +mod backoff; + +pub use attempt::Attempt; +#[cfg(any(feature = "retry", test))] +pub(crate) use attempt::MaxAttempts; +pub use backoff::Backoff; diff --git a/crates/seatbelt/src/options/seatbelt_options.rs b/crates/seatbelt/src/options/seatbelt_options.rs new file mode 100644 index 00000000..488a6b87 --- /dev/null +++ b/crates/seatbelt/src/options/seatbelt_options.rs @@ -0,0 +1,271 @@ +// Copyright (c) Microsoft Corporation. + +use std::borrow::Cow; + +use opentelemetry::metrics::*; +use tick::Clock; + +use crate::telemetry::metrics::*; + +pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; + +/// Shared options for resilience middleware pipelines. +/// +/// `SeatbeltOptions` bundles a clock and telemetry primitives (OpenTelemetry +/// meter) that resilience middleware uses to measure time, emit metrics, and +/// report events. Use a single instance to configure a whole resilience +/// pipeline (retries, timeouts, circuit breakers, hedging, rate limiting, +/// etc.). +/// +/// The [`pipeline_name`][`SeatbeltOptions::pipeline_name`] groups resilience middleware under one logical +/// parent for telemetry correlation. When you reuse the same name across all +/// policies in a pipeline, exported metrics and events will carry the same +/// pipeline attribute, making dashboards and analysis easier. +/// +/// You can also override the meter provider via [`meter_provider`](Self::meter_provider) +/// if you need a non-global provider (e.g., tests or custom SDK wiring). +/// +/// # Examples +/// +/// Basic usage: +/// +/// ```rust +/// # use opentelemetry::KeyValue; +/// # use opentelemetry::metrics::MeterProvider; +/// # use seatbelt::SeatbeltOptions; +/// # use seatbelt::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; +/// # use tick::Clock; +/// # fn example(clock: Clock, meter_provider: &dyn MeterProvider) { +/// let options = SeatbeltOptions::::new(&clock) +/// .pipeline_name("auth_pipeline") +/// .meter_provider(meter_provider); +/// +/// // Use the clock for timing operations +/// let start = options.get_clock().instant(); +/// +/// // Create an event reporter +/// let resilience_counter = options.create_resilience_event_counter(); +/// +/// // Report an event with required attributes +/// resilience_counter.add( +/// 1, +/// &[ +/// KeyValue::new(PIPELINE_NAME, options.get_pipeline_name().clone()), +/// KeyValue::new(STRATEGY_NAME, "my_strategy"), +/// KeyValue::new(EVENT_NAME, "my_event"), +/// ], +/// ); +/// # } +/// ``` +/// +/// # Authoring Resilience `MIddlewares` +/// +/// This crate’s primitives are designed to make writing custom resilience middleware +/// straightforward and observable. Pass a shared [`SeatbeltOptions`] to each +/// middleware that needs telemetry. You can keep a single instance (or derive child +/// scopes from it) to group related middleware under one logical parent so their +/// telemetry automatically shares correlation dimensions (e.g., the pipeline name). +/// +/// With `SeatbeltOptions` you can: +/// +/// - Share a common `Clock` for consistent timing and timeouts +/// - Create and use telemetry instruments via the OpenTelemetry `Meter` +/// - Set a logical parent (`pipeline_name`) that groups middleware together +/// +/// ## Middleware pattern +/// +/// Resilience middleware typically follows this pattern: +/// +/// 1. Accept a [`SeatbeltOptions`] in your layer/builder to configure observability +/// 2. Create instruments from the options (e.g., counters, histograms) +/// 3. Wrap the next service/layer and forward requests without hidden global state +/// 4. Report events at key decision points (retries, circuit breaker trips, timeouts, …) +/// 5. Use recovery metadata from error types to make informed retry/backoff decisions +/// +/// > Note: The canonical, in‑depth description of the Oxidizer middleware / service +/// > layering pattern lives in the [`layered`] crate documentation. Resilience +/// > middleware in this crate is expected to follow that same pattern (layer types +/// > implementing `Layer`, constructors taking the next service, no hidden global +/// > state, etc.). If you need to evolve the pattern itself, update it in +/// > `layered` and reference it here instead of diverging. +#[derive(Debug)] +#[non_exhaustive] +pub struct SeatbeltOptions { + clock: Clock, + pipeline_name: Cow<'static, str>, + meter: Meter, + _in: std::marker::PhantomData In>, + _out: std::marker::PhantomData Out>, +} + +impl SeatbeltOptions { + /// Create options with a clock and the global meter provider. + /// + /// Initializes with `pipeline_name = "default"` and a meter from the + /// global provider. Override the provider later via + /// [`meter_provider`](Self::meter_provider) if needed. + pub fn new(clock: impl AsRef) -> Self { + #[cfg(any(feature = "metrics", test))] + let meter = create_meter(opentelemetry::global::meter_provider().as_ref()); + + Self { + clock: clock.as_ref().clone(), + pipeline_name: Cow::Borrowed(DEFAULT_PIPELINE_NAME), + meter, + _in: std::marker::PhantomData, + _out: std::marker::PhantomData, + } + } + + /// Get the configured clock for timing operations. + /// + /// Middlewares use this to measure durations, track timeouts, and perform + /// other time-related operations from a consistent source. + #[must_use] + pub fn get_clock(&self) -> &Clock { + &self.clock + } + + /// Set the logical pipeline name used to group resilience middleware. + /// + /// Use the same `pipeline_name` across the policies (retry, timeout, + /// circuit breaker, etc.) that form one execution pipeline. The name is + /// attached to emitted metrics/events so they can be correlated. Prefer + /// `snake_case`, e.g., `user_auth`, `data_ingest`. + #[must_use] + pub fn pipeline_name(mut self, name: impl Into>) -> Self { + self.pipeline_name = name.into(); + self + } + + /// Get the configured pipeline name (`default` if not set). + #[must_use] + pub fn get_pipeline_name(&self) -> &Cow<'static, str> { + &self.pipeline_name + } + + /// Override the global meter provider with a custom one. + #[must_use] + pub fn meter_provider(self, provider: &dyn MeterProvider) -> Self { + let meter = create_meter(provider); + + Self { meter, ..self } + } + + /// Get the configured OpenTelemetry meter. + /// + /// Use this to create counters, histograms, and gauges for middleware + /// observability. The meter uses: + /// + /// - Name: `seatbelt` + /// - Version: `v0.1.0` + /// - Schema URL: `https://opentelemetry.io/schemas/1.47.0` + #[must_use] + pub fn get_meter(&self) -> &Meter { + &self.meter + } + + /// Creates standardized counter for resilience events. + /// + /// Returns a counter that can be used to report events occurring within + /// resilience middleware. + /// + /// # Required Attributes + /// + /// When reporting events, the following attributes MUST be added: + /// + /// - [`PIPELINE_NAME`][crate::telemetry::PIPELINE_NAME]: The name of the pipeline this middleware belongs to + /// - [`STRATEGY_NAME`][crate::telemetry::STRATEGY_NAME]: The name of the specific strategy/middleware + /// - [`EVENT_NAME`][crate::telemetry::EVENT_NAME]: The name of the event being reported + /// + /// # Examples + /// + /// ```rust + /// # use tick::Clock; + /// # use opentelemetry::KeyValue; + /// # use opentelemetry::metrics::Meter; + /// # use seatbelt::SeatbeltOptions; + /// # use seatbelt::telemetry::{PIPELINE_NAME, STRATEGY_NAME, EVENT_NAME}; + /// # fn example(seatbelt_options: &SeatbeltOptions<(), ()>) { + /// let counter = seatbelt_options.create_resilience_event_counter(); + /// + /// counter.add( + /// 1, + /// &[ + /// KeyValue::new(PIPELINE_NAME, "my_pipeline"), + /// KeyValue::new(STRATEGY_NAME, "my_strategy"), + /// KeyValue::new(EVENT_NAME, "my_event"), + /// ], + /// ); + /// # } + /// ``` + #[must_use] + pub fn create_resilience_event_counter(&self) -> Counter { + create_resilience_event_counter(self.get_meter()) + } +} + +impl Clone for SeatbeltOptions { + fn clone(&self) -> Self { + Self { + clock: self.clock.clone(), + pipeline_name: self.pipeline_name.clone(), + meter: self.meter.clone(), + _in: std::marker::PhantomData, + _out: std::marker::PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + use opentelemetry_sdk::metrics::{InMemoryMetricExporter, SdkMeterProvider}; + + use super::*; + + #[test] + fn test_new_with_clock_sets_default_pipeline_name() { + let clock = tick::Clock::new_frozen(); + let options = SeatbeltOptions::<(), ()>::new(clock); + assert_eq!(options.get_pipeline_name().as_ref(), DEFAULT_PIPELINE_NAME); + // Ensure clock reference behaves (timestamp monotonic relative behaviour not required, just accessible) + let _ = options.get_clock().system_time(); + } + + #[test] + fn test_pipeline_name_with_custom_value_sets_name_and_is_owned() { + let clock = tick::Clock::new_frozen(); + let options = + SeatbeltOptions::<(), ()>::new(clock).pipeline_name(String::from("custom_pipeline")); + assert_eq!(options.get_pipeline_name().as_ref(), "custom_pipeline"); + assert!(matches!(options.get_pipeline_name(), Cow::Owned(_))); + } + + #[cfg(not(miri))] + #[test] + fn test_create_event_reporter_with_multiple_clones_accumulates_events() { + let clock = tick::Clock::new_frozen(); + let (provider, exporter) = test_meter_provider(); + + let options = SeatbeltOptions::<(), ()>::new(clock).meter_provider(&provider); + let c1 = create_resilience_event_counter(options.get_meter()); + let c2 = create_resilience_event_counter(options.get_meter()); + c1.add(1, &[]); + c2.add(2, &[]); + + provider.force_flush().unwrap(); + let metrics = exporter.get_finished_metrics().unwrap(); + let dump = format!("{metrics:?}"); + assert!(dump.contains("resilience.event")); + // Basic sanity that total of 3 was recorded somewhere in debug output. + assert!(dump.contains('3')); + } + + fn test_meter_provider() -> (SdkMeterProvider, InMemoryMetricExporter) { + let exporter = InMemoryMetricExporter::default(); + let provider = SdkMeterProvider::builder() + .with_periodic_exporter(exporter.clone()) + .build(); + (provider, exporter) + } +} diff --git a/crates/seatbelt/src/retry/args.rs b/crates/seatbelt/src/retry/args.rs new file mode 100644 index 00000000..3e25e4c8 --- /dev/null +++ b/crates/seatbelt/src/retry/args.rs @@ -0,0 +1,158 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +use tick::Clock; + +use crate::{Attempt, RecoveryInfo}; + +/// Arguments for the [`clone_input_with`][super::RetryLayer::clone_input_with] callback function. +/// +/// Provides context for input cloning operations. +#[derive(Debug)] +pub struct CloneArgs { + pub(super) attempt: Attempt, + pub(super) previous_recovery: Option, +} + +impl CloneArgs { + /// Returns the current attempt information. + #[must_use] + pub fn attempt(&self) -> Attempt { + self.attempt + } + + /// Returns the recovery information from the previous attempt, if any. + #[must_use] + pub fn previous_recovery(&self) -> Option<&RecoveryInfo> { + self.previous_recovery.as_ref() + } +} + +/// Arguments for the [`recovery_with`][super::RetryLayer::recovery_with] callback function. +/// +/// Provides context for recovery classification. +#[derive(Debug)] +pub struct RecoveryArgs<'a> { + pub(super) attempt: Attempt, + pub(super) clock: &'a Clock, +} + +impl RecoveryArgs<'_> { + /// Returns the current attempt information. + #[must_use] + pub fn attempt(&self) -> Attempt { + self.attempt + } + + /// Returns the clock used for time-related operations. + #[must_use] + pub fn clock(&self) -> &Clock { + self.clock + } +} + +/// Arguments for the [`on_retry`][super::RetryLayer::on_retry] callback function. +/// +/// Provides context for retry notifications. +#[derive(Debug)] +pub struct OnRetryArgs { + pub(super) attempt: Attempt, + pub(super) retry_delay: Duration, + pub(super) recovery: RecoveryInfo, +} + +impl OnRetryArgs { + /// Returns the current attempt information. + #[must_use] + pub fn attempt(&self) -> Attempt { + self.attempt + } + + /// Returns the delay before the next retry attempt. + #[must_use] + pub fn retry_delay(&self) -> Duration { + self.retry_delay + } + + /// Returns the recovery information that triggered this retry. + #[must_use] + pub fn recovery(&self) -> &RecoveryInfo { + &self.recovery + } +} + +/// Arguments for the [`restore_input`][super::RetryLayer::restore_input] callback function. +/// +/// Provides context for input restoration when cloning is unavailable. +#[derive(Debug)] +pub struct RestoreInputArgs { + pub(super) attempt: Attempt, + pub(super) recovery: RecoveryInfo, +} + +impl RestoreInputArgs { + /// Returns the current attempt information. + #[must_use] + pub fn attempt(&self) -> Attempt { + self.attempt + } + + /// Returns the recovery information that triggered this restoration attempt. + #[must_use] + pub fn recovery(&self) -> &RecoveryInfo { + &self.recovery + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn should_recover_args() { + let clock = Clock::new_frozen(); + + let args = RecoveryArgs { + attempt: Attempt::new(3, true), + clock: &clock, + }; + + assert_eq!(args.attempt(), Attempt::new(3, true)); + } + + #[test] + fn on_retry_args() { + let args = OnRetryArgs { + attempt: Attempt::new(2, false), + retry_delay: Duration::from_secs(5), + recovery: RecoveryInfo::retry(), + }; + + assert_eq!(args.attempt(), Attempt::new(2, false)); + assert_eq!(args.retry_delay(), Duration::from_secs(5)); + assert_eq!(*args.recovery(), RecoveryInfo::retry()); + } + + #[test] + fn clone_args() { + let args = CloneArgs { + attempt: Attempt::new(1, false), + previous_recovery: Some(RecoveryInfo::retry()), + }; + + assert_eq!(args.attempt(), Attempt::new(1, false)); + assert_eq!(args.previous_recovery(), Some(&RecoveryInfo::retry())); + } + + #[test] + fn restore_input_args() { + let args = RestoreInputArgs { + attempt: Attempt::new(2, true), + recovery: RecoveryInfo::retry(), + }; + + assert_eq!(args.attempt(), Attempt::new(2, true)); + assert_eq!(*args.recovery(), RecoveryInfo::retry()); + } +} diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs new file mode 100644 index 00000000..3ae3148f --- /dev/null +++ b/crates/seatbelt/src/retry/backoff.rs @@ -0,0 +1,689 @@ +// Copyright (c) Microsoft Corporation. + +use std::cmp::min; +use std::time::Duration; + +use crate::Backoff; +use crate::retry::constants::{DEFAULT_BACKOFF, DEFAULT_BASE_DELAY, DEFAULT_USE_JITTER}; +use crate::rnd::Rnd; + +/// The factor used to determine the range of jitter applied to delays. +const JITTER_FACTOR: f64 = 0.5; + +/// The default factor used for exponential backoff calculations for cases where jitter is not applied. +const EXPONENTIAL_FACTOR: f64 = 2.0; + +// The delay generation follows the Polly V8 implementation: +// +// https://github.com/App-vNext/Polly/blob/452b34ee1e3a45ccce156a6980f60901a623ee67/src/Polly.Core/Retry/RetryHelper.cs#L3 +#[derive(Debug)] +pub(crate) struct DelayBackoff(pub(super) BackoffOptions); + +impl From for DelayBackoff { + fn from(props: BackoffOptions) -> Self { + Self(props) + } +} + +impl DelayBackoff { + pub fn delays(&self) -> impl Iterator { + DelaysIter { + props: self.0.clone(), + attempt: 0, + prev: 0.0, + } + } +} + +#[derive(Debug)] +struct DelaysIter { + props: BackoffOptions, + attempt: u32, + // The state that is required to compute the next delay when using + // decorrelated jitter backoff. + prev: f64, +} + +impl Iterator for DelaysIter { + type Item = Duration; + + fn next(&mut self) -> Option { + // zero base delay => always zero + if self.props.base_delay.is_zero() { + return Some(Duration::ZERO); + } + + let next_attempt = self.attempt.saturating_add(1); + let delay = match (self.props.backoff_type, self.props.use_jitter) { + (Backoff::Constant, false) => self.props.base_delay, + (Backoff::Constant, true) => apply_jitter(self.props.base_delay, &self.props.rnd), + (Backoff::Linear, _) => { + let delay = self.props.base_delay.saturating_mul(next_attempt); + if self.props.use_jitter { + apply_jitter(delay, &self.props.rnd) + } else { + delay + } + } + (Backoff::Exponential, false) => duration_mul_pow2(self.props.base_delay, self.attempt), + (Backoff::Exponential, true) => decorrelated_jitter_backoff_v2( + self.attempt, + self.props.base_delay, + &mut self.prev, + &self.props.rnd, + ), + }; + + self.attempt = next_attempt; + Some(clamp_to_max(delay, self.props.max_delay)) + } +} + +fn clamp_to_max(d: Duration, max: Option) -> Duration { + max.map_or(d, |m| min(d, m)) +} + +fn duration_mul_pow2(base: Duration, attempt: u32) -> Duration { + let factor = EXPONENTIAL_FACTOR.powi(i32::try_from(attempt).unwrap_or(i32::MAX)); + secs_to_duration_saturating(base.as_secs_f64() * factor) +} + +/// Adds a symmetric, uniform jitter around the given delay. +/// +/// - Jitter is in both directions and relative to `delay` (centered on it). +/// - With `JITTER_FACTOR = 0.5`, the result lies in `[0.75*delay, 1.25*delay]`. +/// - Randomness comes from [`Rnd`]; conversion saturates on overflow and clamps at zero. +#[inline] +fn apply_jitter(delay: Duration, rnd: &Rnd) -> Duration { + let ms = delay.as_secs_f64() * 1000.0; + let offset = (ms * JITTER_FACTOR) / 2.0; + let random_delay = (ms * JITTER_FACTOR).mul_add(rnd.next_f64(), -offset); + let new_ms = ms + random_delay; + + secs_to_duration_saturating(new_ms / 1000.0) +} + +/// Decorrelated-jitter backoff (v2): smooth exponential growth with bounded randomization. +/// +/// Decorrelated Jitter V2 spreads retries evenly while preserving exponential backoff +/// (with a configurable first-retry median), reducing synchronized spikes and tail-latency +/// compared to naïve “±random” jitter. +/// +/// What does "decorrelated" mean? +/// +/// - Successive delays are not a direct function of the immediately previous +/// delay. Instead, each step samples a random phase (`t = attempt + U[0,1)`) +/// and advances a smooth curve; we only take the delta from the previous +/// position on that curve. This weakens correlation between consecutive +/// samples and reduces synchronization across many callers. +/// +/// What does "v2" mean? +/// +/// - It refers to the second-generation formulation from +/// `Polly.Contrib.WaitAndRetry` (linked below). Compared to the earlier +/// (v1) "decorrelated jitter" popularized in the AWS blog post, v2 uses a +/// closed-form function combining exponential growth with a `tanh(sqrt(p*t))` +/// taper to achieve monotonic expected growth, reduced tail latencies, and a +/// tighter distribution—while still remaining decorrelated. +/// +/// References +/// - [Polly V8 implementation](https://github.com/App-vNext/Polly/blob/8ba1e3ba295542cbc937d0555fadfa0d23b5c568/src/Polly.Core/Retry/RetryHelper.cs#L96) +/// - [Polly V7 implementation](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry/blob/7596d2dacf22d88bbd814bc49c28424fb6e921e9/src/Polly.Contrib.WaitAndRetry/Backoff.DecorrelatedJitterV2.cs#L22) +/// - [Polly.Contrib.WaitAndRetry repo](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry) +#[inline] +fn decorrelated_jitter_backoff_v2( + attempt: u32, + base_delay: Duration, + prev: &mut f64, + rnd: &Rnd, +) -> Duration { + // The original author/credit for this jitter formula is @george-polevoy . + // Jitter formula used with permission as described at https://github.com/App-vNext/Polly/issues/530#issuecomment-526555979 + // Minor adaptations (pFactor = 4.0 and rpScalingFactor = 1 / 1.4d) by @reisenberger, to scale the formula output for easier parameterization to users. + + // A factor used within the formula to help smooth the first calculated delay. + const P_FACTOR: f64 = 4.0; + + // A factor used to scale the median values of the retry times generated by the formula to be _near_ whole seconds, to aid Polly user comprehension. + // This factor allows the median values to fall approximately at 1, 2, 4 etc seconds, instead of 1.4, 2.8, 5.6, 11.2. + const RP_SCALING: f64 = 1.0 / 1.4; + + let target_secs_first_delay = base_delay.as_secs_f64(); + + let t = f64::from(attempt) + rnd.next_f64(); + let next = t.exp2() * (P_FACTOR * t).sqrt().tanh(); + + if !next.is_finite() { + *prev = next; + return Duration::MAX; + } + + let formula_intrinsic_value = next - *prev; + *prev = next; + + secs_to_duration_saturating(formula_intrinsic_value * RP_SCALING * target_secs_first_delay) +} + +fn secs_to_duration_saturating(secs: f64) -> Duration { + if secs <= 0.0 { + return Duration::ZERO; + } + + Duration::try_from_secs_f64(secs).unwrap_or(Duration::MAX) +} + +#[derive(Debug, Clone)] +pub(super) struct BackoffOptions { + pub backoff_type: Backoff, + pub base_delay: Duration, + pub max_delay: Option, + pub use_jitter: bool, + pub rnd: Rnd, +} + +impl Default for BackoffOptions { + fn default() -> Self { + Self { + backoff_type: DEFAULT_BACKOFF, + base_delay: DEFAULT_BASE_DELAY, + max_delay: None, + use_jitter: DEFAULT_USE_JITTER, + rnd: Rnd::default(), + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Mutex; + + use super::*; + + #[test] + fn default_props() { + let props = BackoffOptions::default(); + + assert_eq!(props.backoff_type, Backoff::Exponential); + assert_eq!(props.base_delay, Duration::from_secs(2)); + assert_eq!(props.max_delay, None); + assert!(props.use_jitter); + } + + #[test] + fn smoke_constant_no_jitter() { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Constant, + base_delay: Duration::from_millis(200), + max_delay: None, + use_jitter: false, + rnd: Rnd::default(), + }); + let v: Vec<_> = backoff.delays().take(3).collect(); + assert_eq!(v, vec![Duration::from_millis(200); 3]); + } + + #[test] + fn smoke_linear_no_jitter() { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Linear, + base_delay: Duration::from_millis(100), + max_delay: None, + use_jitter: false, + rnd: Rnd::default(), + }); + + let v: Vec<_> = backoff.delays().take(4).collect(); + assert_eq!( + v, + vec![ + Duration::from_millis(100), + Duration::from_millis(200), + Duration::from_millis(300), + Duration::from_millis(400), + ] + ); + } + + #[test] + fn smoke_exponential_cap() { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_millis(100), + max_delay: Some(Duration::from_secs(1)), + use_jitter: false, + rnd: Rnd::default(), + }); + + // 100ms, 200ms, 400ms, 800ms, then clamped at 1s + let v: Vec<_> = backoff.delays().take(6).collect(); + assert_eq!(v[0], Duration::from_millis(100)); + assert_eq!(v[1], Duration::from_millis(200)); + assert_eq!(v[2], Duration::from_millis(400)); + assert_eq!(v[3], Duration::from_millis(800)); + assert_eq!(v[4], Duration::from_secs(1)); + assert_eq!(v[5], Duration::from_secs(1)); + } + + #[test] + fn zero_base_delay_always_zero() { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::ZERO, + max_delay: None, + use_jitter: true, + rnd: Rnd::default(), + }); + let v: Vec<_> = backoff.delays().take(5).collect(); + assert!(v.iter().all(|d| *d == Duration::ZERO)); + } + + #[test] + fn constant_with_jitter() { + // Test with fixed random values to verify jitter behavior + let rnd = Rnd::new_fixed(0.0); + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Constant, + base_delay: Duration::from_secs(1), + max_delay: None, + use_jitter: true, + rnd, + }); + + let v: Vec<_> = backoff.delays().take(3).collect(); + // With random value 0.0, jitter should give us 0.75 seconds + assert_eq!(v[0], Duration::from_millis(750)); + assert_eq!(v[1], Duration::from_millis(750)); + assert_eq!(v[2], Duration::from_millis(750)); + } + + #[test] + fn constant_with_different_jitter_values() { + // Test with random value 0.4 + let rnd = Rnd::new_fixed(0.4); + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Constant, + base_delay: Duration::from_secs(1), + max_delay: None, + use_jitter: true, + rnd, + }); + + let delay = backoff.delays().next().unwrap(); + // With random value 0.4, jitter should give us 0.95 seconds + assert_eq!(delay, Duration::from_millis(950)); + + // Test with random value 1.0 + let rnd = Rnd::new_fixed(1.0); + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Constant, + base_delay: Duration::from_secs(1), + max_delay: None, + use_jitter: true, + rnd, + }); + + let delay = backoff.delays().next().unwrap(); + // With random value 1.0, jitter should give us 1.25 seconds + assert_eq!(delay, Duration::from_millis(1250)); + } + + #[test] + fn linear_with_jitter() { + // Test with fixed random value 0.5 + let rnd = Rnd::new_fixed(0.5); + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Linear, + base_delay: Duration::from_secs(1), + max_delay: None, + use_jitter: true, + rnd, + }); + + let v: Vec<_> = backoff.delays().take(3).collect(); + // attempt 0: base_delay * 1 = 1s, with jitter 0.5 should be exactly 1s + // attempt 1: base_delay * 2 = 2s, with jitter 0.5 should be exactly 2s + // attempt 2: base_delay * 3 = 3s, with jitter 0.5 should be exactly 3s + assert_eq!(v[0], Duration::from_secs(1)); + assert_eq!(v[1], Duration::from_secs(2)); + assert_eq!(v[2], Duration::from_secs(3)); + } + + #[test] + fn linear_with_different_jitter_values() { + // Test linear with various jitter values for attempt 2 (3rd delay) + let test_cases = [ + (0.0, 2250), // 3s * (1 + 0.5 * (0.0 - 0.5)) = 2.25s + (0.4, 2850), // 3s * (1 + 0.5 * (0.4 - 0.5)) = 2.85s + (0.6, 3150), // 3s * (1 + 0.5 * (0.6 - 0.5)) = 3.15s + (1.0, 3750), // 3s * (1 + 0.5 * (1.0 - 0.5)) = 3.75s + ]; + + for (random_val, expected_ms) in test_cases { + let rnd = Rnd::new_fixed(random_val); + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Linear, + base_delay: Duration::from_secs(1), + max_delay: None, + use_jitter: true, + rnd, + }); + + let v: Vec<_> = backoff.delays().take(3).collect(); + assert_eq!(v[2], Duration::from_millis(expected_ms)); + } + } + + #[test] + fn exponential_no_jitter() { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_secs(1), + max_delay: None, + use_jitter: false, + rnd: Rnd::default(), + }); + + let v: Vec<_> = backoff.delays().take(3).collect(); + assert_eq!( + v, + vec![ + Duration::from_secs(1), // 2^0 = 1 + Duration::from_secs(2), // 2^1 = 2 + Duration::from_secs(4), // 2^2 = 4 + ] + ); + } + + #[test] + fn max_delay_respected_all_types() { + let max_delay = Duration::from_secs(1); + + // Test constant with large base delay + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Constant, + base_delay: Duration::from_secs(10), + max_delay: Some(max_delay), + use_jitter: false, + rnd: Rnd::default(), + }); + let v: Vec<_> = backoff.delays().take(3).collect(); + assert!(v.iter().all(|d| *d == max_delay)); + + // Test linear with large multiplier + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Linear, + base_delay: Duration::from_secs(10), + max_delay: Some(max_delay), + use_jitter: false, + rnd: Rnd::default(), + }); + let v: Vec<_> = backoff.delays().take(3).collect(); + assert!(v.iter().all(|d| *d == max_delay)); + + // Test exponential with large base delay + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_secs(10), + max_delay: Some(max_delay), + use_jitter: false, + rnd: Rnd::default(), + }); + let v: Vec<_> = backoff.delays().take(3).collect(); + assert!(v.iter().all(|d| *d == max_delay)); + } + + #[test] + fn max_delay_with_jitter() { + let rnd = Rnd::new_fixed(0.5); + + let max_delay = Duration::from_secs(1); + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Linear, + base_delay: Duration::from_secs(10), + max_delay: Some(max_delay), + use_jitter: true, + rnd, + }); + + let v: Vec<_> = backoff.delays().take(3).collect(); + assert!(v.iter().all(|d| *d == max_delay)); + } + + #[test] + fn delay_less_than_max_delay_respected() { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Constant, + base_delay: Duration::from_secs(1), + max_delay: Some(Duration::from_secs(2)), + use_jitter: false, + rnd: Rnd::default(), + }); + + let v: Vec<_> = backoff.delays().take(3).collect(); + assert!(v.iter().all(|d| *d == Duration::from_secs(1))); + } + + #[test] + fn exponential_overflow_returns_max_duration() { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_secs(86400), // 1 day + max_delay: None, + use_jitter: false, + rnd: Rnd::default(), + }); + + // Large attempt should cause overflow and return Duration::MAX + let v: Vec<_> = backoff.delays().skip(1000).take(1).collect(); + assert_eq!(v[0], Duration::MAX); + } + + #[test] + fn exponential_overflow_with_max_delay() { + let max_delay = Duration::from_secs(172_800); // 2 days + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_secs(86400), // 1 day + max_delay: Some(max_delay), + use_jitter: false, + rnd: Rnd::default(), + }); + + // Large attempt should cause overflow but be clamped to max_delay + let v: Vec<_> = backoff.delays().skip(1000).take(1).collect(); + assert_eq!(v[0], max_delay); + } + + #[test] + fn exponential_with_jitter_is_positive() { + let test_attempts = [1, 2, 3, 4, 10, 100, 1000, 1024, 1025]; + + for attempt in test_attempts { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_secs(2), + max_delay: None, + use_jitter: true, + rnd: Rnd::default(), + }); + + let delays: Vec<_> = backoff.delays().skip(attempt).take(2).collect(); + assert!( + delays[0] > Duration::ZERO, + "Attempt {attempt}: first delay should be positive" + ); + assert!( + delays[1] > Duration::ZERO, + "Attempt {attempt}: second delay should be positive" + ); + } + } + + #[test] + fn exponential_with_jitter_respects_max_delay() { + let test_attempts = [1, 2, 3, 4, 10, 100, 1000, 1024, 1025]; + let max_delay = Duration::from_secs(30); + + for attempt in test_attempts { + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_secs(2), + max_delay: Some(max_delay), + use_jitter: true, + rnd: Rnd::default(), + }); + + let delays: Vec<_> = backoff.delays().skip(attempt).take(2).collect(); + assert!( + delays[0] > Duration::ZERO, + "Attempt {attempt}: first delay should be positive" + ); + assert!( + delays[0] <= max_delay, + "Attempt {attempt}: first delay should not exceed max" + ); + assert!( + delays[1] > Duration::ZERO, + "Attempt {attempt}: second delay should be positive" + ); + assert!( + delays[1] <= max_delay, + "Attempt {attempt}: second delay should not exceed max" + ); + } + } + + #[test] + fn exponential_with_jitter_reproducible_with_fixed_values() { + let rnd1 = Rnd::new_fixed(0.5); + let backoff1 = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_millis(7800), // 7.8 seconds + max_delay: None, + use_jitter: true, + rnd: rnd1, + }); + + let rnd2 = Rnd::new_fixed(0.5); + let backoff2 = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_millis(7800), // 7.8 seconds + max_delay: None, + use_jitter: true, + rnd: rnd2, + }); + + let delays1: Vec<_> = backoff1.delays().take(10).collect(); + let delays2: Vec<_> = backoff2.delays().take(10).collect(); + + assert_eq!(delays1, delays2); + assert!(delays1.iter().all(|d| *d > Duration::ZERO)); + } + + #[test] + fn exponential_with_jitter_different_values_different_results() { + let rnd1 = Rnd::new_fixed(0.2); + let backoff1 = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_millis(7800), // 7.8 seconds + max_delay: None, + use_jitter: true, + rnd: rnd1, + }); + + let rnd2 = Rnd::new_fixed(0.8); + let backoff2 = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_millis(7800), // 7.8 seconds + max_delay: None, + use_jitter: true, + rnd: rnd2, + }); + + let delays1: Vec<_> = backoff1.delays().take(10).collect(); + let delays2: Vec<_> = backoff2.delays().take(10).collect(); + + assert_ne!(delays1, delays2); + assert!(delays1.iter().all(|d| *d > Duration::ZERO)); + assert!(delays2.iter().all(|d| *d > Duration::ZERO)); + } + + // This test checks that the exponential backoff with jitter produces the same sequence of delays + // as Polly v8: + // + // https://github.com/App-vNext/Polly/blob/452b34ee1e3a45ccce156a6980f60901a623ee67/test/Polly.Core.Tests/Retry/RetryHelperTests.cs#L254 + #[test] + fn exponential_with_jitter_compatibility_with_polly_v8() { + let random_values = Mutex::new( + [ + 0.726_243_269_967_959_8, + 0.817_325_359_590_968_7, + 0.768_022_689_394_663_4, + 0.558_161_191_436_537_2, + 0.206_033_154_021_032_7, + 0.558_884_794_618_415_1, + 0.906_027_066_011_925_7, + 0.442_177_873_310_715_84, + 0.977_549_753_141_379_8, + 0.273_704_457_689_870_34, + ] + .into_iter(), + ); + + let delays_ms = [ + 8_626, 10_830, 18_396, 27_703, 37_213, 159_824, 405_539, 300_743, 1_839_611, 639_970, + ]; + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_millis(7800), // 7.8 seconds + max_delay: None, + use_jitter: true, + rnd: Rnd::new_function(move || random_values.lock().unwrap().next().unwrap()), + }); + + let computed: Vec<_> = backoff.delays().take(10).map(|v| v.as_millis()).collect(); + assert_eq!(computed, delays_ms); + } + + #[test] + fn exponential_without_jitter_ensure_expected_delays() { + let random_values = Mutex::new( + [ + 0.726_243_269_967_959_8, + 0.817_325_359_590_968_7, + 0.768_022_689_394_663_4, + 0.558_161_191_436_537_2, + 0.206_033_154_021_032_7, + 0.558_884_794_618_415_1, + 0.906_027_066_011_925_7, + 0.442_177_873_310_715_84, + 0.977_549_753_141_379_8, + 0.273_704_457_689_870_34, + ] + .into_iter(), + ); + + let delays_ms = [ + 7800, 15600, 31200, 62400, 124_800, 249_600, 499_200, 998_400, 1_996_800, 3_993_600, + ]; + + let backoff = DelayBackoff(BackoffOptions { + backoff_type: Backoff::Exponential, + base_delay: Duration::from_millis(7800), // 7.8 seconds + max_delay: None, + use_jitter: false, + rnd: Rnd::new_function(move || random_values.lock().unwrap().next().unwrap()), + }); + + let computed: Vec<_> = backoff.delays().take(10).map(|v| v.as_millis()).collect(); + assert_eq!(computed, delays_ms); + } +} diff --git a/crates/seatbelt/src/retry/callbacks.rs b/crates/seatbelt/src/retry/callbacks.rs new file mode 100644 index 00000000..c7f14330 --- /dev/null +++ b/crates/seatbelt/src/retry/callbacks.rs @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. + +use super::{CloneArgs, OnRetryArgs, RecoveryArgs, RestoreInputArgs}; +use crate::RecoveryInfo; + +crate::define_fn_wrapper!(CloneInput(Fn(&mut In, CloneArgs) -> Option)); +crate::define_fn_wrapper!(ShouldRecover(Fn(&Out, RecoveryArgs) -> RecoveryInfo)); +crate::define_fn_wrapper!(OnRetry(Fn(&Out, OnRetryArgs))); +crate::define_fn_wrapper!(RestoreInput(Fn(&mut Out, RestoreInputArgs) -> Option)); diff --git a/crates/seatbelt/src/retry/constants.rs b/crates/seatbelt/src/retry/constants.rs new file mode 100644 index 00000000..b73ed74b --- /dev/null +++ b/crates/seatbelt/src/retry/constants.rs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +use crate::Backoff; + +/// Default backoff strategy: exponential backoff. +/// +/// Exponential backoff quickly reduces request pressure after failures and +/// naturally spaces out subsequent attempts. This is the commonly recommended +/// choice for transient faults in distributed systems and pairs well with jitter +/// to avoid thundering herds. +pub(super) const DEFAULT_BACKOFF: Backoff = Backoff::Exponential; + +/// Base delay for the backoff schedule; conservative 2 seconds by default. +/// +/// A 2s starting delay prevents aggressive retry storms during partial outages +/// while still enabling fast recovery for short-lived failures. Workloads with +/// different needs can override this via configuration. +pub(super) const DEFAULT_BASE_DELAY: Duration = Duration::from_secs(2); + +/// Enable jitter by default to desynchronize clients and reduce contention. +/// +/// Randomizing retry delays mitigates correlated bursts and improves tail +/// latency under contention. See [Exponential Backoff and Jitter](https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter) for details. +pub(super) const DEFAULT_USE_JITTER: bool = true; + +/// Default maximum retry attempts: 3. +/// +/// The default is inherited from Polly v8 which is a widely used resilience library +/// and also uses 3 retry attempts. +pub(super) const DEFAULT_RETRY_ATTEMPTS: u32 = 3; diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs new file mode 100644 index 00000000..9df45da4 --- /dev/null +++ b/crates/seatbelt/src/retry/layer.rs @@ -0,0 +1,776 @@ +// Copyright (c) Microsoft Corporation. + +use std::marker::PhantomData; +use std::time::Duration; + +use opentelemetry::StringValue; + +use crate::retry::backoff::BackoffOptions; +use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; +use crate::retry::{ + CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, + Retry, ShouldRecover, +}; +use crate::service::Layer; +use crate::{Backoff, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryInfo, SeatbeltOptions, Set}; + +/// Builder for configuring retry resilience middleware. +/// +/// This type is created by calling [`Retry::layer`](crate::retry::Retry::layer) and uses the +/// typestate pattern to enforce that required properties are configured before the retry middleware can be built: +/// +/// - [`clone_input`][RetryLayer::clone_input]: Required to specify how to clone inputs for retry attempts +/// - [`recovery`][RetryLayer::recovery]: Required to determine if an output should trigger a retry +/// +/// For comprehensive examples, see the [retry module][crate::retry] documentation. +#[derive(Debug)] +pub struct RetryLayer { + options: SeatbeltOptions, + max_attempts: MaxAttempts, + backoff: BackoffOptions, + clone_input: Option>, + should_recover: Option>, + on_retry: Option>, + enable_if: EnableIf, + strategy_name: StringValue, + restore_input: Option>, + handle_unavailable: bool, + _state: PhantomData Out>, +} + +impl RetryLayer { + #[must_use] + pub(crate) fn new(name: StringValue, options: &SeatbeltOptions) -> Self { + Self { + options: options.clone(), + max_attempts: MaxAttempts::Finite(DEFAULT_RETRY_ATTEMPTS.saturating_add(1)), + backoff: BackoffOptions::default(), + clone_input: None, + should_recover: None, + on_retry: None, + enable_if: EnableIf::always(), + strategy_name: name, + restore_input: None, + handle_unavailable: false, + _state: PhantomData, + } + } +} + +impl RetryLayer { + /// Sets the maximum number of retry attempts. + /// + /// This specifies the maximum number of retry attempts in addition to the original call. + /// For example, if `max_retry_attempts` is 3, the operation will be attempted up to + /// 4 times total (1 original + 3 retries). + /// + /// **Default**: 3 retry attempts + #[must_use] + pub fn max_retry_attempts(mut self, max_attempts: u32) -> Self { + self.max_attempts = MaxAttempts::Finite(max_attempts.saturating_add(1)); + self + } + + /// Configures infinite retry attempts. + /// + /// This setting will cause the operation to be retried indefinitely until it succeeds + /// or the retry is aborted by other means (e.g., cancellation, timeout). + /// + /// **Warning**: Use with caution as this can cause infinite loops if the operation + /// consistently fails. + #[must_use] + pub fn infinite_retry_attempts(mut self) -> Self { + self.max_attempts = MaxAttempts::Infinite; + self + } + + /// Sets the backoff strategy for delay calculation. + /// + /// - [`Backoff::Constant`]: Same delay between all retries + /// - [`Backoff::Linear`]: Linearly increasing delay (`base_delay` × attempt) + /// - [`Backoff::Exponential`]: Exponentially increasing delay (`base_delay` × 2^attempt) + /// + /// **Default**: [`Backoff::Exponential`] + #[must_use] + pub fn backoff(mut self, backoff_type: Backoff) -> Self { + self.backoff.backoff_type = backoff_type; + self + } + + /// Sets the base delay used for backoff calculations. + /// + /// The meaning depends on the backoff strategy: + /// - **Constant**: The actual delay between retries + /// - **Linear**: Initial delay; subsequent delays are `base_delay × attempt_number` + /// - **Exponential**: Initial delay; subsequent delays grow exponentially + /// + /// **Default**: 2 seconds + #[must_use] + pub fn base_delay(mut self, delay: Duration) -> Self { + self.backoff.base_delay = delay; + self + } + + /// Sets the maximum allowed delay between retries. + /// + /// This caps the backoff calculation to prevent excessively long delays. + /// If not set, delays can grow indefinitely based on the backoff algorithm. + /// + /// **Default**: None (no limit) + #[must_use] + pub fn max_delay(mut self, max_delay: Duration) -> Self { + self.backoff.max_delay = Some(max_delay); + self + } + + /// Enables or disables jitter to reduce correlation between retries. + /// + /// Jitter adds randomization to delay calculations to prevent thundering herd + /// problems when multiple clients retry simultaneously. This is especially + /// important in distributed systems to avoid synchronized retry storms. + /// + /// **Default**: true (jitter enabled) + #[must_use] + pub fn use_jitter(mut self, use_jitter: bool) -> Self { + self.backoff.use_jitter = use_jitter; + self + } + + /// Sets the input cloning function. + /// + /// This function is called before retry attempts to clone the input. + /// Return `Some(cloned_input)` to proceed with retry, or `None` to abort + /// retry and return the last failed result. + /// + /// This is required because Rust's ownership model doesn't allow reusing + /// the same input value across multiple attempts. + /// + /// # Arguments + /// + /// * `clone_fn` - Function that takes a reference to the input and [`CloneArgs`] + /// containing context about the retry attempt, and returns an optional cloned input + #[must_use] + pub fn clone_input_with( + mut self, + clone_fn: impl Fn(&mut In, CloneArgs) -> Option + Send + Sync + 'static, + ) -> RetryLayer { + self.clone_input = Some(CloneInput::new(clone_fn)); + self.into_state::() + } + + /// Automatically sets the input cloning function for types that implement [`Clone`]. + /// + /// This is a convenience method that uses the standard [`Clone`] trait to clone + /// inputs for retry attempts. For types that implement [`Clone`], this provides + /// a simple way to enable retries without manually implementing a cloning function. + /// + /// This is equivalent to calling [`clone_input_with`][RetryLayer::clone_input_with] with + /// `|input, _args| Some(input.clone())`. + /// + /// # Type Requirements + /// + /// This method is only available when the input type `In` implements [`Clone`]. + #[must_use] + pub fn clone_input(self) -> RetryLayer + where + In: Clone, + { + self.clone_input_with(|input, _args| Some(input.clone())) + } + + /// Sets the recovery classification function. + /// + /// This function determines whether a specific output should trigger a retry + /// by examining the output and returning a [`RecoveryInfo`] classification. + /// + /// The function receives the output and [`RecoveryArgs`] with context + /// about the current attempt. + /// + /// # Arguments + /// + /// * `recover_fn` - Function that takes a reference to the output and + /// [`RecoveryArgs`] containing retry attempt context, and returns + /// a [`RecoveryInfo`] decision + #[must_use] + pub fn recovery_with( + mut self, + recover_fn: impl Fn(&Out, RecoveryArgs) -> RecoveryInfo + Send + Sync + 'static, + ) -> RetryLayer { + self.should_recover = Some(ShouldRecover::new(recover_fn)); + self.into_state::() + } + + /// Automatically sets the recovery classification function for types that implement [`Recovery`]. + /// + /// This is a convenience method that uses the [`Recovery`] trait to determine + /// whether an output should trigger a retry. For types that implement [`Recovery`], + /// this provides a simple way to enable intelligent retry behavior without manually + /// implementing a recovery classification function. + /// + /// This is equivalent to calling [`recovery`][RetryLayer::recovery] with + /// `|output, _args| output.recovery()`. + /// + /// # Type Requirements + /// + /// This method is only available when the output type `Out` implements [`Recovery`]. + #[must_use] + pub fn recovery(self) -> RetryLayer + where + Out: Recovery, + { + self.recovery_with(|out, _args| out.recovery()) + } + + /// Configures a callback invoked before each retry attempt. + /// + /// This callback is useful for logging, metrics, or other observability + /// purposes. It receives the output that triggered the retry and + /// [`OnRetryArgs`] with detailed retry information. + /// + /// The callback does not affect retry behavior - it's purely for observation. + /// + /// **Default**: None (no observability by default) + /// + /// # Arguments + /// + /// * `retry_fn` - Function that takes a reference to the output and + /// [`OnRetryArgs`] containing retry context information + #[must_use] + pub fn on_retry( + mut self, + retry_fn: impl Fn(&Out, OnRetryArgs) + Send + Sync + 'static, + ) -> Self { + self.on_retry = Some(OnRetry::new(retry_fn)); + self + } + + /// Optionally enables the retry middleware based on a condition. + /// + /// When disabled, requests pass through without retry protection. + /// This call replaces any previous condition. + /// + /// **Default**: Always enabled + /// + /// # Arguments + /// + /// * `is_enabled` - Function that takes a reference to the input and returns + /// `true` if retry protection should be enabled for this request + #[must_use] + pub fn enable_if(mut self, is_enabled: impl Fn(&In) -> bool + Send + Sync + 'static) -> Self { + self.enable_if = EnableIf::new(is_enabled); + self + } + + /// Enables the retry middleware unconditionally. + /// + /// All requests will have retry protection applied. + /// This call replaces any previous condition. + /// + /// **Note**: This is the default behavior - retry is enabled by default. + #[must_use] + pub fn enable_always(mut self) -> Self { + self.enable_if = EnableIf::always(); + self + } + + /// Disables the retry middleware completely. + /// + /// All requests will pass through without retry protection. + /// This call replaces any previous condition. + /// + /// **Note**: This overrides the default enabled behavior. + #[must_use] + pub fn disable(mut self) -> Self { + self.enable_if = EnableIf::never(); + self + } + + /// Configures whether the retry middleware should attempt to recover from unavailable services. + /// + /// When enabled, the retry middleware will treat [`RecoveryInfo::unavailable`] classifications + /// as recoverable conditions and attempt retries. When disabled (default), unavailable services + /// are treated as non-recoverable and cause immediate failure without retry attempts. + /// + /// This is particularly useful when you have access to multiple resources + /// or service endpoints. When one resource is unavailable, the retry + /// mechanism can attempt the operation against a different resource in subsequent + /// attempts, potentially allowing the operation to succeed despite the unavailability. + /// + /// **Default**: false (unavailable responses are not retried) + /// + /// # Arguments + /// + /// * `enable` - `true` to enable unavailable recovery, `false` to treat unavailable responses as permanent failures + /// + /// # Example + /// + /// ```rust + /// # use seatbelt::retry::Retry; + /// # use seatbelt::{Attempt, RecoveryInfo, SeatbeltOptions}; + /// # use tick::Clock; + /// # fn example() { + /// # let options = SeatbeltOptions::>::new(Clock::new_frozen()); + /// // Service with multiple endpoints that can route around unavailable services + /// let layer = Retry::layer("multi_endpoint_retry", &options) + /// .clone_input_with(|input: &mut String, args| { + /// let mut input = input.clone(); + /// update_endpoint(&mut input, args.attempt()); // Modify input to use a different endpoint + /// Some(input) + /// }) + /// .recovery_with(|result, _args| match result { + /// Ok(_) => RecoveryInfo::never(), + /// Err(msg) if msg.contains("service unavailable") => RecoveryInfo::unavailable(), + /// Err(_) => RecoveryInfo::retry(), + /// }) + /// .handle_unavailable(true); // Try different endpoints on unavailable + /// # } + /// # fn update_endpoint(_input : &mut String, _attempt: Attempt) {} + /// ``` + #[must_use] + pub fn handle_unavailable(mut self, enable: bool) -> Self { + self.handle_unavailable = enable; + self + } + + /// Sets the input restoration function. + /// + /// This function is called when the original input could not be cloned for a retry + /// attempt (i.e., when [`clone_input_with`][RetryLayer::clone_input_with] returns `None`). + /// The restore function receives the output from the failed attempt and can attempt + /// to extract and reconstruct the input for the next retry. + /// + /// This is particularly useful when a service is unavailable and the input was not actually + /// consumed by the operation. A common pattern is that error responses contain or reference + /// the original input that can be extracted for retry. For example, an HTTP request that + /// is rejected even before sending, because the remote service is known to be down. + /// + /// The restore function should return: + /// - `Some(Input)` to proceed with retry using the restored input + /// - `None` to abort retry and return the provided output + /// + /// This enables retry scenarios where input cloning is expensive or impossible, but + /// the input can be extracted from error responses or failure contexts. + /// + /// # Arguments + /// + /// * `restore_fn` - Function that takes the output and [`RestoreInputArgs`] containing + /// context about the retry attempt, and returns either a restored input and modified + /// output, or just the output to abort retry + /// + /// # Example + /// + /// ```rust + /// # use std::ops::ControlFlow; + /// # use seatbelt::retry::{Retry, RestoreInputArgs}; + /// # use seatbelt::{RecoveryInfo, SeatbeltOptions}; + /// # use tick::Clock; + /// # fn example() { + /// # let clock = Clock::new_frozen(); + /// # let options = SeatbeltOptions::new(&clock); + /// #[derive(Clone)] + /// struct HttpRequest { + /// url: String, + /// body: Vec, + /// } + /// + /// enum HttpResult { + /// Success(String), + /// ConnectionError { original_request: HttpRequest }, + /// ServerError(u16), + /// } + /// + /// let layer = Retry::layer("http_retry", &options) + /// .clone_input_with(|_request, _args| None) // Don't clone expensive request bodies + /// .restore_input(|result: &mut HttpResult, _args| { + /// match result { + /// // Extract the original request from the error for retry + /// HttpResult::ConnectionError { original_request } => { + /// let request = original_request.clone(); + /// *result = HttpResult::ServerError(0); + /// Some(request) + /// } + /// _ => None, + /// } + /// }) + /// .recovery_with(|result, _args| match result { + /// HttpResult::ConnectionError { .. } => RecoveryInfo::retry(), + /// _ => RecoveryInfo::never(), + /// }); + /// # } + /// ``` + #[must_use] + pub fn restore_input( + mut self, + restore_fn: impl Fn(&mut Out, RestoreInputArgs) -> Option + Send + Sync + 'static, + ) -> Self { + self.restore_input = Some(RestoreInput::new(restore_fn)); + self + } +} + +impl Layer for RetryLayer { + type Service = Retry; + + fn layer(&self, inner: S) -> Self::Service { + Retry { + inner, + clock: self.options.get_clock().clone(), + max_attempts: self.max_attempts, + backoff: self.backoff.clone().into(), + clone_input: self + .clone_input + .clone() + .expect("clone_input must be set in Ready state"), + should_recover: self + .should_recover + .clone() + .expect("should_recover must be set in Ready state"), + on_retry: self.on_retry.clone(), + enable_if: self.enable_if.clone(), + strategy_name: self.strategy_name.clone(), + pipeline_name: self.options.get_pipeline_name().clone().into(), + event_reporter: self.options.create_resilience_event_counter(), + restore_input: self.restore_input.clone(), + handle_unavailable: self.handle_unavailable, + } + } +} + +impl + RetryLayer, CloneInputState, RecoveryState> +{ + /// Sets a specialized input restoration callback that operates only on error cases. + /// + /// This is a convenience method for working with `Result` outputs, where you + /// only want to restore input when an error occurs. The callback receives a mutable reference + /// to the error and can extract the original input from it, while potentially modifying the + /// error for the next attempt. + /// + /// This method is particularly useful when: + /// - Your service returns `Result` where the error type contains recoverable request data + /// - You want to extract and restore input only from error cases, not successful responses + /// - You need to modify the error (e.g., to remove sensitive data) before the next retry + /// + /// # Parameters + /// + /// * `restore_fn` - A function that takes a mutable reference to the error and restoration + /// arguments, returning `Some(input)` if the input can be restored from the error, or + /// `None` if restoration is not possible or desired. + /// + /// # Example + /// + /// ```rust + /// # use tick::Clock; + /// # use seatbelt::retry::*; + /// # use seatbelt::{RecoveryInfo, SeatbeltOptions}; + /// # #[derive(Clone)] + /// # struct HttpRequest { url: String, body: Vec } + /// # struct HttpResponse { status: u16 } + /// # enum HttpError { + /// # ConnectionError { original_request: HttpRequest }, + /// # ServerError(u16), + /// # AuthError, + /// # } + /// # impl HttpError { + /// # fn try_restore_request(&mut self) -> Option { + /// # match self { + /// # HttpError::ConnectionError { original_request } => { + /// # Some(original_request.clone()) + /// # }, + /// # _ => None, + /// # } + /// # } + /// # } + /// # fn example(clock: Clock) { + /// # let options = SeatbeltOptions::>::new(&clock); + /// type HttpResult = Result; + /// + /// let layer = Retry::layer("http_retry", &options).restore_input_from_error( + /// |error: &mut HttpError, _args| { + /// // Only restore input from connection errors that contain the original request + /// error.try_restore_request() + /// }, + /// ); + /// # } + /// ``` + #[must_use] + pub fn restore_input_from_error( + self, + restore_fn: impl Fn(&mut Error, RestoreInputArgs) -> Option + Send + Sync + 'static, + ) -> Self { + self.restore_input(move |input, args| match input { + Ok(_) => None, + Err(e) => restore_fn(e, args), + }) + } +} + +impl RetryLayer { + fn into_state(self) -> RetryLayer { + RetryLayer { + options: self.options, + max_attempts: self.max_attempts, + backoff: self.backoff, + clone_input: self.clone_input, + should_recover: self.should_recover, + on_retry: self.on_retry, + enable_if: self.enable_if, + strategy_name: self.strategy_name, + restore_input: self.restore_input, + handle_unavailable: self.handle_unavailable, + _state: PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + use std::sync::Arc; + use std::sync::atomic::{AtomicU32, Ordering}; + + use layered::Execute; + use tick::Clock; + + use super::*; + use crate::Attempt; + use crate::testing::RecoverableType; + + #[test] + fn new_creates_correct_initial_state() { + let options = create_test_options(); + let layer: RetryLayer<_, _, NotSet, NotSet> = + RetryLayer::new(StringValue::from("test_retry"), &options); + + assert_eq!(layer.max_attempts, MaxAttempts::Finite(4)); // 3 retries + 1 original = 4 total + assert!(matches!(layer.backoff.backoff_type, Backoff::Exponential)); + assert_eq!(layer.backoff.base_delay, Duration::from_secs(2)); + assert!(layer.backoff.max_delay.is_none()); + assert!(layer.backoff.use_jitter); // Default is true + assert!(layer.clone_input.is_none()); + assert!(layer.should_recover.is_none()); + assert!(layer.on_retry.is_none()); + assert_eq!(layer.strategy_name, StringValue::from("test_retry")); + assert!(layer.enable_if.call(&"test_input".to_string())); + } + + #[test] + fn clone_input_sets_correctly() { + let options = create_test_options(); + let layer = RetryLayer::new(StringValue::from("test"), &options); + + let layer: RetryLayer<_, _, Set, NotSet> = + layer.clone_input_with(|input, _args| Some(input.clone())); + + let result = layer.clone_input.unwrap().call( + &mut "test".to_string(), + CloneArgs { + attempt: Attempt::new(0, false), + previous_recovery: None, + }, + ); + assert_eq!(result, Some("test".to_string())); + } + + #[test] + fn recovery_sets_correctly() { + let options = create_test_options(); + let layer = RetryLayer::new(StringValue::from("test"), &options); + + let layer: RetryLayer<_, _, NotSet, Set> = layer.recovery_with(|output, _args| { + if output.contains("error") { + RecoveryInfo::retry() + } else { + RecoveryInfo::never() + } + }); + + let result = layer.should_recover.as_ref().unwrap().call( + &"error message".to_string(), + RecoveryArgs { + attempt: Attempt::new(1, false), + clock: options.get_clock(), + }, + ); + assert_eq!(result, RecoveryInfo::retry()); + + let result = layer.should_recover.as_ref().unwrap().call( + &"success".to_string(), + RecoveryArgs { + attempt: Attempt::new(1, false), + clock: options.get_clock(), + }, + ); + assert_eq!(result, RecoveryInfo::never()); + } + + #[test] + fn recovery_auto_sets_correctly() { + let options = SeatbeltOptions::::new(Clock::new_frozen()); + let layer = RetryLayer::new(StringValue::from("test"), &options); + + let layer: RetryLayer<_, _, NotSet, Set> = layer.recovery(); + + let result = layer.should_recover.as_ref().unwrap().call( + &RecoverableType::from(RecoveryInfo::retry()), + RecoveryArgs { + attempt: Attempt::new(1, false), + clock: options.get_clock(), + }, + ); + assert_eq!(result, RecoveryInfo::retry()); + + let result = layer.should_recover.as_ref().unwrap().call( + &RecoverableType::from(RecoveryInfo::never()), + RecoveryArgs { + attempt: Attempt::new(1, false), + clock: options.get_clock(), + }, + ); + assert_eq!(result, RecoveryInfo::never()); + } + + #[test] + fn configuration_methods_work() { + let layer = create_ready_layer() + .max_retry_attempts(5) + .backoff(Backoff::Exponential) + .base_delay(Duration::from_millis(500)) + .max_delay(Duration::from_secs(30)) + .use_jitter(true); + + assert_eq!(layer.max_attempts, MaxAttempts::Finite(6)); + assert!(matches!(layer.backoff.backoff_type, Backoff::Exponential)); + assert_eq!(layer.backoff.base_delay, Duration::from_millis(500)); + assert_eq!(layer.backoff.max_delay, Some(Duration::from_secs(30))); + assert!(layer.backoff.use_jitter); + } + + #[test] + fn on_retry_works() { + let called = Arc::new(AtomicU32::new(0)); + let called_clone = Arc::clone(&called); + + let layer = create_ready_layer().on_retry(move |_output, _args| { + called_clone.fetch_add(1, Ordering::SeqCst); + }); + + layer.on_retry.unwrap().call( + &"output".to_string(), + OnRetryArgs { + retry_delay: Duration::ZERO, + attempt: Attempt::new(1, false), + recovery: RecoveryInfo::retry(), + }, + ); + + assert_eq!(called.load(Ordering::SeqCst), 1); + } + + #[test] + fn enable_disable_conditions_work() { + let layer = create_ready_layer().enable_if(|input| input.contains("enable")); + + assert!(layer.enable_if.call(&"enable_test".to_string())); + assert!(!layer.enable_if.call(&"disable_test".to_string())); + + let layer = layer.disable(); + assert!(!layer.enable_if.call(&"anything".to_string())); + + let layer = layer.enable_always(); + assert!(layer.enable_if.call(&"anything".to_string())); + } + + #[test] + fn layer_builds_service_when_ready() { + let layer = create_ready_layer(); + let _service = layer.layer(Execute::new(|input: String| async move { input })); + } + + #[test] + fn handle_unavailable_sets_correctly() { + let options = create_test_options(); + let layer = RetryLayer::new(StringValue::from("test"), &options); + + // Test default value + assert!(!layer.handle_unavailable); + + // Test enabling outage handling + let layer = layer.handle_unavailable(true); + assert!(layer.handle_unavailable); + + // Test disabling outage handling + let layer = layer.handle_unavailable(false); + assert!(!layer.handle_unavailable); + } + + #[test] + fn restore_input_sets_correctly() { + let options = create_test_options(); + let layer = RetryLayer::new(StringValue::from("test"), &options); + + let layer = layer.restore_input(|output: &mut String, _args| { + (output == "restore_me").then(|| { + *output = "modified_output".to_string(); + "restored_input".to_string() + }) + }); + + let mut test_output = "restore_me".to_string(); + let result = layer.restore_input.as_ref().unwrap().call( + &mut test_output, + RestoreInputArgs { + attempt: Attempt::new(1, false), + recovery: RecoveryInfo::retry(), + }, + ); + + match result { + Some(input) => { + assert_eq!(input, "restored_input"); + assert_eq!(test_output, "modified_output"); + } + None => panic!("Expected Some, got None"), + } + + let mut test_output2 = "no_restore".to_string(); + let result = layer.restore_input.as_ref().unwrap().call( + &mut test_output2, + RestoreInputArgs { + attempt: Attempt::new(1, false), + recovery: RecoveryInfo::retry(), + }, + ); + + match result { + None => { + assert_eq!(test_output2, "no_restore"); + } + Some(_) => panic!("Expected None, got Some"), + } + } + + #[test] + fn static_assertions() { + static_assertions::assert_impl_all!(RetryLayer: Layer); + static_assertions::assert_not_impl_all!(RetryLayer: Layer); + static_assertions::assert_not_impl_all!(RetryLayer: Layer); + static_assertions::assert_impl_all!(RetryLayer: Debug); + } + + fn create_test_options() -> SeatbeltOptions { + SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") + } + + fn create_ready_layer() -> RetryLayer { + RetryLayer::new(StringValue::from("test"), &create_test_options()) + .clone_input_with(|input, _args| Some(input.clone())) + .recovery_with(|output, _args| { + if output.contains("error") { + RecoveryInfo::retry() + } else { + RecoveryInfo::never() + } + }) + } +} diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs new file mode 100644 index 00000000..daeaca2c --- /dev/null +++ b/crates/seatbelt/src/retry/mod.rs @@ -0,0 +1,234 @@ +// Copyright (c) Microsoft Corporation. + +//! Retry resilience middleware for services, applications, and libraries. +//! +//! This module provides automatic retry capabilities with configurable backoff strategies, +//! jitter, recovery classification, and comprehensive telemetry. The primary types are +//! [`Retry`] and [`RetryLayer`]: +//! +//! - [`Retry`] is the middleware that wraps an inner service and automatically retries failed operations +//! - [`RetryLayer`] is used to configure and construct the retry middleware +//! +//! # Quick Start +//! +//! ```rust +//! # use oxidizer_rt::Builtins; +//! # use layered::{Execute, Service, Stack}; +//! # use seatbelt::retry::Retry; +//! # use seatbelt::{Backoff, RecoveryInfo, SeatbeltOptions}; +//! # #[oxidizer_rt::test] +//! # async fn example(state: Builtins) -> Result<(), Box> { +//! # let clock = state.clock().clone(); +//! let options = SeatbeltOptions::new(&clock).pipeline_name("my_service"); +//! +//! let stack = ( +//! Retry::layer("retry", &options) +//! .clone_input_with(|args| Some(args.input().clone())) +//! .recovery_with(|result, _| match result { +//! Ok(_) => RecoveryInfo::never(), +//! Err(_) => RecoveryInfo::retry(), +//! }), +//! Execute::new(my_operation), +//! ); +//! +//! let service = stack.build(); +//! let result = service.execute("input".to_string()).await; +//! # let _result = result; +//! # Ok(()) +//! # } +//! # async fn my_operation(input: String) -> Result { Ok(input) } +//! ``` +//! +//! # Configuration +//! +//! The [`RetryLayer`] uses a type state pattern to enforce that all required properties are +//! configured before the layer can be built. This compile-time safety ensures that you cannot +//! accidentally create a retry layer without properly specifying input cloning and recovery logic: +//! +//! - [`clone_input_with`][RetryLayer::clone_input_with]: Required function to clone inputs for retries (Rust ownership requirement) +//! - [`recovery`][RetryLayer::recovery]: Required function to determine if an output should trigger a retry +//! +//! Each retry layer requires an identifier for telemetry purposes. This identifier should use +//! `snake_case` naming convention to maintain consistency across the codebase. +//! +//! # Thread Safety +//! +//! The [`Retry`] type is thread-safe and implements both `Send` and `Sync` as enforced by +//! the `Service` trait it implements. This allows retry middleware to be safely shared +//! across multiple threads and used in concurrent environments. +//! +//! # Retry Delay +//! +//! Retry delays are determined by the following priority order: +//! +//! 1. **Recovery Delay Override**: If [`RetryLayer::recovery_with`] returns +//! [`RecoveryInfo::retry().delay()`][crate::RecoveryInfo::delay] or [`RecoveryInfo::unavailable()`][crate::RecoveryInfo::unavailable] with a specific duration, this delay +//! is used directly. +//! +//! 2. **Backoff Strategy**: When no recovery delay is specified, delays are calculated using +//! the configured backoff strategy (Constant, Linear, or Exponential with default 2s base delay). +//! +//! # Defaults +//! +//! The retry middleware uses the following default values when optional configuration is not provided: +//! +//! | Parameter | Default Value | Description | Configured By | +//! |-----------|---------------|-------------|---------------| +//! | Max retry attempts | `3` (4 total) | Maximum retry attempts plus original call | [`max_retry_attempts`][RetryLayer::max_retry_attempts], [`infinite_retry_attempts`][RetryLayer::infinite_retry_attempts] | +//! | Base delay | `2` seconds | Base delay used for backoff calculations | [`base_delay`][RetryLayer::base_delay] | +//! | Backoff strategy | `Exponential` | Exponential backoff with base multiplier of 2 | [`backoff`][RetryLayer::backoff] | +//! | Jitter | `Enabled` | Adds randomness to delays to prevent thundering herds | [`use_jitter`][RetryLayer::use_jitter] | +//! | Max delay | `None` | No limit on maximum delay between retries | [`max_delay`][RetryLayer::max_delay] | +//! | Enable condition | Always enabled | Retry protection is applied to all requests | [`enable_if`][RetryLayer::enable_if], [`enable_always`][RetryLayer::enable_always], [`disable`][RetryLayer::disable] | +//! +//! These defaults provide a reasonable starting point for most use cases, offering a balance +//! between resilience and avoiding an excessive load on downstream services. +//! +//! # Telemetry +//! +//! ## Metrics +//! +//! - **Metric**: `resilience.event` (counter) +//! - **When**: Emitted for each attempt that should be retried (including the final retry attempt) +//! - **Attributes**: +//! - `resilience.pipeline.name`: Pipeline identifier from [`SeatbeltOptions::pipeline_name`] +//! - `resilience.strategy.name`: Timeout identifier from [`Retry::layer`] +//! - `resilience.event.name`: Always `retry` +//! - `resilience.attempt.index`: Attempt index (0-based) +//! - `resilience.attempt.is_last`: Boolean indicating if this is the last retry attempt +//! +//! # Examples +//! +//! ## Basic Usage +//! +//! This example demonstrates the basic usage of configuring and using retry middleware. +//! +//! ```rust +//! # use std::time::Duration; +//! # use oxidizer_rt::Builtins; +//! # use layered::{Execute, Service, Stack}; +//! # use seatbelt::retry::Retry; +//! # use seatbelt::{Backoff, RecoveryInfo, SeatbeltOptions}; +//! # #[oxidizer_rt::test] +//! # async fn example(state: Builtins) -> Result<(), Box> { +//! # let clock = state.clock().clone(); +//! // Define common options for resilience middleware. The clock is runtime-specific and +//! // must be provided. See its documentation for details. +//! let options = SeatbeltOptions::new(&clock).pipeline_name("example"); +//! +//! let stack = ( +//! Retry::layer("my_retry", &options) +//! // Required: how to clone inputs for retries +//! .clone_input_with(|args| Some(args.input().clone())) +//! // Required: determine if we should retry based on output +//! .recovery_with(|output, _args| match output { +//! // These are demonstrative, real code will have more meaningful recovery detection +//! Ok(_) => RecoveryInfo::never(), +//! Err(msg) if msg.contains("transient") => RecoveryInfo::retry(), +//! Err(_) => RecoveryInfo::never(), +//! }), +//! Execute::new(execute_unreliable_operation), +//! ); +//! +//! // Build the service +//! let service = stack.build(); +//! +//! // Execute the service +//! let result = service.execute("test input".to_string()).await; +//! # let _result = result; +//! # Ok(()) +//! # } +//! # async fn execute_unreliable_operation(input: String) -> Result> { Ok(input) } +//! ``` +//! +//! ## Advanced Usage +//! +//! This example demonstrates advanced usage of the retry middleware, including custom backoff +//! strategies, delay generators, and retry callbacks. +//! +//! ```rust +//! # use std::time::Duration; +//! # use oxidizer_rt::Builtins; +//! # use layered::{Execute, Stack}; +//! # use seatbelt::retry::Retry; +//! # use seatbelt::{RecoveryInfo, SeatbeltOptions, Backoff}; +//! # #[oxidizer_rt::test] +//! # async fn example(state: Builtins) -> Result<(), Box> { +//! # let clock = state.clock().clone(); +//! // Define common options for resilience middleware. +//! let options = SeatbeltOptions::new(&clock); +//! +//! let stack = ( +//! Retry::layer("advanced_retry", &options) +//! .clone_input_with(|args| Some(args.input().clone())) +//! .recovery_with(|output, _args| match output { +//! Err(msg) if msg.contains("rate_limit") => { +//! RecoveryInfo::retry().delay(Duration::from_secs(60)) +//! } +//! Err(msg) if msg.contains("timeout") => RecoveryInfo::retry(), +//! Err(_) => RecoveryInfo::never(), +//! Ok(_) => RecoveryInfo::never(), +//! }) +//! // Optional configuration +//! .max_retry_attempts(5) +//! .base_delay(Duration::from_millis(200)) +//! .backoff(Backoff::Exponential) +//! .jitter(true) +//! // You can extract the delay from the output, or return None to use the +//! // one provided by the retry middleware +//! .delay_generator(|_output, args| None) +//! // Callback called just before the next retry +//! .on_retry(|output, args| { +//! println!( +//! "retrying, attempt: {}, delay: {}ms", +//! args.attempt(), +//! args.retry_delay().as_millis(), +//! ); +//! }), +//! Execute::new(execute_unreliable_operation), +//! ); +//! +//! // Build and execute the service +//! let service = stack.build(); +//! let result = service.execute("test_timeout".to_string()).await; +//! # let _result = result; +//! # Ok(()) +//! # } +//! # async fn execute_unreliable_operation(input: String) -> Result> { Ok(input) } +//! ``` +//! +//! ## Incomplete Configuration +//! +//! This example demonstrates what happens when the `RetryLayer` is not fully configured. +//! The code below will not compile because the retry layer is missing required configuration. +//! +//! ```compile_fail +//! # use seatbelt::retry::Retry; +//! # use seatbelt::SeatbeltOptions; +//! # use tick::Clock; +//! # fn example(service_options: SeatbeltOptions) { +//! let stack = ( +//! Retry::layer("test", &service_options), // Missing required configuration! +//! Execute::new(|input| async move { input }) +//! ); +//! +//! // This will fail to compile +//! let service = stack.build(); +//! # } +//! ``` +//! +//! For more comprehensive examples, see the [examples directory](https://github.com/microsoft/oxidizer/tree/main/crates/seatbelt/examples). + +mod args; +mod backoff; +mod callbacks; +mod constants; +mod layer; +mod service; +mod telemetry; + +pub use args::{CloneArgs, OnRetryArgs, RecoveryArgs, RestoreInputArgs}; +pub(crate) use backoff::DelayBackoff; +pub(crate) use callbacks::{CloneInput, OnRetry, RestoreInput, ShouldRecover}; +pub use layer::RetryLayer; +pub use service::Retry; diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs new file mode 100644 index 00000000..3fdba052 --- /dev/null +++ b/crates/seatbelt/src/retry/service.rs @@ -0,0 +1,640 @@ +// Copyright (c) Microsoft Corporation. + +use std::ops::ControlFlow; +use std::time::Duration; + +use layered::Service; +use opentelemetry::StringValue; +use opentelemetry::metrics::Counter; +use tick::Clock; + +use crate::options::MaxAttempts; +use crate::retry::telemetry::{ATTEMPT_INDEX, ATTEMPT_NUMBER_IS_LAST, RETRY_EVENT}; +use crate::retry::{ + CloneArgs, CloneInput, DelayBackoff, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, + RestoreInputArgs, ShouldRecover, +}; +use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; +use crate::{Attempt, EnableIf, NotSet, RecoveryInfo, RecoveryKind}; + +/// Applies retry logic to service execution for transient error handling. +/// +/// `Retry` wraps an inner [`Service`] and automatically retries failed operations +/// based on configurable recovery classification, backoff strategies, and delay generation. +/// This middleware is designed to be used across services, applications, and libraries +/// to handle transient failures gracefully. +/// +/// This middleware requires input cloning capabilities and recovery classification to determine +/// retry eligibility. +/// +/// Retry is configured by calling [`Retry::layer`] and using the +/// builder methods on the returned [`RetryLayer`][crate::retry::RetryLayer] instance. +/// +/// For comprehensive examples and usage patterns, see the [retry module][crate::retry] documentation. +#[derive(Debug)] +#[expect(clippy::struct_field_names, reason = "Fields are named for clarity")] +pub struct Retry { + pub(super) inner: S, + pub(super) clock: Clock, + pub(super) max_attempts: MaxAttempts, + pub(super) backoff: DelayBackoff, + pub(super) clone_input: CloneInput, + pub(super) should_recover: ShouldRecover, + pub(super) on_retry: Option>, + pub(super) enable_if: EnableIf, + pub(super) strategy_name: StringValue, + pub(super) pipeline_name: StringValue, + pub(super) event_reporter: Counter, + pub(super) restore_input: Option>, + pub(super) handle_unavailable: bool, +} + +impl Retry { + /// Creates a new retry layer with the specified name and options. + /// + /// Returns a [`RetryLayer`][crate::retry::RetryLayer] that must be configured with required parameters + /// before it can be used to build a retry service. + pub fn layer( + name: impl Into>, + options: &crate::SeatbeltOptions, + ) -> crate::retry::RetryLayer { + crate::retry::RetryLayer::new(name.into().into(), options) + } +} + +impl Service for Retry +where + In: Send, + S: Service, +{ + type Out = Out; + + async fn execute(&self, mut input: In) -> Self::Out { + // Check if retry is enabled for this input + if !self.enable_if.call(&input) { + return self.inner.execute(input).await; + } + + let mut attempt = self.max_attempts.first_attempt(); + let mut delays = self.backoff.delays(); + let mut previous_recovery = None; + + loop { + match self + .execute_attempt(input, attempt, &mut delays, previous_recovery) + .await + { + ControlFlow::Continue((next_input, next_attempt, recovery)) => { + input = next_input; + attempt = next_attempt; + previous_recovery = Some(recovery); + } + ControlFlow::Break(out) => return out, + } + } + } +} + +impl Retry +where + In: Send, + S: Service, +{ + async fn execute_attempt( + &self, + mut input: In, + attempt: Attempt, + delays: &mut impl Iterator, + previous_recovery: Option, + ) -> ControlFlow { + let (original_input, attempt_input) = match self.clone_input.call( + &mut input, + CloneArgs { + attempt, + previous_recovery, + }, + ) { + Some(cloned) => (Some(input), cloned), + None => (None, input), + }; + + // Execute the operation + let out = self.inner.execute(attempt_input).await; + + // Check if we should recover from this output + let recovery = self.should_recover.call( + &out, + RecoveryArgs { + attempt, + clock: &self.clock, + }, + ); + + // Detect if we can recover from output + let recovery_kind = match recovery.kind() { + RecoveryKind::Unavailable => { + if self.handle_unavailable { + RecoverableKind::Retry + } else { + return ControlFlow::Break(out); + } + } + RecoveryKind::Retry => RecoverableKind::Retry, + // Handle future variants - treat unknown variants as non-recoverable + RecoveryKind::Never | RecoveryKind::Unknown | _ => return ControlFlow::Break(out), + }; + + // If no more attempts left, report telemetry, and return the last output + let Some(next_attempt) = attempt.increment(self.max_attempts) else { + self.emit_attempt_telemetry(attempt, Duration::ZERO); + return ControlFlow::Break(out); + }; + + // Always get the next delay, even if we won't use it. This is because we want to + // advance the backoff strategy (e.g., exponential backoff), so the next retry uses the + // correct delay when it's not explicitly overridden by the recovery. + let retry_delay = delays.next().unwrap_or(Duration::ZERO); + + // Use the recovery delay if provided, otherwise use the backoff delay + let retry_delay = recovery.get_delay().unwrap_or(retry_delay); + + // At this point, we know that the output is recoverable and that we have more attempts left. + // Determine the delay before the next attempt based on the recovery kind. + let flow_control = match recovery_kind { + RecoverableKind::Retry => self.finalize_retryable_attempt( + original_input, + out, + attempt, + next_attempt, + retry_delay, + recovery, + ), + }; + + // Only ever delay if we have a next attempt + if matches!(flow_control, ControlFlow::Continue(_)) { + self.clock.delay(retry_delay).await; + } + + flow_control + } +} + +enum RecoverableKind { + Retry, +} + +impl Retry { + fn emit_attempt_telemetry(&self, attempt: Attempt, retry_delay: Duration) { + tracing::event!( + name: "seatbelt.retry", + tracing::Level::WARN, + pipeline.name = self.pipeline_name.as_str(), + strategy.name = self.strategy_name.as_str(), + resilience.attempt.index = attempt.index(), + resilience.attempt.is_last = attempt.is_last(), + resilience.retry.delay = retry_delay.as_secs_f32(), + ); + + self.event_reporter.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, RETRY_EVENT), + opentelemetry::KeyValue::new(ATTEMPT_INDEX, i64::from(attempt.index())), + opentelemetry::KeyValue::new(ATTEMPT_NUMBER_IS_LAST, attempt.is_last()), + ], + ); + } + + #[inline] + fn finalize_retryable_attempt( + &self, + mut original_input: Option, + mut out: Out, + attempt: Attempt, + next_attempt: Attempt, + retry_delay: Duration, + recovery: RecoveryInfo, + ) -> ControlFlow { + // we emit attempt telemetry even if the next attempt does not happen + self.emit_attempt_telemetry(attempt, retry_delay); + + // If we have a restore input callback, we can use it to restore the input for the next attempt if + // the original input was not clonable. + if original_input.is_none() + && let Some(restore) = &self.restore_input + && let Some(input) = restore.call( + &mut out, + RestoreInputArgs { + attempt, + recovery: recovery.clone(), + }, + ) + { + original_input = Some(input); + } + + match original_input { + Some(input) => { + // Only invoke on-retry if there will be next attempt + if let Some(ref on_retry) = self.on_retry { + on_retry.call( + &out, + OnRetryArgs { + attempt, + retry_delay, + recovery: recovery.clone(), + }, + ); + } + ControlFlow::Continue((input, next_attempt, recovery)) + } + None => ControlFlow::Break(out), + } + } +} + +#[cfg(not(miri))] // Oxidizer runtime does not support Miri. +#[cfg(test)] +mod tests { + use std::sync::atomic::{AtomicU32, Ordering}; + use std::sync::{Arc, Mutex}; + + use layered::Execute; + use opentelemetry::KeyValue; + use tick::ClockControl; + + use super::*; + use crate::options::Backoff; + use crate::retry::RetryLayer; + use crate::service::Layer; + use crate::testing::MetricTester; + use crate::{SeatbeltOptions, Set}; + + #[test] + fn layer_ensure_defaults() { + let options = SeatbeltOptions::::new(Clock::new_frozen()) + .pipeline_name("test_pipeline"); + let layer: RetryLayer = + Retry::layer("test_retry", &options); + let layer = layer + .recovery_with(|_, _| RecoveryInfo::never()) + .clone_input(); + + let retry = layer.layer(Execute::new(|v: String| async move { v })); + + assert_eq!(retry.pipeline_name.to_string(), "test_pipeline"); + assert_eq!(retry.strategy_name.to_string(), "test_retry"); + assert_eq!(retry.max_attempts, MaxAttempts::Finite(4)); + assert_eq!(retry.backoff.0.base_delay, Duration::from_secs(2)); + assert_eq!(retry.backoff.0.backoff_type, Backoff::Exponential); + assert!(retry.backoff.0.use_jitter); + assert!(retry.on_retry.is_none()); + assert!(retry.enable_if.call(&"str".to_string())); + } + + #[oxidizer_rt::test] + async fn retry_disabled_no_inner_calls(state: oxidizer_rt::Builtins) { + let counter = Arc::new(AtomicU32::new(0)); + let counter_clone = std::sync::Arc::clone(&counter); + + let service = create_ready_retry_layer(state.clock(), RecoveryInfo::retry()) + .clone_input_with(move |input, _args| { + counter_clone.fetch_add(1, Ordering::SeqCst); + Some(input.clone()) + }) + .disable() + .layer(Execute::new(move |v: String| async move { v })); + + let result = service.execute("test".to_string()).await; + + assert_eq!(result, "test"); + assert_eq!(counter.load(Ordering::SeqCst), 0); + } + + #[oxidizer_rt::test] + async fn uncloneable_recovery_called(state: oxidizer_rt::Builtins) { + let counter = Arc::new(AtomicU32::new(0)); + let counter_clone = std::sync::Arc::clone(&counter); + + let service = create_ready_retry_layer(state.clock(), RecoveryInfo::retry()) + .clone_input_with(move |_input, _args| None) + .recovery_with(move |_input, _args| { + counter_clone.fetch_add(1, Ordering::SeqCst); + RecoveryInfo::retry() + }) + .layer(Execute::new(move |v: String| async move { v })); + + let result = service.execute("test".to_string()).await; + + assert_eq!(result, "test"); + assert_eq!(counter.load(Ordering::SeqCst), 1); + } + + #[oxidizer_rt::test] + async fn no_recovery_ensure_no_additional_retries(state: oxidizer_rt::Builtins) { + let counter = Arc::new(AtomicU32::new(0)); + let counter_clone = std::sync::Arc::clone(&counter); + + let service = create_ready_retry_layer(state.clock(), RecoveryInfo::retry()) + .clone_input_with(move |input, _args| { + counter_clone.fetch_add(1, Ordering::SeqCst); + Some(input.clone()) + }) + .recovery_with(move |_input, _args| RecoveryInfo::never()) + .layer(Execute::new(move |v: String| async move { v })); + + let result = service.execute("test".to_string()).await; + + assert_eq!(result, "test"); + assert_eq!(counter.load(Ordering::SeqCst), 1); + } + + #[oxidizer_rt::test] + async fn retry_recovery_ensure_retries_exhausted(_state: oxidizer_rt::Builtins) { + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); + let counter = Arc::new(AtomicU32::new(0)); + let counter_clone = std::sync::Arc::clone(&counter); + + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) + .clone_input_with(move |input, _args| { + counter_clone.fetch_add(1, Ordering::SeqCst); + Some(input.clone()) + }) + .recovery_with(move |_input, _args| RecoveryInfo::retry()) + .max_retry_attempts(4) + .layer(Execute::new(move |v: String| async move { v })); + + let result = service.execute("test".to_string()).await; + + assert_eq!(result, "test"); + assert_eq!(counter.load(Ordering::SeqCst), 5); + } + + #[oxidizer_rt::test] + async fn retry_recovery_ensure_correct_delays(_state: oxidizer_rt::Builtins) { + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); + let delays = Arc::new(Mutex::new(vec![])); + let delays_clone = Arc::clone(&delays); + + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) + .clone_input_with(move |input, _args| Some(input.clone())) + .use_jitter(false) + .backoff(Backoff::Linear) + .recovery_with(move |_input, _args| RecoveryInfo::retry()) + .max_retry_attempts(4) + .on_retry(move |_output, args| { + delays_clone.lock().unwrap().push(args.retry_delay()); + }) + .layer(Execute::new(move |v: String| async move { v })); + + let _result = service.execute("test".to_string()).await; + + assert_eq!( + delays.lock().unwrap().to_vec(), + vec![ + Duration::from_secs(2), + Duration::from_secs(4), + Duration::from_secs(6), + Duration::from_secs(8), + ] + ); + } + + #[oxidizer_rt::test] + async fn retry_recovery_ensure_correct_attempts(_state: oxidizer_rt::Builtins) { + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); + let attempts = Arc::new(Mutex::new(vec![])); + let attempts_clone = Arc::clone(&attempts); + + let attempts_for_clone = Arc::new(Mutex::new(vec![])); + let attempts_for_clone_clone = Arc::clone(&attempts_for_clone); + + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) + .clone_input_with(move |input, args| { + attempts_for_clone_clone + .lock() + .unwrap() + .push(args.attempt()); + Some(input.clone()) + }) + .recovery_with(move |_input, _args| RecoveryInfo::retry()) + .max_retry_attempts(4) + .on_retry(move |_output, args| { + attempts_clone.lock().unwrap().push(args.attempt()); + }) + .layer(Execute::new(move |v: String| async move { v })); + + let _result = service.execute("test".to_string()).await; + + assert_eq!( + attempts_for_clone.lock().unwrap().to_vec(), + vec![ + Attempt::new(0, false), + Attempt::new(1, false), + Attempt::new(2, false), + Attempt::new(3, false), + Attempt::new(4, true), + ] + ); + + assert_eq!( + attempts.lock().unwrap().to_vec(), + vec![ + Attempt::new(0, false), + Attempt::new(1, false), + Attempt::new(2, false), + Attempt::new(3, false), + ] + ); + } + + #[oxidizer_rt::test] + async fn restore_input_integration_test(_state: oxidizer_rt::Builtins) { + use std::sync::Arc; + use std::sync::atomic::{AtomicU32, Ordering}; + + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); + let call_count = Arc::new(AtomicU32::new(0)); + let call_count_clone = std::sync::Arc::clone(&call_count); + let restore_count = Arc::new(AtomicU32::new(0)); + let restore_count_clone = std::sync::Arc::clone(&restore_count); + + // Create a service that fails on first attempt but can restore input + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) + .clone_input_with(|_input, _args| None) // Don't clone - force restore path + .restore_input(move |output: &mut String, _args| { + restore_count_clone.fetch_add(1, Ordering::SeqCst); + output.contains("error:").then(|| { + let input = output.replace("error:", ""); + *output = "restored".to_string(); + input + }) + }) + .recovery_with(|output, _args| { + if output.contains("error:") { + RecoveryInfo::retry() + } else { + RecoveryInfo::never() + } + }) + .max_retry_attempts(2) + .layer(Execute::new(move |input: String| { + let count = call_count_clone.fetch_add(1, Ordering::SeqCst); + async move { + if count == 0 { + // First call fails with input stored in error + format!("error:{input}") + } else { + // Subsequent calls succeed + format!("success:{input}") + } + } + })); + + let result = service.execute("test_input".to_string()).await; + + // Verify the restore path was used and retry succeeded + assert_eq!(result, "success:test_input"); + assert_eq!(call_count.load(Ordering::SeqCst), 2); // Original + 1 retry + assert_eq!(restore_count.load(Ordering::SeqCst), 1); // Restore called once + } + + #[oxidizer_rt::test] + async fn outage_handling_disabled_no_retries(_state: oxidizer_rt::Builtins) { + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); + let call_count = Arc::new(AtomicU32::new(0)); + let call_count_clone = Arc::clone(&call_count); + + // Create a service that returns outage (handle_unavailable disabled by default) + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) + .clone_input_with(move |input, _args| { + call_count_clone.fetch_add(1, Ordering::SeqCst); + Some(input.clone()) + }) + .recovery_with(|_output, _args| RecoveryInfo::unavailable()) + .layer(Execute::new(move |v: String| async move { v })); + + let result = service.execute("test".to_string()).await; + + // Should not retry when outage handling is disabled + assert_eq!(result, "test"); + assert_eq!(call_count.load(Ordering::SeqCst), 1); // Only original call, no retries + } + + #[oxidizer_rt::test] + async fn outage_handling_enabled_with_retries(_state: oxidizer_rt::Builtins) { + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); + let call_count = Arc::new(AtomicU32::new(0)); + let call_count_clone = std::sync::Arc::clone(&call_count); + + // Create a service that returns outage initially, then succeeds + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) + .clone_input_with(move |input, _args| { + call_count_clone.fetch_add(1, Ordering::SeqCst); + Some(input.clone()) + }) + .recovery_with(|_output, args| { + // First attempt returns outage, subsequent attempts succeed + if args.attempt().index() == 0 { + RecoveryInfo::unavailable() + } else { + RecoveryInfo::never() + } + }) + .handle_unavailable(true) // Enable outage handling + .max_retry_attempts(2) + .layer(Execute::new(move |input: String| async move { + format!("processed_{input}") + })); + + let result = service.execute("test".to_string()).await; + + // Should retry when outage handling is enabled + assert_eq!(result, "processed_test"); + assert_eq!(call_count.load(Ordering::SeqCst), 2); // Original + 1 retry + } + + #[oxidizer_rt::test] + async fn outage_handling_with_recovery_hint(_state: oxidizer_rt::Builtins) { + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); + let delays = Arc::new(Mutex::new(vec![])); + let delays_clone = Arc::clone(&delays); + + // Create a service that returns outage with recovery hint + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) + .clone_input_with(move |input, _args| Some(input.clone())) + .recovery_with(|_output, args| { + if args.attempt().index() == 0 { + RecoveryInfo::unavailable().delay(Duration::from_secs(10)) // 10 second recovery hint + } else { + RecoveryInfo::never() + } + }) + .handle_unavailable(true) + .max_retry_attempts(1) + .on_retry(move |_output, args| { + delays_clone.lock().unwrap().push(args.retry_delay()); + }) + .layer(Execute::new(move |v: String| async move { v })); + + let _result = service.execute("test".to_string()).await; + + // Should use the recovery hint as the delay + assert_eq!( + delays.lock().unwrap().to_vec(), + vec![Duration::from_secs(10)] + ); + } + + #[oxidizer_rt::test] + async fn retries_exhausted_ensure_telemetry_reported(_state: oxidizer_rt::Builtins) { + let tester = MetricTester::new(); + let options = SeatbeltOptions::::new( + ClockControl::default().auto_advance_timers(true).to_clock(), + ) + .pipeline_name("test_pipeline") + .meter_provider(tester.meter_provider()); + + let service = create_ready_retry_layer_core(RecoveryInfo::retry(), &options) + .clone_input_with(move |input, _args| Some(input.clone())) + .max_retry_attempts(2) + .recovery_with(move |_input, _args| RecoveryInfo::retry()) + .layer(Execute::new(move |v: String| async move { v })); + + let _result = service.execute("test".to_string()).await; + + tester.assert_attributes( + &[ + KeyValue::new("resilience.attempt.index", 0), + KeyValue::new("resilience.attempt.index", 1), + KeyValue::new("resilience.attempt.is_last", false), + KeyValue::new("resilience.attempt.is_last", true), + KeyValue::new("resilience.pipeline.name", "test_pipeline"), + KeyValue::new("resilience.strategy.name", "test_retry"), + KeyValue::new("resilience.event.name", "retry"), + ], + Some(15), + ); + } + + fn create_ready_retry_layer( + clock: &Clock, + recover: RecoveryInfo, + ) -> RetryLayer { + let options = SeatbeltOptions::new(clock.clone()).pipeline_name("test_pipeline"); + create_ready_retry_layer_core(recover, &options) + } + + fn create_ready_retry_layer_core( + recover: RecoveryInfo, + seatbelt_options: &SeatbeltOptions, + ) -> RetryLayer { + Retry::layer("test_retry", seatbelt_options) + .recovery_with(move |_, _| recover.clone()) + .clone_input() + .max_delay(Duration::from_secs(9999)) // protect against infinite backoff + } +} diff --git a/crates/seatbelt/src/retry/telemetry.rs b/crates/seatbelt/src/retry/telemetry.rs new file mode 100644 index 00000000..7494f06f --- /dev/null +++ b/crates/seatbelt/src/retry/telemetry.rs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. + +/// The name of the retry event for telemetry reporting. +pub(super) const RETRY_EVENT: &str = "retry"; + +/// Attribute key for the retry attempt index. +pub(super) const ATTEMPT_INDEX: &str = "resilience.attempt.index"; + +/// Attribute key for whether this is the last retry attempt. +pub(super) const ATTEMPT_NUMBER_IS_LAST: &str = "resilience.attempt.is_last"; diff --git a/crates/seatbelt/src/rnd.rs b/crates/seatbelt/src/rnd.rs new file mode 100644 index 00000000..6c86189c --- /dev/null +++ b/crates/seatbelt/src/rnd.rs @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. + +use std::fmt::Debug; + +/// Non-cryptographic random number generator used in this crate. +/// +/// This RNG is **NOT cryptographically secure** and should only be used for +/// non-security-critical purposes such as load balancing, jitter, sampling, +/// and other scenarios where cryptographic guarantees are not required. +/// +/// The seatbelt create does not require cryptographic security for its +/// random number generation needs, so this type is provided as a lightweight +/// alternative to more complex RNG implementations. +#[derive(Clone, Default)] +pub(crate) enum Rnd { + #[default] + Real, + + #[cfg(test)] + Test(std::sync::Arc f64 + Send + Sync>), +} + +impl Debug for Rnd { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Real => write!(f, "Real"), + #[cfg(test)] + Self::Test(_) => write!(f, "Test"), + } + } +} + +impl Rnd { + #[cfg(test)] + pub fn new_fixed(value: f64) -> Self { + Self::Test(std::sync::Arc::new(move || value)) + } + + #[cfg(test)] + pub fn new_function(f: F) -> Self + where + F: Fn() -> f64 + Send + Sync + 'static, + { + Self::Test(std::sync::Arc::new(f)) + } + + pub fn next_f64(&self) -> f64 { + match self { + Self::Real => fastrand::f64(), + #[cfg(test)] + Self::Test(generator) => generator(), + } + } +} diff --git a/crates/seatbelt/src/telemetry/metrics.rs b/crates/seatbelt/src/telemetry/metrics.rs new file mode 100644 index 00000000..0f315aeb --- /dev/null +++ b/crates/seatbelt/src/telemetry/metrics.rs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. + +use opentelemetry::InstrumentationScope; +use opentelemetry::metrics::{Counter, Meter, MeterProvider}; + +const METER_NAME: &str = "seatbelt"; +const VERSION: &str = "v0.1.0"; +const SCHEMA_URL: &str = "https://opentelemetry.io/schemas/1.47.0"; + +pub(crate) fn create_meter(meter_provider: &dyn MeterProvider) -> Meter { + meter_provider.meter_with_scope( + InstrumentationScope::builder(METER_NAME) + .with_version(VERSION) + .with_schema_url(SCHEMA_URL) + .build(), + ) +} + +pub(crate) fn create_resilience_event_counter(meter: &Meter) -> Counter { + meter + .u64_counter("resilience.event") + .with_description("Emitted upon the occurrence of a resilience event.") + .with_unit("u64") + .build() +} + +#[cfg(test)] +mod tests { + use opentelemetry_sdk::metrics::InMemoryMetricExporter; + + use super::*; + + #[test] + #[cfg(not(miri))] + fn assert_definitions() { + let exporter = InMemoryMetricExporter::default(); + let meter_provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder() + .with_periodic_exporter(exporter.clone()) + .build(); + + let meter = create_meter(&meter_provider); + let resilience_events = create_resilience_event_counter(&meter); + resilience_events.add(1, &[]); + + meter_provider.force_flush().unwrap(); + + let metrics = exporter.get_finished_metrics().unwrap(); + let str = format!("{metrics:?}"); + + assert!(str.contains("resilience.event")); + assert!(str.contains("u64")); + assert!(str.contains("seatbelt")); + assert!(str.contains("v0.1.0")); + assert!(str.contains("https://opentelemetry.io/schemas/1.47")); + } +} diff --git a/crates/seatbelt/src/telemetry/mod.rs b/crates/seatbelt/src/telemetry/mod.rs new file mode 100644 index 00000000..2f457c6d --- /dev/null +++ b/crates/seatbelt/src/telemetry/mod.rs @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. + +//! # Service Middleware Telemetry +//! +//! This module defines well‑known attribute / label keys used when emitting +//! telemetry (metrics, traces, logs) from middleware components such as retry, +//! timeout, bulkhead, circuit breaker, and fallback pipelines. +//! +//! The constants are stable keys you can attach to: +//! +//! - metrics (e.g. counters, histograms) +//! - tracing spans / events +//! - structured log records +//! +//! # Conventions +//! +//! Names follow the [OpenTelemetry naming guidelines](https://opentelemetry.io/docs/specs/semconv/general/naming/#general-naming-considerations). +//! +//! - Keys should be dot-separated (e.g., `pipeline.name`, `strategy.name`) +//! - Values should be concise and short, preferably using `snake_case` + +#[cfg(any(feature = "metrics", test))] +pub(crate) mod metrics; + +/// Key used to annotate the name of a resilience pipeline. +/// +/// Values reported under this dimension should be short and concise, preferably in `snake_case`. +/// Examples: `user_auth`, `data_processing`, `payment_flow`. +pub const PIPELINE_NAME: &str = "resilience.pipeline.name"; + +/// Key used to annotate the name of a resilience strategy. +/// +/// Values reported under this dimension should be short and concise, preferably in `snake_case`. +/// Examples: `retry`, `circuit_breaker`, `timeout`, `bulkhead`. +pub const STRATEGY_NAME: &str = "resilience.strategy.name"; + +/// Key used to annotate the specific resilience event being emitted. +/// +/// Values reported under this dimension should be short and concise, preferably in `snake_case`. +/// Examples: `retry`, `timeout`, `circuit_opened`. +pub const EVENT_NAME: &str = "resilience.event.name"; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pipeline_name_is_expected() { + assert_eq!(PIPELINE_NAME, "resilience.pipeline.name"); + } + + #[test] + fn test_strategy_name_is_expected() { + assert_eq!(STRATEGY_NAME, "resilience.strategy.name"); + } + + #[test] + fn test_event_name_is_expected() { + assert_eq!(EVENT_NAME, "resilience.event.name"); + } +} diff --git a/crates/seatbelt/src/testing.rs b/crates/seatbelt/src/testing.rs new file mode 100644 index 00000000..45437988 --- /dev/null +++ b/crates/seatbelt/src/testing.rs @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft Corporation. + +use opentelemetry::KeyValue; +use opentelemetry_sdk::metrics::data::{AggregatedMetrics, Metric, MetricData}; +use opentelemetry_sdk::metrics::{InMemoryMetricExporter, SdkMeterProvider}; + +use crate::{Recovery, RecoveryInfo}; + +#[derive(Debug)] +pub struct MetricTester { + exporter: InMemoryMetricExporter, + provider: SdkMeterProvider, +} + +impl Default for MetricTester { + fn default() -> Self { + Self::new() + } +} + +impl MetricTester { + #[must_use] + pub fn new() -> Self { + let in_memory = InMemoryMetricExporter::default(); + + Self { + exporter: in_memory.clone(), + provider: SdkMeterProvider::builder() + .with_periodic_exporter(in_memory) + .build(), + } + } + + #[must_use] + pub fn meter_provider(&self) -> &SdkMeterProvider { + &self.provider + } + + #[must_use] + #[expect(clippy::missing_panics_doc, reason = "TODO")] + pub fn collect_attributes(&self) -> Vec { + self.provider.force_flush().unwrap(); + collect_attributes(&self.exporter) + } + + #[expect(clippy::missing_panics_doc, reason = "TODO")] + pub fn assert_attributes(&self, key_values: &[KeyValue], expected_length: Option) { + let attributes = self.collect_attributes(); + + if let Some(expected_length) = expected_length { + assert_eq!( + attributes.len(), + expected_length, + "expected {} attributes, got {}", + expected_length, + attributes.len() + ); + } + + for attr in key_values { + assert!( + attributes.contains(attr), + "attribute {attr:?} not found in collected attributes: {attributes:?}" + ); + } + } +} + +fn collect_attributes(exporter: &InMemoryMetricExporter) -> Vec { + exporter + .get_finished_metrics() + .unwrap() + .iter() + .flat_map(opentelemetry_sdk::metrics::data::ResourceMetrics::scope_metrics) + .flat_map(opentelemetry_sdk::metrics::data::ScopeMetrics::metrics) + .flat_map(collect_attributes_for_metric) + .collect() +} + +// bleh +fn collect_attributes_for_metric(metric: &Metric) -> impl Iterator { + match metric.data() { + AggregatedMetrics::F64(data) => match data { + MetricData::Gauge(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::Sum(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::Histogram(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::ExponentialHistogram(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + }, + AggregatedMetrics::U64(data) => match data { + MetricData::Gauge(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::Sum(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::Histogram(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::ExponentialHistogram(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + }, + AggregatedMetrics::I64(data) => match data { + MetricData::Gauge(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::Sum(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::Histogram(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + MetricData::ExponentialHistogram(data) => data + .data_points() + .flat_map(|v| v.attributes().cloned()) + .collect::>(), + }, + } + .into_iter() +} + +#[derive(Debug)] +pub struct RecoverableType(RecoveryInfo); + +impl Recovery for RecoverableType { + fn recovery(&self) -> RecoveryInfo { + self.0.clone() + } +} + +impl From for RecoverableType { + fn from(recovery: RecoveryInfo) -> Self { + Self(recovery) + } +} diff --git a/crates/seatbelt/src/timeout/args.rs b/crates/seatbelt/src/timeout/args.rs new file mode 100644 index 00000000..c65c2c0d --- /dev/null +++ b/crates/seatbelt/src/timeout/args.rs @@ -0,0 +1,75 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +/// Arguments passed to timeout callback functions. +/// +/// Contains information about the timeout event that can be used for logging, +/// metrics, or other side effects when a timeout occurs. +#[derive(Debug)] +#[non_exhaustive] +pub struct OnTimeoutArgs { + pub(super) timeout: Duration, +} + +impl OnTimeoutArgs { + /// Returns the timeout duration that was exceeded. + /// + /// This is the duration after which the operation was canceled. + #[must_use] + pub fn timeout(&self) -> Duration { + self.timeout + } +} + +/// Arguments passed to timeout override functions. +#[derive(Debug)] +#[non_exhaustive] +pub struct TimeoutOverrideArgs { + pub(super) default_timeout: Duration, +} + +impl TimeoutOverrideArgs { + /// Returns the default timeout duration configured for the middleware. + /// + /// This can be used as a base value when calculating dynamic timeouts, or to + /// explicitly reuse the default by returning `None` from the override closure. + #[must_use] + pub fn default_timeout(&self) -> Duration { + self.default_timeout + } +} + +/// Arguments passed to timeout output functions. +/// +/// Contains information about the timeout event that occurred, +/// which can be used to create appropriate timeout responses. +#[derive(Debug)] +pub struct TimeoutOutputArgs { + pub(super) timeout: Duration, +} + +impl TimeoutOutputArgs { + /// Returns the timeout duration that was exceeded. + /// + /// This is the duration after which the operation was canceled. + /// It can be used to provide detailed timeout information in the response. + #[must_use] + pub fn timeout(&self) -> Duration { + self.timeout + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_timeout_ok() { + let args = TimeoutOverrideArgs { + default_timeout: Duration::from_secs(5), + }; + + assert_eq!(args.default_timeout(), Duration::from_secs(5)); + } +} diff --git a/crates/seatbelt/src/timeout/callbacks.rs b/crates/seatbelt/src/timeout/callbacks.rs new file mode 100644 index 00000000..f714d1d1 --- /dev/null +++ b/crates/seatbelt/src/timeout/callbacks.rs @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. + +use std::time::Duration; + +use super::{OnTimeoutArgs, TimeoutOutputArgs, TimeoutOverrideArgs}; + +crate::define_fn_wrapper!(TimeoutOutput(Fn(TimeoutOutputArgs) -> Out)); +crate::define_fn_wrapper!(OnTimeout(Fn(&Out, OnTimeoutArgs))); +crate::define_fn_wrapper!(TimeoutOverride(Fn(&In, TimeoutOverrideArgs) -> Option)); diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs new file mode 100644 index 00000000..fefcf333 --- /dev/null +++ b/crates/seatbelt/src/timeout/layer.rs @@ -0,0 +1,429 @@ +// Copyright (c) Microsoft Corporation. + +use std::marker::PhantomData; +use std::time::Duration; + +use opentelemetry::StringValue; + +use crate::service::Layer; +use crate::timeout::{ + OnTimeout, OnTimeoutArgs, Timeout, TimeoutOutput as TimeoutOutputCallback, TimeoutOutputArgs, + TimeoutOverride, TimeoutOverrideArgs, +}; +use crate::{EnableIf, NotSet, SeatbeltOptions, Set}; + +/// Builder for configuring timeout resilience middleware. +/// +/// This type is created by calling [`Timeout::layer`](crate::timeout::Timeout::layer) and uses the +/// typestate pattern to enforce that required properties are configured before the timeout middleware can be built: +/// +/// - [`timeout_output`][TimeoutLayer::timeout_output]: Required to specify how to represent output values when a timeout occurs +/// - [`timeout`][TimeoutLayer::timeout]: Required to set the timeout duration for operations +/// +/// For comprehensive examples, see the [timeout module][crate::timeout] documentation. +#[derive(Debug)] +pub struct TimeoutLayer { + options: SeatbeltOptions, + timeout: Option, + timeout_output: Option>, + on_timeout: Option>, + enable_if: EnableIf, + strategy_name: StringValue, + timeout_override: Option>, + _state: PhantomData Out>, +} + +impl TimeoutLayer { + #[must_use] + pub(crate) fn new(name: StringValue, options: &SeatbeltOptions) -> Self { + Self { + timeout: None, + timeout_output: None, + on_timeout: None, + enable_if: EnableIf::always(), + strategy_name: name, + options: options.clone(), + timeout_override: None, + _state: PhantomData, + } + } +} + +impl TimeoutLayer, Timeout, TimeoutOutput> { + /// Configures the error value to return when a timeout occurs for Result types. + /// + /// This is a convenience method for Result types that creates an error value + /// when a timeout occurs instead of requiring you to specify the full Result. + /// The error function receives [`TimeoutOutputArgs`] containing timeout context. + /// + /// # Arguments + /// + /// * `timeout_error` - Function that takes [`TimeoutOutputArgs`] and returns + /// the error value to use when a timeout occurs + pub fn timeout_error( + self, + timeout_error: impl Fn(TimeoutOutputArgs) -> E + Send + Sync + 'static, + ) -> TimeoutLayer, Timeout, Set> { + self.into_state::() + .timeout_output(move |args| Err(timeout_error(args))) + .into_state() + } +} + +impl TimeoutLayer { + /// Sets the timeout duration. + /// + /// This specifies how long to wait before timing out an operation. + /// This call replaces any previous timeout value. + /// + /// # Arguments + /// + /// * `timeout` - The maximum duration to wait for the operation to complete + #[must_use] + pub fn timeout(mut self, timeout: Duration) -> TimeoutLayer { + self.timeout = Some(timeout); + self.into_state::() + } + + /// Sets the timeout result factory function. + /// + /// This function is called when a timeout occurs to create the output value + /// that will be returned instead of the original operation's result. + /// This call replaces any previous timeout output handler. + /// + /// # Arguments + /// + /// * `output` - Function that takes [`TimeoutOutputArgs`] containing timeout + /// context and returns the output value to use when a timeout occurs + #[must_use] + pub fn timeout_output( + mut self, + output: impl Fn(TimeoutOutputArgs) -> Out + Send + Sync + 'static, + ) -> TimeoutLayer { + self.timeout_output = Some(TimeoutOutputCallback::new(output)); + self.into_state::() + } + + /// Configures a callback invoked when a timeout occurs. + /// + /// This callback is useful for logging, metrics, or other observability + /// purposes. It receives the timeout output and [`OnTimeoutArgs`] with + /// detailed timeout information. + /// + /// The callback does not affect timeout behavior - it's purely for observation. + /// This call replaces any previous callback. + /// + /// **Default**: None (no observability by default) + /// + /// # Arguments + /// + /// * `on_timeout` - Function that takes a reference to the timeout output and + /// [`OnTimeoutArgs`] containing timeout context information + #[must_use] + pub fn on_timeout( + mut self, + on_timeout: impl Fn(&Out, OnTimeoutArgs) + Send + Sync + 'static, + ) -> Self { + self.on_timeout = Some(OnTimeout::new(on_timeout)); + self + } + + /// Overrides the default timeout on a per-request basis. + /// + /// Use this to compute a timeout dynamically from the input. Return `Some(Duration)` + /// to apply an override, or `None` to fall back to the default timeout configured via + /// [`timeout`][TimeoutLayer::timeout]. The function receives [`TimeoutOverrideArgs`], + /// which exposes the default via [`TimeoutOverrideArgs::default_timeout`]. + /// + /// This call replaces any previous timeout override. + /// + /// **Default**: None (uses default timeout for all requests) + /// + /// # Arguments + /// + /// * `timeout_override` - Function that takes a reference to the input and + /// [`TimeoutOverrideArgs`] containing the default timeout, and returns + /// an optional override duration + #[must_use] + pub fn timeout_override( + mut self, + timeout_override: impl Fn(&In, TimeoutOverrideArgs) -> Option + Send + Sync + 'static, + ) -> Self { + self.timeout_override = Some(TimeoutOverride::new(timeout_override)); + self + } + + /// Optionally enables the timeout middleware based on a condition. + /// + /// When disabled, requests pass through without timeout protection. + /// This call replaces any previous condition. + /// + /// **Default**: Always enabled + /// + /// # Arguments + /// + /// * `is_enabled` - Function that takes a reference to the input and returns + /// `true` if timeout protection should be enabled for this request + #[must_use] + pub fn enable_if(mut self, is_enabled: impl Fn(&In) -> bool + Send + Sync + 'static) -> Self { + self.enable_if = EnableIf::new(is_enabled); + self + } + + /// Enables the timeout middleware unconditionally. + /// + /// All requests will have timeout protection applied. + /// This call replaces any previous condition. + /// + /// **Note**: This is the default behavior - timeout is enabled by default. + #[must_use] + pub fn enable_always(mut self) -> Self { + self.enable_if = EnableIf::always(); + self + } + + /// Disables the timeout middleware completely. + /// + /// All requests will pass through without timeout protection. + /// This call replaces any previous condition. + /// + /// **Note**: This overrides the default enabled behavior. + #[must_use] + pub fn disable(mut self) -> Self { + self.enable_if = EnableIf::never(); + self + } +} + +impl Layer for TimeoutLayer { + type Service = Timeout; + + fn layer(&self, inner: S) -> Self::Service { + Timeout { + inner, + clock: self.options.get_clock().clone(), + timeout: self.timeout.expect("timeout must be set in Ready state"), + enable_if: self.enable_if.clone(), + on_timeout: self.on_timeout.clone(), + timeout_output: self + .timeout_output + .clone() + .expect("timeout_result must be set in Ready state"), + name: self.strategy_name.clone(), + pipeline_name: self.options.get_pipeline_name().clone().into(), + event_reporter: self.options.create_resilience_event_counter(), + timeout_override: self.timeout_override.clone(), + } + } +} + +impl TimeoutLayer { + fn into_state(self) -> TimeoutLayer { + TimeoutLayer { + timeout: self.timeout, + enable_if: self.enable_if, + timeout_output: self.timeout_output, + on_timeout: self.on_timeout, + strategy_name: self.strategy_name, + options: self.options, + timeout_override: self.timeout_override, + _state: PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + use std::sync::Arc; + use std::sync::atomic::{AtomicBool, Ordering}; + + use layered::Execute; + use tick::Clock; + + use super::*; + + #[test] + fn new_needs_timeout_output() { + let options = create_test_options(); + let layer: TimeoutLayer<_, _, NotSet, NotSet> = + TimeoutLayer::new(StringValue::from("test_timeout"), &options); + + assert!(layer.timeout.is_none()); + assert!(layer.timeout_output.is_none()); + assert!(layer.on_timeout.is_none()); + assert!(layer.timeout_override.is_none()); + assert_eq!(layer.strategy_name, StringValue::from("test_timeout")); + assert!(layer.enable_if.call(&"test_input".to_string())); + } + + #[test] + fn timeout_output_ensure_set_correctly() { + let options = create_test_options(); + let layer = TimeoutLayer::new(StringValue::from("test"), &options); + + let layer: TimeoutLayer<_, _, NotSet, Set> = + layer.timeout_output(|args| format!("timeout: {}", args.timeout().as_millis())); + let result = layer.timeout_output.unwrap().call(TimeoutOutputArgs { + timeout: Duration::from_millis(3), + }); + + assert_eq!(result, "timeout: 3"); + } + + #[test] + fn timeout_error_ensure_set_correctly() { + let options = create_test_options_result(); + let layer = TimeoutLayer::new(StringValue::from("test"), &options); + + let layer: TimeoutLayer<_, _, NotSet, Set> = + layer.timeout_error(|args| format!("timeout: {}", args.timeout().as_millis())); + let result = layer + .timeout_output + .unwrap() + .call(TimeoutOutputArgs { + timeout: Duration::from_millis(3), + }) + .unwrap_err(); + + assert_eq!(result, "timeout: 3"); + } + + #[test] + fn timeout_ensure_set_correctly() { + let layer: TimeoutLayer<_, _, Set, Set> = + TimeoutLayer::new(StringValue::from("test"), &create_test_options()) + .timeout_output(|_args| "timeout: ".to_string()) + .timeout(Duration::from_millis(3)); + + assert_eq!(layer.timeout.unwrap(), Duration::from_millis(3)); + } + + #[test] + fn on_timeout_ok() { + let called = Arc::new(AtomicBool::new(false)); + let called_clone = Arc::clone(&called); + + let layer: TimeoutLayer<_, _, Set, Set> = + create_ready_layer().on_timeout(move |_output, _args| { + called_clone.store(true, Ordering::SeqCst); + }); + + layer.on_timeout.unwrap().call( + &"output".to_string(), + OnTimeoutArgs { + timeout: Duration::from_millis(3), + }, + ); + + assert!(called.load(Ordering::SeqCst)); + } + + #[test] + fn timeout_override_ok() { + let layer: TimeoutLayer<_, _, Set, Set> = + create_ready_layer().timeout_override(|_input, _args| Some(Duration::from_secs(3))); + + let result = layer.timeout_override.unwrap().call( + &"a".to_string(), + TimeoutOverrideArgs { + default_timeout: Duration::from_millis(3), + }, + ); + + assert_eq!(result, Some(Duration::from_secs(3))); + } + + #[test] + fn enable_if_ok() { + let layer: TimeoutLayer<_, _, Set, Set> = + create_ready_layer().enable_if(|input| matches!(input.as_ref(), "enable")); + + assert!(layer.enable_if.call(&"enable".to_string())); + assert!(!layer.enable_if.call(&"disable".to_string())); + } + + #[test] + fn disable_ok() { + let layer: TimeoutLayer<_, _, Set, Set> = create_ready_layer().disable(); + + assert!(!layer.enable_if.call(&"whatever".to_string())); + } + + #[test] + fn enable_ok() { + let layer: TimeoutLayer<_, _, Set, Set> = create_ready_layer().disable().enable_always(); + + assert!(layer.enable_if.call(&"whatever".to_string())); + } + + #[test] + fn timeout_when_ready_ok() { + let layer: TimeoutLayer<_, _, Set, Set> = + create_ready_layer().timeout(Duration::from_secs(123)); + + assert_eq!(layer.timeout.unwrap(), Duration::from_secs(123)); + } + + #[test] + fn timeout_output_when_ready_ok() { + let layer: TimeoutLayer<_, _, Set, Set> = + create_ready_layer().timeout_output(|_args| "some new value".to_string()); + assert!(layer.timeout_output.is_some()); + let result = layer.timeout_output.unwrap().call(TimeoutOutputArgs { + timeout: Duration::from_secs(123), + }); + + assert_eq!(result, "some new value"); + } + + #[test] + fn timeout_error_when_ready_ok() { + let layer: TimeoutLayer<_, _, Set, Set> = + create_ready_layer_with_result().timeout_error(|_args| "some error value".to_string()); + assert!(layer.timeout_output.is_some()); + let result = layer + .timeout_output + .unwrap() + .call(TimeoutOutputArgs { + timeout: Duration::from_secs(123), + }) + .unwrap_err(); + + assert_eq!(result, "some error value"); + } + + #[test] + fn layer_ok() { + let _layered = + create_ready_layer().layer(Execute::new(|input: String| async move { input })); + } + + #[test] + fn static_assertions() { + static_assertions::assert_impl_all!(TimeoutLayer: Layer); + static_assertions::assert_not_impl_all!(TimeoutLayer: Layer); + static_assertions::assert_not_impl_all!(TimeoutLayer: Layer); + static_assertions::assert_impl_all!(TimeoutLayer: Debug); + } + + fn create_test_options() -> SeatbeltOptions { + SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") + } + + fn create_test_options_result() -> SeatbeltOptions> { + SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") + } + + fn create_ready_layer() -> TimeoutLayer { + TimeoutLayer::new(StringValue::from("test"), &create_test_options()) + .timeout_output(|_args| "timeout: ".to_string()) + .timeout(Duration::from_millis(3)) + } + + fn create_ready_layer_with_result() -> TimeoutLayer, Set, Set> { + TimeoutLayer::new(StringValue::from("test"), &create_test_options_result()) + .timeout_error(|_args| "timeout: ".to_string()) + .timeout(Duration::from_millis(3)) + } +} diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs new file mode 100644 index 00000000..e1a6743f --- /dev/null +++ b/crates/seatbelt/src/timeout/mod.rs @@ -0,0 +1,211 @@ +// Copyright (c) Microsoft Corporation. + +//! Timeout resilience middleware for services, applications, and libraries. +//! +//! This module provides timeout functionality to cancel long-running operations and prevent +//! services from hanging indefinitely when processing requests. The primary types are +//! [`Timeout`] and [`TimeoutLayer`]: +//! +//! - [`Timeout`] is the middleware that wraps an inner service and enforces timeout behavior +//! - [`TimeoutLayer`] is used to configure and construct the timeout middleware +//! +//! # Quick Start +//! +//! ```rust +//! # use std::time::Duration; +//! # use std::io; +//! # use oxidizer_rt::Builtins; +//! # use layered::{Execute, Service, Stack}; +//! # use seatbelt::timeout::Timeout; +//! # use seatbelt::SeatbeltOptions; +//! # #[oxidizer_rt::test] +//! # async fn example(state: Builtins) -> Result<(), Box> { +//! # let clock = state.clock().clone(); +//! let options = SeatbeltOptions::new(&clock).pipeline_name("my_service"); +//! +//! let stack = ( +//! Timeout::layer("timeout", &options) +//! .timeout_error(|_| io::Error::new(io::ErrorKind::TimedOut, "operation timed out")) +//! .timeout(Duration::from_secs(30)), +//! Execute::new(my_operation), +//! ); +//! +//! let service = stack.build(); +//! let result = service.execute("input".to_string()).await; +//! # Ok(()) +//! # } +//! # async fn my_operation(input: String) -> Result { Ok(input) } +//! ``` +//! +//! # Configuration +//! +//! The [`TimeoutLayer`] uses a type state pattern to enforce that all required properties are +//! configured before the layer can be built. This compile-time safety ensures that you cannot +//! accidentally create a timeout layer without properly specifying the timeout duration +//! and the timeout output generator: +//! +//! - [`timeout_output`][TimeoutLayer::timeout_output] or [`timeout_error`][TimeoutLayer::timeout_error]: Required function to generate output when timeout occurs +//! - [`timeout`][TimeoutLayer::timeout]: Required timeout duration for operations +//! +//! Each timeout layer requires an identifier for telemetry purposes. This identifier should use +//! `snake_case` naming convention to maintain consistency across the codebase. +//! +//! The default timeout is configured via [`TimeoutLayer::timeout`]. You can override that +//! per request with [`TimeoutLayer::timeout_override`]. +//! +//! # Defaults +//! +//! The timeout middleware uses the following default values when optional configuration is not provided: +//! +//! | Parameter | Default Value | Description | Configured By | +//! |-----------|---------------|-------------|---------------| +//! | Timeout duration | `None` (required) | Maximum duration to wait for operation completion | [`timeout`][TimeoutLayer::timeout] | +//! | Timeout output | `None` (required) | Output value to return when timeout occurs | [`timeout_output`][TimeoutLayer::timeout_output], [`timeout_error`][TimeoutLayer::timeout_error] | +//! | Timeout override | `None` | Uses default timeout for all requests | [`timeout_override`][TimeoutLayer::timeout_override] | +//! | On timeout callback | `None` | No observability by default | [`on_timeout`][TimeoutLayer::on_timeout] | +//! | Enable condition | Always enabled | Timeout protection is applied to all requests | [`enable_if`][TimeoutLayer::enable_if], [`enable_always`][TimeoutLayer::enable_always], [`disable`][TimeoutLayer::disable] | +//! +//! Unlike other middleware, timeout requires explicit configuration of both the timeout duration +//! and the output generator function, as there are no reasonable universal defaults for these values. +//! +//! # Thread Safety +//! +//! The [`Timeout`] type is thread-safe and implements both `Send` and `Sync` as enforced by +//! the `Service` trait it implements. This allows timeout middleware to be safely shared +//! across multiple threads and used in concurrent environments. +//! +//! # Telemetry +//! +//! ## Metrics +//! +//! - **Metric**: `resilience.event` (counter) +//! - **When**: Emitted when a timeout occurs +//! - **Attributes**: +//! - `resilience.pipeline.name`: Pipeline identifier from [`SeatbeltOptions::pipeline_name`] +//! - `resilience.strategy.name`: Timeout identifier from [`Timeout::layer`] +//! - `resilience.event.name`: Always `timeout` +//! +//! # Examples +//! +//! ## Basic Usage +//! +//! This example demonstrates the basic usage of configuring and using timeout middleware. +//! +//! ```rust +//! # use std::time::Duration; +//! # use oxidizer_rt::Builtins; +//! # use layered::{Execute, Service, Stack}; +//! # use tick::Clock; +//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::timeout::Timeout; +//! # #[oxidizer_rt::test] +//! # async fn example(state: Builtins) -> Result<(), Box> { +//! # let clock = state.clock().clone(); +//! // Define common options for resilience middleware. The clock is runtime-specific and +//! // must be provided. See its documentation for details. +//! let options = SeatbeltOptions::new(&clock); +//! +//! let stack = ( +//! Timeout::layer("my_timeout", &options) +//! // Required: timeout middleware needs to know what output to return when timeout occurs +//! .timeout_output(|args| { +//! format!("timeout error, duration: {}ms", args.timeout().as_millis()) +//! }) +//! // Required: timeout duration must be set +//! .timeout(Duration::from_secs(30)), +//! Execute::new(execute_unreliable_operation), +//! ); +//! +//! // Build the service +//! let service = stack.build(); +//! +//! // Execute the service +//! let result = service.execute("quick".to_string()).await; +//! # let _result = result; +//! # } +//! +//! # async fn execute_unreliable_operation(input: String) -> String { input } +//! ``` +//! +//! ## Advanced Usage +//! +//! This example demonstrates advanced usage of the timeout middleware, including working with +//! Result-based outputs, custom configurations, and timeout overrides. +//! +//! ```rust +//! # use std::time::Duration; +//! # use std::io; +//! # use oxidizer_rt::Builtins; +//! # use layered::{Execute, Service, Stack}; +//! # use tick::Clock; +//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::timeout::Timeout; +//! # #[oxidizer_rt::test] +//! # async fn example(state: Builtins) -> Result<(), Box> { +//! # let clock = state.clock().clone(); +//! // Define common options for resilience middleware. +//! let options = SeatbeltOptions::new(&clock); +//! +//! let stack = ( +//! Timeout::layer("my_timeout", &options) +//! // Return an error for Result outputs on timeout +//! .timeout_error(|args| io::Error::new(io::ErrorKind::TimedOut, "request timed out")) +//! // Default timeout +//! .timeout(Duration::from_secs(30)) +//! // Callback for when a timeout occurs +//! .on_timeout(|_output, args| { +//! println!("timeout occurred after {}ms", args.timeout().as_millis()); +//! }) +//! // Provide per-input timeout overrides (fallback to default on None) +//! .timeout_override(|input, args| { +//! match input.as_str() { +//! "quick" => Some(Duration::from_secs(5)), // override +//! "slow" => Some(Duration::from_secs(60)), +//! _ => None, // use default (args.default_timeout())! +//! } +//! }) +//! // Optionally disable timeouts for some inputs +//! .enable_if(|input| !input.starts_with("bypass_")), +//! Execute::new(execute_unreliable_operation), +//! ); +//! +//! // Build and execute the service +//! let service = stack.build(); +//! let result = service.execute("quick".to_string()).await?; +//! # let _result = result; +//! # Ok(()) +//! # } +//! # async fn execute_unreliable_operation(input: String) -> String { input } +//! ``` +//! +//! ## Incomplete Configuration +//! +//! This example demonstrates what happens when the `TimeoutLayer` is not fully configured. +//! The code below will not compile because the timeout layer is missing required configuration. +//! +//! ```compile_fail +//! # use std::time::Duration; +//! # use layered::{Execute, Stack}; +//! # use tick::Clock; +//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::timeout::Timeout; +//! # fn example(service_options: SeatbeltOptions) { +//! let stack = ( +//! Timeout::layer("my_timeout", &service_options), // Missing required configuration! +//! Execute::new(|input| async move { input }) +//! ); +//! +//! // This will fail to compile +//! let service = stack.build(); +//! # } +//! ``` +mod args; +mod callbacks; +mod layer; +mod service; +mod telemetry; + +pub use args::{OnTimeoutArgs, TimeoutOutputArgs, TimeoutOverrideArgs}; +pub(crate) use callbacks::{OnTimeout, TimeoutOutput, TimeoutOverride}; +pub use layer::TimeoutLayer; +pub use service::Timeout; diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs new file mode 100644 index 00000000..20e24fef --- /dev/null +++ b/crates/seatbelt/src/timeout/service.rs @@ -0,0 +1,279 @@ +// Copyright (c) Microsoft Corporation. + +use std::borrow::Cow; +use std::time::Duration; + +use futures::future::Either; +use layered::Service; +use opentelemetry::StringValue; +use tick::{Clock, FutureExt}; + +use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; +use crate::timeout::telemetry::TIMEOUT_EVENT_NAME; +use crate::timeout::{ + OnTimeout, OnTimeoutArgs, TimeoutLayer, TimeoutOutput, TimeoutOutputArgs, TimeoutOverride, + TimeoutOverrideArgs, +}; +use crate::{EnableIf, NotSet, SeatbeltOptions}; + +/// Applies a timeout to service execution for canceling long-running operations. +/// +/// `Timeout` wraps an inner [`Service`] and enforces a maximum duration for +/// each call. If the operation doesn't finish in time, an output that represents +/// the timeout is returned. This middleware is designed to be used across +/// services, applications, and libraries to prevent operations from hanging indefinitely. +/// +/// Timeouts are configured by calling [`Timeout::layer`](crate::timeout::Timeout::layer) +/// and using the builder methods on the returned [`TimeoutLayer`] instance. +/// +/// For comprehensive examples and usage patterns, see the [timeout module] documentation. +/// +/// [timeout module]: crate::timeout +#[derive(Debug)] +#[expect(clippy::struct_field_names, reason = "fields are named for clarity")] +pub struct Timeout { + pub(super) inner: S, + pub(super) clock: Clock, + pub(super) timeout: Duration, + pub(super) enable_if: EnableIf, + pub(super) on_timeout: Option>, + pub(super) timeout_override: Option>, + pub(super) timeout_output: TimeoutOutput, + pub(super) name: StringValue, + pub(super) pipeline_name: StringValue, + pub(super) event_reporter: opentelemetry::metrics::Counter, +} + +impl Timeout { + /// Creates a [`TimeoutLayer`] used to configure the timeout resilience middleware. + /// + /// The instance returned by this call is a builder and cannot be used to build a + /// service until the required properties are set: `timeout_output` and `timeout`. + /// The `name` identifies the timeout strategy in telemetry, while `options` + /// provides configuration shared across multiple resilience middleware. + /// + /// # Example + /// + /// ```rust + /// # use std::time::Duration; + /// # use layered::{Execute, Stack}; + /// # use tick::Clock; + /// # use seatbelt::SeatbeltOptions; + /// use seatbelt::timeout::Timeout; + /// + /// # fn example(options: SeatbeltOptions) { + /// let timeout_layer = Timeout::layer("my_timeout", &options) + /// .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) + /// .timeout(Duration::from_secs(30)); + /// # } + /// ``` + /// + /// For comprehensive examples, see the [timeout module] documentation. + /// + /// [timeout module]: crate::timeout + pub fn layer( + name: impl Into>, + options: &SeatbeltOptions, + ) -> TimeoutLayer { + TimeoutLayer::new(name.into().into(), options) + } +} + +impl Service for Timeout +where + In: Send, + S: Service, +{ + type Out = Out; + + #[cfg_attr(test, mutants::skip)] // causes test timeouts + fn execute(&self, input: In) -> impl Future + Send { + if !self.enable_if.call(&input) { + return Either::Left(self.inner.execute(input)); + } + + let timeout = self + .timeout_override + .as_ref() + .and_then(|provider| { + provider.call( + &input, + TimeoutOverrideArgs { + default_timeout: self.timeout, + }, + ) + }) + .unwrap_or(self.timeout); + + Either::Right(async move { + match self + .inner + .execute(input) + .timeout(&self.clock, timeout) + .await + { + Ok(output) => output, + Err(_error) => { + self.event_reporter.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, TIMEOUT_EVENT_NAME), + ], + ); + + let output = self.timeout_output.call(TimeoutOutputArgs { timeout }); + + tracing::event!( + name: "seatbelt.timeout", + tracing::Level::WARN, + pipeline.name = self.pipeline_name.as_str(), + strategy.name = self.name.as_str(), + timeout.ms = timeout.as_millis(), + ); + + if let Some(on_timeout) = &self.on_timeout { + on_timeout.call(&output, OnTimeoutArgs { timeout }); + } + + output + } + } + }) + } +} + +#[cfg(not(miri))] // Oxidizer runtime does not support Miri. +#[cfg(test)] +mod tests { + use layered::{Execute, Stack}; + use oxidizer_rt::Builtins; + use tick::ClockControl; + + use super::*; + + #[oxidizer_rt::test] + async fn no_timeout(state: Builtins) { + let options = SeatbeltOptions::new(state.clock().clone()); + + let stack = ( + Timeout::layer("test_timeout", &options) + .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) + .timeout(Duration::from_secs(5)), + Execute::new(|input: String| async move { input }), + ); + + let service = stack.build(); + + let output = service.execute("test input".to_string()).await; + + assert_eq!(output, "test input".to_string()); + } + + #[oxidizer_rt::test] + async fn timeout(_state: Builtins) { + let clock = ClockControl::default() + .auto_advance(Duration::from_millis(200)) + .auto_advance_limit(Duration::from_millis(500)) + .to_clock(); + let options = SeatbeltOptions::new(clock.clone()); + let called = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); + let called_clone = std::sync::Arc::clone(&called); + + let stack = ( + Timeout::layer("test_timeout", &options) + .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) + .timeout(Duration::from_millis(200)) + .on_timeout(move |out, args| { + assert_eq!("timed out after 200ms", out.as_str()); + assert_eq!(200, args.timeout().as_millis()); + called.store(true, std::sync::atomic::Ordering::SeqCst); + }), + Execute::new(move |input| { + let clock = clock.clone(); + async move { + clock.delay(Duration::from_secs(1)).await; + input + } + }), + ); + + let service = stack.build(); + + let output = service.execute("test input".to_string()).await; + + assert_eq!(output, "timed out after 200ms"); + assert!(called_clone.load(std::sync::atomic::Ordering::SeqCst)); + } + + #[oxidizer_rt::test] + async fn timeout_override_ensure_respected(_state: Builtins) { + let clock = ClockControl::default() + .auto_advance(Duration::from_millis(200)) + .auto_advance_limit(Duration::from_millis(5000)) + .to_clock(); + + let stack = ( + Timeout::layer("test_timeout", &SeatbeltOptions::new(clock.clone())) + .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) + .timeout(Duration::from_millis(200)) + .timeout_override(|input, _args| { + if input == "ignore" { + return None; + } + + Some(Duration::from_millis(150)) + }), + Execute::new(move |input| { + let clock = clock.clone(); + async move { + clock.delay(Duration::from_secs(10)).await; + input + } + }), + ); + + let service = stack.build(); + + assert_eq!( + service.execute("test input".to_string()).await, + "timed out after 150ms" + ); + assert_eq!( + service.execute("ignore".to_string()).await, + "timed out after 200ms" + ); + } + + #[oxidizer_rt::test] + async fn no_timeout_if_disabled(state: Builtins) { + let control = ClockControl::default(); + let clock = control.to_clock(); + let stack = ( + Timeout::layer("test_timeout", &SeatbeltOptions::new(&clock)) + .timeout_output(|_args| "timed out".to_string()) + .timeout(Duration::from_millis(200)) + .disable(), + Execute::new(move |input| { + let clock = clock.clone(); + async move { + clock.delay(Duration::from_secs(1)).await; + input + } + }), + ); + + let service = stack.build(); + let handle = state + .scheduler() + .spawn(|_state| async move { service.execute("test input".to_string()).await }); + + // this should trigger timeout + control.advance_millis(250); + state.runtime_ops().unwrap().yield_now().await; + control.advance_millis(1000); + + assert_eq!(handle.await, "test input"); + } +} diff --git a/crates/seatbelt/src/timeout/telemetry.rs b/crates/seatbelt/src/timeout/telemetry.rs new file mode 100644 index 00000000..314df271 --- /dev/null +++ b/crates/seatbelt/src/timeout/telemetry.rs @@ -0,0 +1,4 @@ +// Copyright (c) Microsoft Corporation. + +/// The name of the timeout event for telemetry reporting. +pub(super) const TIMEOUT_EVENT_NAME: &str = "timeout"; From 83e9b86f7423cd07908003ad196fd2d3f724976d Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 15:03:52 +0100 Subject: [PATCH 02/60] fixes --- Cargo.lock | 12 ++++ Cargo.toml | 1 + crates/seatbelt/Cargo.toml | 4 +- crates/seatbelt/examples/circuit_breaker.rs | 31 ++++++++-- .../seatbelt/examples/resilience_pipeline.rs | 41 +++++++------- crates/seatbelt/examples/retry.rs | 9 +-- crates/seatbelt/examples/retry_advanced.rs | 34 ++++++----- crates/seatbelt/examples/retry_outage.rs | 29 ++++++++-- crates/seatbelt/examples/timeout.rs | 43 +++++++++++--- crates/seatbelt/examples/timeout_advanced.rs | 56 ++++++++++--------- 10 files changed, 178 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b088a43e..660c0382 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -974,6 +974,17 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "opentelemetry-stdout" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811" +dependencies = [ + "chrono", + "opentelemetry", + "opentelemetry_sdk", +] + [[package]] name = "opentelemetry_sdk" version = "0.31.0" @@ -1365,6 +1376,7 @@ dependencies = [ "mutants", "ohno", "opentelemetry", + "opentelemetry-stdout", "opentelemetry_sdk", "recoverable", "static_assertions", diff --git a/Cargo.toml b/Cargo.toml index 8637621a..a064561b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -99,6 +99,7 @@ xxhash-rust = { version = "0.8.15", default-features = false } fastrand = { version = "2.3.0", default-features = false, features = ["std"] } opentelemetry = { version = "0.31.0", default-features = false } opentelemetry_sdk = { version = "0.31.0", default-features = false } +opentelemetry-stdout = { version = "0.31.0", default-features = false } http = { version = "1.2.0", default-features = false, features = ["std"] } [workspace.lints.rust] diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 59c29482..b3f40a36 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -50,12 +50,14 @@ alloc_tracker.workspace = true futures = { workspace = true, features = ["executor"] } opentelemetry = { workspace = true, default-features = false, features = ["metrics"] } opentelemetry_sdk = { workspace = true, default-features = false, features = ["metrics", "testing", "experimental_metrics_custom_reader"] } +opentelemetry-stdout = { workspace = true, default-features = false, features = ["metrics", "logs"] } anyhow.workspace = true tracing.workspace = true -tracing-subscriber.workspace = true +tracing-subscriber = { workspace = true, features = ["fmt", "std"] } fastrand.workspace = true http.workspace = true tokio = { workspace = true, features = ["rt", "macros"] } +ohno = { workspace, features = ["app-err"] } [[example]] name = "timeout" diff --git a/crates/seatbelt/examples/circuit_breaker.rs b/crates/seatbelt/examples/circuit_breaker.rs index 7ad854fc..f975ff3f 100644 --- a/crates/seatbelt/examples/circuit_breaker.rs +++ b/crates/seatbelt/examples/circuit_breaker.rs @@ -11,13 +11,20 @@ use std::time::Duration; use layered::{Execute, Service, Stack}; -use oxidizer_rt::Builtins; +use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_stdout::MetricExporter; use seatbelt::circuit_breaker::CircuitBreaker; use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use tick::Clock; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; -#[oxidizer_rt::main] -async fn main(builtins: Builtins) -> anyhow::Result<()> { - let options = SeatbeltOptions::new(&builtins); +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let meter_provider = configure_telemetry(); + + let clock = Clock::new_tokio(); + let options = SeatbeltOptions::new(&clock).meter_provider(&meter_provider); // Define stack with circuit breaker layer let stack = ( @@ -54,7 +61,7 @@ async fn main(builtins: Builtins) -> anyhow::Result<()> { // failure rate exceeds the threshold. You can play with this value an increase it to 300 // to see how the circuit breaker eventually closes when the service recovers. for attempt in 0..30 { - builtins.clock().delay(Duration::from_millis(50)).await; + clock.delay(Duration::from_millis(50)).await; match service.execute(attempt).await { Ok(output) => println!("{attempt}: {output}"), @@ -62,6 +69,9 @@ async fn main(builtins: Builtins) -> anyhow::Result<()> { } } + // Flush metrics to stdout before exiting + meter_provider.force_flush()?; + Ok(()) } @@ -79,3 +89,14 @@ async fn execute_operation(input: u32) -> Result { Ok(format!("output-{input}")) } } + +fn configure_telemetry() -> SdkMeterProvider { + // Set up tracing subscriber for logs to console + tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer()) + .init(); + + SdkMeterProvider::builder() + .with_periodic_exporter(MetricExporter::default()) + .build() +} diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs index b0f649e7..e69f8c79 100644 --- a/crates/seatbelt/examples/resilience_pipeline.rs +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -7,22 +7,24 @@ use std::time::Duration; use layered::{Execute, Service, Stack}; -use oxidizer_rt::Builtins; -use oxidizer_telemetry::Telemetry; -use oxidizer_telemetry::destination::{stdout_logs, stdout_metrics}; -use oxidizer_telemetry::logs::InterceptTracingToLogExporter; +use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; use seatbelt::timeout::Timeout; use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use tick::Clock; +use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; -#[oxidizer_rt::main] -async fn main(builtins: Builtins) { - let telemetry = configure_telemetry(); +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let meter_provider = configure_telemetry(); + + let clock = Clock::new_tokio(); // Shared options for resilience middleware - let options = SeatbeltOptions::new(&builtins) - .meter_provider(telemetry.meter_provider()) + let options = SeatbeltOptions::new(&clock) + .meter_provider(&meter_provider) .pipeline_name("my_pipeline"); // Define stack with retry and timeout middlewares @@ -48,6 +50,11 @@ async fn main(builtins: Builtins) { let output = service.execute("value".to_string()).await; println!("execution finished, output: {output}"); + + // Flush metrics to stdout before exiting + meter_provider.force_flush()?; + + Ok(()) } async fn execute_operation(input: String) -> String { @@ -58,17 +65,13 @@ async fn execute_operation(input: String) -> String { } } -fn configure_telemetry() -> Telemetry { - let telemetry = oxidizer_telemetry::builder() - .destination(stdout_metrics()) - .unwrap() - .destination(stdout_logs()) - .unwrap() - .build(); - +fn configure_telemetry() -> SdkMeterProvider { + // Set up tracing subscriber for logs to console tracing_subscriber::registry() - .with_tracing_interception(telemetry.logger_provider()) + .with(tracing_subscriber::fmt::layer()) .init(); - telemetry + SdkMeterProvider::builder() + .with_periodic_exporter(MetricExporter::default()) + .build() } diff --git a/crates/seatbelt/examples/retry.rs b/crates/seatbelt/examples/retry.rs index c7cc97b1..2ec87b9d 100644 --- a/crates/seatbelt/examples/retry.rs +++ b/crates/seatbelt/examples/retry.rs @@ -5,13 +5,14 @@ use std::io::Error; use layered::{Execute, Service, Stack}; -use oxidizer_rt::Builtins; use seatbelt::retry::Retry; use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use tick::Clock; -#[oxidizer_rt::main] -async fn main(state: Builtins) -> anyhow::Result<()> { - let options = SeatbeltOptions::new(&state); +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let clock = Clock::new_tokio(); + let options = SeatbeltOptions::new(&clock); // Define stack with retry layer let stack = ( diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index 725eda02..90ec9351 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -10,15 +10,22 @@ use std::time::Duration; use http::{Request, Response}; use layered::{Execute, Service, Stack}; +use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; use seatbelt::{Attempt, RecoveryInfo, SeatbeltOptions}; +use tick::Clock; +use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; -#[oxidizer_rt::main] -async fn main(state: Builtins) -> anyhow::Result<()> { - let options = SeatbeltOptions::new(&state) +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let meter_provider = configure_telemetry(); + + let clock = Clock::new_tokio(); + let options = SeatbeltOptions::new(&clock) .pipeline_name("retry_advanced") - .meter_provider(configure_telemetry().meter_provider()); + .meter_provider(&meter_provider); // Define stack with retry layer let stack = ( @@ -70,6 +77,9 @@ async fn main(state: Builtins) -> anyhow::Result<()> { Err(e) => println!("execution failed, error: {e}"), } + // Flush metrics to stdout before exiting + meter_provider.force_flush()?; + Ok(()) } @@ -93,17 +103,13 @@ async fn send_request(input: Request) -> Result, Error> } } -fn configure_telemetry() -> Telemetry { - let telemetry = oxidizer_telemetry::builder() - .destination(stdout_metrics()) - .unwrap() - .destination(stdout_logs()) - .unwrap() - .build(); - +fn configure_telemetry() -> SdkMeterProvider { + // Set up tracing subscriber for logs to console tracing_subscriber::registry() - .with_tracing_interception(telemetry.logger_provider()) + .with(tracing_subscriber::fmt::layer()) .init(); - telemetry + SdkMeterProvider::builder() + .with_periodic_exporter(MetricExporter::default()) + .build() } diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index 1d8f2653..c31ef660 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -12,16 +12,23 @@ use std::time::Duration; use http::{Request, Response}; use layered::{Execute, Service, Stack}; -use oxidizer_rt::Builtins; +use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; use seatbelt::{Recovery, RecoveryInfo, SeatbeltOptions}; +use tick::Clock; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; const ENDPOINT_WITH_OUTAGE: &str = "https://example.com"; const ENDPOINT_ALIVE: &str = "https://fallback.example.com"; -#[oxidizer_rt::main] -async fn main(state: Builtins) -> anyhow::Result<()> { - let options = SeatbeltOptions::new(&state); +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let meter_provider = configure_telemetry(); + + let clock = Clock::new_tokio(); + let options = SeatbeltOptions::new(&clock).meter_provider(&meter_provider); // Configure retry layer for outage handling with input restoration let stack = ( @@ -61,6 +68,9 @@ async fn main(state: Builtins) -> anyhow::Result<()> { println!("Final response: {}", response.body()); + // Flush metrics to stdout before exiting + meter_provider.force_flush()?; + Ok(()) } @@ -124,3 +134,14 @@ impl Recovery for HttpError { self.recovery.clone() } } + +fn configure_telemetry() -> SdkMeterProvider { + // Set up tracing subscriber for logs to console + tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer()) + .init(); + + SdkMeterProvider::builder() + .with_periodic_exporter(MetricExporter::default()) + .build() +} diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index 346baa65..37b59a6f 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -9,17 +9,25 @@ use std::time::Duration; use anyhow::anyhow; use layered::{Execute, Service, Stack}; -use oxidizer_rt::Builtins; +use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_stdout::MetricExporter; use seatbelt::SeatbeltOptions; use seatbelt::timeout::Timeout; +use tick::Clock; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; const TIMEOUT_DURATION: Duration = Duration::from_millis(100); const PROCESSING_DELAY: Duration = Duration::from_millis(500); -#[oxidizer_rt::main] -async fn main(state: Builtins) -> anyhow::Result<()> { +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let meter_provider = configure_telemetry(); + + let clock = Clock::new_tokio(); + // Create common options - let options = SeatbeltOptions::new(&state); + let options = SeatbeltOptions::new(&clock).meter_provider(&meter_provider); // Define stack with timeout layer let stack = ( @@ -33,11 +41,14 @@ async fn main(state: Builtins) -> anyhow::Result<()> { args.timeout().as_millis() ) }), - Execute::new(move |_input| { - let clock = state.clock().clone(); - async move { - clock.delay(PROCESSING_DELAY).await; // Simulate some processing delay so the timeout can trigger - Ok(()) + Execute::new({ + let clock = clock.clone(); + move |_input| { + let clock = clock.clone(); + async move { + clock.delay(PROCESSING_DELAY).await; // Simulate some processing delay so the timeout can trigger + Ok(()) + } } }), ); @@ -51,5 +62,19 @@ async fn main(state: Builtins) -> anyhow::Result<()> { println!("{i} attempt, error: {timeout_error}"); } + // Flush metrics to stdout before exiting + meter_provider.force_flush()?; + Ok(()) } + +fn configure_telemetry() -> SdkMeterProvider { + // Set up tracing subscriber for logs to console + tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer()) + .init(); + + SdkMeterProvider::builder() + .with_periodic_exporter(MetricExporter::default()) + .build() +} diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs index 8121e395..810fb1da 100644 --- a/crates/seatbelt/examples/timeout_advanced.rs +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -9,26 +9,28 @@ use std::time::Duration; use anyhow::anyhow; use layered::{Execute, Service, Stack}; -use oxidizer_rt::Builtins; -use oxidizer_telemetry::Telemetry; -use oxidizer_telemetry::destination::{stdout_logs, stdout_metrics}; -use oxidizer_telemetry::logs::InterceptTracingToLogExporter; +use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_stdout::MetricExporter; use seatbelt::SeatbeltOptions; use seatbelt::timeout::Timeout; -use tracing_subscriber::prelude::*; +use tick::Clock; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; const TIMEOUT_DURATION: Duration = Duration::from_millis(20); const PROCESSING_DELAY: Duration = Duration::from_secs(1); -#[oxidizer_rt::main] -async fn main(state: Builtins) -> anyhow::Result<()> { +#[tokio::main] +async fn main() -> anyhow::Result<()> { // Configure telemetry to see the timeout metrics and logs - let telemetry = configure_telemetry(); + let meter_provider = configure_telemetry(); + + let clock = Clock::new_tokio(); // Create service options - let options: SeatbeltOptions> = SeatbeltOptions::new(&state) + let options: SeatbeltOptions> = SeatbeltOptions::new(&clock) .pipeline_name("my_pipeline") - .meter_provider(telemetry.meter_provider()); + .meter_provider(&meter_provider); // Define stack with timeout layer let stack = ( @@ -54,12 +56,15 @@ async fn main(state: Builtins) -> anyhow::Result<()> { "2" => Some(Duration::from_millis(300)), _ => None, }), - Execute::new(move |_input| { - let clock = state.clock().clone(); - async move { - // Simulate some processing delay so the timeout can trigger - clock.delay(PROCESSING_DELAY).await; - Ok(()) + Execute::new({ + let clock = clock.clone(); + move |_input| { + let clock = clock.clone(); + async move { + // Simulate some processing delay so the timeout can trigger + clock.delay(PROCESSING_DELAY).await; + Ok(()) + } } }), ); @@ -75,20 +80,19 @@ async fn main(state: Builtins) -> anyhow::Result<()> { } } + // Flush metrics to stdout before exiting + meter_provider.force_flush()?; + Ok(()) } -fn configure_telemetry() -> Telemetry { - let telemetry = oxidizer_telemetry::builder() - .destination(stdout_metrics()) - .unwrap() - .destination(stdout_logs()) - .unwrap() - .build(); - +fn configure_telemetry() -> SdkMeterProvider { + // Set up tracing subscriber for logs to console tracing_subscriber::registry() - .with_tracing_interception(telemetry.logger_provider()) + .with(tracing_subscriber::fmt::layer()) .init(); - telemetry + SdkMeterProvider::builder() + .with_periodic_exporter(MetricExporter::default()) + .build() } From 07b7f4ebc5e323e9c4b8e9ef1751f7fd3fe91414 Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 15:06:28 +0100 Subject: [PATCH 03/60] fixes --- crates/seatbelt/Cargo.toml | 3 +-- crates/seatbelt/examples/circuit_breaker.rs | 3 ++- crates/seatbelt/examples/resilience_pipeline.rs | 3 ++- crates/seatbelt/examples/retry.rs | 3 ++- crates/seatbelt/examples/retry_advanced.rs | 3 ++- crates/seatbelt/examples/retry_outage.rs | 3 ++- crates/seatbelt/examples/timeout.rs | 6 +++--- crates/seatbelt/examples/timeout_advanced.rs | 8 ++++---- 8 files changed, 18 insertions(+), 14 deletions(-) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index b3f40a36..a2849184 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -42,7 +42,7 @@ futures = { workspace = true } [dev-dependencies] tick = { workspace = true, features = ["test-util", "tokio"] } layered = { workspace = true } -ohno = { workspace = true } +ohno = { workspace = true, features = ["app-err"] } static_assertions.workspace = true mutants.workspace = true criterion.workspace = true @@ -57,7 +57,6 @@ tracing-subscriber = { workspace = true, features = ["fmt", "std"] } fastrand.workspace = true http.workspace = true tokio = { workspace = true, features = ["rt", "macros"] } -ohno = { workspace, features = ["app-err"] } [[example]] name = "timeout" diff --git a/crates/seatbelt/examples/circuit_breaker.rs b/crates/seatbelt/examples/circuit_breaker.rs index f975ff3f..13252cfe 100644 --- a/crates/seatbelt/examples/circuit_breaker.rs +++ b/crates/seatbelt/examples/circuit_breaker.rs @@ -11,6 +11,7 @@ use std::time::Duration; use layered::{Execute, Service, Stack}; +use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::circuit_breaker::CircuitBreaker; @@ -20,7 +21,7 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs index e69f8c79..b9352caf 100644 --- a/crates/seatbelt/examples/resilience_pipeline.rs +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -7,6 +7,7 @@ use std::time::Duration; use layered::{Execute, Service, Stack}; +use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; @@ -17,7 +18,7 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); diff --git a/crates/seatbelt/examples/retry.rs b/crates/seatbelt/examples/retry.rs index 2ec87b9d..0a2aa317 100644 --- a/crates/seatbelt/examples/retry.rs +++ b/crates/seatbelt/examples/retry.rs @@ -5,12 +5,13 @@ use std::io::Error; use layered::{Execute, Service, Stack}; +use ohno::AppError; use seatbelt::retry::Retry; use seatbelt::{RecoveryInfo, SeatbeltOptions}; use tick::Clock; #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); let options = SeatbeltOptions::new(&clock); diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index 90ec9351..6c74ffdb 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -10,6 +10,7 @@ use std::time::Duration; use http::{Request, Response}; use layered::{Execute, Service, Stack}; +use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; @@ -19,7 +20,7 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index c31ef660..06480ed4 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -12,6 +12,7 @@ use std::time::Duration; use http::{Request, Response}; use layered::{Execute, Service, Stack}; +use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; @@ -24,7 +25,7 @@ const ENDPOINT_WITH_OUTAGE: &str = "https://example.com"; const ENDPOINT_ALIVE: &str = "https://fallback.example.com"; #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index 37b59a6f..bb10018a 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -7,8 +7,8 @@ use std::time::Duration; -use anyhow::anyhow; use layered::{Execute, Service, Stack}; +use ohno::{AppError, app_err}; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::SeatbeltOptions; @@ -21,7 +21,7 @@ const TIMEOUT_DURATION: Duration = Duration::from_millis(100); const PROCESSING_DELAY: Duration = Duration::from_millis(500); #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); @@ -36,7 +36,7 @@ async fn main() -> anyhow::Result<()> { .timeout(TIMEOUT_DURATION) // Required: create error output for timeouts .timeout_error(|args| { - anyhow!( + app_err!( "timeout occurred, timeout: {}ms", args.timeout().as_millis() ) diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs index 810fb1da..f93a4e7c 100644 --- a/crates/seatbelt/examples/timeout_advanced.rs +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -7,8 +7,8 @@ use std::time::Duration; -use anyhow::anyhow; use layered::{Execute, Service, Stack}; +use ohno::{AppError, app_err}; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::SeatbeltOptions; @@ -21,14 +21,14 @@ const TIMEOUT_DURATION: Duration = Duration::from_millis(20); const PROCESSING_DELAY: Duration = Duration::from_secs(1); #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<(), AppError> { // Configure telemetry to see the timeout metrics and logs let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); // Create service options - let options: SeatbeltOptions> = SeatbeltOptions::new(&clock) + let options: SeatbeltOptions> = SeatbeltOptions::new(&clock) .pipeline_name("my_pipeline") .meter_provider(&meter_provider); @@ -39,7 +39,7 @@ async fn main() -> anyhow::Result<()> { .timeout(TIMEOUT_DURATION) // Required: create error output for timeouts .timeout_error(|args| { - anyhow!( + app_err!( "timeout occurred, timeout: {}ms", args.timeout().as_millis() ) From 299aa75e5954499fa736c8f7f7cf7aba385202eb Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 15:07:28 +0100 Subject: [PATCH 04/60] sort --- crates/seatbelt/Cargo.toml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index a2849184..4cbc3fc4 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -31,32 +31,32 @@ retry = ["options", "dep:fastrand"] circuit-breaker = ["options", "dep:fastrand"] [dependencies] -recoverable = { workspace = true } -tick = { workspace = true, optional = true } +fastrand = { workspace = true, optional = true } +futures = { workspace = true } layered = { workspace = true, optional = true } opentelemetry = { workspace = true, optional = true } +recoverable = { workspace = true } +tick = { workspace = true, optional = true } tracing = { workspace = true, optional = true } -fastrand = { workspace = true, optional = true } -futures = { workspace = true } [dev-dependencies] -tick = { workspace = true, features = ["test-util", "tokio"] } -layered = { workspace = true } -ohno = { workspace = true, features = ["app-err"] } -static_assertions.workspace = true -mutants.workspace = true -criterion.workspace = true alloc_tracker.workspace = true +anyhow.workspace = true +criterion.workspace = true +fastrand.workspace = true futures = { workspace = true, features = ["executor"] } +http.workspace = true +layered = { workspace = true } +mutants.workspace = true +ohno = { workspace = true, features = ["app-err"] } opentelemetry = { workspace = true, default-features = false, features = ["metrics"] } -opentelemetry_sdk = { workspace = true, default-features = false, features = ["metrics", "testing", "experimental_metrics_custom_reader"] } opentelemetry-stdout = { workspace = true, default-features = false, features = ["metrics", "logs"] } -anyhow.workspace = true +opentelemetry_sdk = { workspace = true, default-features = false, features = ["metrics", "testing", "experimental_metrics_custom_reader"] } +static_assertions.workspace = true +tick = { workspace = true, features = ["test-util", "tokio"] } +tokio = { workspace = true, features = ["rt", "macros"] } tracing.workspace = true tracing-subscriber = { workspace = true, features = ["fmt", "std"] } -fastrand.workspace = true -http.workspace = true -tokio = { workspace = true, features = ["rt", "macros"] } [[example]] name = "timeout" From a5bc1cf4619dea7b2f103bc60a79e30243b95b77 Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 15:17:18 +0100 Subject: [PATCH 05/60] fixes --- crates/seatbelt/benches/retry.rs | 1 - .../seatbelt/src/circuit_breaker/service.rs | 23 ++++---- crates/seatbelt/src/lib.rs | 2 +- crates/seatbelt/src/retry/service.rs | 53 ++++++++++--------- crates/seatbelt/src/testing.rs | 4 +- crates/seatbelt/src/timeout/service.rs | 49 ++++++++--------- 6 files changed, 66 insertions(+), 66 deletions(-) diff --git a/crates/seatbelt/benches/retry.rs b/crates/seatbelt/benches/retry.rs index 018ce611..0cfb4964 100644 --- a/crates/seatbelt/benches/retry.rs +++ b/crates/seatbelt/benches/retry.rs @@ -6,7 +6,6 @@ use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; -use oxidizer_benchmarking::BenchmarkGroupExt; use seatbelt::retry::Retry; use seatbelt::{RecoveryInfo, SeatbeltOptions}; use tick::Clock; diff --git a/crates/seatbelt/src/circuit_breaker/service.rs b/crates/seatbelt/src/circuit_breaker/service.rs index 8eebf816..f564e797 100644 --- a/crates/seatbelt/src/circuit_breaker/service.rs +++ b/crates/seatbelt/src/circuit_breaker/service.rs @@ -177,7 +177,6 @@ mod tests { use std::time::{Duration, Instant}; use layered::Execute; - use oxidizer_rt::Builtins; use tick::ClockControl; use super::*; @@ -201,9 +200,10 @@ mod tests { assert!(breaker.enable_if.call(&"str".to_string())); } - #[oxidizer_rt::test] - async fn circuit_breaker_disabled_no_inner_calls(state: oxidizer_rt::Builtins) { - let service = create_ready_circuit_breaker_layer(state.clock()) + #[tokio::test] + async fn circuit_breaker_disabled_no_inner_calls() { + let clock = Clock::new_frozen(); + let service = create_ready_circuit_breaker_layer(&clock) .disable() .layer(Execute::new(move |v: String| async move { v })); @@ -212,8 +212,8 @@ mod tests { assert_eq!(result, "test"); } - #[oxidizer_rt::test] - async fn passthrough_behavior(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn passthrough_behavior() { let clock = Clock::new_frozen(); let service = create_ready_circuit_breaker_layer(&clock) .layer(Execute::new(move |v: String| async move { v })); @@ -389,8 +389,8 @@ mod tests { assert!(closed_called_clone.load(Ordering::SeqCst)); } - #[oxidizer_rt::test] - async fn execute_end_to_end_with_callbacks(_state: Builtins) { + #[tokio::test] + async fn execute_end_to_end_with_callbacks() { let probing_called = Arc::new(AtomicBool::new(false)); let opened_called = Arc::new(AtomicBool::new(false)); let closed_called = Arc::new(AtomicBool::new(false)); @@ -452,9 +452,10 @@ mod tests { assert_eq!(result, "success_output"); } - #[oxidizer_rt::test] - async fn different_partitions_ensure_isolated(builtins: Builtins) { - let service = create_ready_circuit_breaker_layer(builtins.clock()) + #[tokio::test] + async fn different_partitions_ensure_isolated() { + let clock = Clock::new_frozen(); + let service = create_ready_circuit_breaker_layer(&clock) .partition_key(|input| PartitionKey::from(input.clone())) .min_throughput(3) .recovery_with(|_, _| RecoveryInfo::retry()) diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 4542bf43..679aa6ad 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -143,4 +143,4 @@ pub mod service { mod rnd; #[cfg(test)] -pub mod testing; +pub(crate) mod testing; diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 3fdba052..35c04614 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -295,12 +295,13 @@ mod tests { assert!(retry.enable_if.call(&"str".to_string())); } - #[oxidizer_rt::test] - async fn retry_disabled_no_inner_calls(state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn retry_disabled_no_inner_calls() { + let clock = Clock::new_frozen(); let counter = Arc::new(AtomicU32::new(0)); let counter_clone = std::sync::Arc::clone(&counter); - let service = create_ready_retry_layer(state.clock(), RecoveryInfo::retry()) + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) .clone_input_with(move |input, _args| { counter_clone.fetch_add(1, Ordering::SeqCst); Some(input.clone()) @@ -314,12 +315,13 @@ mod tests { assert_eq!(counter.load(Ordering::SeqCst), 0); } - #[oxidizer_rt::test] - async fn uncloneable_recovery_called(state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn uncloneable_recovery_called() { + let clock = Clock::new_frozen(); let counter = Arc::new(AtomicU32::new(0)); let counter_clone = std::sync::Arc::clone(&counter); - let service = create_ready_retry_layer(state.clock(), RecoveryInfo::retry()) + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) .clone_input_with(move |_input, _args| None) .recovery_with(move |_input, _args| { counter_clone.fetch_add(1, Ordering::SeqCst); @@ -333,12 +335,13 @@ mod tests { assert_eq!(counter.load(Ordering::SeqCst), 1); } - #[oxidizer_rt::test] - async fn no_recovery_ensure_no_additional_retries(state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn no_recovery_ensure_no_additional_retries() { + let clock = Clock::new_frozen(); let counter = Arc::new(AtomicU32::new(0)); let counter_clone = std::sync::Arc::clone(&counter); - let service = create_ready_retry_layer(state.clock(), RecoveryInfo::retry()) + let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) .clone_input_with(move |input, _args| { counter_clone.fetch_add(1, Ordering::SeqCst); Some(input.clone()) @@ -352,8 +355,8 @@ mod tests { assert_eq!(counter.load(Ordering::SeqCst), 1); } - #[oxidizer_rt::test] - async fn retry_recovery_ensure_retries_exhausted(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn retry_recovery_ensure_retries_exhausted() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let counter = Arc::new(AtomicU32::new(0)); let counter_clone = std::sync::Arc::clone(&counter); @@ -373,8 +376,8 @@ mod tests { assert_eq!(counter.load(Ordering::SeqCst), 5); } - #[oxidizer_rt::test] - async fn retry_recovery_ensure_correct_delays(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn retry_recovery_ensure_correct_delays() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let delays = Arc::new(Mutex::new(vec![])); let delays_clone = Arc::clone(&delays); @@ -403,8 +406,8 @@ mod tests { ); } - #[oxidizer_rt::test] - async fn retry_recovery_ensure_correct_attempts(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn retry_recovery_ensure_correct_attempts() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let attempts = Arc::new(Mutex::new(vec![])); let attempts_clone = Arc::clone(&attempts); @@ -451,8 +454,8 @@ mod tests { ); } - #[oxidizer_rt::test] - async fn restore_input_integration_test(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn restore_input_integration_test() { use std::sync::Arc; use std::sync::atomic::{AtomicU32, Ordering}; @@ -502,8 +505,8 @@ mod tests { assert_eq!(restore_count.load(Ordering::SeqCst), 1); // Restore called once } - #[oxidizer_rt::test] - async fn outage_handling_disabled_no_retries(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn outage_handling_disabled_no_retries() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let call_count = Arc::new(AtomicU32::new(0)); let call_count_clone = Arc::clone(&call_count); @@ -524,8 +527,8 @@ mod tests { assert_eq!(call_count.load(Ordering::SeqCst), 1); // Only original call, no retries } - #[oxidizer_rt::test] - async fn outage_handling_enabled_with_retries(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn outage_handling_enabled_with_retries() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let call_count = Arc::new(AtomicU32::new(0)); let call_count_clone = std::sync::Arc::clone(&call_count); @@ -557,8 +560,8 @@ mod tests { assert_eq!(call_count.load(Ordering::SeqCst), 2); // Original + 1 retry } - #[oxidizer_rt::test] - async fn outage_handling_with_recovery_hint(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn outage_handling_with_recovery_hint() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let delays = Arc::new(Mutex::new(vec![])); let delays_clone = Arc::clone(&delays); @@ -589,8 +592,8 @@ mod tests { ); } - #[oxidizer_rt::test] - async fn retries_exhausted_ensure_telemetry_reported(_state: oxidizer_rt::Builtins) { + #[tokio::test] + async fn retries_exhausted_ensure_telemetry_reported() { let tester = MetricTester::new(); let options = SeatbeltOptions::::new( ClockControl::default().auto_advance_timers(true).to_clock(), diff --git a/crates/seatbelt/src/testing.rs b/crates/seatbelt/src/testing.rs index 45437988..7c85aa75 100644 --- a/crates/seatbelt/src/testing.rs +++ b/crates/seatbelt/src/testing.rs @@ -7,7 +7,7 @@ use opentelemetry_sdk::metrics::{InMemoryMetricExporter, SdkMeterProvider}; use crate::{Recovery, RecoveryInfo}; #[derive(Debug)] -pub struct MetricTester { +pub(crate) struct MetricTester { exporter: InMemoryMetricExporter, provider: SdkMeterProvider, } @@ -139,7 +139,7 @@ fn collect_attributes_for_metric(metric: &Metric) -> impl Iterator RecoveryInfo { diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index 20e24fef..0929b8af 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -144,18 +144,18 @@ where } } -#[cfg(not(miri))] // Oxidizer runtime does not support Miri. +#[cfg(not(miri))] // tokio runtime does not support Miri. #[cfg(test)] mod tests { use layered::{Execute, Stack}; - use oxidizer_rt::Builtins; use tick::ClockControl; use super::*; - #[oxidizer_rt::test] - async fn no_timeout(state: Builtins) { - let options = SeatbeltOptions::new(state.clock().clone()); + #[tokio::test] + async fn no_timeout() { + let clock = Clock::new_frozen(); + let options = SeatbeltOptions::new(clock); let stack = ( Timeout::layer("test_timeout", &options) @@ -171,8 +171,8 @@ mod tests { assert_eq!(output, "test input".to_string()); } - #[oxidizer_rt::test] - async fn timeout(_state: Builtins) { + #[tokio::test] + async fn timeout() { let clock = ClockControl::default() .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(500)) @@ -207,8 +207,8 @@ mod tests { assert!(called_clone.load(std::sync::atomic::Ordering::SeqCst)); } - #[oxidizer_rt::test] - async fn timeout_override_ensure_respected(_state: Builtins) { + #[tokio::test] + async fn timeout_override_ensure_respected() { let clock = ClockControl::default() .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(5000)) @@ -246,34 +246,31 @@ mod tests { ); } - #[oxidizer_rt::test] - async fn no_timeout_if_disabled(state: Builtins) { - let control = ClockControl::default(); - let clock = control.to_clock(); + #[tokio::test] + async fn no_timeout_if_disabled() { + let clock = ClockControl::default() + .auto_advance_timers(true) + .to_clock(); let stack = ( Timeout::layer("test_timeout", &SeatbeltOptions::new(&clock)) .timeout_output(|_args| "timed out".to_string()) .timeout(Duration::from_millis(200)) .disable(), - Execute::new(move |input| { + Execute::new({ let clock = clock.clone(); - async move { - clock.delay(Duration::from_secs(1)).await; - input + move |input| { + let clock = clock.clone(); + async move { + clock.delay(Duration::from_secs(1)).await; + input + } } }), ); let service = stack.build(); - let handle = state - .scheduler() - .spawn(|_state| async move { service.execute("test input".to_string()).await }); - - // this should trigger timeout - control.advance_millis(250); - state.runtime_ops().unwrap().yield_now().await; - control.advance_millis(1000); + let output = service.execute("test input".to_string()).await; - assert_eq!(handle.await, "test input"); + assert_eq!(output, "test input"); } } From ed0a1c1923d04f7073baf33812540ed2e3db5fa1 Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 15:22:24 +0100 Subject: [PATCH 06/60] fixies --- crates/seatbelt/benches/circuit_breaker.rs | 28 ++++++++------- crates/seatbelt/benches/observability.rs | 42 ++++++++++++---------- crates/seatbelt/benches/retry.rs | 39 +++++++++++--------- crates/seatbelt/benches/timeout.rs | 28 ++++++++------- 4 files changed, 79 insertions(+), 58 deletions(-) diff --git a/crates/seatbelt/benches/circuit_breaker.rs b/crates/seatbelt/benches/circuit_breaker.rs index 05fd75f8..a2433d56 100644 --- a/crates/seatbelt/benches/circuit_breaker.rs +++ b/crates/seatbelt/benches/circuit_breaker.rs @@ -4,7 +4,6 @@ use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; -use oxidizer_benchmarking::BenchmarkGroupExt; use seatbelt::circuit_breaker::CircuitBreaker; use seatbelt::{RecoveryInfo, SeatbeltOptions}; use tick::Clock; @@ -12,17 +11,19 @@ use tick::Clock; #[global_allocator] static ALLOCATOR: Allocator = Allocator::system(); -pub fn entry(c: &mut Criterion) { +fn entry(c: &mut Criterion) { let mut group = c.benchmark_group("circuit_breaker"); let session = Session::new(); // No circuit breaker let service = Execute::new(|_input: Input| async move { Output }); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "no-circuit-breaker", - &session, - ); + let operation = session.operation("no-circuit-breaker"); + group.bench_function("no-circuit-breaker", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); // With circuit breaker (closed state) let options = SeatbeltOptions::new(Clock::new_frozen()); @@ -36,12 +37,15 @@ pub fn entry(c: &mut Criterion) { ) .build(); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "with-circuit-breaker", - &session, - ); + let operation = session.operation("with-circuit-breaker"); + group.bench_function("with-circuit-breaker", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); + group.finish(); session.print_to_stdout(); } diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs index 7d94d860..6059f572 100644 --- a/crates/seatbelt/benches/observability.rs +++ b/crates/seatbelt/benches/observability.rs @@ -12,7 +12,6 @@ use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::metrics::data::ResourceMetrics; use opentelemetry_sdk::metrics::exporter::PushMetricExporter; use opentelemetry_sdk::metrics::{SdkMeterProvider, Temporality}; -use oxidizer_benchmarking::BenchmarkGroupExt; use seatbelt::SeatbeltOptions; use seatbelt::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; use tick::Clock; @@ -20,17 +19,19 @@ use tick::Clock; #[global_allocator] static ALLOCATOR: Allocator = Allocator::system(); -pub fn entry(c: &mut Criterion) { +fn entry(c: &mut Criterion) { let mut group = c.benchmark_group("observability"); let session = Session::new(); // No observability let service = NoObservability(Execute::new(|v: Input| async move { Output::from(v) })); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "no-observability", - &session, - ); + let operation = session.operation("no-observability"); + group.bench_function("no-observability", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); // With observability let options = SeatbeltOptions::new(Clock::new_frozen()); @@ -38,11 +39,13 @@ pub fn entry(c: &mut Criterion) { &options, Execute::new(|v: Input| async move { Output::from(v) }), ); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "observability", - &session, - ); + let operation = session.operation("observability"); + group.bench_function("observability", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); // With observability + listener let meter_provider = SdkMeterProvider::builder() @@ -53,12 +56,15 @@ pub fn entry(c: &mut Criterion) { &options, Execute::new(|v: Input| async move { Output::from(v) }), ); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "observability-and-listener", - &session, - ); - + let operation = session.operation("observability-and-listener"); + group.bench_function("observability-and-listener", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); + + group.finish(); session.print_to_stdout(); } diff --git a/crates/seatbelt/benches/retry.rs b/crates/seatbelt/benches/retry.rs index 0cfb4964..ee128b85 100644 --- a/crates/seatbelt/benches/retry.rs +++ b/crates/seatbelt/benches/retry.rs @@ -13,17 +13,19 @@ use tick::Clock; #[global_allocator] static ALLOCATOR: Allocator = Allocator::system(); -pub fn entry(c: &mut Criterion) { +fn entry(c: &mut Criterion) { let mut group = c.benchmark_group("retry"); let session = Session::new(); // No retries let service = Execute::new(|v: Input| async move { Output::from(v) }); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "no-retry", - &session, - ); + let operation = session.operation("no-retry"); + group.bench_function("no-retry", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); // With retry let options = SeatbeltOptions::new(Clock::new_frozen()); @@ -36,11 +38,13 @@ pub fn entry(c: &mut Criterion) { ) .build(); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "with-retry", - &session, - ); + let operation = session.operation("with-retry"); + group.bench_function("with-retry", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); // With retry and recovery let options = SeatbeltOptions::new(Clock::new_frozen()); @@ -55,12 +59,15 @@ pub fn entry(c: &mut Criterion) { ) .build(); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "with-retry-and-recovery", - &session, - ); + let operation = session.operation("with-retry-and-recovery"); + group.bench_function("with-retry-and-recovery", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); + group.finish(); session.print_to_stdout(); } diff --git a/crates/seatbelt/benches/timeout.rs b/crates/seatbelt/benches/timeout.rs index aba68fb0..beb498c8 100644 --- a/crates/seatbelt/benches/timeout.rs +++ b/crates/seatbelt/benches/timeout.rs @@ -6,7 +6,6 @@ use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; -use oxidizer_benchmarking::BenchmarkGroupExt; use seatbelt::SeatbeltOptions; use seatbelt::timeout::Timeout; use tick::Clock; @@ -14,17 +13,19 @@ use tick::Clock; #[global_allocator] static ALLOCATOR: Allocator = Allocator::system(); -pub fn entry(c: &mut Criterion) { +fn entry(c: &mut Criterion) { let mut group = c.benchmark_group("timeout"); let session = Session::new(); // No timeout let service = Execute::new(|v: Input| async move { Output::from(v) }); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "no-timeout", - &session, - ); + let operation = session.operation("no-timeout"); + group.bench_function("no-timeout", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); // With timeout let options = SeatbeltOptions::new(Clock::new_frozen()); @@ -37,12 +38,15 @@ pub fn entry(c: &mut Criterion) { ) .build(); - group.bench_with_memory( - || _ = block_on(service.execute(Input)), - "with-timeout", - &session, - ); + let operation = session.operation("with-timeout"); + group.bench_function("with-timeout", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); + group.finish(); session.print_to_stdout(); } From 3a32ce1c6a241015eebebf515449ce6b565c094b Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 15:24:05 +0100 Subject: [PATCH 07/60] fixes --- crates/seatbelt/README.md | 128 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 5 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 44109329..f59567c4 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -1,5 +1,123 @@ - +
+ Seatbelt Logo + +# Seatbelt + +[![crate.io](https://img.shields.io/crates/v/seatbelt.svg)](https://crates.io/crates/seatbelt) +[![docs.rs](https://docs.rs/seatbelt/badge.svg)](https://docs.rs/seatbelt) +[![MSRV](https://img.shields.io/crates/msrv/seatbelt)](https://crates.io/crates/seatbelt) +[![CI](https://github.com/microsoft/oxidizer/actions/workflows/main.yml/badge.svg?event=push)](https://github.com/microsoft/oxidizer/actions/workflows/main.yml) +[![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE) +This crate was developed as part of the Oxidizer project + +
+ +Resilience and fault handling for applications and libraries. + +This crate helps applications handle transient faults gracefully through composable +resilience patterns. It provides resilience middleware for building robust distributed systems +that can automatically handle timeouts, retries, and other failure scenarios. + +## Runtime Agnostic Design + +The seatbelt crate is designed to be **runtime agnostic** and works seamlessly across any +async runtime. This flexibility allows you to use the same resilience patterns across +different projects and migrate between runtimes without changing your resilience patterns. + +## Core Types + +* [`RecoveryInfo`][__link0]: Classifies errors as recoverable (transient) or non-recoverable (permanent). +* [`Recovery`][__link1]: A trait for types that can determine their recoverability. + +## Quick Start + +Add resilience to your services with just a few lines of code. **Retry** handles transient failures +and **Timeout** prevents operations from hanging indefinitely: + +```rust +use seatbelt::retry::Retry; +use seatbelt::timeout::Timeout; +use seatbelt::{RecoveryInfo, SeatbeltOptions}; + +let options = SeatbeltOptions::new(&clock); +let service = ( + // Retry middleware: Automatically retries failed operations + Retry::layer("retry", &options) + .clone_input() + .recovery_with(|output: &String, _| match output.as_str() { + "temporary_error" => RecoveryInfo::retry(), + "operation timed out" => RecoveryInfo::retry(), + _ => RecoveryInfo::never(), + }), + // Timeout middleware: Cancels operations that take too long + Timeout::layer("timeout", &options) + .timeout_output(|_| "operation timed out".to_string()) + .timeout(Duration::from_secs(30)), + // Your core business logic + Execute::new(my_string_operation), +) + .build(); + +let result = service.execute("input data".to_string()).await; +``` + + > + > **Note**: Resilience middleware requires [`Clock`][__link2] from the [`tick`][__link3] crate for timing + > operations like delays, timeouts, and backoff calculations. The clock is passed through + > [`SeatbeltOptions`][__link4] when creating middleware layers. + +See [Built-in Middlewares](#built-in-middleware) for more details. + +## Recovery Metadata + +Error types can implement [`Recovery`][__link5] to provide additional metadata about their retry characteristics. +This enables callers to use a unified, streamlined approach when determining whether to retry an +operation, regardless of the underlying error type or source. + +## Built-in Middleware + +This crate provides built-in resilience middleware that you can use out of the box. See the documentation +for each module for details on how to use them. + +* [`timeout`][__link6]: Cancels long-running operations. +* [`retry`][__link7]: Automatically retries failed operations with configurable backoff strategies. +* [`circuit_breaker`][__link8]: Prevents cascading failures by stopping requests to unhealthy services. + +### Features + +This crate supports several optional features that can be enabled to extend functionality: + +* `options`: Enables common APIs for building resilience middleware, including [`SeatbeltOptions`][__link9]. + Requires [`tick`][__link10] for timing operations. +* `service`: Re-exports common types for building middleware from [`layered`][__link11] crate. +* `telemetry`: Enables telemetry and observability features using OpenTelemetry for monitoring + resilience operations. +* `metrics`: Exposes the OpenTelemetry metrics API for collecting and reporting metrics. +* `timeout`: Enables the [`timeout`][__link12] middleware for canceling long-running operations. +* `retry`: Enables the [`retry`][__link13] middleware for automatically retrying failed operations with + configurable backoff strategies, jitter, and recovery classification. +* `circuit-breaker`: Enables the [`circuit_breaker`][__link14] middleware for preventing cascading failures. + + +
+ +This crate was developed as part of The Oxidizer Project. Browse this crate's source code. + + + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG_h-2C_E9HQsG5HdDZkQ24HrGw2SJ0OwsTS7G4yA4phNCJzdYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI + [__link0]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo + [__link1]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery + [__link10]: https://crates.io/crates/tick/0.1.2 + [__link11]: https://crates.io/crates/layered/0.1.0 + [__link12]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html + [__link13]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html + [__link14]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit_breaker/index.html + [__link2]: https://docs.rs/tick/0.1.2/tick/?search=Clock + [__link3]: https://crates.io/crates/tick/0.1.2 + [__link4]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::SeatbeltOptions + [__link5]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery + [__link6]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html + [__link7]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html + [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit_breaker/index.html + [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::SeatbeltOptions From d3b2ca74bb388441945c3935dd32ac7775a1b9ae Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 16:27:03 +0100 Subject: [PATCH 08/60] fixes --- crates/seatbelt/src/lib.rs | 6 ++--- crates/seatbelt/src/retry/mod.rs | 37 ++++++++++++------------------ crates/seatbelt/src/timeout/mod.rs | 31 ++++++++++--------------- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 679aa6ad..be12ef04 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -31,15 +31,13 @@ //! # #[cfg(all(feature = "retry", feature = "timeout"))] //! # { //! # use std::time::Duration; -//! # use oxidizer_rt::Builtins; +//! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! use seatbelt::retry::Retry; //! use seatbelt::timeout::Timeout; //! use seatbelt::{RecoveryInfo, SeatbeltOptions}; //! -//! # #[oxidizer_rt::main] -//! # async fn main(state: Builtins) { -//! # let clock = state.clock().clone(); +//! # async fn main(clock: Clock) { //! let options = SeatbeltOptions::new(&clock); //! let service = ( //! // Retry middleware: Automatically retries failed operations diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index daeaca2c..cd2d23cf 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -12,18 +12,16 @@ //! # Quick Start //! //! ```rust -//! # use oxidizer_rt::Builtins; +//! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::Retry; //! # use seatbelt::{Backoff, RecoveryInfo, SeatbeltOptions}; -//! # #[oxidizer_rt::test] -//! # async fn example(state: Builtins) -> Result<(), Box> { -//! # let clock = state.clock().clone(); +//! # async fn example(clock: Clock) -> Result<(), Box> { //! let options = SeatbeltOptions::new(&clock).pipeline_name("my_service"); //! //! let stack = ( //! Retry::layer("retry", &options) -//! .clone_input_with(|args| Some(args.input().clone())) +//! .clone_input() //! .recovery_with(|result, _| match result { //! Ok(_) => RecoveryInfo::never(), //! Err(_) => RecoveryInfo::retry(), @@ -105,13 +103,11 @@ //! //! ```rust //! # use std::time::Duration; -//! # use oxidizer_rt::Builtins; +//! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::Retry; //! # use seatbelt::{Backoff, RecoveryInfo, SeatbeltOptions}; -//! # #[oxidizer_rt::test] -//! # async fn example(state: Builtins) -> Result<(), Box> { -//! # let clock = state.clock().clone(); +//! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. //! let options = SeatbeltOptions::new(&clock).pipeline_name("example"); @@ -119,7 +115,7 @@ //! let stack = ( //! Retry::layer("my_retry", &options) //! // Required: how to clone inputs for retries -//! .clone_input_with(|args| Some(args.input().clone())) +//! .clone_input() //! // Required: determine if we should retry based on output //! .recovery_with(|output, _args| match output { //! // These are demonstrative, real code will have more meaningful recovery detection @@ -138,7 +134,7 @@ //! # let _result = result; //! # Ok(()) //! # } -//! # async fn execute_unreliable_operation(input: String) -> Result> { Ok(input) } +//! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } //! ``` //! //! ## Advanced Usage @@ -148,20 +144,18 @@ //! //! ```rust //! # use std::time::Duration; -//! # use oxidizer_rt::Builtins; +//! # use tick::Clock; //! # use layered::{Execute, Stack}; //! # use seatbelt::retry::Retry; //! # use seatbelt::{RecoveryInfo, SeatbeltOptions, Backoff}; -//! # #[oxidizer_rt::test] -//! # async fn example(state: Builtins) -> Result<(), Box> { -//! # let clock = state.clock().clone(); +//! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. //! let options = SeatbeltOptions::new(&clock); //! //! let stack = ( //! Retry::layer("advanced_retry", &options) -//! .clone_input_with(|args| Some(args.input().clone())) -//! .recovery_with(|output, _args| match output { +//! .clone_input() +//! .recovery_with(|output: &Result, _args| match output { //! Err(msg) if msg.contains("rate_limit") => { //! RecoveryInfo::retry().delay(Duration::from_secs(60)) //! } @@ -173,12 +167,12 @@ //! .max_retry_attempts(5) //! .base_delay(Duration::from_millis(200)) //! .backoff(Backoff::Exponential) -//! .jitter(true) +//! .use_jitter(true) //! // You can extract the delay from the output, or return None to use the //! // one provided by the retry middleware //! .delay_generator(|_output, args| None) //! // Callback called just before the next retry -//! .on_retry(|output, args| { +//! .on_retry(|output: Result, args| { //! println!( //! "retrying, attempt: {}, delay: {}ms", //! args.attempt(), @@ -191,10 +185,9 @@ //! // Build and execute the service //! let service = stack.build(); //! let result = service.execute("test_timeout".to_string()).await; -//! # let _result = result; -//! # Ok(()) +//! # Ok(result) //! # } -//! # async fn execute_unreliable_operation(input: String) -> Result> { Ok(input) } +//! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } //! ``` //! //! ## Incomplete Configuration diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs index e1a6743f..81e136cd 100644 --- a/crates/seatbelt/src/timeout/mod.rs +++ b/crates/seatbelt/src/timeout/mod.rs @@ -14,13 +14,11 @@ //! ```rust //! # use std::time::Duration; //! # use std::io; -//! # use oxidizer_rt::Builtins; +//! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::timeout::Timeout; //! # use seatbelt::SeatbeltOptions; -//! # #[oxidizer_rt::test] -//! # async fn example(state: Builtins) -> Result<(), Box> { -//! # let clock = state.clock().clone(); +//! # async fn example(clock: Clock) -> Result<(), io::Error> { //! let options = SeatbeltOptions::new(&clock).pipeline_name("my_service"); //! //! let stack = ( @@ -34,7 +32,7 @@ //! let result = service.execute("input".to_string()).await; //! # Ok(()) //! # } -//! # async fn my_operation(input: String) -> Result { Ok(input) } +//! # async fn my_operation(input: String) -> Result { Ok(input) } //! ``` //! //! # Configuration @@ -93,14 +91,11 @@ //! //! ```rust //! # use std::time::Duration; -//! # use oxidizer_rt::Builtins; -//! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; +//! # use layered::{Execute, Service, Stack}; //! # use seatbelt::SeatbeltOptions; //! # use seatbelt::timeout::Timeout; -//! # #[oxidizer_rt::test] -//! # async fn example(state: Builtins) -> Result<(), Box> { -//! # let clock = state.clock().clone(); +//! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. //! let options = SeatbeltOptions::new(&clock); @@ -122,6 +117,7 @@ //! // Execute the service //! let result = service.execute("quick".to_string()).await; //! # let _result = result; +//! # Ok(()) //! # } //! //! # async fn execute_unreliable_operation(input: String) -> String { input } @@ -135,14 +131,11 @@ //! ```rust //! # use std::time::Duration; //! # use std::io; -//! # use oxidizer_rt::Builtins; -//! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; +//! # use layered::{Execute, Service, Stack}; //! # use seatbelt::SeatbeltOptions; //! # use seatbelt::timeout::Timeout; -//! # #[oxidizer_rt::test] -//! # async fn example(state: Builtins) -> Result<(), Box> { -//! # let clock = state.clock().clone(); +//! # async fn example(clock: Clock) -> Result<(), io::Error> { //! // Define common options for resilience middleware. //! let options = SeatbeltOptions::new(&clock); //! @@ -153,11 +146,11 @@ //! // Default timeout //! .timeout(Duration::from_secs(30)) //! // Callback for when a timeout occurs -//! .on_timeout(|_output, args| { +//! .on_timeout(|_output: &Result, args| { //! println!("timeout occurred after {}ms", args.timeout().as_millis()); //! }) //! // Provide per-input timeout overrides (fallback to default on None) -//! .timeout_override(|input, args| { +//! .timeout_override(|input: &String, args| { //! match input.as_str() { //! "quick" => Some(Duration::from_secs(5)), // override //! "slow" => Some(Duration::from_secs(60)), @@ -165,7 +158,7 @@ //! } //! }) //! // Optionally disable timeouts for some inputs -//! .enable_if(|input| !input.starts_with("bypass_")), +//! .enable_if(|input: &String| !input.starts_with("bypass_")), //! Execute::new(execute_unreliable_operation), //! ); //! @@ -175,7 +168,7 @@ //! # let _result = result; //! # Ok(()) //! # } -//! # async fn execute_unreliable_operation(input: String) -> String { input } +//! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } //! ``` //! //! ## Incomplete Configuration From 3bf81b6c88d2a33ee0635e596486912a46193010 Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 16:33:34 +0100 Subject: [PATCH 09/60] fixes --- crates/seatbelt/src/retry/mod.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index cd2d23cf..efbcf0f2 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -107,7 +107,7 @@ //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::Retry; //! # use seatbelt::{Backoff, RecoveryInfo, SeatbeltOptions}; -//! # async fn example(clock: Clock) -> Result<(), Box> { +//! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. //! let options = SeatbeltOptions::new(&clock).pipeline_name("example"); @@ -117,7 +117,7 @@ //! // Required: how to clone inputs for retries //! .clone_input() //! // Required: determine if we should retry based on output -//! .recovery_with(|output, _args| match output { +//! .recovery_with(|output: &Result, _args| match output { //! // These are demonstrative, real code will have more meaningful recovery detection //! Ok(_) => RecoveryInfo::never(), //! Err(msg) if msg.contains("transient") => RecoveryInfo::retry(), @@ -145,7 +145,8 @@ //! ```rust //! # use std::time::Duration; //! # use tick::Clock; -//! # use layered::{Execute, Stack}; +//! # use std::io; +//! # use layered::{Execute, Stack, Service}; //! # use seatbelt::retry::Retry; //! # use seatbelt::{RecoveryInfo, SeatbeltOptions, Backoff}; //! # async fn example(clock: Clock) -> Result<(), String> { @@ -156,10 +157,7 @@ //! Retry::layer("advanced_retry", &options) //! .clone_input() //! .recovery_with(|output: &Result, _args| match output { -//! Err(msg) if msg.contains("rate_limit") => { -//! RecoveryInfo::retry().delay(Duration::from_secs(60)) -//! } -//! Err(msg) if msg.contains("timeout") => RecoveryInfo::retry(), +//! Err(err) if err.kind() == io::ErrorKind::TimedOut => RecoveryInfo::retry().delay(Duration::from_secs(60)), //! Err(_) => RecoveryInfo::never(), //! Ok(_) => RecoveryInfo::never(), //! }) @@ -168,11 +166,8 @@ //! .base_delay(Duration::from_millis(200)) //! .backoff(Backoff::Exponential) //! .use_jitter(true) -//! // You can extract the delay from the output, or return None to use the -//! // one provided by the retry middleware -//! .delay_generator(|_output, args| None) //! // Callback called just before the next retry -//! .on_retry(|output: Result, args| { +//! .on_retry(|output, args| { //! println!( //! "retrying, attempt: {}, delay: {}ms", //! args.attempt(), @@ -185,9 +180,10 @@ //! // Build and execute the service //! let service = stack.build(); //! let result = service.execute("test_timeout".to_string()).await; -//! # Ok(result) +//! # let _result = result; +//! # Ok(()) //! # } -//! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } +//! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } //! ``` //! //! ## Incomplete Configuration From 32fea51b33a20f2d3ca454307058def2381cdb26 Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 16:34:52 +0100 Subject: [PATCH 10/60] format --- crates/seatbelt/benches/observability.rs | 14 +--- crates/seatbelt/examples/circuit_breaker.rs | 8 +-- .../seatbelt/examples/resilience_pipeline.rs | 10 +-- crates/seatbelt/examples/retry_advanced.rs | 25 ++----- crates/seatbelt/examples/retry_outage.rs | 4 +- crates/seatbelt/examples/timeout.rs | 11 +-- crates/seatbelt/examples/timeout_advanced.rs | 16 +---- .../seatbelt/src/circuit_breaker/callbacks.rs | 4 +- .../seatbelt/src/circuit_breaker/constants.rs | 3 +- .../src/circuit_breaker/engine/engine_core.rs | 30 ++------ .../src/circuit_breaker/engine/engine_fake.rs | 9 +-- .../engine/engine_telemetry.rs | 5 +- .../src/circuit_breaker/engine/engines.rs | 6 +- .../engine/probing/health_probe.rs | 23 ++----- .../src/circuit_breaker/engine/probing/mod.rs | 10 +-- .../circuit_breaker/engine/probing/options.rs | 57 ++++----------- .../circuit_breaker/engine/probing/probes.rs | 14 +--- .../engine/probing/single_probe.rs | 10 +-- .../src/circuit_breaker/execution_result.rs | 15 +--- .../src/circuit_breaker/half_open_mode.rs | 15 +--- crates/seatbelt/src/circuit_breaker/health.rs | 13 +--- crates/seatbelt/src/circuit_breaker/layer.rs | 69 +++++-------------- .../seatbelt/src/circuit_breaker/service.rs | 52 ++++---------- .../seatbelt/src/options/seatbelt_options.rs | 7 +- crates/seatbelt/src/retry/backoff.rs | 54 ++++----------- crates/seatbelt/src/retry/layer.rs | 40 +++-------- crates/seatbelt/src/retry/service.rs | 54 ++++----------- crates/seatbelt/src/testing.rs | 64 ++++------------- crates/seatbelt/src/timeout/layer.rs | 54 +++++---------- crates/seatbelt/src/timeout/service.rs | 31 ++------- 30 files changed, 166 insertions(+), 561 deletions(-) diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs index 6059f572..a3ade010 100644 --- a/crates/seatbelt/benches/observability.rs +++ b/crates/seatbelt/benches/observability.rs @@ -35,10 +35,7 @@ fn entry(c: &mut Criterion) { // With observability let options = SeatbeltOptions::new(Clock::new_frozen()); - let service = Observability::new( - &options, - Execute::new(|v: Input| async move { Output::from(v) }), - ); + let service = Observability::new(&options, Execute::new(|v: Input| async move { Output::from(v) })); let operation = session.operation("observability"); group.bench_function("observability", |b| { b.iter(|| { @@ -48,14 +45,9 @@ fn entry(c: &mut Criterion) { }); // With observability + listener - let meter_provider = SdkMeterProvider::builder() - .with_periodic_exporter(EmptyExporter) - .build(); + let meter_provider = SdkMeterProvider::builder().with_periodic_exporter(EmptyExporter).build(); let options = SeatbeltOptions::new(Clock::new_frozen()).meter_provider(&meter_provider); - let service = Observability::new( - &options, - Execute::new(|v: Input| async move { Output::from(v) }), - ); + let service = Observability::new(&options, Execute::new(|v: Input| async move { Output::from(v) })); let operation = session.operation("observability-and-listener"); group.bench_function("observability-and-listener", |b| { b.iter(|| { diff --git a/crates/seatbelt/examples/circuit_breaker.rs b/crates/seatbelt/examples/circuit_breaker.rs index 13252cfe..f33c482b 100644 --- a/crates/seatbelt/examples/circuit_breaker.rs +++ b/crates/seatbelt/examples/circuit_breaker.rs @@ -36,9 +36,7 @@ async fn main() -> Result<(), AppError> { Err(_) => RecoveryInfo::retry(), }) // Required: provide output when circuit is open - .rejected_input_error(|input, _args| { - format!("rejecting execution of '{input}' because circuit is open") - }) + .rejected_input_error(|input, _args| format!("rejecting execution of '{input}' because circuit is open")) // Decrease the following values to see the circuit breaker trip faster // and speed-up the example .sampling_duration(Duration::from_secs(2)) @@ -93,9 +91,7 @@ async fn execute_operation(input: u32) -> Result { fn configure_telemetry() -> SdkMeterProvider { // Set up tracing subscriber for logs to console - tracing_subscriber::registry() - .with(tracing_subscriber::fmt::layer()) - .init(); + tracing_subscriber::registry().with(tracing_subscriber::fmt::layer()).init(); SdkMeterProvider::builder() .with_periodic_exporter(MetricExporter::default()) diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs index b9352caf..bdcde806 100644 --- a/crates/seatbelt/examples/resilience_pipeline.rs +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -59,18 +59,12 @@ async fn main() -> Result<(), AppError> { } async fn execute_operation(input: String) -> String { - if fastrand::i16(0..10) > 4 { - "error".to_string() - } else { - input - } + if fastrand::i16(0..10) > 4 { "error".to_string() } else { input } } fn configure_telemetry() -> SdkMeterProvider { // Set up tracing subscriber for logs to console - tracing_subscriber::registry() - .with(tracing_subscriber::fmt::layer()) - .init(); + tracing_subscriber::registry().with(tracing_subscriber::fmt::layer()).init(); SdkMeterProvider::builder() .with_periodic_exporter(MetricExporter::default()) diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index 6c74ffdb..66ddb86e 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -58,22 +58,13 @@ async fn main() -> Result<(), AppError> { // Create the service from the stack let service = stack.build(); - let request = Request::builder() - .uri("https://example.com") - .body("value".to_string())?; + let request = Request::builder().uri("https://example.com").body("value".to_string())?; match service.execute(request).await { Ok(output) => { // Extract attempt info that was forwarded through the pipeline - let attempts = output - .extensions() - .get::() - .map_or(0, |a| a.index()); - println!( - "execution succeeded, result: {}, attempts: {}", - output.body(), - attempts - ); + let attempts = output.extensions().get::().map_or(0, |a| a.index()); + println!("execution succeeded, result: {}, attempts: {}", output.body(), attempts); } Err(e) => println!("execution failed, error: {e}"), } @@ -90,11 +81,7 @@ async fn send_request(input: Request) -> Result, Error> Err(Error::other("transient execution error")) } else { // Extract attempt info that was injected during custom cloning - let attempt = input - .extensions() - .get::() - .copied() - .unwrap_or_default(); + let attempt = input.extensions().get::().copied().unwrap_or_default(); // Forward attempt info to output via response extensions Response::builder() @@ -106,9 +93,7 @@ async fn send_request(input: Request) -> Result, Error> fn configure_telemetry() -> SdkMeterProvider { // Set up tracing subscriber for logs to console - tracing_subscriber::registry() - .with(tracing_subscriber::fmt::layer()) - .init(); + tracing_subscriber::registry().with(tracing_subscriber::fmt::layer()).init(); SdkMeterProvider::builder() .with_periodic_exporter(MetricExporter::default()) diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index 06480ed4..c8048767 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -138,9 +138,7 @@ impl Recovery for HttpError { fn configure_telemetry() -> SdkMeterProvider { // Set up tracing subscriber for logs to console - tracing_subscriber::registry() - .with(tracing_subscriber::fmt::layer()) - .init(); + tracing_subscriber::registry().with(tracing_subscriber::fmt::layer()).init(); SdkMeterProvider::builder() .with_periodic_exporter(MetricExporter::default()) diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index bb10018a..4c43384e 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -35,12 +35,7 @@ async fn main() -> Result<(), AppError> { // Required: specify the timeout duration .timeout(TIMEOUT_DURATION) // Required: create error output for timeouts - .timeout_error(|args| { - app_err!( - "timeout occurred, timeout: {}ms", - args.timeout().as_millis() - ) - }), + .timeout_error(|args| app_err!("timeout occurred, timeout: {}ms", args.timeout().as_millis())), Execute::new({ let clock = clock.clone(); move |_input| { @@ -70,9 +65,7 @@ async fn main() -> Result<(), AppError> { fn configure_telemetry() -> SdkMeterProvider { // Set up tracing subscriber for logs to console - tracing_subscriber::registry() - .with(tracing_subscriber::fmt::layer()) - .init(); + tracing_subscriber::registry().with(tracing_subscriber::fmt::layer()).init(); SdkMeterProvider::builder() .with_periodic_exporter(MetricExporter::default()) diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs index f93a4e7c..954101aa 100644 --- a/crates/seatbelt/examples/timeout_advanced.rs +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -38,18 +38,10 @@ async fn main() -> Result<(), AppError> { // Required: specify the timeout duration .timeout(TIMEOUT_DURATION) // Required: create error output for timeouts - .timeout_error(|args| { - app_err!( - "timeout occurred, timeout: {}ms", - args.timeout().as_millis() - ) - }) + .timeout_error(|args| app_err!("timeout occurred, timeout: {}ms", args.timeout().as_millis())) // Optional: callback to be invoked when a timeout occurs .on_timeout(|_out, args| { - println!( - "timeout occurred, timeout: {}ms", - args.timeout().as_millis() - ); + println!("timeout occurred, timeout: {}ms", args.timeout().as_millis()); }) // Optional: override the default timeout duration by inspecting the input .timeout_override(|input, _args| match input.as_str() { @@ -88,9 +80,7 @@ async fn main() -> Result<(), AppError> { fn configure_telemetry() -> SdkMeterProvider { // Set up tracing subscriber for logs to console - tracing_subscriber::registry() - .with(tracing_subscriber::fmt::layer()) - .init(); + tracing_subscriber::registry().with(tracing_subscriber::fmt::layer()).init(); SdkMeterProvider::builder() .with_periodic_exporter(MetricExporter::default()) diff --git a/crates/seatbelt/src/circuit_breaker/callbacks.rs b/crates/seatbelt/src/circuit_breaker/callbacks.rs index b391324a..b7157f8f 100644 --- a/crates/seatbelt/src/circuit_breaker/callbacks.rs +++ b/crates/seatbelt/src/circuit_breaker/callbacks.rs @@ -1,8 +1,6 @@ // Copyright (c) Microsoft Corporation. -use super::{ - OnClosedArgs, OnOpenedArgs, OnProbingArgs, PartitionKey, RecoveryArgs, RejectedInputArgs, -}; +use super::{OnClosedArgs, OnOpenedArgs, OnProbingArgs, PartitionKey, RecoveryArgs, RejectedInputArgs}; use crate::RecoveryInfo; crate::define_fn_wrapper!(PartionKeyProvider(Fn(&In) -> PartitionKey)); diff --git a/crates/seatbelt/src/circuit_breaker/constants.rs b/crates/seatbelt/src/circuit_breaker/constants.rs index 4977829a..a81e40aa 100644 --- a/crates/seatbelt/src/circuit_breaker/constants.rs +++ b/crates/seatbelt/src/circuit_breaker/constants.rs @@ -32,4 +32,5 @@ pub(crate) const DEFAULT_FAILURE_THRESHOLD: f32 = 0.1; /// pub(crate) const DEFAULT_BREAK_DURATION: Duration = Duration::from_secs(5); -pub(crate) const ERR_POISONED_LOCK: &str = "poisoned lock - cannot continue execution because security and privacy guarantees can no longer be upheld"; +pub(crate) const ERR_POISONED_LOCK: &str = + "poisoned lock - cannot continue execution because security and privacy guarantees can no longer be upheld"; diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs index f7f2c23f..adb1816a 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs @@ -8,9 +8,7 @@ use tick::Clock; use super::{EngineOptions, EnterCircuitResult, ExitCircuitResult}; use crate::circuit_breaker::constants::ERR_POISONED_LOCK; use crate::circuit_breaker::engine::probing::{AllowProbeResult, Probes, ProbingResult}; -use crate::circuit_breaker::{ - CircuitEngine, ExecutionMode, ExecutionResult, HealthMetrics, HealthStatus, -}; +use crate::circuit_breaker::{CircuitEngine, ExecutionMode, ExecutionResult, HealthMetrics, HealthStatus}; /// Engine that manages the state of the circuit breaker. #[derive(Debug)] @@ -37,20 +35,14 @@ impl CircuitEngine for EngineCore { let now = self.clock.instant(); // NOTE: Remember to execute all expensive operations (like time checks) outside the lock. - self.state - .lock() - .expect(ERR_POISONED_LOCK) - .enter(now, &self.options) + self.state.lock().expect(ERR_POISONED_LOCK).enter(now, &self.options) } fn exit(&self, result: ExecutionResult, _mode: ExecutionMode) -> ExitCircuitResult { let now = self.clock.instant(); // NOTE: Remember to execute all expensive operations (like time checks) outside the lock. - self.state - .lock() - .expect(ERR_POISONED_LOCK) - .exit(result, now, &self.options) + self.state.lock().expect(ERR_POISONED_LOCK).exit(result, now, &self.options) } } @@ -83,10 +75,7 @@ impl State { EnterCircuitResult::Rejected } } - Self::HalfOpen { - probes, - stats: info, - } => { + Self::HalfOpen { probes, stats: info } => { let allow = probes.allow_probe(now); info.record_allow_result(allow); EnterCircuitResult::from(allow) @@ -94,12 +83,7 @@ impl State { } } - fn exit( - &mut self, - result: ExecutionResult, - now: Instant, - settings: &EngineOptions, - ) -> ExitCircuitResult { + fn exit(&mut self, result: ExecutionResult, now: Instant, settings: &EngineOptions) -> ExitCircuitResult { match self { Self::Closed { health } => { // first, record the result and evaluate the health metrics @@ -439,9 +423,7 @@ mod tests { let result = engine.exit(ExecutionResult::Success, ExecutionMode::Normal); - assert!( - matches!(result, ExitCircuitResult::Closed(stats) if stats.probes_successes == 1 && stats.probes_total == 1) - ); + assert!(matches!(result, ExitCircuitResult::Closed(stats) if stats.probes_successes == 1 && stats.probes_total == 1)); } #[test] diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs index cf747a3b..503a9941 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs @@ -1,8 +1,6 @@ // Copyright (c) Microsoft Corporation. -use crate::circuit_breaker::{ - CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult, -}; +use crate::circuit_breaker::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; /// Fake engine to be used in tests. #[derive(Debug)] @@ -13,10 +11,7 @@ pub(crate) struct EngineFake { impl EngineFake { pub fn new(enter_result: EnterCircuitResult, exit_result: ExitCircuitResult) -> Self { - Self { - enter_result, - exit_result, - } + Self { enter_result, exit_result } } } diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs index 3653939f..bebf3080 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs @@ -5,10 +5,7 @@ use opentelemetry::metrics::Counter; use tick::Clock; use crate::circuit_breaker::telemetry::*; -use crate::circuit_breaker::{ - CircuitEngine, CircuitState, EnterCircuitResult, ExecutionMode, ExecutionResult, - ExitCircuitResult, -}; +use crate::circuit_breaker::{CircuitEngine, CircuitState, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; /// Wrapper around a circuit engine to add telemetry capabilities. diff --git a/crates/seatbelt/src/circuit_breaker/engine/engines.rs b/crates/seatbelt/src/circuit_breaker/engine/engines.rs index faa72bd9..140cdcfb 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engines.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engines.rs @@ -83,11 +83,7 @@ mod tests { let engines = Engines::new( EngineOptions { break_duration: Duration::from_secs(60), - health_metrics_builder: HealthMetricsBuilder::new( - Duration::from_millis(100), - 0.5, - 5, - ), + health_metrics_builder: HealthMetricsBuilder::new(Duration::from_millis(100), 0.5, 5), probes: ProbesOptions::quick(Duration::from_secs(1)), }, Clock::new_frozen(), diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs index e4499f80..a11e653b 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs @@ -2,9 +2,7 @@ use std::time::Instant; -use crate::circuit_breaker::engine::probing::{ - AllowProbeResult, HealthProbeOptions, ProbeOperation, ProbingResult, -}; +use crate::circuit_breaker::engine::probing::{AllowProbeResult, HealthProbeOptions, ProbeOperation, ProbingResult}; use crate::circuit_breaker::{ExecutionResult, HealthMetrics, HealthStatus}; use crate::rnd::Rnd; @@ -20,9 +18,7 @@ pub(crate) struct HealthProbe { impl ProbeOperation for HealthProbe { fn allow_probe(&mut self, now: Instant) -> AllowProbeResult { // Sampling starts with the first probe attempt. Make sure relevant timestamps are set. - let sample_until = *self - .sample_until - .get_or_insert_with(|| now + self.options.stage_duration()); + let sample_until = *self.sample_until.get_or_insert_with(|| now + self.options.stage_duration()); // Fallback probe is allowed only after the sampling duration has elapsed. let fallback_after = *self.fallback_after.get_or_insert(sample_until); @@ -119,10 +115,7 @@ mod tests { let mut probe = HealthProbe::new(options); probe.rnd = Rnd::new_fixed(0.1); - assert_eq!( - probe.allow_probe(Instant::now()), - AllowProbeResult::Rejected - ); + assert_eq!(probe.allow_probe(Instant::now()), AllowProbeResult::Rejected); } #[test] @@ -131,15 +124,9 @@ mod tests { let mut probe = HealthProbe::new(options); let now = Instant::now(); - assert_eq!( - probe.record(ExecutionResult::Success, now), - ProbingResult::Pending, - ); + assert_eq!(probe.record(ExecutionResult::Success, now), ProbingResult::Pending,); - assert_eq!( - probe.record(ExecutionResult::Success, now), - ProbingResult::Pending, - ); + assert_eq!(probe.record(ExecutionResult::Success, now), ProbingResult::Pending,); let status = probe.metrics.health_info(); assert_eq!(status.status(), HealthStatus::Healthy); diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs index dcf72dbf..a7e1b64f 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs @@ -127,14 +127,8 @@ mod tests { }); let now = Instant::now(); - assert_eq!( - probe.record(ExecutionResult::Success, now), - ProbingResult::Success - ); - assert_eq!( - probe.record(ExecutionResult::Failure, now), - ProbingResult::Failure - ); + assert_eq!(probe.record(ExecutionResult::Success, now), ProbingResult::Success); + assert_eq!(probe.record(ExecutionResult::Failure, now), ProbingResult::Failure); } #[test] diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs index 9ac1810b..f65424bf 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs @@ -34,31 +34,17 @@ impl ProbesOptions { } pub fn reliable(stage_duration: Duration, failure_threshold: f32) -> Self { - Self::gradual( - &[0.001, 0.01, 0.05, 0.1, 0.25, 0.5], - stage_duration, - failure_threshold, - ) - } - - pub fn gradual( - probing_ratio: &[f64], - stage_duration: Duration, - failure_threshold: f32, - ) -> Self { + Self::gradual(&[0.001, 0.01, 0.05, 0.1, 0.25, 0.5], stage_duration, failure_threshold) + } + + pub fn gradual(probing_ratio: &[f64], stage_duration: Duration, failure_threshold: f32) -> Self { // Start with a single probe - let initial = std::iter::once(ProbeOptions::SingleProbe { - cooldown: stage_duration, - }); + let initial = std::iter::once(ProbeOptions::SingleProbe { cooldown: stage_duration }); // Then continue with health-based probes - let health = probing_ratio.iter().map(|probing_ratio| { - ProbeOptions::HealthProbe(HealthProbeOptions::new( - stage_duration, - failure_threshold, - *probing_ratio, - )) - }); + let health = probing_ratio + .iter() + .map(|probing_ratio| ProbeOptions::HealthProbe(HealthProbeOptions::new(stage_duration, failure_threshold, *probing_ratio))); Self::new(initial.chain(health)) } @@ -82,18 +68,9 @@ pub struct HealthProbeOptions { impl HealthProbeOptions { pub fn new(stage_duration: Duration, failure_threshold: f32, probing_ratio: f64) -> Self { - assert!( - probing_ratio > 0.0 && probing_ratio <= 1.0, - "probing_ratio must be in (0.0, 1.0]" - ); - assert!( - (0.0..1.0).contains(&failure_threshold), - "failure_threshold must be in [0.0, 1.0)" - ); - assert!( - stage_duration > Duration::ZERO, - "stage_duration must be greater than zero" - ); + assert!(probing_ratio > 0.0 && probing_ratio <= 1.0, "probing_ratio must be in (0.0, 1.0]"); + assert!((0.0..1.0).contains(&failure_threshold), "failure_threshold must be in [0.0, 1.0)"); + assert!(stage_duration > Duration::ZERO, "stage_duration must be greater than zero"); Self { // The min throughput is set to 0, so if no requests come in during the probing stage, @@ -150,15 +127,9 @@ mod tests { let probes: Vec<_> = options.probes().collect(); assert_eq!(probes.len(), 3); - assert!( - matches!(&probes[0], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(10)) - ); - assert!( - matches!(&probes[1], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(20)) - ); - assert!( - matches!(&probes[2], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(30)) - ); + assert!(matches!(&probes[0], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(10))); + assert!(matches!(&probes[1], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(20))); + assert!(matches!(&probes[2], ProbeOptions::SingleProbe { cooldown } if *cooldown == Duration::from_secs(30))); } #[test] diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs index 85333cdf..acf7cf0d 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs @@ -16,9 +16,7 @@ pub(crate) struct Probes { impl Probes { pub fn new(options: &ProbesOptions) -> Self { let mut probes = options.probes(); - let probe = probes - .next() - .expect("probes are never empty because ProbesOptions enforces that"); + let probe = probes.next().expect("probes are never empty because ProbesOptions enforces that"); Self { probes, @@ -70,16 +68,10 @@ mod tests { assert_eq!(probes.allow_probe(now), AllowProbeResult::Accepted); assert_eq!(probes.allow_probe(now), AllowProbeResult::Rejected); - assert_eq!( - probes.record(ExecutionResult::Success, now), - ProbingResult::Pending - ); + assert_eq!(probes.record(ExecutionResult::Success, now), ProbingResult::Pending); assert_eq!(probes.allow_probe(now), AllowProbeResult::Accepted); - assert_eq!( - probes.record(ExecutionResult::Success, now), - ProbingResult::Success - ); + assert_eq!(probes.record(ExecutionResult::Success, now), ProbingResult::Success); assert!(probes.probes.next().is_none()); } diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs index f1ad3a2d..99c1dce7 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs @@ -101,15 +101,9 @@ mod tests { let now = Instant::now(); // Record a success - assert_eq!( - probe.record(ExecutionResult::Success, now), - ProbingResult::Success - ); + assert_eq!(probe.record(ExecutionResult::Success, now), ProbingResult::Success); // Record a failure - assert_eq!( - probe.record(ExecutionResult::Failure, now), - ProbingResult::Failure - ); + assert_eq!(probe.record(ExecutionResult::Failure, now), ProbingResult::Failure); } } diff --git a/crates/seatbelt/src/circuit_breaker/execution_result.rs b/crates/seatbelt/src/circuit_breaker/execution_result.rs index b7785d15..a04faa3e 100644 --- a/crates/seatbelt/src/circuit_breaker/execution_result.rs +++ b/crates/seatbelt/src/circuit_breaker/execution_result.rs @@ -36,22 +36,13 @@ mod tests { #[test] fn test_execution_result_from_recovery() { - assert_eq!( - ExecutionResult::from_recovery(&RecoveryInfo::retry()), - ExecutionResult::Failure - ); + assert_eq!(ExecutionResult::from_recovery(&RecoveryInfo::retry()), ExecutionResult::Failure); assert_eq!( ExecutionResult::from_recovery(&RecoveryInfo::unavailable()), ExecutionResult::Failure ); - assert_eq!( - ExecutionResult::from_recovery(&RecoveryInfo::never()), - ExecutionResult::Success - ); - assert_eq!( - ExecutionResult::from_recovery(&RecoveryInfo::unknown()), - ExecutionResult::Success - ); + assert_eq!(ExecutionResult::from_recovery(&RecoveryInfo::never()), ExecutionResult::Success); + assert_eq!(ExecutionResult::from_recovery(&RecoveryInfo::unknown()), ExecutionResult::Success); } #[test] diff --git a/crates/seatbelt/src/circuit_breaker/half_open_mode.rs b/crates/seatbelt/src/circuit_breaker/half_open_mode.rs index 2af22e63..2452c4b0 100644 --- a/crates/seatbelt/src/circuit_breaker/half_open_mode.rs +++ b/crates/seatbelt/src/circuit_breaker/half_open_mode.rs @@ -60,17 +60,10 @@ impl HalfOpenMode { } } - pub(super) fn to_options( - &self, - default_stage_duration: Duration, - failure_threshold: f32, - ) -> ProbesOptions { + pub(super) fn to_options(&self, default_stage_duration: Duration, failure_threshold: f32) -> ProbesOptions { match self.inner { Mode::Quick => ProbesOptions::quick(default_stage_duration), - Mode::Reliable(duration) => ProbesOptions::reliable( - duration.unwrap_or(default_stage_duration), - failure_threshold, - ), + Mode::Reliable(duration) => ProbesOptions::reliable(duration.unwrap_or(default_stage_duration), failure_threshold), } } } @@ -173,8 +166,6 @@ mod tests { fn reliable_mode_clamps_min_duration() { let mode = HalfOpenMode::reliable(Duration::from_millis(500)); - assert!( - matches!(mode.inner, Mode::Reliable(duration) if duration == Some(Duration::from_secs(1))) - ); + assert!(matches!(mode.inner, Mode::Reliable(duration) if duration == Some(Duration::from_secs(1)))); } } diff --git a/crates/seatbelt/src/circuit_breaker/health.rs b/crates/seatbelt/src/circuit_breaker/health.rs index ab60f35b..f324d22c 100644 --- a/crates/seatbelt/src/circuit_breaker/health.rs +++ b/crates/seatbelt/src/circuit_breaker/health.rs @@ -81,11 +81,7 @@ impl HealthMetricsBuilder { } pub fn build(&self) -> HealthMetrics { - HealthMetrics::new( - self.sampling_duration, - self.failure_threshold, - self.min_throughput, - ) + HealthMetrics::new(self.sampling_duration, self.failure_threshold, self.min_throughput) } } @@ -141,12 +137,7 @@ impl HealthMetrics { failures = failures.saturating_add(w.failures); } - HealthInfo::new( - successes, - failures, - self.failure_threshold, - self.min_throughput, - ) + HealthInfo::new(successes, failures, self.failure_threshold, self.min_throughput) } } diff --git a/crates/seatbelt/src/circuit_breaker/layer.rs b/crates/seatbelt/src/circuit_breaker/layer.rs index 9001e56a..89c5bb3b 100644 --- a/crates/seatbelt/src/circuit_breaker/layer.rs +++ b/crates/seatbelt/src/circuit_breaker/layer.rs @@ -5,14 +5,10 @@ use std::time::Duration; use opentelemetry::StringValue; -use super::constants::{ - DEFAULT_BREAK_DURATION, DEFAULT_FAILURE_THRESHOLD, DEFAULT_MIN_THROUGHPUT, - DEFAULT_SAMPLING_DURATION, -}; +use super::constants::{DEFAULT_BREAK_DURATION, DEFAULT_FAILURE_THRESHOLD, DEFAULT_MIN_THROUGHPUT, DEFAULT_SAMPLING_DURATION}; use super::{ - CircuitBreaker, Engines, HalfOpenMode, HealthMetricsBuilder, OnClosed, OnClosedArgs, OnOpened, - OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RejectedInput, - RejectedInputArgs, ShouldRecover, + CircuitBreaker, Engines, HalfOpenMode, HealthMetricsBuilder, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, + PartionKeyProvider, PartitionKey, RejectedInput, RejectedInputArgs, ShouldRecover, }; use crate::circuit_breaker::engine::probing::ProbesOptions; use crate::service::Layer; @@ -70,9 +66,7 @@ impl CircuitBreakerLayer { } } -impl - CircuitBreakerLayer, RecoveryState, RejectedInputState> -{ +impl CircuitBreakerLayer, RecoveryState, RejectedInputState> { /// Sets the error to return when the circuit breaker is open for Result-returning services. /// /// When the circuit is open, requests are immediately rejected and this function @@ -97,9 +91,7 @@ impl } } -impl - CircuitBreakerLayer -{ +impl CircuitBreakerLayer { /// Sets the recovery classification function. /// /// This function determines whether a specific output represents a failure @@ -116,10 +108,7 @@ impl #[must_use] pub fn recovery_with( mut self, - recover_fn: impl Fn(&Out, crate::circuit_breaker::RecoveryArgs) -> RecoveryInfo - + Send - + Sync - + 'static, + recover_fn: impl Fn(&Out, crate::circuit_breaker::RecoveryArgs) -> RecoveryInfo + Send + Sync + 'static, ) -> CircuitBreakerLayer { self.recovery = Some(ShouldRecover::new(recover_fn)); self.into_state::() @@ -249,10 +238,7 @@ impl /// * `callback` - Function that takes a reference to the output and /// [`OnOpenedArgs`] containing circuit breaker context #[must_use] - pub fn on_opened( - mut self, - callback: impl Fn(&Out, OnOpenedArgs) + Send + Sync + 'static, - ) -> Self { + pub fn on_opened(mut self, callback: impl Fn(&Out, OnOpenedArgs) + Send + Sync + 'static) -> Self { self.on_opened = Some(OnOpened::new(callback)); self } @@ -269,10 +255,7 @@ impl /// * `callback` - Function that takes a reference to the output and /// [`OnClosedArgs`] containing circuit breaker context #[must_use] - pub fn on_closed( - mut self, - callback: impl Fn(&Out, OnClosedArgs) + Send + Sync + 'static, - ) -> Self { + pub fn on_closed(mut self, callback: impl Fn(&Out, OnClosedArgs) + Send + Sync + 'static) -> Self { self.on_closed = Some(OnClosed::new(callback)); self } @@ -289,10 +272,7 @@ impl /// * `callback` - Function that takes a mutable reference to the input and /// [`OnProbingArgs`] containing circuit breaker context #[must_use] - pub fn on_probing( - mut self, - callback: impl Fn(&mut In, OnProbingArgs) + Send + Sync + 'static, - ) -> Self { + pub fn on_probing(mut self, callback: impl Fn(&mut In, OnProbingArgs) + Send + Sync + 'static) -> Self { self.on_probing = Some(OnProbing::new(callback)); self } @@ -349,10 +329,7 @@ impl /// are created do not contain any sensitive data such as authentication tokens, personal /// identifiable information (PII), or other confidential data. #[must_use] - pub fn partition_key( - mut self, - key_provider: impl Fn(&In) -> PartitionKey + Send + Sync + 'static, - ) -> Self { + pub fn partition_key(mut self, key_provider: impl Fn(&In) -> PartitionKey + Send + Sync + 'static) -> Self { self.partition_key = Some(PartionKeyProvider::new(key_provider)); self } @@ -418,14 +395,8 @@ impl Layer for CircuitBreakerLayer { CircuitBreaker { inner, clock: self.options.get_clock().clone(), - recovery: self - .recovery - .clone() - .expect("recovery must be set in Ready state"), - rejected_input: self - .rejected_input - .clone() - .expect("rejected_input must be set in Ready state"), + recovery: self.recovery.clone().expect("recovery must be set in Ready state"), + rejected_input: self.rejected_input.clone().expect("rejected_input must be set in Ready state"), enable_if: self.enable_if.clone(), engines: self.engines(), on_opened: self.on_opened.clone(), @@ -436,9 +407,7 @@ impl Layer for CircuitBreakerLayer { } } -impl - CircuitBreakerLayer -{ +impl CircuitBreakerLayer { fn probes_options(&self) -> ProbesOptions { self.half_open_mode // we will use break duration as the sampling duration for probes @@ -449,11 +418,7 @@ impl Engines::new( super::engine::EngineOptions { break_duration: self.break_duration, - health_metrics_builder: HealthMetricsBuilder::new( - self.sampling_duration, - self.failure_threshold, - self.min_throughput, - ), + health_metrics_builder: HealthMetricsBuilder::new(self.sampling_duration, self.failure_threshold, self.min_throughput), probes: self.probes_options(), }, self.options.get_clock().clone(), @@ -500,8 +465,7 @@ mod tests { #[expect(clippy::float_cmp, reason = "Test")] fn new_creates_correct_initial_state() { let options = create_test_options(); - let layer: CircuitBreakerLayer<_, _, NotSet, NotSet> = - CircuitBreakerLayer::new(StringValue::from("test_breaker"), &options); + let layer: CircuitBreakerLayer<_, _, NotSet, NotSet> = CircuitBreakerLayer::new(StringValue::from("test_breaker"), &options); assert!(layer.recovery.is_none()); assert!(layer.rejected_input.is_none()); @@ -575,8 +539,7 @@ mod tests { let options = create_test_options(); let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); - let layer: CircuitBreakerLayer<_, _, NotSet, Set> = - layer.rejected_input(|_, _| "rejected".to_string()); + let layer: CircuitBreakerLayer<_, _, NotSet, Set> = layer.rejected_input(|_, _| "rejected".to_string()); let result = layer.rejected_input.as_ref().unwrap().call( "test".to_string(), diff --git a/crates/seatbelt/src/circuit_breaker/service.rs b/crates/seatbelt/src/circuit_breaker/service.rs index f564e797..b2c0adaf 100644 --- a/crates/seatbelt/src/circuit_breaker/service.rs +++ b/crates/seatbelt/src/circuit_breaker/service.rs @@ -6,9 +6,8 @@ use layered::Service; use tick::Clock; use super::{ - CircuitBreakerLayer, CircuitEngine, Engines, EnterCircuitResult, ExecutionMode, - ExecutionResult, ExitCircuitResult, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, - OnProbingArgs, PartionKeyProvider, PartitionKey, RecoveryArgs, RejectedInput, + CircuitBreakerLayer, CircuitEngine, Engines, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult, OnClosed, + OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RecoveryArgs, RejectedInput, RejectedInputArgs, ShouldRecover, }; use crate::{EnableIf, NotSet}; @@ -70,9 +69,7 @@ where let partition_key = self .partition_key .as_ref() - .map_or_else(PartitionKey::default, |partition_key| { - partition_key.call(&input) - }); + .map_or_else(PartitionKey::default, |partition_key| partition_key.call(&input)); // Retrieve the engine for this partition let engine = self.engines.get_engine(&partition_key); @@ -119,20 +116,11 @@ impl CircuitBreaker { } } // Circuit is open, return rejected input output - EnterCircuitResult::Rejected => ControlFlow::Break( - self.rejected_input - .call(input, RejectedInputArgs { partition_key }), - ), + EnterCircuitResult::Rejected => ControlFlow::Break(self.rejected_input.call(input, RejectedInputArgs { partition_key })), } } - fn after_execute( - &self, - engine: &impl CircuitEngine, - output: &Out, - mode: ExecutionMode, - partition_key: &PartitionKey, - ) { + fn after_execute(&self, engine: &impl CircuitEngine, output: &Out, mode: ExecutionMode, partition_key: &PartitionKey) { let recovery = self.recovery.call( output, RecoveryArgs { @@ -187,10 +175,8 @@ mod tests { #[test] fn layer_ensure_defaults() { - let options = SeatbeltOptions::::new(Clock::new_frozen()) - .pipeline_name("test_pipeline"); - let layer: CircuitBreakerLayer = - CircuitBreaker::layer("test_breaker", &options); + let options = SeatbeltOptions::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); + let layer: CircuitBreakerLayer = CircuitBreaker::layer("test_breaker", &options); let layer = layer .recovery_with(|_, _| RecoveryInfo::never()) .rejected_input(|_, _| "rejected".to_string()); @@ -215,8 +201,7 @@ mod tests { #[tokio::test] async fn passthrough_behavior() { let clock = Clock::new_frozen(); - let service = create_ready_circuit_breaker_layer(&clock) - .layer(Execute::new(move |v: String| async move { v })); + let service = create_ready_circuit_breaker_layer(&clock).layer(Execute::new(move |v: String| async move { v })); let result = service.execute("test".to_string()).await; @@ -300,12 +285,7 @@ mod tests { ); // This should not panic, indicating no callbacks were invoked - service.after_execute( - &engine, - &"success".to_string(), - ExecutionMode::Normal, - &PartitionKey::default(), - ); + service.after_execute(&engine, &"success".to_string(), ExecutionMode::Normal, &PartitionKey::default()); } #[test] @@ -323,12 +303,7 @@ mod tests { ); // This should not panic, indicating no callbacks were invoked - service.after_execute( - &engine, - &"success".to_string(), - ExecutionMode::Normal, - &PartitionKey::default(), - ); + service.after_execute(&engine, &"success".to_string(), ExecutionMode::Normal, &PartitionKey::default()); } #[test] @@ -476,11 +451,8 @@ mod tests { assert_eq!(result, "B"); } - fn create_ready_circuit_breaker_layer( - clock: &Clock, - ) -> CircuitBreakerLayer { - let options = - SeatbeltOptions::::new(clock.clone()).pipeline_name("test_pipeline"); + fn create_ready_circuit_breaker_layer(clock: &Clock) -> CircuitBreakerLayer { + let options = SeatbeltOptions::::new(clock.clone()).pipeline_name("test_pipeline"); CircuitBreaker::layer("test_breaker", &options) .recovery_with(|output, _| { if output.contains("error") { diff --git a/crates/seatbelt/src/options/seatbelt_options.rs b/crates/seatbelt/src/options/seatbelt_options.rs index 488a6b87..bce48044 100644 --- a/crates/seatbelt/src/options/seatbelt_options.rs +++ b/crates/seatbelt/src/options/seatbelt_options.rs @@ -235,8 +235,7 @@ mod tests { #[test] fn test_pipeline_name_with_custom_value_sets_name_and_is_owned() { let clock = tick::Clock::new_frozen(); - let options = - SeatbeltOptions::<(), ()>::new(clock).pipeline_name(String::from("custom_pipeline")); + let options = SeatbeltOptions::<(), ()>::new(clock).pipeline_name(String::from("custom_pipeline")); assert_eq!(options.get_pipeline_name().as_ref(), "custom_pipeline"); assert!(matches!(options.get_pipeline_name(), Cow::Owned(_))); } @@ -263,9 +262,7 @@ mod tests { fn test_meter_provider() -> (SdkMeterProvider, InMemoryMetricExporter) { let exporter = InMemoryMetricExporter::default(); - let provider = SdkMeterProvider::builder() - .with_periodic_exporter(exporter.clone()) - .build(); + let provider = SdkMeterProvider::builder().with_periodic_exporter(exporter.clone()).build(); (provider, exporter) } } diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs index 3ae3148f..c430e369 100644 --- a/crates/seatbelt/src/retry/backoff.rs +++ b/crates/seatbelt/src/retry/backoff.rs @@ -66,12 +66,9 @@ impl Iterator for DelaysIter { } } (Backoff::Exponential, false) => duration_mul_pow2(self.props.base_delay, self.attempt), - (Backoff::Exponential, true) => decorrelated_jitter_backoff_v2( - self.attempt, - self.props.base_delay, - &mut self.prev, - &self.props.rnd, - ), + (Backoff::Exponential, true) => { + decorrelated_jitter_backoff_v2(self.attempt, self.props.base_delay, &mut self.prev, &self.props.rnd) + } }; self.attempt = next_attempt; @@ -131,12 +128,7 @@ fn apply_jitter(delay: Duration, rnd: &Rnd) -> Duration { /// - [Polly V7 implementation](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry/blob/7596d2dacf22d88bbd814bc49c28424fb6e921e9/src/Polly.Contrib.WaitAndRetry/Backoff.DecorrelatedJitterV2.cs#L22) /// - [Polly.Contrib.WaitAndRetry repo](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry) #[inline] -fn decorrelated_jitter_backoff_v2( - attempt: u32, - base_delay: Duration, - prev: &mut f64, - rnd: &Rnd, -) -> Duration { +fn decorrelated_jitter_backoff_v2(attempt: u32, base_delay: Duration, prev: &mut f64, rnd: &Rnd) -> Duration { // The original author/credit for this jitter formula is @george-polevoy . // Jitter formula used with permission as described at https://github.com/App-vNext/Polly/issues/530#issuecomment-526555979 // Minor adaptations (pFactor = 4.0 and rpScalingFactor = 1 / 1.4d) by @reisenberger, to scale the formula output for easier parameterization to users. @@ -515,14 +507,8 @@ mod tests { }); let delays: Vec<_> = backoff.delays().skip(attempt).take(2).collect(); - assert!( - delays[0] > Duration::ZERO, - "Attempt {attempt}: first delay should be positive" - ); - assert!( - delays[1] > Duration::ZERO, - "Attempt {attempt}: second delay should be positive" - ); + assert!(delays[0] > Duration::ZERO, "Attempt {attempt}: first delay should be positive"); + assert!(delays[1] > Duration::ZERO, "Attempt {attempt}: second delay should be positive"); } } @@ -541,22 +527,10 @@ mod tests { }); let delays: Vec<_> = backoff.delays().skip(attempt).take(2).collect(); - assert!( - delays[0] > Duration::ZERO, - "Attempt {attempt}: first delay should be positive" - ); - assert!( - delays[0] <= max_delay, - "Attempt {attempt}: first delay should not exceed max" - ); - assert!( - delays[1] > Duration::ZERO, - "Attempt {attempt}: second delay should be positive" - ); - assert!( - delays[1] <= max_delay, - "Attempt {attempt}: second delay should not exceed max" - ); + assert!(delays[0] > Duration::ZERO, "Attempt {attempt}: first delay should be positive"); + assert!(delays[0] <= max_delay, "Attempt {attempt}: first delay should not exceed max"); + assert!(delays[1] > Duration::ZERO, "Attempt {attempt}: second delay should be positive"); + assert!(delays[1] <= max_delay, "Attempt {attempt}: second delay should not exceed max"); } } @@ -637,9 +611,7 @@ mod tests { .into_iter(), ); - let delays_ms = [ - 8_626, 10_830, 18_396, 27_703, 37_213, 159_824, 405_539, 300_743, 1_839_611, 639_970, - ]; + let delays_ms = [8_626, 10_830, 18_396, 27_703, 37_213, 159_824, 405_539, 300_743, 1_839_611, 639_970]; let backoff = DelayBackoff(BackoffOptions { backoff_type: Backoff::Exponential, @@ -671,9 +643,7 @@ mod tests { .into_iter(), ); - let delays_ms = [ - 7800, 15600, 31200, 62400, 124_800, 249_600, 499_200, 998_400, 1_996_800, 3_993_600, - ]; + let delays_ms = [7800, 15600, 31200, 62400, 124_800, 249_600, 499_200, 998_400, 1_996_800, 3_993_600]; let backoff = DelayBackoff(BackoffOptions { backoff_type: Backoff::Exponential, diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 9df45da4..75f61d8c 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -7,10 +7,7 @@ use opentelemetry::StringValue; use crate::retry::backoff::BackoffOptions; use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; -use crate::retry::{ - CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, - Retry, ShouldRecover, -}; +use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, Retry, ShouldRecover}; use crate::service::Layer; use crate::{Backoff, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryInfo, SeatbeltOptions, Set}; @@ -236,10 +233,7 @@ impl RetryLayer Self { + pub fn on_retry(mut self, retry_fn: impl Fn(&Out, OnRetryArgs) + Send + Sync + 'static) -> Self { self.on_retry = Some(OnRetry::new(retry_fn)); self } @@ -399,10 +393,7 @@ impl RetryLayer Option + Send + Sync + 'static, - ) -> Self { + pub fn restore_input(mut self, restore_fn: impl Fn(&mut Out, RestoreInputArgs) -> Option + Send + Sync + 'static) -> Self { self.restore_input = Some(RestoreInput::new(restore_fn)); self } @@ -417,14 +408,8 @@ impl Layer for RetryLayer { clock: self.options.get_clock().clone(), max_attempts: self.max_attempts, backoff: self.backoff.clone().into(), - clone_input: self - .clone_input - .clone() - .expect("clone_input must be set in Ready state"), - should_recover: self - .should_recover - .clone() - .expect("should_recover must be set in Ready state"), + clone_input: self.clone_input.clone().expect("clone_input must be set in Ready state"), + should_recover: self.should_recover.clone().expect("should_recover must be set in Ready state"), on_retry: self.on_retry.clone(), enable_if: self.enable_if.clone(), strategy_name: self.strategy_name.clone(), @@ -436,9 +421,7 @@ impl Layer for RetryLayer { } } -impl - RetryLayer, CloneInputState, RecoveryState> -{ +impl RetryLayer, CloneInputState, RecoveryState> { /// Sets a specialized input restoration callback that operates only on error cases. /// /// This is a convenience method for working with `Result` outputs, where you @@ -494,10 +477,7 @@ impl /// # } /// ``` #[must_use] - pub fn restore_input_from_error( - self, - restore_fn: impl Fn(&mut Error, RestoreInputArgs) -> Option + Send + Sync + 'static, - ) -> Self { + pub fn restore_input_from_error(self, restore_fn: impl Fn(&mut Error, RestoreInputArgs) -> Option + Send + Sync + 'static) -> Self { self.restore_input(move |input, args| match input { Ok(_) => None, Err(e) => restore_fn(e, args), @@ -539,8 +519,7 @@ mod tests { #[test] fn new_creates_correct_initial_state() { let options = create_test_options(); - let layer: RetryLayer<_, _, NotSet, NotSet> = - RetryLayer::new(StringValue::from("test_retry"), &options); + let layer: RetryLayer<_, _, NotSet, NotSet> = RetryLayer::new(StringValue::from("test_retry"), &options); assert_eq!(layer.max_attempts, MaxAttempts::Finite(4)); // 3 retries + 1 original = 4 total assert!(matches!(layer.backoff.backoff_type, Backoff::Exponential)); @@ -559,8 +538,7 @@ mod tests { let options = create_test_options(); let layer = RetryLayer::new(StringValue::from("test"), &options); - let layer: RetryLayer<_, _, Set, NotSet> = - layer.clone_input_with(|input, _args| Some(input.clone())); + let layer: RetryLayer<_, _, Set, NotSet> = layer.clone_input_with(|input, _args| Some(input.clone())); let result = layer.clone_input.unwrap().call( &mut "test".to_string(), diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 35c04614..1e20c593 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -11,8 +11,7 @@ use tick::Clock; use crate::options::MaxAttempts; use crate::retry::telemetry::{ATTEMPT_INDEX, ATTEMPT_NUMBER_IS_LAST, RETRY_EVENT}; use crate::retry::{ - CloneArgs, CloneInput, DelayBackoff, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, - RestoreInputArgs, ShouldRecover, + CloneArgs, CloneInput, DelayBackoff, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, ShouldRecover, }; use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; use crate::{Attempt, EnableIf, NotSet, RecoveryInfo, RecoveryKind}; @@ -80,10 +79,7 @@ where let mut previous_recovery = None; loop { - match self - .execute_attempt(input, attempt, &mut delays, previous_recovery) - .await - { + match self.execute_attempt(input, attempt, &mut delays, previous_recovery).await { ControlFlow::Continue((next_input, next_attempt, recovery)) => { input = next_input; attempt = next_attempt; @@ -161,14 +157,7 @@ where // At this point, we know that the output is recoverable and that we have more attempts left. // Determine the delay before the next attempt based on the recovery kind. let flow_control = match recovery_kind { - RecoverableKind::Retry => self.finalize_retryable_attempt( - original_input, - out, - attempt, - next_attempt, - retry_delay, - recovery, - ), + RecoverableKind::Retry => self.finalize_retryable_attempt(original_input, out, attempt, next_attempt, retry_delay, recovery), }; // Only ever delay if we have a next attempt @@ -275,13 +264,9 @@ mod tests { #[test] fn layer_ensure_defaults() { - let options = SeatbeltOptions::::new(Clock::new_frozen()) - .pipeline_name("test_pipeline"); - let layer: RetryLayer = - Retry::layer("test_retry", &options); - let layer = layer - .recovery_with(|_, _| RecoveryInfo::never()) - .clone_input(); + let options = SeatbeltOptions::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); + let layer: RetryLayer = Retry::layer("test_retry", &options); + let layer = layer.recovery_with(|_, _| RecoveryInfo::never()).clone_input(); let retry = layer.layer(Execute::new(|v: String| async move { v })); @@ -417,10 +402,7 @@ mod tests { let service = create_ready_retry_layer(&clock, RecoveryInfo::retry()) .clone_input_with(move |input, args| { - attempts_for_clone_clone - .lock() - .unwrap() - .push(args.attempt()); + attempts_for_clone_clone.lock().unwrap().push(args.attempt()); Some(input.clone()) }) .recovery_with(move |_input, _args| RecoveryInfo::retry()) @@ -549,9 +531,7 @@ mod tests { }) .handle_unavailable(true) // Enable outage handling .max_retry_attempts(2) - .layer(Execute::new(move |input: String| async move { - format!("processed_{input}") - })); + .layer(Execute::new(move |input: String| async move { format!("processed_{input}") })); let result = service.execute("test".to_string()).await; @@ -586,20 +566,15 @@ mod tests { let _result = service.execute("test".to_string()).await; // Should use the recovery hint as the delay - assert_eq!( - delays.lock().unwrap().to_vec(), - vec![Duration::from_secs(10)] - ); + assert_eq!(delays.lock().unwrap().to_vec(), vec![Duration::from_secs(10)]); } #[tokio::test] async fn retries_exhausted_ensure_telemetry_reported() { let tester = MetricTester::new(); - let options = SeatbeltOptions::::new( - ClockControl::default().auto_advance_timers(true).to_clock(), - ) - .pipeline_name("test_pipeline") - .meter_provider(tester.meter_provider()); + let options = SeatbeltOptions::::new(ClockControl::default().auto_advance_timers(true).to_clock()) + .pipeline_name("test_pipeline") + .meter_provider(tester.meter_provider()); let service = create_ready_retry_layer_core(RecoveryInfo::retry(), &options) .clone_input_with(move |input, _args| Some(input.clone())) @@ -623,10 +598,7 @@ mod tests { ); } - fn create_ready_retry_layer( - clock: &Clock, - recover: RecoveryInfo, - ) -> RetryLayer { + fn create_ready_retry_layer(clock: &Clock, recover: RecoveryInfo) -> RetryLayer { let options = SeatbeltOptions::new(clock.clone()).pipeline_name("test_pipeline"); create_ready_retry_layer_core(recover, &options) } diff --git a/crates/seatbelt/src/testing.rs b/crates/seatbelt/src/testing.rs index 7c85aa75..37ceb748 100644 --- a/crates/seatbelt/src/testing.rs +++ b/crates/seatbelt/src/testing.rs @@ -25,9 +25,7 @@ impl MetricTester { Self { exporter: in_memory.clone(), - provider: SdkMeterProvider::builder() - .with_periodic_exporter(in_memory) - .build(), + provider: SdkMeterProvider::builder().with_periodic_exporter(in_memory).build(), } } @@ -81,58 +79,22 @@ fn collect_attributes(exporter: &InMemoryMetricExporter) -> Vec { fn collect_attributes_for_metric(metric: &Metric) -> impl Iterator { match metric.data() { AggregatedMetrics::F64(data) => match data { - MetricData::Gauge(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::Sum(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::Histogram(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::ExponentialHistogram(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), + MetricData::Gauge(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::Sum(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::Histogram(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::ExponentialHistogram(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), }, AggregatedMetrics::U64(data) => match data { - MetricData::Gauge(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::Sum(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::Histogram(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::ExponentialHistogram(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), + MetricData::Gauge(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::Sum(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::Histogram(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::ExponentialHistogram(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), }, AggregatedMetrics::I64(data) => match data { - MetricData::Gauge(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::Sum(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::Histogram(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), - MetricData::ExponentialHistogram(data) => data - .data_points() - .flat_map(|v| v.attributes().cloned()) - .collect::>(), + MetricData::Gauge(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::Sum(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::Histogram(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), + MetricData::ExponentialHistogram(data) => data.data_points().flat_map(|v| v.attributes().cloned()).collect::>(), }, } .into_iter() diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index fefcf333..914df69c 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -7,8 +7,7 @@ use opentelemetry::StringValue; use crate::service::Layer; use crate::timeout::{ - OnTimeout, OnTimeoutArgs, Timeout, TimeoutOutput as TimeoutOutputCallback, TimeoutOutputArgs, - TimeoutOverride, TimeoutOverrideArgs, + OnTimeout, OnTimeoutArgs, Timeout, TimeoutOutput as TimeoutOutputCallback, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs, }; use crate::{EnableIf, NotSet, SeatbeltOptions, Set}; @@ -120,10 +119,7 @@ impl TimeoutLayer Self { + pub fn on_timeout(mut self, on_timeout: impl Fn(&Out, OnTimeoutArgs) + Send + Sync + 'static) -> Self { self.on_timeout = Some(OnTimeout::new(on_timeout)); self } @@ -205,10 +201,7 @@ impl Layer for TimeoutLayer { timeout: self.timeout.expect("timeout must be set in Ready state"), enable_if: self.enable_if.clone(), on_timeout: self.on_timeout.clone(), - timeout_output: self - .timeout_output - .clone() - .expect("timeout_result must be set in Ready state"), + timeout_output: self.timeout_output.clone().expect("timeout_result must be set in Ready state"), name: self.strategy_name.clone(), pipeline_name: self.options.get_pipeline_name().clone().into(), event_reporter: self.options.create_resilience_event_counter(), @@ -246,8 +239,7 @@ mod tests { #[test] fn new_needs_timeout_output() { let options = create_test_options(); - let layer: TimeoutLayer<_, _, NotSet, NotSet> = - TimeoutLayer::new(StringValue::from("test_timeout"), &options); + let layer: TimeoutLayer<_, _, NotSet, NotSet> = TimeoutLayer::new(StringValue::from("test_timeout"), &options); assert!(layer.timeout.is_none()); assert!(layer.timeout_output.is_none()); @@ -262,8 +254,7 @@ mod tests { let options = create_test_options(); let layer = TimeoutLayer::new(StringValue::from("test"), &options); - let layer: TimeoutLayer<_, _, NotSet, Set> = - layer.timeout_output(|args| format!("timeout: {}", args.timeout().as_millis())); + let layer: TimeoutLayer<_, _, NotSet, Set> = layer.timeout_output(|args| format!("timeout: {}", args.timeout().as_millis())); let result = layer.timeout_output.unwrap().call(TimeoutOutputArgs { timeout: Duration::from_millis(3), }); @@ -276,8 +267,7 @@ mod tests { let options = create_test_options_result(); let layer = TimeoutLayer::new(StringValue::from("test"), &options); - let layer: TimeoutLayer<_, _, NotSet, Set> = - layer.timeout_error(|args| format!("timeout: {}", args.timeout().as_millis())); + let layer: TimeoutLayer<_, _, NotSet, Set> = layer.timeout_error(|args| format!("timeout: {}", args.timeout().as_millis())); let result = layer .timeout_output .unwrap() @@ -291,10 +281,9 @@ mod tests { #[test] fn timeout_ensure_set_correctly() { - let layer: TimeoutLayer<_, _, Set, Set> = - TimeoutLayer::new(StringValue::from("test"), &create_test_options()) - .timeout_output(|_args| "timeout: ".to_string()) - .timeout(Duration::from_millis(3)); + let layer: TimeoutLayer<_, _, Set, Set> = TimeoutLayer::new(StringValue::from("test"), &create_test_options()) + .timeout_output(|_args| "timeout: ".to_string()) + .timeout(Duration::from_millis(3)); assert_eq!(layer.timeout.unwrap(), Duration::from_millis(3)); } @@ -304,10 +293,9 @@ mod tests { let called = Arc::new(AtomicBool::new(false)); let called_clone = Arc::clone(&called); - let layer: TimeoutLayer<_, _, Set, Set> = - create_ready_layer().on_timeout(move |_output, _args| { - called_clone.store(true, Ordering::SeqCst); - }); + let layer: TimeoutLayer<_, _, Set, Set> = create_ready_layer().on_timeout(move |_output, _args| { + called_clone.store(true, Ordering::SeqCst); + }); layer.on_timeout.unwrap().call( &"output".to_string(), @@ -321,8 +309,7 @@ mod tests { #[test] fn timeout_override_ok() { - let layer: TimeoutLayer<_, _, Set, Set> = - create_ready_layer().timeout_override(|_input, _args| Some(Duration::from_secs(3))); + let layer: TimeoutLayer<_, _, Set, Set> = create_ready_layer().timeout_override(|_input, _args| Some(Duration::from_secs(3))); let result = layer.timeout_override.unwrap().call( &"a".to_string(), @@ -336,8 +323,7 @@ mod tests { #[test] fn enable_if_ok() { - let layer: TimeoutLayer<_, _, Set, Set> = - create_ready_layer().enable_if(|input| matches!(input.as_ref(), "enable")); + let layer: TimeoutLayer<_, _, Set, Set> = create_ready_layer().enable_if(|input| matches!(input.as_ref(), "enable")); assert!(layer.enable_if.call(&"enable".to_string())); assert!(!layer.enable_if.call(&"disable".to_string())); @@ -359,16 +345,14 @@ mod tests { #[test] fn timeout_when_ready_ok() { - let layer: TimeoutLayer<_, _, Set, Set> = - create_ready_layer().timeout(Duration::from_secs(123)); + let layer: TimeoutLayer<_, _, Set, Set> = create_ready_layer().timeout(Duration::from_secs(123)); assert_eq!(layer.timeout.unwrap(), Duration::from_secs(123)); } #[test] fn timeout_output_when_ready_ok() { - let layer: TimeoutLayer<_, _, Set, Set> = - create_ready_layer().timeout_output(|_args| "some new value".to_string()); + let layer: TimeoutLayer<_, _, Set, Set> = create_ready_layer().timeout_output(|_args| "some new value".to_string()); assert!(layer.timeout_output.is_some()); let result = layer.timeout_output.unwrap().call(TimeoutOutputArgs { timeout: Duration::from_secs(123), @@ -379,8 +363,7 @@ mod tests { #[test] fn timeout_error_when_ready_ok() { - let layer: TimeoutLayer<_, _, Set, Set> = - create_ready_layer_with_result().timeout_error(|_args| "some error value".to_string()); + let layer: TimeoutLayer<_, _, Set, Set> = create_ready_layer_with_result().timeout_error(|_args| "some error value".to_string()); assert!(layer.timeout_output.is_some()); let result = layer .timeout_output @@ -395,8 +378,7 @@ mod tests { #[test] fn layer_ok() { - let _layered = - create_ready_layer().layer(Execute::new(|input: String| async move { input })); + let _layered = create_ready_layer().layer(Execute::new(|input: String| async move { input })); } #[test] diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index 0929b8af..454cb75e 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -10,10 +10,7 @@ use tick::{Clock, FutureExt}; use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; use crate::timeout::telemetry::TIMEOUT_EVENT_NAME; -use crate::timeout::{ - OnTimeout, OnTimeoutArgs, TimeoutLayer, TimeoutOutput, TimeoutOutputArgs, TimeoutOverride, - TimeoutOverrideArgs, -}; +use crate::timeout::{OnTimeout, OnTimeoutArgs, TimeoutLayer, TimeoutOutput, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs}; use crate::{EnableIf, NotSet, SeatbeltOptions}; /// Applies a timeout to service execution for canceling long-running operations. @@ -71,10 +68,7 @@ impl Timeout { /// For comprehensive examples, see the [timeout module] documentation. /// /// [timeout module]: crate::timeout - pub fn layer( - name: impl Into>, - options: &SeatbeltOptions, - ) -> TimeoutLayer { + pub fn layer(name: impl Into>, options: &SeatbeltOptions) -> TimeoutLayer { TimeoutLayer::new(name.into().into(), options) } } @@ -106,12 +100,7 @@ where .unwrap_or(self.timeout); Either::Right(async move { - match self - .inner - .execute(input) - .timeout(&self.clock, timeout) - .await - { + match self.inner.execute(input).timeout(&self.clock, timeout).await { Ok(output) => output, Err(_error) => { self.event_reporter.add( @@ -236,21 +225,13 @@ mod tests { let service = stack.build(); - assert_eq!( - service.execute("test input".to_string()).await, - "timed out after 150ms" - ); - assert_eq!( - service.execute("ignore".to_string()).await, - "timed out after 200ms" - ); + assert_eq!(service.execute("test input".to_string()).await, "timed out after 150ms"); + assert_eq!(service.execute("ignore".to_string()).await, "timed out after 200ms"); } #[tokio::test] async fn no_timeout_if_disabled() { - let clock = ClockControl::default() - .auto_advance_timers(true) - .to_clock(); + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let stack = ( Timeout::layer("test_timeout", &SeatbeltOptions::new(&clock)) .timeout_output(|_args| "timed out".to_string()) From 5b6b54bcec07bb2958dc3c87b2c8036da5cab22b Mon Sep 17 00:00:00 2001 From: martintomka Date: Fri, 16 Jan 2026 16:53:11 +0100 Subject: [PATCH 11/60] fixes --- crates/seatbelt/Cargo.toml | 1 + crates/seatbelt/src/circuit_breaker/engine/engine_core.rs | 1 + .../seatbelt/src/circuit_breaker/engine/engine_telemetry.rs | 1 + crates/seatbelt/src/circuit_breaker/engine/engines.rs | 1 + crates/seatbelt/src/circuit_breaker/engine/mod.rs | 2 ++ .../src/circuit_breaker/engine/probing/health_probe.rs | 1 + crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs | 1 + .../seatbelt/src/circuit_breaker/engine/probing/options.rs | 1 + .../seatbelt/src/circuit_breaker/engine/probing/probes.rs | 1 + .../src/circuit_breaker/engine/probing/single_probe.rs | 1 + crates/seatbelt/src/circuit_breaker/execution_result.rs | 1 + crates/seatbelt/src/circuit_breaker/half_open_mode.rs | 1 + crates/seatbelt/src/circuit_breaker/health.rs | 1 + crates/seatbelt/src/circuit_breaker/layer.rs | 1 + crates/seatbelt/src/circuit_breaker/partition_key.rs | 1 + crates/seatbelt/src/circuit_breaker/service.rs | 1 + crates/seatbelt/src/lib.rs | 6 ++++++ crates/seatbelt/src/options/attempt.rs | 1 + crates/seatbelt/src/options/define_fn_wrapper.rs | 1 + crates/seatbelt/src/options/seatbelt_options.rs | 1 + crates/seatbelt/src/retry/args.rs | 1 + crates/seatbelt/src/retry/backoff.rs | 1 + crates/seatbelt/src/retry/layer.rs | 1 + crates/seatbelt/src/retry/service.rs | 1 + crates/seatbelt/src/telemetry/metrics.rs | 1 + crates/seatbelt/src/telemetry/mod.rs | 1 + crates/seatbelt/src/timeout/args.rs | 1 + crates/seatbelt/src/timeout/layer.rs | 1 + crates/seatbelt/src/timeout/service.rs | 1 + 29 files changed, 35 insertions(+) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 4cbc3fc4..eadd14eb 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -20,6 +20,7 @@ repository.workspace = true all-features = true [features] +default = [] service = ["dep:layered"] telemetry = [] metrics = ["dep:opentelemetry", "opentelemetry/metrics", "telemetry"] diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs index adb1816a..4e6eaa0b 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs @@ -192,6 +192,7 @@ impl Stats { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use std::ops::Deref; diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs index bebf3080..8ed29140 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs @@ -159,6 +159,7 @@ impl CircuitEngine for EngineTelemetry { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] #[cfg(not(miri))] mod tests { diff --git a/crates/seatbelt/src/circuit_breaker/engine/engines.rs b/crates/seatbelt/src/circuit_breaker/engine/engines.rs index 140cdcfb..7019711e 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engines.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engines.rs @@ -69,6 +69,7 @@ impl Engines { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use std::time::Duration; diff --git a/crates/seatbelt/src/circuit_breaker/engine/mod.rs b/crates/seatbelt/src/circuit_breaker/engine/mod.rs index d6c6b3ec..80df2172 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/mod.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/mod.rs @@ -83,6 +83,7 @@ pub(crate) trait CircuitEngine: Debug + Send + Sync + 'static { mod engine_core; pub(crate) use engine_core::*; +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod engine_fake; #[cfg(test)] @@ -94,6 +95,7 @@ pub(crate) use engine_telemetry::*; mod engines; pub(crate) use engines::*; +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs index a11e653b..3142c1d2 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs @@ -79,6 +79,7 @@ impl HealthProbe { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use std::time::Duration; diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs index a7e1b64f..7e67f4cf 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs @@ -96,6 +96,7 @@ impl ProbeOperation for Probe { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use std::time::Duration; diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs index f65424bf..f0059d62 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs @@ -90,6 +90,7 @@ impl HealthProbeOptions { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use static_assertions::assert_impl_all; diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs index acf7cf0d..569b626c 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs @@ -46,6 +46,7 @@ impl Probes { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs index 99c1dce7..63cfee8e 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs @@ -54,6 +54,7 @@ impl ProbeOperation for SingleProbe { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/circuit_breaker/execution_result.rs b/crates/seatbelt/src/circuit_breaker/execution_result.rs index a04faa3e..b653a518 100644 --- a/crates/seatbelt/src/circuit_breaker/execution_result.rs +++ b/crates/seatbelt/src/circuit_breaker/execution_result.rs @@ -30,6 +30,7 @@ impl ExecutionResult { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/circuit_breaker/half_open_mode.rs b/crates/seatbelt/src/circuit_breaker/half_open_mode.rs index 2452c4b0..71710044 100644 --- a/crates/seatbelt/src/circuit_breaker/half_open_mode.rs +++ b/crates/seatbelt/src/circuit_breaker/half_open_mode.rs @@ -74,6 +74,7 @@ enum Mode { Reliable(Option), } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/circuit_breaker/health.rs b/crates/seatbelt/src/circuit_breaker/health.rs index f324d22c..469a0e37 100644 --- a/crates/seatbelt/src/circuit_breaker/health.rs +++ b/crates/seatbelt/src/circuit_breaker/health.rs @@ -165,6 +165,7 @@ impl Window { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/circuit_breaker/layer.rs b/crates/seatbelt/src/circuit_breaker/layer.rs index 89c5bb3b..8eebdfb3 100644 --- a/crates/seatbelt/src/circuit_breaker/layer.rs +++ b/crates/seatbelt/src/circuit_breaker/layer.rs @@ -449,6 +449,7 @@ impl CircuitBreakerLayer), } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use std::fmt::Debug; diff --git a/crates/seatbelt/src/circuit_breaker/service.rs b/crates/seatbelt/src/circuit_breaker/service.rs index b2c0adaf..77c2f62e 100644 --- a/crates/seatbelt/src/circuit_breaker/service.rs +++ b/crates/seatbelt/src/circuit_breaker/service.rs @@ -157,6 +157,7 @@ impl CircuitBreaker { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] #[cfg(not(miri))] mod tests { diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index be12ef04..93cb21e0 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -1,5 +1,10 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +#![cfg_attr(coverage_nightly, feature(coverage_attribute))] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/seatbelt/logo.png")] +#![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/seatbelt/favicon.ico")] #![expect( rustdoc::broken_intra_doc_links, reason = "Too ugly to make 'live links' possible with the combination of features" @@ -140,5 +145,6 @@ pub mod service { #[cfg(any(feature = "retry", feature = "circuit-breaker", test))] mod rnd; +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] pub(crate) mod testing; diff --git a/crates/seatbelt/src/options/attempt.rs b/crates/seatbelt/src/options/attempt.rs index d90d6dc6..7e305efe 100644 --- a/crates/seatbelt/src/options/attempt.rs +++ b/crates/seatbelt/src/options/attempt.rs @@ -170,6 +170,7 @@ impl From for MaxAttempts { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/options/define_fn_wrapper.rs b/crates/seatbelt/src/options/define_fn_wrapper.rs index dbfd6451..f30752dd 100644 --- a/crates/seatbelt/src/options/define_fn_wrapper.rs +++ b/crates/seatbelt/src/options/define_fn_wrapper.rs @@ -128,6 +128,7 @@ macro_rules! define_fn_wrapper { pub(crate) use define_fn_wrapper; +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use std::fmt::Debug; diff --git a/crates/seatbelt/src/options/seatbelt_options.rs b/crates/seatbelt/src/options/seatbelt_options.rs index bce48044..02d36d32 100644 --- a/crates/seatbelt/src/options/seatbelt_options.rs +++ b/crates/seatbelt/src/options/seatbelt_options.rs @@ -217,6 +217,7 @@ impl Clone for SeatbeltOptions { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use opentelemetry_sdk::metrics::{InMemoryMetricExporter, SdkMeterProvider}; diff --git a/crates/seatbelt/src/retry/args.rs b/crates/seatbelt/src/retry/args.rs index 3e25e4c8..131c8034 100644 --- a/crates/seatbelt/src/retry/args.rs +++ b/crates/seatbelt/src/retry/args.rs @@ -105,6 +105,7 @@ impl RestoreInputArgs { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs index c430e369..2517af73 100644 --- a/crates/seatbelt/src/retry/backoff.rs +++ b/crates/seatbelt/src/retry/backoff.rs @@ -185,6 +185,7 @@ impl Default for BackoffOptions { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use std::sync::Mutex; diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 75f61d8c..1e9b4a17 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -503,6 +503,7 @@ impl RetryLayer Retry { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(not(miri))] // Oxidizer runtime does not support Miri. #[cfg(test)] mod tests { diff --git a/crates/seatbelt/src/telemetry/metrics.rs b/crates/seatbelt/src/telemetry/metrics.rs index 0f315aeb..55d84e4c 100644 --- a/crates/seatbelt/src/telemetry/metrics.rs +++ b/crates/seatbelt/src/telemetry/metrics.rs @@ -24,6 +24,7 @@ pub(crate) fn create_resilience_event_counter(meter: &Meter) -> Counter { .build() } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use opentelemetry_sdk::metrics::InMemoryMetricExporter; diff --git a/crates/seatbelt/src/telemetry/mod.rs b/crates/seatbelt/src/telemetry/mod.rs index 2f457c6d..4aa9719d 100644 --- a/crates/seatbelt/src/telemetry/mod.rs +++ b/crates/seatbelt/src/telemetry/mod.rs @@ -40,6 +40,7 @@ pub const STRATEGY_NAME: &str = "resilience.strategy.name"; /// Examples: `retry`, `timeout`, `circuit_opened`. pub const EVENT_NAME: &str = "resilience.event.name"; +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/timeout/args.rs b/crates/seatbelt/src/timeout/args.rs index c65c2c0d..ad70b0f3 100644 --- a/crates/seatbelt/src/timeout/args.rs +++ b/crates/seatbelt/src/timeout/args.rs @@ -60,6 +60,7 @@ impl TimeoutOutputArgs { } } +#[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { use super::*; diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index 914df69c..293100f2 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -225,6 +225,7 @@ impl TimeoutLayer Date: Mon, 19 Jan 2026 15:06:04 +0100 Subject: [PATCH 12/60] rename CircuitBreaker to just Circuit --- crates/seatbelt/Cargo.toml | 10 +-- crates/seatbelt/README.md | 10 +-- .../{circuit_breaker.rs => circuit.rs} | 4 +- .../{circuit_breaker.rs => circuit.rs} | 4 +- .../src/{circuit_breaker => circuit}/args.rs | 2 +- .../{circuit_breaker => circuit}/callbacks.rs | 0 .../{circuit_breaker => circuit}/constants.rs | 0 .../engine/engine_core.rs | 10 +-- .../engine/engine_fake.rs | 2 +- .../engine/engine_telemetry.rs | 6 +- .../engine/engines.rs | 8 +- .../engine/mod.rs | 2 +- .../engine/probing/health_probe.rs | 4 +- .../engine/probing/mod.rs | 2 +- .../engine/probing/options.rs | 2 +- .../engine/probing/probes.rs | 2 +- .../engine/probing/single_probe.rs | 2 +- .../execution_result.rs | 0 .../half_open_mode.rs | 6 +- .../{circuit_breaker => circuit}/health.rs | 2 +- .../src/{circuit_breaker => circuit}/layer.rs | 84 +++++++++---------- .../src/{circuit_breaker => circuit}/mod.rs | 62 +++++++------- .../partition_key.rs | 4 +- .../{circuit_breaker => circuit}/service.rs | 34 ++++---- .../{circuit_breaker => circuit}/telemetry.rs | 0 crates/seatbelt/src/lib.rs | 10 +-- 26 files changed, 136 insertions(+), 136 deletions(-) rename crates/seatbelt/benches/{circuit_breaker.rs => circuit.rs} (94%) rename crates/seatbelt/examples/{circuit_breaker.rs => circuit.rs} (97%) rename crates/seatbelt/src/{circuit_breaker => circuit}/args.rs (98%) rename crates/seatbelt/src/{circuit_breaker => circuit}/callbacks.rs (100%) rename crates/seatbelt/src/{circuit_breaker => circuit}/constants.rs (100%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/engine_core.rs (98%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/engine_fake.rs (84%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/engine_telemetry.rs (97%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/engines.rs (91%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/mod.rs (97%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/probing/health_probe.rs (97%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/probing/mod.rs (98%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/probing/options.rs (99%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/probing/probes.rs (98%) rename crates/seatbelt/src/{circuit_breaker => circuit}/engine/probing/single_probe.rs (98%) rename crates/seatbelt/src/{circuit_breaker => circuit}/execution_result.rs (100%) rename crates/seatbelt/src/{circuit_breaker => circuit}/half_open_mode.rs (97%) rename crates/seatbelt/src/{circuit_breaker => circuit}/health.rs (99%) rename crates/seatbelt/src/{circuit_breaker => circuit}/layer.rs (86%) rename crates/seatbelt/src/{circuit_breaker => circuit}/mod.rs (85%) rename crates/seatbelt/src/{circuit_breaker => circuit}/partition_key.rs (98%) rename crates/seatbelt/src/{circuit_breaker => circuit}/service.rs (93%) rename crates/seatbelt/src/{circuit_breaker => circuit}/telemetry.rs (100%) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index eadd14eb..f9e9fc5d 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -29,7 +29,7 @@ logs = ["telemetry", "dep:tracing"] options = ["dep:tick", "telemetry", "service", "metrics", "logs"] timeout = ["options"] retry = ["options", "dep:fastrand"] -circuit-breaker = ["options", "dep:fastrand"] +circuit = ["options", "dep:fastrand"] [dependencies] fastrand = { workspace = true, optional = true } @@ -84,8 +84,8 @@ name = "resilience_pipeline" required-features = ["retry", "timeout"] [[example]] -name = "circuit_breaker" -required-features = ["circuit-breaker"] +name = "circuit" +required-features = ["circuit"] [[bench]] name = "observability" @@ -103,9 +103,9 @@ harness = false required-features = ["retry"] [[bench]] -name = "circuit_breaker" +name = "circuit" harness = false -required-features = ["circuit-breaker"] +required-features = ["circuit"] [lints] workspace = true diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index f59567c4..e6be42a8 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -82,7 +82,7 @@ for each module for details on how to use them. * [`timeout`][__link6]: Cancels long-running operations. * [`retry`][__link7]: Automatically retries failed operations with configurable backoff strategies. -* [`circuit_breaker`][__link8]: Prevents cascading failures by stopping requests to unhealthy services. +* [`circuit`][__link8]: Prevents cascading failures by stopping requests to unhealthy services. ### Features @@ -97,7 +97,7 @@ This crate supports several optional features that can be enabled to extend func * `timeout`: Enables the [`timeout`][__link12] middleware for canceling long-running operations. * `retry`: Enables the [`retry`][__link13] middleware for automatically retrying failed operations with configurable backoff strategies, jitter, and recovery classification. -* `circuit-breaker`: Enables the [`circuit_breaker`][__link14] middleware for preventing cascading failures. +* `circuit`: Enables the [`circuit`][__link14] middleware for preventing cascading failures.
@@ -105,19 +105,19 @@ This crate supports several optional features that can be enabled to extend func This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG_h-2C_E9HQsG5HdDZkQ24HrGw2SJ0OwsTS7G4yA4phNCJzdYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG2dhk5l0Ky--G-zcUMSmDnfBG6s2B3IYE_rxGzkEJPiyOJeqYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI [__link0]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo [__link1]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery [__link10]: https://crates.io/crates/tick/0.1.2 [__link11]: https://crates.io/crates/layered/0.1.0 [__link12]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html [__link13]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html - [__link14]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit_breaker/index.html + [__link14]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html [__link2]: https://docs.rs/tick/0.1.2/tick/?search=Clock [__link3]: https://crates.io/crates/tick/0.1.2 [__link4]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::SeatbeltOptions [__link5]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery [__link6]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html [__link7]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html - [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit_breaker/index.html + [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::SeatbeltOptions diff --git a/crates/seatbelt/benches/circuit_breaker.rs b/crates/seatbelt/benches/circuit.rs similarity index 94% rename from crates/seatbelt/benches/circuit_breaker.rs rename to crates/seatbelt/benches/circuit.rs index a2433d56..042010e6 100644 --- a/crates/seatbelt/benches/circuit_breaker.rs +++ b/crates/seatbelt/benches/circuit.rs @@ -4,7 +4,7 @@ use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; -use seatbelt::circuit_breaker::CircuitBreaker; +use seatbelt::circuit::Circuit; use seatbelt::{RecoveryInfo, SeatbeltOptions}; use tick::Clock; @@ -29,7 +29,7 @@ fn entry(c: &mut Criterion) { let options = SeatbeltOptions::new(Clock::new_frozen()); let service = ( - CircuitBreaker::layer("bench", &options) + Circuit::layer("bench", &options) .recovery_with(|_, _| RecoveryInfo::never()) .rejected_input_error(|_input, _args| Output) .min_throughput(1000), // High threshold to keep circuit closed diff --git a/crates/seatbelt/examples/circuit_breaker.rs b/crates/seatbelt/examples/circuit.rs similarity index 97% rename from crates/seatbelt/examples/circuit_breaker.rs rename to crates/seatbelt/examples/circuit.rs index f33c482b..00c41cce 100644 --- a/crates/seatbelt/examples/circuit_breaker.rs +++ b/crates/seatbelt/examples/circuit.rs @@ -14,7 +14,7 @@ use layered::{Execute, Service, Stack}; use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::circuit_breaker::CircuitBreaker; +use seatbelt::circuit::Circuit; use seatbelt::{RecoveryInfo, SeatbeltOptions}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; @@ -29,7 +29,7 @@ async fn main() -> Result<(), AppError> { // Define stack with circuit breaker layer let stack = ( - CircuitBreaker::layer("my_circuit_breaker", &options) + Circuit::layer("my_circuit_breaker", &options) // Required: classify the recoverability of outputs .recovery_with(|output, _args| match output { Ok(_) => RecoveryInfo::never(), diff --git a/crates/seatbelt/src/circuit_breaker/args.rs b/crates/seatbelt/src/circuit/args.rs similarity index 98% rename from crates/seatbelt/src/circuit_breaker/args.rs rename to crates/seatbelt/src/circuit/args.rs index d5729227..1a8c6baa 100644 --- a/crates/seatbelt/src/circuit_breaker/args.rs +++ b/crates/seatbelt/src/circuit/args.rs @@ -4,7 +4,7 @@ use std::time::Duration; use tick::Clock; -use crate::circuit_breaker::PartitionKey; +use crate::circuit::PartitionKey; /// Arguments for the [`recovery_with`][super::CircuitBreakerLayer::recovery_with] callback function. /// diff --git a/crates/seatbelt/src/circuit_breaker/callbacks.rs b/crates/seatbelt/src/circuit/callbacks.rs similarity index 100% rename from crates/seatbelt/src/circuit_breaker/callbacks.rs rename to crates/seatbelt/src/circuit/callbacks.rs diff --git a/crates/seatbelt/src/circuit_breaker/constants.rs b/crates/seatbelt/src/circuit/constants.rs similarity index 100% rename from crates/seatbelt/src/circuit_breaker/constants.rs rename to crates/seatbelt/src/circuit/constants.rs diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs b/crates/seatbelt/src/circuit/engine/engine_core.rs similarity index 98% rename from crates/seatbelt/src/circuit_breaker/engine/engine_core.rs rename to crates/seatbelt/src/circuit/engine/engine_core.rs index 4e6eaa0b..e0032690 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs +++ b/crates/seatbelt/src/circuit/engine/engine_core.rs @@ -6,9 +6,9 @@ use std::time::{Duration, Instant}; use tick::Clock; use super::{EngineOptions, EnterCircuitResult, ExitCircuitResult}; -use crate::circuit_breaker::constants::ERR_POISONED_LOCK; -use crate::circuit_breaker::engine::probing::{AllowProbeResult, Probes, ProbingResult}; -use crate::circuit_breaker::{CircuitEngine, ExecutionMode, ExecutionResult, HealthMetrics, HealthStatus}; +use crate::circuit::constants::ERR_POISONED_LOCK; +use crate::circuit::engine::probing::{AllowProbeResult, Probes, ProbingResult}; +use crate::circuit::{CircuitEngine, ExecutionMode, ExecutionResult, HealthMetrics, HealthStatus}; /// Engine that manages the state of the circuit breaker. #[derive(Debug)] @@ -200,8 +200,8 @@ mod tests { use tick::ClockControl; use super::*; - use crate::circuit_breaker::HealthMetricsBuilder; - use crate::circuit_breaker::engine::probing::ProbesOptions; + use crate::circuit::HealthMetricsBuilder; + use crate::circuit::engine::probing::ProbesOptions; fn create_test_settings() -> EngineOptions { EngineOptions { diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs b/crates/seatbelt/src/circuit/engine/engine_fake.rs similarity index 84% rename from crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs rename to crates/seatbelt/src/circuit/engine/engine_fake.rs index 503a9941..696894d4 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs +++ b/crates/seatbelt/src/circuit/engine/engine_fake.rs @@ -1,6 +1,6 @@ // Copyright (c) Microsoft Corporation. -use crate::circuit_breaker::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; +use crate::circuit::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; /// Fake engine to be used in tests. #[derive(Debug)] diff --git a/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs similarity index 97% rename from crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs rename to crates/seatbelt/src/circuit/engine/engine_telemetry.rs index 8ed29140..9d85872f 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs @@ -4,8 +4,8 @@ use opentelemetry::StringValue; use opentelemetry::metrics::Counter; use tick::Clock; -use crate::circuit_breaker::telemetry::*; -use crate::circuit_breaker::{CircuitEngine, CircuitState, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; +use crate::circuit::telemetry::*; +use crate::circuit::{CircuitEngine, CircuitState, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; /// Wrapper around a circuit engine to add telemetry capabilities. @@ -168,7 +168,7 @@ mod tests { use opentelemetry::KeyValue; use super::*; - use crate::circuit_breaker::{EngineFake, HealthInfo, Stats}; + use crate::circuit::{EngineFake, HealthInfo, Stats}; use crate::telemetry::metrics::{create_meter, create_resilience_event_counter}; use crate::testing::MetricTester; diff --git a/crates/seatbelt/src/circuit_breaker/engine/engines.rs b/crates/seatbelt/src/circuit/engine/engines.rs similarity index 91% rename from crates/seatbelt/src/circuit_breaker/engine/engines.rs rename to crates/seatbelt/src/circuit/engine/engines.rs index 7019711e..4f3884b6 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/engines.rs +++ b/crates/seatbelt/src/circuit/engine/engines.rs @@ -7,8 +7,8 @@ use opentelemetry::StringValue; use opentelemetry::metrics::Counter; use tick::Clock; -use crate::circuit_breaker::constants::ERR_POISONED_LOCK; -use crate::circuit_breaker::{Engine, EngineCore, EngineOptions, EngineTelemetry, PartitionKey}; +use crate::circuit::constants::ERR_POISONED_LOCK; +use crate::circuit::{Engine, EngineCore, EngineOptions, EngineTelemetry, PartitionKey}; /// Manages circuit breaker engines for different partition keys. #[derive(Debug)] @@ -75,8 +75,8 @@ mod tests { use std::time::Duration; use super::*; - use crate::circuit_breaker::HealthMetricsBuilder; - use crate::circuit_breaker::engine::probing::ProbesOptions; + use crate::circuit::HealthMetricsBuilder; + use crate::circuit::engine::probing::ProbesOptions; use crate::telemetry::metrics::create_resilience_event_counter; #[test] diff --git a/crates/seatbelt/src/circuit_breaker/engine/mod.rs b/crates/seatbelt/src/circuit/engine/mod.rs similarity index 97% rename from crates/seatbelt/src/circuit_breaker/engine/mod.rs rename to crates/seatbelt/src/circuit/engine/mod.rs index 80df2172..d2679d7b 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/mod.rs +++ b/crates/seatbelt/src/circuit/engine/mod.rs @@ -3,7 +3,7 @@ use std::fmt::Debug; use std::time::Duration; -use crate::circuit_breaker::{ExecutionResult, HealthInfo, HealthMetricsBuilder}; +use crate::circuit::{ExecutionResult, HealthInfo, HealthMetricsBuilder}; pub(super) mod probing; diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs b/crates/seatbelt/src/circuit/engine/probing/health_probe.rs similarity index 97% rename from crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs rename to crates/seatbelt/src/circuit/engine/probing/health_probe.rs index 3142c1d2..20faf95c 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs +++ b/crates/seatbelt/src/circuit/engine/probing/health_probe.rs @@ -2,8 +2,8 @@ use std::time::Instant; -use crate::circuit_breaker::engine::probing::{AllowProbeResult, HealthProbeOptions, ProbeOperation, ProbingResult}; -use crate::circuit_breaker::{ExecutionResult, HealthMetrics, HealthStatus}; +use crate::circuit::engine::probing::{AllowProbeResult, HealthProbeOptions, ProbeOperation, ProbingResult}; +use crate::circuit::{ExecutionResult, HealthMetrics, HealthStatus}; use crate::rnd::Rnd; #[derive(Debug)] diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs b/crates/seatbelt/src/circuit/engine/probing/mod.rs similarity index 98% rename from crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs rename to crates/seatbelt/src/circuit/engine/probing/mod.rs index 7e67f4cf..4d64932c 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs +++ b/crates/seatbelt/src/circuit/engine/probing/mod.rs @@ -11,7 +11,7 @@ use std::fmt::Debug; use std::time::Instant; -use crate::circuit_breaker::{EnterCircuitResult, ExecutionMode, ExecutionResult}; +use crate::circuit::{EnterCircuitResult, ExecutionMode, ExecutionResult}; mod health_probe; mod options; diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs b/crates/seatbelt/src/circuit/engine/probing/options.rs similarity index 99% rename from crates/seatbelt/src/circuit_breaker/engine/probing/options.rs rename to crates/seatbelt/src/circuit/engine/probing/options.rs index f0059d62..c52d72c0 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs +++ b/crates/seatbelt/src/circuit/engine/probing/options.rs @@ -3,7 +3,7 @@ use std::time::Duration; use std::vec::IntoIter; -use crate::circuit_breaker::HealthMetricsBuilder; +use crate::circuit::HealthMetricsBuilder; /// The minimum throughput during probing stage is set to 1, so at least one request must come /// through in each probing stage to evaluate the health. diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs b/crates/seatbelt/src/circuit/engine/probing/probes.rs similarity index 98% rename from crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs rename to crates/seatbelt/src/circuit/engine/probing/probes.rs index 569b626c..46e9c8b6 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs +++ b/crates/seatbelt/src/circuit/engine/probing/probes.rs @@ -4,7 +4,7 @@ use std::time::Instant; use std::vec; use super::{AllowProbeResult, Probe, ProbeOperation, ProbeOptions, ProbesOptions, ProbingResult}; -use crate::circuit_breaker::ExecutionResult; +use crate::circuit::ExecutionResult; /// Manages a sequence of probes. #[derive(Debug)] diff --git a/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs b/crates/seatbelt/src/circuit/engine/probing/single_probe.rs similarity index 98% rename from crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs rename to crates/seatbelt/src/circuit/engine/probing/single_probe.rs index 63cfee8e..4d8d2cba 100644 --- a/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs +++ b/crates/seatbelt/src/circuit/engine/probing/single_probe.rs @@ -3,7 +3,7 @@ use std::time::{Duration, Instant}; use super::{AllowProbeResult, ProbeOperation, ProbingResult}; -use crate::circuit_breaker::ExecutionResult; +use crate::circuit::ExecutionResult; /// Allows a single probe to get in and based on the result either closes the circuit /// or goes back to open state. diff --git a/crates/seatbelt/src/circuit_breaker/execution_result.rs b/crates/seatbelt/src/circuit/execution_result.rs similarity index 100% rename from crates/seatbelt/src/circuit_breaker/execution_result.rs rename to crates/seatbelt/src/circuit/execution_result.rs diff --git a/crates/seatbelt/src/circuit_breaker/half_open_mode.rs b/crates/seatbelt/src/circuit/half_open_mode.rs similarity index 97% rename from crates/seatbelt/src/circuit_breaker/half_open_mode.rs rename to crates/seatbelt/src/circuit/half_open_mode.rs index 71710044..6566c05d 100644 --- a/crates/seatbelt/src/circuit_breaker/half_open_mode.rs +++ b/crates/seatbelt/src/circuit/half_open_mode.rs @@ -2,8 +2,8 @@ use std::time::Duration; -use crate::circuit_breaker::constants::MIN_SAMPLING_DURATION; -use crate::circuit_breaker::engine::probing::ProbesOptions; +use crate::circuit::constants::MIN_SAMPLING_DURATION; +use crate::circuit::engine::probing::ProbesOptions; /// Defines the behavior of the circuit breaker when transitioning from half-open to closed state. /// @@ -78,7 +78,7 @@ enum Mode { #[cfg(test)] mod tests { use super::*; - use crate::circuit_breaker::engine::probing::ProbeOptions; + use crate::circuit::engine::probing::ProbeOptions; #[test] fn quick_mode_creates_single_probe() { diff --git a/crates/seatbelt/src/circuit_breaker/health.rs b/crates/seatbelt/src/circuit/health.rs similarity index 99% rename from crates/seatbelt/src/circuit_breaker/health.rs rename to crates/seatbelt/src/circuit/health.rs index 469a0e37..b7fd617b 100644 --- a/crates/seatbelt/src/circuit_breaker/health.rs +++ b/crates/seatbelt/src/circuit/health.rs @@ -4,7 +4,7 @@ use std::collections::VecDeque; use std::time::{Duration, Instant}; use super::ExecutionResult; -use crate::circuit_breaker::constants::MIN_SAMPLING_DURATION; +use crate::circuit::constants::MIN_SAMPLING_DURATION; const WINDOW_COUNT: u32 = 10; diff --git a/crates/seatbelt/src/circuit_breaker/layer.rs b/crates/seatbelt/src/circuit/layer.rs similarity index 86% rename from crates/seatbelt/src/circuit_breaker/layer.rs rename to crates/seatbelt/src/circuit/layer.rs index 8eebdfb3..25b7ff3d 100644 --- a/crates/seatbelt/src/circuit_breaker/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -7,25 +7,25 @@ use opentelemetry::StringValue; use super::constants::{DEFAULT_BREAK_DURATION, DEFAULT_FAILURE_THRESHOLD, DEFAULT_MIN_THROUGHPUT, DEFAULT_SAMPLING_DURATION}; use super::{ - CircuitBreaker, Engines, HalfOpenMode, HealthMetricsBuilder, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, + Circuit, Engines, HalfOpenMode, HealthMetricsBuilder, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RejectedInput, RejectedInputArgs, ShouldRecover, }; -use crate::circuit_breaker::engine::probing::ProbesOptions; +use crate::circuit::engine::probing::ProbesOptions; use crate::service::Layer; use crate::{EnableIf, NotSet, Recovery, RecoveryInfo, SeatbeltOptions, Set}; /// Builder for configuring circuit breaker resilience middleware. /// -/// This type is created by calling [`CircuitBreaker::layer`] and uses the +/// This type is created by calling [`Circuit::layer`] and uses the /// typestate pattern to enforce that required properties are configured before the circuit breaker /// middleware can be built: /// -/// - [`recovery`][CircuitBreakerLayer::recovery]: Required to determine if an output represents a failure -/// - [`rejected_input`][CircuitBreakerLayer::rejected_input]: Required to specify the output when the circuit is open and inputs are rejected +/// - [`recovery`][CircuitLayer::recovery]: Required to determine if an output represents a failure +/// - [`rejected_input`][CircuitLayer::rejected_input]: Required to specify the output when the circuit is open and inputs are rejected /// -/// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit_breaker] documentation. +/// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit] documentation. #[derive(Debug)] -pub struct CircuitBreakerLayer { +pub struct CircuitLayer { options: SeatbeltOptions, recovery: Option>, rejected_input: Option>, @@ -43,7 +43,7 @@ pub struct CircuitBreakerLayer Out>, } -impl CircuitBreakerLayer { +impl CircuitLayer { #[must_use] pub(crate) fn new(name: StringValue, options: &SeatbeltOptions) -> Self { Self { @@ -66,7 +66,7 @@ impl CircuitBreakerLayer { } } -impl CircuitBreakerLayer, RecoveryState, RejectedInputState> { +impl CircuitLayer, RecoveryState, RejectedInputState> { /// Sets the error to return when the circuit breaker is open for Result-returning services. /// /// When the circuit is open, requests are immediately rejected and this function @@ -84,32 +84,32 @@ impl CircuitBreakerLayer E + Send + Sync + 'static, - ) -> CircuitBreakerLayer, RecoveryState, Set> { + ) -> CircuitLayer, RecoveryState, Set> { self.into_state::() .rejected_input(move |input, args| Err(error_producer(input, args))) .into_state() } } -impl CircuitBreakerLayer { +impl CircuitLayer { /// Sets the recovery classification function. /// /// This function determines whether a specific output represents a failure /// by examining the output and returning a [`RecoveryInfo`] classification. /// - /// The function receives the output and [`RecoveryArgs`][crate::circuit_breaker::RecoveryArgs] + /// The function receives the output and [`RecoveryArgs`][crate::circuit::RecoveryArgs] /// with context about the circuit breaker state. /// /// # Arguments /// /// * `recover_fn` - Function that takes a reference to the output and - /// [`RecoveryArgs`][crate::circuit_breaker::RecoveryArgs] containing circuit breaker context, + /// [`RecoveryArgs`][crate::circuit::RecoveryArgs] containing circuit breaker context, /// and returns a [`RecoveryInfo`] decision #[must_use] pub fn recovery_with( mut self, - recover_fn: impl Fn(&Out, crate::circuit_breaker::RecoveryArgs) -> RecoveryInfo + Send + Sync + 'static, - ) -> CircuitBreakerLayer { + recover_fn: impl Fn(&Out, crate::circuit::RecoveryArgs) -> RecoveryInfo + Send + Sync + 'static, + ) -> CircuitLayer { self.recovery = Some(ShouldRecover::new(recover_fn)); self.into_state::() } @@ -121,14 +121,14 @@ impl CircuitBreakerLayer CircuitBreakerLayer + pub fn recovery(self) -> CircuitLayer where Out: Recovery, { @@ -150,7 +150,7 @@ impl CircuitBreakerLayer Out + Send + Sync + 'static, - ) -> CircuitBreakerLayer { + ) -> CircuitLayer { self.rejected_input = Some(RejectedInput::new(rejected_fn)); self.into_state::() } @@ -300,7 +300,7 @@ impl CircuitBreakerLayer CircuitBreakerLayer) { + /// # fn example(circuit_breaker_layer: CircuitLayer) { /// // Configure circuit breaker with a partition key based on a scheme, host and port. /// let layer = circuit_breaker_layer.partition_key(|request: &HttpRequest| { /// let partition = format!("{}://{}:{}", request.scheme, request.host, request.port); @@ -388,11 +388,11 @@ impl CircuitBreakerLayer Layer for CircuitBreakerLayer { - type Service = CircuitBreaker; +impl Layer for CircuitLayer { + type Service = Circuit; fn layer(&self, inner: S) -> Self::Service { - CircuitBreaker { + Circuit { inner, clock: self.options.get_clock().clone(), recovery: self.recovery.clone().expect("recovery must be set in Ready state"), @@ -407,7 +407,7 @@ impl Layer for CircuitBreakerLayer { } } -impl CircuitBreakerLayer { +impl CircuitLayer { fn probes_options(&self) -> ProbesOptions { self.half_open_mode // we will use break duration as the sampling duration for probes @@ -428,8 +428,8 @@ impl CircuitBreakerLayer(self) -> CircuitBreakerLayer { - CircuitBreakerLayer { + fn into_state(self) -> CircuitLayer { + CircuitLayer { options: self.options, recovery: self.recovery, rejected_input: self.rejected_input, @@ -458,15 +458,15 @@ mod tests { use tick::Clock; use super::*; - use crate::circuit_breaker::RecoveryArgs; - use crate::circuit_breaker::engine::probing::ProbeOptions; + use crate::circuit::RecoveryArgs; + use crate::circuit::engine::probing::ProbeOptions; use crate::testing::RecoverableType; #[test] #[expect(clippy::float_cmp, reason = "Test")] fn new_creates_correct_initial_state() { let options = create_test_options(); - let layer: CircuitBreakerLayer<_, _, NotSet, NotSet> = CircuitBreakerLayer::new(StringValue::from("test_breaker"), &options); + let layer: CircuitLayer<_, _, NotSet, NotSet> = CircuitLayer::new(StringValue::from("test_breaker"), &options); assert!(layer.recovery.is_none()); assert!(layer.rejected_input.is_none()); @@ -480,9 +480,9 @@ mod tests { #[test] fn recovery_sets_correctly() { let options = create_test_options(); - let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); + let layer = CircuitLayer::new(StringValue::from("test"), &options); - let layer: CircuitBreakerLayer<_, _, Set, NotSet> = layer.recovery_with(|output, _args| { + let layer: CircuitLayer<_, _, Set, NotSet> = layer.recovery_with(|output, _args| { if output.contains("error") { RecoveryInfo::retry() } else { @@ -512,9 +512,9 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { let options = SeatbeltOptions::::new(Clock::new_frozen()); - let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); + let layer = CircuitLayer::new(StringValue::from("test"), &options); - let layer: CircuitBreakerLayer<_, _, Set, NotSet> = layer.recovery(); + let layer: CircuitLayer<_, _, Set, NotSet> = layer.recovery(); let result = layer.recovery.as_ref().unwrap().call( &RecoverableType::from(RecoveryInfo::retry()), @@ -538,9 +538,9 @@ mod tests { #[test] fn rejected_input_sets_correctly() { let options = create_test_options(); - let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); + let layer = CircuitLayer::new(StringValue::from("test"), &options); - let layer: CircuitBreakerLayer<_, _, NotSet, Set> = layer.rejected_input(|_, _| "rejected".to_string()); + let layer: CircuitLayer<_, _, NotSet, Set> = layer.rejected_input(|_, _| "rejected".to_string()); let result = layer.rejected_input.as_ref().unwrap().call( "test".to_string(), @@ -641,7 +641,7 @@ mod tests { #[expect(clippy::float_cmp, reason = "Test")] fn default_values_are_correct() { let options = create_test_options(); - let layer = CircuitBreakerLayer::new(StringValue::from("test"), &options); + let layer = CircuitLayer::new(StringValue::from("test"), &options); assert_eq!(layer.failure_threshold, DEFAULT_FAILURE_THRESHOLD); assert_eq!(layer.min_throughput, DEFAULT_MIN_THROUGHPUT); @@ -676,18 +676,18 @@ mod tests { #[test] fn static_assertions() { - static_assertions::assert_impl_all!(CircuitBreakerLayer: Layer); - static_assertions::assert_not_impl_all!(CircuitBreakerLayer: Layer); - static_assertions::assert_not_impl_all!(CircuitBreakerLayer: Layer); - static_assertions::assert_impl_all!(CircuitBreakerLayer: Debug); + static_assertions::assert_impl_all!(CircuitLayer: Layer); + static_assertions::assert_not_impl_all!(CircuitLayer: Layer); + static_assertions::assert_not_impl_all!(CircuitLayer: Layer); + static_assertions::assert_impl_all!(CircuitLayer: Debug); } fn create_test_options() -> SeatbeltOptions { SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") } - fn create_ready_layer() -> CircuitBreakerLayer { - CircuitBreakerLayer::new(StringValue::from("test"), &create_test_options()) + fn create_ready_layer() -> CircuitLayer { + CircuitLayer::new(StringValue::from("test"), &create_test_options()) .recovery_with(|output, _args| { if output.contains("error") { RecoveryInfo::retry() diff --git a/crates/seatbelt/src/circuit_breaker/mod.rs b/crates/seatbelt/src/circuit/mod.rs similarity index 85% rename from crates/seatbelt/src/circuit_breaker/mod.rs rename to crates/seatbelt/src/circuit/mod.rs index 897c0c8a..1f9ad5af 100644 --- a/crates/seatbelt/src/circuit_breaker/mod.rs +++ b/crates/seatbelt/src/circuit/mod.rs @@ -5,8 +5,8 @@ //! This module provides automatic circuit breaking capabilities with configurable failure //! thresholds, break durations, and comprehensive telemetry. The primary types are: //! -//! - [`CircuitBreaker`] is the middleware that wraps an inner service and monitors failure rates -//! - [`CircuitBreakerLayer`] is used to configure and construct the circuit breaker middleware +//! - [`Circuit`] is the middleware that wraps an inner service and monitors failure rates +//! - [`CircuitLayer`] is used to configure and construct the circuit breaker middleware //! //! A circuit breaker monitors the success and failure rates of operations and can temporarily //! block requests when the failure rate exceeds a configured threshold. This prevents cascading failures @@ -17,13 +17,13 @@ //! ```rust //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; -//! # use seatbelt::circuit_breaker::CircuitBreaker; +//! # use seatbelt::circuit::Circuit; //! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! let options = SeatbeltOptions::new(&clock); //! //! let stack = ( -//! CircuitBreaker::layer("circuit_breaker", &options) +//! Circuit::layer("circuit_breaker", &options) //! // Required: determine if output indicates a failure by using recovery metadata //! .recovery_with(|result: &Result, _| match result { //! Ok(_) => RecoveryInfo::never(), @@ -44,14 +44,14 @@ //! //! # Configuration //! -//! The [`CircuitBreakerLayer`] uses a type state pattern to enforce that all required properties +//! The [`CircuitLayer`] uses a type state pattern to enforce that all required properties //! are configured before the layer can be built. This compile-time safety ensures that you cannot //! accidentally create a circuit breaker layer without properly specifying recovery logic and //! rejected input handling. The properties that must be configured are: //! -//! - [`recovery`][CircuitBreakerLayer::recovery]: Detects the recovery classification for output. +//! - [`recovery`][CircuitLayer::recovery]: Detects the recovery classification for output. //! This is used to determine if an operation succeeded or failed. -//! - [`rejected_input`][CircuitBreakerLayer::rejected_input]: Provide the output to return when the +//! - [`rejected_input`][CircuitLayer::rejected_input]: Provide the output to return when the //! circuit is open and execution is being rejected. //! //! Each circuit breaker layer requires an identifier for telemetry purposes. This identifier @@ -59,7 +59,7 @@ //! //! # Thread Safety //! -//! The [`CircuitBreaker`] type is thread-safe and implements both `Send` and `Sync` as enforced +//! The [`Circuit`] type is thread-safe and implements both `Send` and `Sync` as enforced //! by the `Service` trait it implements. This allows circuit breaker middleware to be safely //! shared across multiple threads and used in concurrent environments. //! @@ -93,17 +93,17 @@ //! - Failures are tracked and evaluated against the failure threshold //! - When the failure threshold is exceeded, transitions to **Open** //! - You can observe transitions into the closed state by providing -//! the [`on_closed`][CircuitBreakerLayer::on_closed] callback. +//! the [`on_closed`][CircuitLayer::on_closed] callback. //! //! ## Open State //! //! When the circuit is open: //! -//! - Requests are immediately rejected with the output provided by [`rejected_input`][CircuitBreakerLayer::rejected_input] +//! - Requests are immediately rejected with the output provided by [`rejected_input`][CircuitLayer::rejected_input] //! - No calls are made to the underlying service //! - After the break duration elapsed, transitions to **Half-Open** //! - You can observe transitions into the open state by providing -//! the [`on_opened`][CircuitBreakerLayer::on_opened] callback. +//! the [`on_opened`][CircuitLayer::on_opened] callback. //! //! ## Half-Open State //! @@ -115,9 +115,9 @@ //! - If failures continue, the circuit stays in the Half-Open state until the underlying service recovers. //! Half-open state respects the break duration before allowing more probing requests. //! - You can observe when the circuit is probing in half-open state by providing -//! the [`on_probing`][CircuitBreakerLayer::on_probing] callback. +//! the [`on_probing`][CircuitLayer::on_probing] callback. //! - You can configure the probing behavior and the sensitivity of how quickly the circuit -//! closes again after successful probes by using [`half_open_mode`][CircuitBreakerLayer::half_open_mode] +//! closes again after successful probes by using [`half_open_mode`][CircuitLayer::half_open_mode] //! //! # Recovery Classification //! @@ -130,7 +130,7 @@ //! # Partitioning //! //! Circuit breakers can maintain separate circuit states for different logical groups of requests -//! by providing a [`partition_key`][CircuitBreakerLayer::partition_key] function. This allows +//! by providing a [`partition_key`][CircuitLayer::partition_key] function. This allows //! the creation of multiple independent circuits based on the input properties. //! //! For example, a typical scenario where partitioning is useful is HTTP request where the partition key @@ -147,13 +147,13 @@ //! //! | Parameter | Default Value | Description | Configured By | //! |-----------|---------------|-------------|---------------| -//! | Failure threshold | `0.1` (10%) | Circuit opens when failure rate exceeds this percentage | [`failure_threshold`][CircuitBreakerLayer::failure_threshold] | -//! | Minimum throughput | `100` requests | Minimum request volume required before circuit can open | [`min_throughput`][CircuitBreakerLayer::min_throughput] | -//! | Sampling duration | `30` seconds | Time window for calculating failure rates | [`sampling_duration`][CircuitBreakerLayer::sampling_duration] | -//! | Break duration | `5` seconds | Duration circuit remains open before testing recovery | [`break_duration`][CircuitBreakerLayer::break_duration] | -//! | Partitioning | Single global circuit | All requests share the same circuit breaker state | [`partition_key`][CircuitBreakerLayer::partition_key] | -//! | Half-open mode | `Reliable` | Gradual recovery with increasing probe percentages | [`half_open_mode`][CircuitBreakerLayer::half_open_mode] | -//! | Enable condition | Always enabled | Circuit breaker protection is applied to all requests | [`enable_if`][CircuitBreakerLayer::enable_if], [`enable_always`][CircuitBreakerLayer::enable_always], [`disable`][CircuitBreakerLayer::disable] | +//! | Failure threshold | `0.1` (10%) | Circuit opens when failure rate exceeds this percentage | [`failure_threshold`][CircuitLayer::failure_threshold] | +//! | Minimum throughput | `100` requests | Minimum request volume required before circuit can open | [`min_throughput`][CircuitLayer::min_throughput] | +//! | Sampling duration | `30` seconds | Time window for calculating failure rates | [`sampling_duration`][CircuitLayer::sampling_duration] | +//! | Break duration | `5` seconds | Duration circuit remains open before testing recovery | [`break_duration`][CircuitLayer::break_duration] | +//! | Partitioning | Single global circuit | All requests share the same circuit breaker state | [`partition_key`][CircuitLayer::partition_key] | +//! | Half-open mode | `Reliable` | Gradual recovery with increasing probe percentages | [`half_open_mode`][CircuitLayer::half_open_mode] | +//! | Enable condition | Always enabled | Circuit breaker protection is applied to all requests | [`enable_if`][CircuitLayer::enable_if], [`enable_always`][CircuitLayer::enable_always], [`disable`][CircuitLayer::disable] | //! //! These defaults provide a reasonable starting point for most use cases, offering a balance //! between resilience and responsiveness to service recovery. @@ -166,7 +166,7 @@ //! - **When**: Emitted when circuit state transitions occur and when requests are rejected //! - **Attributes**: //! - `resilience.pipeline.name`: Pipeline identifier from [`SeatbeltOptions::pipeline_name`] -//! - `resilience.strategy.name`: Circuit breaker identifier from [`CircuitBreaker::layer`] +//! - `resilience.strategy.name`: Circuit breaker identifier from [`Circuit::layer`] //! - `resilience.event.name`: One of: //! - `circuit_opened`: When the circuit transitions to open state due to failure threshold being exceeded //! - `circuit_closed`: When the circuit transitions to closed state after successful probing @@ -186,7 +186,7 @@ //! ```rust //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; -//! # use seatbelt::circuit_breaker::CircuitBreaker; +//! # use seatbelt::circuit::Circuit; //! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and @@ -194,7 +194,7 @@ //! let options = SeatbeltOptions::new(&clock).pipeline_name("example"); //! //! let stack = ( -//! CircuitBreaker::layer("my_breaker", &options) +//! Circuit::layer("my_breaker", &options) //! // Required: determine if output indicates failure //! .recovery_with(|result: &Result, _args| match result { //! // These are demonstrative, real code will have more meaningful recovery detection @@ -227,14 +227,14 @@ //! # use std::time::Duration; //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; -//! # use seatbelt::circuit_breaker::{CircuitBreaker, PartitionKey, HalfOpenMode}; +//! # use seatbelt::circuit::{Circuit, PartitionKey, HalfOpenMode}; //! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. //! let options = SeatbeltOptions::new(&clock).pipeline_name("advanced_example"); //! //! let stack = ( -//! CircuitBreaker::layer("advanced_breaker", &options) +//! Circuit::layer("advanced_breaker", &options) //! // Required: determine if output indicates failure //! .recovery_with(|result: &Result, _args| match result { //! Err(msg) if msg.contains("rate_limit") => RecoveryInfo::unavailable(), @@ -284,16 +284,16 @@ //! //! ## Incomplete Configuration //! -//! This example demonstrates what happens when the `CircuitBreakerLayer` is not fully configured. +//! This example demonstrates what happens when the `CircuitLayer` is not fully configured. //! The code below will not compile because the circuit breaker layer is missing required configuration. //! //! ```compile_fail -//! # use seatbelt::circuit_breaker::CircuitBreaker; +//! # use seatbelt::circuit::Circuit; //! # use seatbelt::SeatbeltOptions; //! # use layered::Execute; //! # fn example(service_options: SeatbeltOptions>) { //! let stack = ( -//! CircuitBreaker::layer("test", &service_options), // Missing required configuration! +//! Circuit::layer("test", &service_options), // Missing required configuration! //! Execute::new(|input| async move { Ok(input) }) //! ); //! @@ -312,9 +312,9 @@ mod service; pub use args::{OnClosedArgs, OnOpenedArgs, OnProbingArgs, RecoveryArgs, RejectedInputArgs}; pub(super) use callbacks::*; #[doc(inline)] -pub use layer::CircuitBreakerLayer; +pub use layer::CircuitLayer; #[doc(inline)] -pub use service::CircuitBreaker; +pub use service::Circuit; mod execution_result; pub(super) use execution_result::ExecutionResult; diff --git a/crates/seatbelt/src/circuit_breaker/partition_key.rs b/crates/seatbelt/src/circuit/partition_key.rs similarity index 98% rename from crates/seatbelt/src/circuit_breaker/partition_key.rs rename to crates/seatbelt/src/circuit/partition_key.rs index d868d252..b912e91f 100644 --- a/crates/seatbelt/src/circuit_breaker/partition_key.rs +++ b/crates/seatbelt/src/circuit/partition_key.rs @@ -15,7 +15,7 @@ use std::hash::Hasher; /// ## Creation from a number /// /// ```rust -/// use seatbelt::circuit_breaker::PartitionKey; +/// use seatbelt::circuit::PartitionKey; /// /// let key = PartitionKey::from(42_u64); /// assert_eq!(key.to_string(), "42"); @@ -24,7 +24,7 @@ use std::hash::Hasher; /// ## Creation from HTTP request authority and scheme /// /// ```rust -/// use seatbelt::circuit_breaker::PartitionKey; +/// use seatbelt::circuit::PartitionKey; /// /// // Simulate extracting authority and scheme from an HTTP request /// let scheme = "https"; diff --git a/crates/seatbelt/src/circuit_breaker/service.rs b/crates/seatbelt/src/circuit/service.rs similarity index 93% rename from crates/seatbelt/src/circuit_breaker/service.rs rename to crates/seatbelt/src/circuit/service.rs index 77c2f62e..b2096c13 100644 --- a/crates/seatbelt/src/circuit_breaker/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -6,7 +6,7 @@ use layered::Service; use tick::Clock; use super::{ - CircuitBreakerLayer, CircuitEngine, Engines, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult, OnClosed, + CircuitLayer, CircuitEngine, Engines, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RecoveryArgs, RejectedInput, RejectedInputArgs, ShouldRecover, }; @@ -14,19 +14,19 @@ use crate::{EnableIf, NotSet}; /// Applies circuit breaker logic to prevent cascading failures. /// -/// `CircuitBreaker` wraps an inner [`Service`] and monitors the success and failure rates +/// `Circuit` wraps an inner [`Service`] and monitors the success and failure rates /// of operations. When the failure rate exceeds a configured threshold, the circuit breaker opens /// and temporarily blocks requests to give the downstream service time to recover. /// /// This middleware is designed to be used across services, applications, and libraries /// to prevent cascading failures in distributed systems. /// -/// `CircuitBreaker` is configured by calling [`CircuitBreaker::layer`] and using the -/// builder methods on the returned [`CircuitBreakerLayer`] instance. +/// `Circuit` is configured by calling [`Circuit::layer`] and using the +/// builder methods on the returned [`CircuitLayer`] instance. /// -/// For comprehensive examples and usage patterns, see the [`circuit_breaker` module][crate::circuit_breaker] documentation. +/// For comprehensive examples and usage patterns, see the [`circuit_breaker` module][crate::circuit] documentation. #[derive(Debug)] -pub struct CircuitBreaker { +pub struct Circuit { pub(super) inner: S, pub(super) clock: Clock, pub(super) recovery: ShouldRecover, @@ -39,20 +39,20 @@ pub struct CircuitBreaker { pub(super) on_probing: Option>, } -impl CircuitBreaker { +impl Circuit { /// Creates a new circuit breaker layer with the specified name and options. /// - /// Returns a [`CircuitBreakerLayer`] that must be configured with required parameters + /// Returns a [`CircuitLayer`] that must be configured with required parameters /// before it can be used to build a circuit breaker service. pub fn layer( name: impl Into>, options: &crate::SeatbeltOptions, - ) -> CircuitBreakerLayer { - CircuitBreakerLayer::new(name.into().into(), options) + ) -> CircuitLayer { + CircuitLayer::new(name.into().into(), options) } } -impl Service for CircuitBreaker +impl Service for Circuit where In: Send, S: Service, @@ -90,7 +90,7 @@ where } } -impl CircuitBreaker { +impl Circuit { #[inline] fn before_execute( &self, @@ -169,15 +169,15 @@ mod tests { use tick::ClockControl; use super::*; - use crate::circuit_breaker::constants::DEFAULT_BREAK_DURATION; - use crate::circuit_breaker::{EngineFake, HalfOpenMode, HealthInfo, Stats}; + use crate::circuit::constants::DEFAULT_BREAK_DURATION; + use crate::circuit::{EngineFake, HalfOpenMode, HealthInfo, Stats}; use crate::service::Layer; use crate::{RecoveryInfo, SeatbeltOptions, Set}; #[test] fn layer_ensure_defaults() { let options = SeatbeltOptions::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); - let layer: CircuitBreakerLayer = CircuitBreaker::layer("test_breaker", &options); + let layer: CircuitLayer = Circuit::layer("test_breaker", &options); let layer = layer .recovery_with(|_, _| RecoveryInfo::never()) .rejected_input(|_, _| "rejected".to_string()); @@ -452,9 +452,9 @@ mod tests { assert_eq!(result, "B"); } - fn create_ready_circuit_breaker_layer(clock: &Clock) -> CircuitBreakerLayer { + fn create_ready_circuit_breaker_layer(clock: &Clock) -> CircuitLayer { let options = SeatbeltOptions::::new(clock.clone()).pipeline_name("test_pipeline"); - CircuitBreaker::layer("test_breaker", &options) + Circuit::layer("test_breaker", &options) .recovery_with(|output, _| { if output.contains("error") { RecoveryInfo::retry() diff --git a/crates/seatbelt/src/circuit_breaker/telemetry.rs b/crates/seatbelt/src/circuit/telemetry.rs similarity index 100% rename from crates/seatbelt/src/circuit_breaker/telemetry.rs rename to crates/seatbelt/src/circuit/telemetry.rs diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 93cb21e0..7c20ff39 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -90,7 +90,7 @@ //! //! - [`timeout`]: Cancels long-running operations. //! - [`retry`]: Automatically retries failed operations with configurable backoff strategies. -//! - [`circuit_breaker`]: Prevents cascading failures by stopping requests to unhealthy services. +//! - [`circuit`]: Prevents cascading failures by stopping requests to unhealthy services. //! //! ## Features //! @@ -105,7 +105,7 @@ //! - `timeout`: Enables the [`timeout`] middleware for canceling long-running operations. //! - `retry`: Enables the [`retry`] middleware for automatically retrying failed operations with //! configurable backoff strategies, jitter, and recovery classification. -//! - `circuit-breaker`: Enables the [`circuit_breaker`] middleware for preventing cascading failures. +//! - `circuit`: Enables the [`circuit`] middleware for preventing cascading failures. #[doc(inline)] pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; @@ -132,8 +132,8 @@ pub mod timeout; #[cfg(any(feature = "retry", test))] pub mod retry; -#[cfg(any(feature = "circuit-breaker", test))] -pub mod circuit_breaker; +#[cfg(any(feature = "circuit", test))] +pub mod circuit; /// Re-exported types from the [`layered`] crate. #[cfg(any(feature = "service", test))] @@ -142,7 +142,7 @@ pub mod service { pub use layered::{Layer, Service, Stack}; } -#[cfg(any(feature = "retry", feature = "circuit-breaker", test))] +#[cfg(any(feature = "retry", feature = "circuit", test))] mod rnd; #[cfg_attr(coverage_nightly, coverage(off))] From 9267c87719243e5e9d31544c695f29654a7aaf6c Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 15:11:30 +0100 Subject: [PATCH 13/60] cleanup --- crates/seatbelt/src/circuit/service.rs | 6 +++--- crates/seatbelt/src/lib.rs | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index b2096c13..1d5c4be6 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -6,9 +6,9 @@ use layered::Service; use tick::Clock; use super::{ - CircuitLayer, CircuitEngine, Engines, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult, OnClosed, - OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RecoveryArgs, RejectedInput, - RejectedInputArgs, ShouldRecover, + CircuitEngine, CircuitLayer, Engines, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult, OnClosed, OnClosedArgs, + OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RecoveryArgs, RejectedInput, RejectedInputArgs, + ShouldRecover, }; use crate::{EnableIf, NotSet}; diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 7c20ff39..de98acb6 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -5,9 +5,12 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/seatbelt/logo.png")] #![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/seatbelt/favicon.ico")] -#![expect( - rustdoc::broken_intra_doc_links, - reason = "Too ugly to make 'live links' possible with the combination of features" +#![cfg_attr( + not(all(feature = "retry", feature = "timeout", feature = "circuit")), + expect( + rustdoc::broken_intra_doc_links, + reason = "too ugly to make 'live links' possible with the combination of features" + ) )] //! Resilience and fault handling for applications and libraries. From 9a74869691388b2e70fbd811ed680211a8bc9e07 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 16:07:13 +0100 Subject: [PATCH 14/60] rename SeatbeltOptions to Context --- crates/seatbelt/README.md | 16 ++--- crates/seatbelt/benches/circuit.rs | 8 +-- crates/seatbelt/benches/observability.rs | 18 ++--- crates/seatbelt/benches/retry.rs | 11 +-- crates/seatbelt/benches/timeout.rs | 7 +- crates/seatbelt/examples/circuit.rs | 6 +- .../seatbelt/examples/resilience_pipeline.rs | 10 ++- crates/seatbelt/examples/retry.rs | 6 +- crates/seatbelt/examples/retry_advanced.rs | 8 +-- crates/seatbelt/examples/retry_outage.rs | 6 +- crates/seatbelt/examples/timeout.rs | 6 +- crates/seatbelt/examples/timeout_advanced.rs | 8 +-- crates/seatbelt/src/circuit/layer.rs | 54 +++++++------- crates/seatbelt/src/circuit/mod.rs | 24 +++---- crates/seatbelt/src/circuit/service.rs | 14 ++-- crates/seatbelt/src/lib.rs | 14 ++-- .../{seatbelt_options.rs => context.rs} | 52 +++++++------- crates/seatbelt/src/options/mod.rs | 4 +- crates/seatbelt/src/retry/layer.rs | 72 +++++++++---------- crates/seatbelt/src/retry/mod.rs | 24 +++---- crates/seatbelt/src/retry/service.rs | 25 +++---- crates/seatbelt/src/timeout/layer.rs | 42 +++++------ crates/seatbelt/src/timeout/mod.rs | 26 +++---- crates/seatbelt/src/timeout/service.rs | 24 +++---- 24 files changed, 239 insertions(+), 246 deletions(-) rename crates/seatbelt/src/options/{seatbelt_options.rs => context.rs} (83%) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index e6be42a8..0e28e3a7 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -38,12 +38,12 @@ and **Timeout** prevents operations from hanging indefinitely: ```rust use seatbelt::retry::Retry; use seatbelt::timeout::Timeout; -use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use seatbelt::{RecoveryInfo, Context}; -let options = SeatbeltOptions::new(&clock); +let context = Context::new(&clock); let service = ( // Retry middleware: Automatically retries failed operations - Retry::layer("retry", &options) + Retry::layer("retry", &context) .clone_input() .recovery_with(|output: &String, _| match output.as_str() { "temporary_error" => RecoveryInfo::retry(), @@ -51,7 +51,7 @@ let service = ( _ => RecoveryInfo::never(), }), // Timeout middleware: Cancels operations that take too long - Timeout::layer("timeout", &options) + Timeout::layer("timeout", &context) .timeout_output(|_| "operation timed out".to_string()) .timeout(Duration::from_secs(30)), // Your core business logic @@ -65,7 +65,7 @@ let result = service.execute("input data".to_string()).await; > > **Note**: Resilience middleware requires [`Clock`][__link2] from the [`tick`][__link3] crate for timing > operations like delays, timeouts, and backoff calculations. The clock is passed through - > [`SeatbeltOptions`][__link4] when creating middleware layers. + > [`Context`][__link4] when creating middleware layers. See [Built-in Middlewares](#built-in-middleware) for more details. @@ -88,7 +88,7 @@ for each module for details on how to use them. This crate supports several optional features that can be enabled to extend functionality: -* `options`: Enables common APIs for building resilience middleware, including [`SeatbeltOptions`][__link9]. +* `options`: Enables common APIs for building resilience middleware, including [`Context`][__link9]. Requires [`tick`][__link10] for timing operations. * `service`: Re-exports common types for building middleware from [`layered`][__link11] crate. * `telemetry`: Enables telemetry and observability features using OpenTelemetry for monitoring @@ -115,9 +115,9 @@ This crate was developed as part of The Oxidizer Project. Br [__link14]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html [__link2]: https://docs.rs/tick/0.1.2/tick/?search=Clock [__link3]: https://crates.io/crates/tick/0.1.2 - [__link4]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::SeatbeltOptions + [__link4]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context [__link5]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery [__link6]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html [__link7]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html - [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::SeatbeltOptions + [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context diff --git a/crates/seatbelt/benches/circuit.rs b/crates/seatbelt/benches/circuit.rs index 042010e6..2556e23f 100644 --- a/crates/seatbelt/benches/circuit.rs +++ b/crates/seatbelt/benches/circuit.rs @@ -1,11 +1,11 @@ // Copyright (c) Microsoft Corporation. - +#![expect(missing_docs, reason = "benchmark code")] use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; use seatbelt::circuit::Circuit; -use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use seatbelt::{Context, RecoveryInfo}; use tick::Clock; #[global_allocator] @@ -26,10 +26,10 @@ fn entry(c: &mut Criterion) { }); // With circuit breaker (closed state) - let options = SeatbeltOptions::new(Clock::new_frozen()); + let context = Context::new(Clock::new_frozen()); let service = ( - Circuit::layer("bench", &options) + Circuit::layer("bench", &context) .recovery_with(|_, _| RecoveryInfo::never()) .rejected_input_error(|_input, _args| Output) .min_throughput(1000), // High threshold to keep circuit closed diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs index a3ade010..eef05a98 100644 --- a/crates/seatbelt/benches/observability.rs +++ b/crates/seatbelt/benches/observability.rs @@ -1,5 +1,5 @@ // Copyright (c) Microsoft Corporation. - +#![expect(missing_docs, reason = "benchmark code")] use std::time::Duration; use alloc_tracker::{Allocator, Session}; @@ -12,7 +12,7 @@ use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::metrics::data::ResourceMetrics; use opentelemetry_sdk::metrics::exporter::PushMetricExporter; use opentelemetry_sdk::metrics::{SdkMeterProvider, Temporality}; -use seatbelt::SeatbeltOptions; +use seatbelt::Context; use seatbelt::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; use tick::Clock; @@ -34,8 +34,8 @@ fn entry(c: &mut Criterion) { }); // With observability - let options = SeatbeltOptions::new(Clock::new_frozen()); - let service = Observability::new(&options, Execute::new(|v: Input| async move { Output::from(v) })); + let context = Context::new(Clock::new_frozen()); + let service = Observability::new(&context, Execute::new(|v: Input| async move { Output::from(v) })); let operation = session.operation("observability"); group.bench_function("observability", |b| { b.iter(|| { @@ -46,8 +46,8 @@ fn entry(c: &mut Criterion) { // With observability + listener let meter_provider = SdkMeterProvider::builder().with_periodic_exporter(EmptyExporter).build(); - let options = SeatbeltOptions::new(Clock::new_frozen()).meter_provider(&meter_provider); - let service = Observability::new(&options, Execute::new(|v: Input| async move { Output::from(v) })); + let context = Context::new(Clock::new_frozen()).meter_provider(&meter_provider); + let service = Observability::new(&context, Execute::new(|v: Input| async move { Output::from(v) })); let operation = session.operation("observability-and-listener"); group.bench_function("observability-and-listener", |b| { b.iter(|| { @@ -83,11 +83,11 @@ struct Observability { } impl Observability { - pub fn new(options: &SeatbeltOptions, service: S) -> Self { + pub fn new(context: &Context, service: S) -> Self { Self { service, - event_reporter: options.create_resilience_event_counter(), - pipeline_name: options.get_pipeline_name().clone().into(), + event_reporter: context.create_resilience_event_counter(), + pipeline_name: context.get_pipeline_name().clone().into(), } } } diff --git a/crates/seatbelt/benches/retry.rs b/crates/seatbelt/benches/retry.rs index ee128b85..736a6d3f 100644 --- a/crates/seatbelt/benches/retry.rs +++ b/crates/seatbelt/benches/retry.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +#![expect(missing_docs, reason = "benchmark code")] use std::time::Duration; @@ -7,7 +8,7 @@ use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; use seatbelt::retry::Retry; -use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use seatbelt::{Context, RecoveryInfo}; use tick::Clock; #[global_allocator] @@ -28,10 +29,10 @@ fn entry(c: &mut Criterion) { }); // With retry - let options = SeatbeltOptions::new(Clock::new_frozen()); + let context = Context::new(Clock::new_frozen()); let service = ( - Retry::layer("bench", &options) + Retry::layer("bench", &context) .clone_input() .recovery_with(|_, _| RecoveryInfo::never()), Execute::new(|v: Input| async move { Output::from(v) }), @@ -47,10 +48,10 @@ fn entry(c: &mut Criterion) { }); // With retry and recovery - let options = SeatbeltOptions::new(Clock::new_frozen()); + let context = Context::new(Clock::new_frozen()); let service = ( - Retry::layer("bench", &options) + Retry::layer("bench", &context) .clone_input() .max_retry_attempts(1) .base_delay(Duration::ZERO) diff --git a/crates/seatbelt/benches/timeout.rs b/crates/seatbelt/benches/timeout.rs index beb498c8..fc370bc8 100644 --- a/crates/seatbelt/benches/timeout.rs +++ b/crates/seatbelt/benches/timeout.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +#![expect(missing_docs, reason = "benchmark code")] use std::time::Duration; @@ -6,7 +7,7 @@ use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; -use seatbelt::SeatbeltOptions; +use seatbelt::Context; use seatbelt::timeout::Timeout; use tick::Clock; @@ -28,10 +29,10 @@ fn entry(c: &mut Criterion) { }); // With timeout - let options = SeatbeltOptions::new(Clock::new_frozen()); + let context = Context::new(Clock::new_frozen()); let service = ( - Timeout::layer("bench", &options) + Timeout::layer("bench", &context) .timeout_output(|_args| Output) .timeout(Duration::from_secs(10)), Execute::new(|v: Input| async move { Output::from(v) }), diff --git a/crates/seatbelt/examples/circuit.rs b/crates/seatbelt/examples/circuit.rs index 00c41cce..d26e06d0 100644 --- a/crates/seatbelt/examples/circuit.rs +++ b/crates/seatbelt/examples/circuit.rs @@ -15,7 +15,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::circuit::Circuit; -use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use seatbelt::{Context, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -25,11 +25,11 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let options = SeatbeltOptions::new(&clock).meter_provider(&meter_provider); + let context = Context::new(&clock).meter_provider(&meter_provider); // Define stack with circuit breaker layer let stack = ( - Circuit::layer("my_circuit_breaker", &options) + Circuit::layer("my_circuit_breaker", &context) // Required: classify the recoverability of outputs .recovery_with(|output, _args| match output { Ok(_) => RecoveryInfo::never(), diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs index bdcde806..0b2bfcf6 100644 --- a/crates/seatbelt/examples/resilience_pipeline.rs +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -12,7 +12,7 @@ use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; use seatbelt::timeout::Timeout; -use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use seatbelt::{Context, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -24,13 +24,11 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Shared options for resilience middleware - let options = SeatbeltOptions::new(&clock) - .meter_provider(&meter_provider) - .pipeline_name("my_pipeline"); + let context = Context::new(&clock).meter_provider(&meter_provider).pipeline_name("my_pipeline"); // Define stack with retry and timeout middlewares let stack = ( - Retry::layer("my_retry", &options) + Retry::layer("my_retry", &context) // automatically clones the input for retries .clone_input() // classify the output @@ -38,7 +36,7 @@ async fn main() -> Result<(), AppError> { "error" | "timeout" => RecoveryInfo::retry(), _ => RecoveryInfo::never(), }), - Timeout::layer("my_timeout", &options) + Timeout::layer("my_timeout", &context) .timeout(Duration::from_secs(1)) .timeout_output(|_args| "timeout".to_string()), Execute::new(execute_operation), diff --git a/crates/seatbelt/examples/retry.rs b/crates/seatbelt/examples/retry.rs index 0a2aa317..653f3a48 100644 --- a/crates/seatbelt/examples/retry.rs +++ b/crates/seatbelt/examples/retry.rs @@ -7,17 +7,17 @@ use std::io::Error; use layered::{Execute, Service, Stack}; use ohno::AppError; use seatbelt::retry::Retry; -use seatbelt::{RecoveryInfo, SeatbeltOptions}; +use seatbelt::{Context, RecoveryInfo}; use tick::Clock; #[tokio::main] async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); - let options = SeatbeltOptions::new(&clock); + let context = Context::new(&clock); // Define stack with retry layer let stack = ( - Retry::layer("my_retry", &options) + Retry::layer("my_retry", &context) .clone_input() // Automatically clone input for retries .recovery_with(|output, _args| match output { Ok(_) => RecoveryInfo::never(), diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index 66ddb86e..f3fbb8e4 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -14,7 +14,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; -use seatbelt::{Attempt, RecoveryInfo, SeatbeltOptions}; +use seatbelt::{Attempt, Context, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -24,13 +24,11 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let options = SeatbeltOptions::new(&clock) - .pipeline_name("retry_advanced") - .meter_provider(&meter_provider); + let context = Context::new(&clock).pipeline_name("retry_advanced").meter_provider(&meter_provider); // Define stack with retry layer let stack = ( - Retry::layer("my_retry", &options) + Retry::layer("my_retry", &context) // Custom input cloning - inject attempt info into request extensions .clone_input_with(|input: &mut Request, args| { let mut cloned = input.clone(); diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index c8048767..efac0fb0 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -16,7 +16,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; -use seatbelt::{Recovery, RecoveryInfo, SeatbeltOptions}; +use seatbelt::{Context, Recovery, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -29,11 +29,11 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let options = SeatbeltOptions::new(&clock).meter_provider(&meter_provider); + let context = Context::new(&clock).meter_provider(&meter_provider); // Configure retry layer for outage handling with input restoration let stack = ( - Retry::layer("outage_retry", &options) + Retry::layer("outage_retry", &context) // Disable input cloning - we'll restore from error instead .clone_input_with(|_, _| None) // Configure recovery based on an error type diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index 4c43384e..9f5d9724 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -11,7 +11,7 @@ use layered::{Execute, Service, Stack}; use ohno::{AppError, app_err}; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::SeatbeltOptions; +use seatbelt::Context; use seatbelt::timeout::Timeout; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; @@ -27,11 +27,11 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Create common options - let options = SeatbeltOptions::new(&clock).meter_provider(&meter_provider); + let context = Context::new(&clock).meter_provider(&meter_provider); // Define stack with timeout layer let stack = ( - Timeout::layer("my_timeout", &options) + Timeout::layer("my_timeout", &context) // Required: specify the timeout duration .timeout(TIMEOUT_DURATION) // Required: create error output for timeouts diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs index 954101aa..23df2158 100644 --- a/crates/seatbelt/examples/timeout_advanced.rs +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -11,7 +11,7 @@ use layered::{Execute, Service, Stack}; use ohno::{AppError, app_err}; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::SeatbeltOptions; +use seatbelt::Context; use seatbelt::timeout::Timeout; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; @@ -28,13 +28,11 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Create service options - let options: SeatbeltOptions> = SeatbeltOptions::new(&clock) - .pipeline_name("my_pipeline") - .meter_provider(&meter_provider); + let context: Context> = Context::new(&clock).pipeline_name("my_pipeline").meter_provider(&meter_provider); // Define stack with timeout layer let stack = ( - Timeout::layer("my_timeout", &options) + Timeout::layer("my_timeout", &context) // Required: specify the timeout duration .timeout(TIMEOUT_DURATION) // Required: create error output for timeouts diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index 25b7ff3d..581b4b79 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -12,7 +12,7 @@ use super::{ }; use crate::circuit::engine::probing::ProbesOptions; use crate::service::Layer; -use crate::{EnableIf, NotSet, Recovery, RecoveryInfo, SeatbeltOptions, Set}; +use crate::{Context, EnableIf, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring circuit breaker resilience middleware. /// @@ -26,7 +26,7 @@ use crate::{EnableIf, NotSet, Recovery, RecoveryInfo, SeatbeltOptions, Set}; /// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit] documentation. #[derive(Debug)] pub struct CircuitLayer { - options: SeatbeltOptions, + context: Context, recovery: Option>, rejected_input: Option>, on_opened: Option>, @@ -45,9 +45,9 @@ pub struct CircuitLayer impl CircuitLayer { #[must_use] - pub(crate) fn new(name: StringValue, options: &SeatbeltOptions) -> Self { + pub(crate) fn new(name: StringValue, context: &Context) -> Self { Self { - options: options.clone(), + context: context.clone(), recovery: None, rejected_input: None, on_opened: None, @@ -394,7 +394,7 @@ impl Layer for CircuitLayer { fn layer(&self, inner: S) -> Self::Service { Circuit { inner, - clock: self.options.get_clock().clone(), + clock: self.context.get_clock().clone(), recovery: self.recovery.clone().expect("recovery must be set in Ready state"), rejected_input: self.rejected_input.clone().expect("rejected_input must be set in Ready state"), enable_if: self.enable_if.clone(), @@ -421,16 +421,16 @@ impl CircuitLayer(self) -> CircuitLayer { CircuitLayer { - options: self.options, + context: self.context, recovery: self.recovery, rejected_input: self.rejected_input, on_opened: self.on_opened, @@ -465,8 +465,8 @@ mod tests { #[test] #[expect(clippy::float_cmp, reason = "Test")] fn new_creates_correct_initial_state() { - let options = create_test_options(); - let layer: CircuitLayer<_, _, NotSet, NotSet> = CircuitLayer::new(StringValue::from("test_breaker"), &options); + let context = create_test_context(); + let layer: CircuitLayer<_, _, NotSet, NotSet> = CircuitLayer::new(StringValue::from("test_breaker"), &context); assert!(layer.recovery.is_none()); assert!(layer.rejected_input.is_none()); @@ -479,8 +479,8 @@ mod tests { #[test] fn recovery_sets_correctly() { - let options = create_test_options(); - let layer = CircuitLayer::new(StringValue::from("test"), &options); + let context = create_test_context(); + let layer = CircuitLayer::new(StringValue::from("test"), &context); let layer: CircuitLayer<_, _, Set, NotSet> = layer.recovery_with(|output, _args| { if output.contains("error") { @@ -511,8 +511,8 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { - let options = SeatbeltOptions::::new(Clock::new_frozen()); - let layer = CircuitLayer::new(StringValue::from("test"), &options); + let context = Context::::new(Clock::new_frozen()); + let layer = CircuitLayer::new(StringValue::from("test"), &context); let layer: CircuitLayer<_, _, Set, NotSet> = layer.recovery(); @@ -537,8 +537,8 @@ mod tests { #[test] fn rejected_input_sets_correctly() { - let options = create_test_options(); - let layer = CircuitLayer::new(StringValue::from("test"), &options); + let context = create_test_context(); + let layer = CircuitLayer::new(StringValue::from("test"), &context); let layer: CircuitLayer<_, _, NotSet, Set> = layer.rejected_input(|_, _| "rejected".to_string()); @@ -640,8 +640,8 @@ mod tests { #[test] #[expect(clippy::float_cmp, reason = "Test")] fn default_values_are_correct() { - let options = create_test_options(); - let layer = CircuitLayer::new(StringValue::from("test"), &options); + let context = create_test_context(); + let layer = CircuitLayer::new(StringValue::from("test"), &context); assert_eq!(layer.failure_threshold, DEFAULT_FAILURE_THRESHOLD); assert_eq!(layer.min_throughput, DEFAULT_MIN_THROUGHPUT); @@ -663,12 +663,12 @@ mod tests { .probes_options(); // access the last probe which should be the health probe - let options = probes.probes().last().unwrap(); + let probe = probes.probes().last().unwrap(); - match options { - ProbeOptions::HealthProbe(options) => { - assert_eq!(options.stage_duration(), Duration::from_secs(234)); - assert_eq!(options.failure_threshold(), 0.52); + match probe { + ProbeOptions::HealthProbe(health_probe) => { + assert_eq!(health_probe.stage_duration(), Duration::from_secs(234)); + assert_eq!(health_probe.failure_threshold(), 0.52); } ProbeOptions::SingleProbe { .. } => panic!("Expected HealthProbe"), } @@ -682,12 +682,12 @@ mod tests { static_assertions::assert_impl_all!(CircuitLayer: Debug); } - fn create_test_options() -> SeatbeltOptions { - SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") + fn create_test_context() -> Context { + Context::new(Clock::new_frozen()).pipeline_name("test_pipeline") } fn create_ready_layer() -> CircuitLayer { - CircuitLayer::new(StringValue::from("test"), &create_test_options()) + CircuitLayer::new(StringValue::from("test"), &create_test_context()) .recovery_with(|output, _args| { if output.contains("error") { RecoveryInfo::retry() diff --git a/crates/seatbelt/src/circuit/mod.rs b/crates/seatbelt/src/circuit/mod.rs index 1f9ad5af..05a8aa17 100644 --- a/crates/seatbelt/src/circuit/mod.rs +++ b/crates/seatbelt/src/circuit/mod.rs @@ -18,12 +18,12 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit::Circuit; -//! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; +//! # use seatbelt::{RecoveryInfo, Context}; //! # async fn example(clock: Clock) -> Result<(), Box> { -//! let options = SeatbeltOptions::new(&clock); +//! let context = Context::new(&clock); //! //! let stack = ( -//! Circuit::layer("circuit_breaker", &options) +//! Circuit::layer("circuit_breaker", &context) //! // Required: determine if output indicates a failure by using recovery metadata //! .recovery_with(|result: &Result, _| match result { //! Ok(_) => RecoveryInfo::never(), @@ -165,7 +165,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted when circuit state transitions occur and when requests are rejected //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`SeatbeltOptions::pipeline_name`] +//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`] //! - `resilience.strategy.name`: Circuit breaker identifier from [`Circuit::layer`] //! - `resilience.event.name`: One of: //! - `circuit_opened`: When the circuit transitions to open state due to failure threshold being exceeded @@ -187,14 +187,14 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit::Circuit; -//! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; +//! # use seatbelt::{RecoveryInfo, Context}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let options = SeatbeltOptions::new(&clock).pipeline_name("example"); +//! let context = Context::new(&clock).pipeline_name("example"); //! //! let stack = ( -//! Circuit::layer("my_breaker", &options) +//! Circuit::layer("my_breaker", &context) //! // Required: determine if output indicates failure //! .recovery_with(|result: &Result, _args| match result { //! // These are demonstrative, real code will have more meaningful recovery detection @@ -228,13 +228,13 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit::{Circuit, PartitionKey, HalfOpenMode}; -//! # use seatbelt::{RecoveryInfo, SeatbeltOptions}; +//! # use seatbelt::{RecoveryInfo, Context}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. -//! let options = SeatbeltOptions::new(&clock).pipeline_name("advanced_example"); +//! let context = Context::new(&clock).pipeline_name("advanced_example"); //! //! let stack = ( -//! Circuit::layer("advanced_breaker", &options) +//! Circuit::layer("advanced_breaker", &context) //! // Required: determine if output indicates failure //! .recovery_with(|result: &Result, _args| match result { //! Err(msg) if msg.contains("rate_limit") => RecoveryInfo::unavailable(), @@ -289,9 +289,9 @@ //! //! ```compile_fail //! # use seatbelt::circuit::Circuit; -//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::Context; //! # use layered::Execute; -//! # fn example(service_options: SeatbeltOptions>) { +//! # fn example(context: Context>) { //! let stack = ( //! Circuit::layer("test", &service_options), // Missing required configuration! //! Execute::new(|input| async move { Ok(input) }) diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index 1d5c4be6..cfeb4aed 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -46,9 +46,9 @@ impl Circuit { /// before it can be used to build a circuit breaker service. pub fn layer( name: impl Into>, - options: &crate::SeatbeltOptions, + context: &crate::Context, ) -> CircuitLayer { - CircuitLayer::new(name.into().into(), options) + CircuitLayer::new(name.into().into(), context) } } @@ -172,12 +172,12 @@ mod tests { use crate::circuit::constants::DEFAULT_BREAK_DURATION; use crate::circuit::{EngineFake, HalfOpenMode, HealthInfo, Stats}; use crate::service::Layer; - use crate::{RecoveryInfo, SeatbeltOptions, Set}; + use crate::{Context, RecoveryInfo, Set}; #[test] fn layer_ensure_defaults() { - let options = SeatbeltOptions::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); - let layer: CircuitLayer = Circuit::layer("test_breaker", &options); + let context = Context::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); + let layer: CircuitLayer = Circuit::layer("test_breaker", &context); let layer = layer .recovery_with(|_, _| RecoveryInfo::never()) .rejected_input(|_, _| "rejected".to_string()); @@ -453,8 +453,8 @@ mod tests { } fn create_ready_circuit_breaker_layer(clock: &Clock) -> CircuitLayer { - let options = SeatbeltOptions::::new(clock.clone()).pipeline_name("test_pipeline"); - Circuit::layer("test_breaker", &options) + let context = Context::::new(clock.clone()).pipeline_name("test_pipeline"); + Circuit::layer("test_breaker", &context) .recovery_with(|output, _| { if output.contains("error") { RecoveryInfo::retry() diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index de98acb6..ab5875fa 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -43,13 +43,13 @@ //! # use layered::{Execute, Service, Stack}; //! use seatbelt::retry::Retry; //! use seatbelt::timeout::Timeout; -//! use seatbelt::{RecoveryInfo, SeatbeltOptions}; +//! use seatbelt::{RecoveryInfo, Context}; //! //! # async fn main(clock: Clock) { -//! let options = SeatbeltOptions::new(&clock); +//! let context = Context::new(&clock); //! let service = ( //! // Retry middleware: Automatically retries failed operations -//! Retry::layer("retry", &options) +//! Retry::layer("retry", &context) //! .clone_input() //! .recovery_with(|output: &String, _| match output.as_str() { //! "temporary_error" => RecoveryInfo::retry(), @@ -57,7 +57,7 @@ //! _ => RecoveryInfo::never(), //! }), //! // Timeout middleware: Cancels operations that take too long -//! Timeout::layer("timeout", &options) +//! Timeout::layer("timeout", &context) //! .timeout_output(|_| "operation timed out".to_string()) //! .timeout(Duration::from_secs(30)), //! // Your core business logic @@ -76,7 +76,7 @@ //! //! > **Note**: Resilience middleware requires [`Clock`][tick::Clock] from the [`tick`] crate for timing //! > operations like delays, timeouts, and backoff calculations. The clock is passed through -//! > [`SeatbeltOptions`] when creating middleware layers. +//! > [`Context`] when creating middleware layers. //! //! See [Built-in Middlewares](#built-in-middleware) for more details. //! @@ -99,7 +99,7 @@ //! //! This crate supports several optional features that can be enabled to extend functionality: //! -//! - `options`: Enables common APIs for building resilience middleware, including [`SeatbeltOptions`]. +//! - `options`: Enables common APIs for building resilience middleware, including [`Context`]. //! Requires [`tick`] for timing operations. //! - `service`: Re-exports common types for building middleware from [`layered`] crate. //! - `telemetry`: Enables telemetry and observability features using OpenTelemetry for monitoring @@ -124,7 +124,7 @@ pub(crate) use options::define_fn_wrapper; #[cfg(any(feature = "retry", test))] pub(crate) use crate::options::MaxAttempts; #[cfg(any(feature = "options", test))] -pub use crate::options::{Attempt, Backoff, NotSet, SeatbeltOptions, Set}; +pub use crate::options::{Attempt, Backoff, Context, NotSet, Set}; #[cfg(any(feature = "telemetry", test))] pub mod telemetry; diff --git a/crates/seatbelt/src/options/seatbelt_options.rs b/crates/seatbelt/src/options/context.rs similarity index 83% rename from crates/seatbelt/src/options/seatbelt_options.rs rename to crates/seatbelt/src/options/context.rs index 02d36d32..15fee623 100644 --- a/crates/seatbelt/src/options/seatbelt_options.rs +++ b/crates/seatbelt/src/options/context.rs @@ -11,13 +11,13 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// Shared options for resilience middleware pipelines. /// -/// `SeatbeltOptions` bundles a clock and telemetry primitives (OpenTelemetry +/// `Context` bundles a clock and telemetry primitives (OpenTelemetry /// meter) that resilience middleware uses to measure time, emit metrics, and /// report events. Use a single instance to configure a whole resilience /// pipeline (retries, timeouts, circuit breakers, hedging, rate limiting, /// etc.). /// -/// The [`pipeline_name`][`SeatbeltOptions::pipeline_name`] groups resilience middleware under one logical +/// The [`pipeline_name`][`Context::pipeline_name`] groups resilience middleware under one logical /// parent for telemetry correlation. When you reuse the same name across all /// policies in a pipeline, exported metrics and events will carry the same /// pipeline attribute, making dashboards and analysis easier. @@ -32,25 +32,25 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// ```rust /// # use opentelemetry::KeyValue; /// # use opentelemetry::metrics::MeterProvider; -/// # use seatbelt::SeatbeltOptions; +/// # use seatbelt::Context; /// # use seatbelt::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; /// # use tick::Clock; /// # fn example(clock: Clock, meter_provider: &dyn MeterProvider) { -/// let options = SeatbeltOptions::::new(&clock) +/// let ctx = Context::::new(&clock) /// .pipeline_name("auth_pipeline") /// .meter_provider(meter_provider); /// /// // Use the clock for timing operations -/// let start = options.get_clock().instant(); +/// let start = ctx.get_clock().instant(); /// /// // Create an event reporter -/// let resilience_counter = options.create_resilience_event_counter(); +/// let resilience_counter = ctx.create_resilience_event_counter(); /// /// // Report an event with required attributes /// resilience_counter.add( /// 1, /// &[ -/// KeyValue::new(PIPELINE_NAME, options.get_pipeline_name().clone()), +/// KeyValue::new(PIPELINE_NAME, ctx.get_pipeline_name().clone()), /// KeyValue::new(STRATEGY_NAME, "my_strategy"), /// KeyValue::new(EVENT_NAME, "my_event"), /// ], @@ -58,15 +58,15 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// # } /// ``` /// -/// # Authoring Resilience `MIddlewares` +/// # Authoring Resilience `Middlewares` /// /// This crate’s primitives are designed to make writing custom resilience middleware -/// straightforward and observable. Pass a shared [`SeatbeltOptions`] to each +/// straightforward and observable. Pass a shared [`Context`] to each /// middleware that needs telemetry. You can keep a single instance (or derive child /// scopes from it) to group related middleware under one logical parent so their /// telemetry automatically shares correlation dimensions (e.g., the pipeline name). /// -/// With `SeatbeltOptions` you can: +/// With `Context` you can: /// /// - Share a common `Clock` for consistent timing and timeouts /// - Create and use telemetry instruments via the OpenTelemetry `Meter` @@ -76,7 +76,7 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// /// Resilience middleware typically follows this pattern: /// -/// 1. Accept a [`SeatbeltOptions`] in your layer/builder to configure observability +/// 1. Accept a [`Context`] in your layer/builder to configure observability /// 2. Create instruments from the options (e.g., counters, histograms) /// 3. Wrap the next service/layer and forward requests without hidden global state /// 4. Report events at key decision points (retries, circuit breaker trips, timeouts, …) @@ -90,7 +90,7 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// > `layered` and reference it here instead of diverging. #[derive(Debug)] #[non_exhaustive] -pub struct SeatbeltOptions { +pub struct Context { clock: Clock, pipeline_name: Cow<'static, str>, meter: Meter, @@ -98,7 +98,7 @@ pub struct SeatbeltOptions { _out: std::marker::PhantomData Out>, } -impl SeatbeltOptions { +impl Context { /// Create options with a clock and the global meter provider. /// /// Initializes with `pipeline_name = "default"` and a meter from the @@ -184,10 +184,10 @@ impl SeatbeltOptions { /// # use tick::Clock; /// # use opentelemetry::KeyValue; /// # use opentelemetry::metrics::Meter; - /// # use seatbelt::SeatbeltOptions; + /// # use seatbelt::Context; /// # use seatbelt::telemetry::{PIPELINE_NAME, STRATEGY_NAME, EVENT_NAME}; - /// # fn example(seatbelt_options: &SeatbeltOptions<(), ()>) { - /// let counter = seatbelt_options.create_resilience_event_counter(); + /// # fn example(context: &Context<(), ()>) { + /// let counter = context.create_resilience_event_counter(); /// /// counter.add( /// 1, @@ -205,7 +205,7 @@ impl SeatbeltOptions { } } -impl Clone for SeatbeltOptions { +impl Clone for Context { fn clone(&self) -> Self { Self { clock: self.clock.clone(), @@ -227,18 +227,18 @@ mod tests { #[test] fn test_new_with_clock_sets_default_pipeline_name() { let clock = tick::Clock::new_frozen(); - let options = SeatbeltOptions::<(), ()>::new(clock); - assert_eq!(options.get_pipeline_name().as_ref(), DEFAULT_PIPELINE_NAME); + let ctx = Context::<(), ()>::new(clock); + assert_eq!(ctx.get_pipeline_name().as_ref(), DEFAULT_PIPELINE_NAME); // Ensure clock reference behaves (timestamp monotonic relative behaviour not required, just accessible) - let _ = options.get_clock().system_time(); + let _ = ctx.get_clock().system_time(); } #[test] fn test_pipeline_name_with_custom_value_sets_name_and_is_owned() { let clock = tick::Clock::new_frozen(); - let options = SeatbeltOptions::<(), ()>::new(clock).pipeline_name(String::from("custom_pipeline")); - assert_eq!(options.get_pipeline_name().as_ref(), "custom_pipeline"); - assert!(matches!(options.get_pipeline_name(), Cow::Owned(_))); + let ctx = Context::<(), ()>::new(clock).pipeline_name(String::from("custom_pipeline")); + assert_eq!(ctx.get_pipeline_name().as_ref(), "custom_pipeline"); + assert!(matches!(ctx.get_pipeline_name(), Cow::Owned(_))); } #[cfg(not(miri))] @@ -247,9 +247,9 @@ mod tests { let clock = tick::Clock::new_frozen(); let (provider, exporter) = test_meter_provider(); - let options = SeatbeltOptions::<(), ()>::new(clock).meter_provider(&provider); - let c1 = create_resilience_event_counter(options.get_meter()); - let c2 = create_resilience_event_counter(options.get_meter()); + let ctx = Context::<(), ()>::new(clock).meter_provider(&provider); + let c1 = create_resilience_event_counter(ctx.get_meter()); + let c2 = create_resilience_event_counter(ctx.get_meter()); c1.add(1, &[]); c2.add(2, &[]); diff --git a/crates/seatbelt/src/options/mod.rs b/crates/seatbelt/src/options/mod.rs index af728227..0c8c3443 100644 --- a/crates/seatbelt/src/options/mod.rs +++ b/crates/seatbelt/src/options/mod.rs @@ -24,8 +24,8 @@ impl EnableIf { } } -mod seatbelt_options; -pub use seatbelt_options::SeatbeltOptions; +mod context; +pub use context::Context; mod define_fn_wrapper; pub(crate) use define_fn_wrapper::define_fn_wrapper; diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 1e9b4a17..d37cd44a 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -9,7 +9,7 @@ use crate::retry::backoff::BackoffOptions; use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, Retry, ShouldRecover}; use crate::service::Layer; -use crate::{Backoff, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryInfo, SeatbeltOptions, Set}; +use crate::{Backoff, Context, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring retry resilience middleware. /// @@ -22,7 +22,7 @@ use crate::{Backoff, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryInfo, Seat /// For comprehensive examples, see the [retry module][crate::retry] documentation. #[derive(Debug)] pub struct RetryLayer { - options: SeatbeltOptions, + context: Context, max_attempts: MaxAttempts, backoff: BackoffOptions, clone_input: Option>, @@ -37,9 +37,9 @@ pub struct RetryLayer { impl RetryLayer { #[must_use] - pub(crate) fn new(name: StringValue, options: &SeatbeltOptions) -> Self { + pub(crate) fn new(name: StringValue, context: &Context) -> Self { Self { - options: options.clone(), + context: context.clone(), max_attempts: MaxAttempts::Finite(DEFAULT_RETRY_ATTEMPTS.saturating_add(1)), backoff: BackoffOptions::default(), clone_input: None, @@ -300,12 +300,12 @@ impl RetryLayer>::new(Clock::new_frozen()); + /// # let context = Context::>::new(Clock::new_frozen()); /// // Service with multiple endpoints that can route around unavailable services - /// let layer = Retry::layer("multi_endpoint_retry", &options) + /// let layer = Retry::layer("multi_endpoint_retry", &context) /// .clone_input_with(|input: &mut String, args| { /// let mut input = input.clone(); /// update_endpoint(&mut input, args.attempt()); // Modify input to use a different endpoint @@ -356,11 +356,11 @@ impl RetryLayer RetryLayer Layer for RetryLayer { fn layer(&self, inner: S) -> Self::Service { Retry { inner, - clock: self.options.get_clock().clone(), + clock: self.context.get_clock().clone(), max_attempts: self.max_attempts, backoff: self.backoff.clone().into(), clone_input: self.clone_input.clone().expect("clone_input must be set in Ready state"), @@ -413,8 +413,8 @@ impl Layer for RetryLayer { on_retry: self.on_retry.clone(), enable_if: self.enable_if.clone(), strategy_name: self.strategy_name.clone(), - pipeline_name: self.options.get_pipeline_name().clone().into(), - event_reporter: self.options.create_resilience_event_counter(), + pipeline_name: self.context.get_pipeline_name().clone().into(), + event_reporter: self.context.create_resilience_event_counter(), restore_input: self.restore_input.clone(), handle_unavailable: self.handle_unavailable, } @@ -445,7 +445,7 @@ impl RetryLayer } /// # struct HttpResponse { status: u16 } @@ -465,10 +465,10 @@ impl RetryLayer>::new(&clock); + /// # let context = Context::>::new(&clock); /// type HttpResult = Result; /// - /// let layer = Retry::layer("http_retry", &options).restore_input_from_error( + /// let layer = Retry::layer("http_retry", &context).restore_input_from_error( /// |error: &mut HttpError, _args| { /// // Only restore input from connection errors that contain the original request /// error.try_restore_request() @@ -488,7 +488,7 @@ impl RetryLayer RetryLayer { fn into_state(self) -> RetryLayer { RetryLayer { - options: self.options, + context: self.context, max_attempts: self.max_attempts, backoff: self.backoff, clone_input: self.clone_input, @@ -519,8 +519,8 @@ mod tests { #[test] fn new_creates_correct_initial_state() { - let options = create_test_options(); - let layer: RetryLayer<_, _, NotSet, NotSet> = RetryLayer::new(StringValue::from("test_retry"), &options); + let context = create_test_context(); + let layer: RetryLayer<_, _, NotSet, NotSet> = RetryLayer::new(StringValue::from("test_retry"), &context); assert_eq!(layer.max_attempts, MaxAttempts::Finite(4)); // 3 retries + 1 original = 4 total assert!(matches!(layer.backoff.backoff_type, Backoff::Exponential)); @@ -536,8 +536,8 @@ mod tests { #[test] fn clone_input_sets_correctly() { - let options = create_test_options(); - let layer = RetryLayer::new(StringValue::from("test"), &options); + let context = create_test_context(); + let layer = RetryLayer::new(StringValue::from("test"), &context); let layer: RetryLayer<_, _, Set, NotSet> = layer.clone_input_with(|input, _args| Some(input.clone())); @@ -553,8 +553,8 @@ mod tests { #[test] fn recovery_sets_correctly() { - let options = create_test_options(); - let layer = RetryLayer::new(StringValue::from("test"), &options); + let context = create_test_context(); + let layer = RetryLayer::new(StringValue::from("test"), &context); let layer: RetryLayer<_, _, NotSet, Set> = layer.recovery_with(|output, _args| { if output.contains("error") { @@ -568,7 +568,7 @@ mod tests { &"error message".to_string(), RecoveryArgs { attempt: Attempt::new(1, false), - clock: options.get_clock(), + clock: context.get_clock(), }, ); assert_eq!(result, RecoveryInfo::retry()); @@ -577,7 +577,7 @@ mod tests { &"success".to_string(), RecoveryArgs { attempt: Attempt::new(1, false), - clock: options.get_clock(), + clock: context.get_clock(), }, ); assert_eq!(result, RecoveryInfo::never()); @@ -585,8 +585,8 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { - let options = SeatbeltOptions::::new(Clock::new_frozen()); - let layer = RetryLayer::new(StringValue::from("test"), &options); + let context = Context::::new(Clock::new_frozen()); + let layer = RetryLayer::new(StringValue::from("test"), &context); let layer: RetryLayer<_, _, NotSet, Set> = layer.recovery(); @@ -594,7 +594,7 @@ mod tests { &RecoverableType::from(RecoveryInfo::retry()), RecoveryArgs { attempt: Attempt::new(1, false), - clock: options.get_clock(), + clock: context.get_clock(), }, ); assert_eq!(result, RecoveryInfo::retry()); @@ -603,7 +603,7 @@ mod tests { &RecoverableType::from(RecoveryInfo::never()), RecoveryArgs { attempt: Attempt::new(1, false), - clock: options.get_clock(), + clock: context.get_clock(), }, ); assert_eq!(result, RecoveryInfo::never()); @@ -668,8 +668,8 @@ mod tests { #[test] fn handle_unavailable_sets_correctly() { - let options = create_test_options(); - let layer = RetryLayer::new(StringValue::from("test"), &options); + let context = create_test_context(); + let layer = RetryLayer::new(StringValue::from("test"), &context); // Test default value assert!(!layer.handle_unavailable); @@ -685,8 +685,8 @@ mod tests { #[test] fn restore_input_sets_correctly() { - let options = create_test_options(); - let layer = RetryLayer::new(StringValue::from("test"), &options); + let context = create_test_context(); + let layer = RetryLayer::new(StringValue::from("test"), &context); let layer = layer.restore_input(|output: &mut String, _args| { (output == "restore_me").then(|| { @@ -737,12 +737,12 @@ mod tests { static_assertions::assert_impl_all!(RetryLayer: Debug); } - fn create_test_options() -> SeatbeltOptions { - SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") + fn create_test_context() -> Context { + Context::new(Clock::new_frozen()).pipeline_name("test_pipeline") } fn create_ready_layer() -> RetryLayer { - RetryLayer::new(StringValue::from("test"), &create_test_options()) + RetryLayer::new(StringValue::from("test"), &create_test_context()) .clone_input_with(|input, _args| Some(input.clone())) .recovery_with(|output, _args| { if output.contains("error") { diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index efbcf0f2..9655277d 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -15,12 +15,12 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::Retry; -//! # use seatbelt::{Backoff, RecoveryInfo, SeatbeltOptions}; +//! # use seatbelt::{Backoff, RecoveryInfo, Context}; //! # async fn example(clock: Clock) -> Result<(), Box> { -//! let options = SeatbeltOptions::new(&clock).pipeline_name("my_service"); +//! let context = Context::new(&clock).pipeline_name("my_service"); //! //! let stack = ( -//! Retry::layer("retry", &options) +//! Retry::layer("retry", &context) //! .clone_input() //! .recovery_with(|result, _| match result { //! Ok(_) => RecoveryInfo::never(), @@ -89,7 +89,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted for each attempt that should be retried (including the final retry attempt) //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`SeatbeltOptions::pipeline_name`] +//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`] //! - `resilience.strategy.name`: Timeout identifier from [`Retry::layer`] //! - `resilience.event.name`: Always `retry` //! - `resilience.attempt.index`: Attempt index (0-based) @@ -106,14 +106,14 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::Retry; -//! # use seatbelt::{Backoff, RecoveryInfo, SeatbeltOptions}; +//! # use seatbelt::{Backoff, RecoveryInfo, Context}; //! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let options = SeatbeltOptions::new(&clock).pipeline_name("example"); +//! let context = Context::new(&clock).pipeline_name("example"); //! //! let stack = ( -//! Retry::layer("my_retry", &options) +//! Retry::layer("my_retry", &context) //! // Required: how to clone inputs for retries //! .clone_input() //! // Required: determine if we should retry based on output @@ -148,13 +148,13 @@ //! # use std::io; //! # use layered::{Execute, Stack, Service}; //! # use seatbelt::retry::Retry; -//! # use seatbelt::{RecoveryInfo, SeatbeltOptions, Backoff}; +//! # use seatbelt::{RecoveryInfo, Context, Backoff}; //! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. -//! let options = SeatbeltOptions::new(&clock); +//! let context = Context::new(&clock); //! //! let stack = ( -//! Retry::layer("advanced_retry", &options) +//! Retry::layer("advanced_retry", &context) //! .clone_input() //! .recovery_with(|output: &Result, _args| match output { //! Err(err) if err.kind() == io::ErrorKind::TimedOut => RecoveryInfo::retry().delay(Duration::from_secs(60)), @@ -193,9 +193,9 @@ //! //! ```compile_fail //! # use seatbelt::retry::Retry; -//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::Context; //! # use tick::Clock; -//! # fn example(service_options: SeatbeltOptions) { +//! # fn example(context: Context) { //! let stack = ( //! Retry::layer("test", &service_options), // Missing required configuration! //! Execute::new(|input| async move { input }) diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index df2e5be8..d5c94972 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -55,9 +55,9 @@ impl Retry { /// before it can be used to build a retry service. pub fn layer( name: impl Into>, - options: &crate::SeatbeltOptions, + context: &crate::Context, ) -> crate::retry::RetryLayer { - crate::retry::RetryLayer::new(name.into().into(), options) + crate::retry::RetryLayer::new(name.into().into(), context) } } @@ -261,12 +261,12 @@ mod tests { use crate::retry::RetryLayer; use crate::service::Layer; use crate::testing::MetricTester; - use crate::{SeatbeltOptions, Set}; + use crate::{Context, Set}; #[test] fn layer_ensure_defaults() { - let options = SeatbeltOptions::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); - let layer: RetryLayer = Retry::layer("test_retry", &options); + let context = Context::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); + let layer: RetryLayer = Retry::layer("test_retry", &context); let layer = layer.recovery_with(|_, _| RecoveryInfo::never()).clone_input(); let retry = layer.layer(Execute::new(|v: String| async move { v })); @@ -573,11 +573,11 @@ mod tests { #[tokio::test] async fn retries_exhausted_ensure_telemetry_reported() { let tester = MetricTester::new(); - let options = SeatbeltOptions::::new(ClockControl::default().auto_advance_timers(true).to_clock()) + let context = Context::::new(ClockControl::default().auto_advance_timers(true).to_clock()) .pipeline_name("test_pipeline") .meter_provider(tester.meter_provider()); - let service = create_ready_retry_layer_core(RecoveryInfo::retry(), &options) + let service = create_ready_retry_layer_core(RecoveryInfo::retry(), &context) .clone_input_with(move |input, _args| Some(input.clone())) .max_retry_attempts(2) .recovery_with(move |_input, _args| RecoveryInfo::retry()) @@ -600,15 +600,12 @@ mod tests { } fn create_ready_retry_layer(clock: &Clock, recover: RecoveryInfo) -> RetryLayer { - let options = SeatbeltOptions::new(clock.clone()).pipeline_name("test_pipeline"); - create_ready_retry_layer_core(recover, &options) + let context = Context::new(clock.clone()).pipeline_name("test_pipeline"); + create_ready_retry_layer_core(recover, &context) } - fn create_ready_retry_layer_core( - recover: RecoveryInfo, - seatbelt_options: &SeatbeltOptions, - ) -> RetryLayer { - Retry::layer("test_retry", seatbelt_options) + fn create_ready_retry_layer_core(recover: RecoveryInfo, context: &Context) -> RetryLayer { + Retry::layer("test_retry", context) .recovery_with(move |_, _| recover.clone()) .clone_input() .max_delay(Duration::from_secs(9999)) // protect against infinite backoff diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index 293100f2..66825a93 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -9,7 +9,7 @@ use crate::service::Layer; use crate::timeout::{ OnTimeout, OnTimeoutArgs, Timeout, TimeoutOutput as TimeoutOutputCallback, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs, }; -use crate::{EnableIf, NotSet, SeatbeltOptions, Set}; +use crate::{Context, EnableIf, NotSet, Set}; /// Builder for configuring timeout resilience middleware. /// @@ -22,7 +22,7 @@ use crate::{EnableIf, NotSet, SeatbeltOptions, Set}; /// For comprehensive examples, see the [timeout module][crate::timeout] documentation. #[derive(Debug)] pub struct TimeoutLayer { - options: SeatbeltOptions, + context: Context, timeout: Option, timeout_output: Option>, on_timeout: Option>, @@ -34,14 +34,14 @@ pub struct TimeoutLayer { impl TimeoutLayer { #[must_use] - pub(crate) fn new(name: StringValue, options: &SeatbeltOptions) -> Self { + pub(crate) fn new(name: StringValue, context: &Context) -> Self { Self { timeout: None, timeout_output: None, on_timeout: None, enable_if: EnableIf::always(), strategy_name: name, - options: options.clone(), + context: context.clone(), timeout_override: None, _state: PhantomData, } @@ -197,14 +197,14 @@ impl Layer for TimeoutLayer { fn layer(&self, inner: S) -> Self::Service { Timeout { inner, - clock: self.options.get_clock().clone(), + clock: self.context.get_clock().clone(), timeout: self.timeout.expect("timeout must be set in Ready state"), enable_if: self.enable_if.clone(), on_timeout: self.on_timeout.clone(), timeout_output: self.timeout_output.clone().expect("timeout_result must be set in Ready state"), name: self.strategy_name.clone(), - pipeline_name: self.options.get_pipeline_name().clone().into(), - event_reporter: self.options.create_resilience_event_counter(), + pipeline_name: self.context.get_pipeline_name().clone().into(), + event_reporter: self.context.create_resilience_event_counter(), timeout_override: self.timeout_override.clone(), } } @@ -218,7 +218,7 @@ impl TimeoutLayer = TimeoutLayer::new(StringValue::from("test_timeout"), &options); + let context = create_test_context(); + let layer: TimeoutLayer<_, _, NotSet, NotSet> = TimeoutLayer::new(StringValue::from("test_timeout"), &context); assert!(layer.timeout.is_none()); assert!(layer.timeout_output.is_none()); @@ -252,8 +252,8 @@ mod tests { #[test] fn timeout_output_ensure_set_correctly() { - let options = create_test_options(); - let layer = TimeoutLayer::new(StringValue::from("test"), &options); + let context = create_test_context(); + let layer = TimeoutLayer::new(StringValue::from("test"), &context); let layer: TimeoutLayer<_, _, NotSet, Set> = layer.timeout_output(|args| format!("timeout: {}", args.timeout().as_millis())); let result = layer.timeout_output.unwrap().call(TimeoutOutputArgs { @@ -265,8 +265,8 @@ mod tests { #[test] fn timeout_error_ensure_set_correctly() { - let options = create_test_options_result(); - let layer = TimeoutLayer::new(StringValue::from("test"), &options); + let context = create_test_context_result(); + let layer = TimeoutLayer::new(StringValue::from("test"), &context); let layer: TimeoutLayer<_, _, NotSet, Set> = layer.timeout_error(|args| format!("timeout: {}", args.timeout().as_millis())); let result = layer @@ -282,7 +282,7 @@ mod tests { #[test] fn timeout_ensure_set_correctly() { - let layer: TimeoutLayer<_, _, Set, Set> = TimeoutLayer::new(StringValue::from("test"), &create_test_options()) + let layer: TimeoutLayer<_, _, Set, Set> = TimeoutLayer::new(StringValue::from("test"), &create_test_context()) .timeout_output(|_args| "timeout: ".to_string()) .timeout(Duration::from_millis(3)); @@ -390,22 +390,22 @@ mod tests { static_assertions::assert_impl_all!(TimeoutLayer: Debug); } - fn create_test_options() -> SeatbeltOptions { - SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") + fn create_test_context() -> Context { + Context::new(Clock::new_frozen()).pipeline_name("test_pipeline") } - fn create_test_options_result() -> SeatbeltOptions> { - SeatbeltOptions::new(Clock::new_frozen()).pipeline_name("test_pipeline") + fn create_test_context_result() -> Context> { + Context::new(Clock::new_frozen()).pipeline_name("test_pipeline") } fn create_ready_layer() -> TimeoutLayer { - TimeoutLayer::new(StringValue::from("test"), &create_test_options()) + TimeoutLayer::new(StringValue::from("test"), &create_test_context()) .timeout_output(|_args| "timeout: ".to_string()) .timeout(Duration::from_millis(3)) } fn create_ready_layer_with_result() -> TimeoutLayer, Set, Set> { - TimeoutLayer::new(StringValue::from("test"), &create_test_options_result()) + TimeoutLayer::new(StringValue::from("test"), &create_test_context_result()) .timeout_error(|_args| "timeout: ".to_string()) .timeout(Duration::from_millis(3)) } diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs index 81e136cd..51f86edb 100644 --- a/crates/seatbelt/src/timeout/mod.rs +++ b/crates/seatbelt/src/timeout/mod.rs @@ -17,12 +17,12 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::timeout::Timeout; -//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::Context; //! # async fn example(clock: Clock) -> Result<(), io::Error> { -//! let options = SeatbeltOptions::new(&clock).pipeline_name("my_service"); +//! let context = Context::new(&clock).pipeline_name("my_service"); //! //! let stack = ( -//! Timeout::layer("timeout", &options) +//! Timeout::layer("timeout", &context) //! .timeout_error(|_| io::Error::new(io::ErrorKind::TimedOut, "operation timed out")) //! .timeout(Duration::from_secs(30)), //! Execute::new(my_operation), @@ -79,7 +79,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted when a timeout occurs //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`SeatbeltOptions::pipeline_name`] +//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`] //! - `resilience.strategy.name`: Timeout identifier from [`Timeout::layer`] //! - `resilience.event.name`: Always `timeout` //! @@ -93,15 +93,15 @@ //! # use std::time::Duration; //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; -//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::Context; //! # use seatbelt::timeout::Timeout; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let options = SeatbeltOptions::new(&clock); +//! let context = Context::new(&clock); //! //! let stack = ( -//! Timeout::layer("my_timeout", &options) +//! Timeout::layer("my_timeout", &context) //! // Required: timeout middleware needs to know what output to return when timeout occurs //! .timeout_output(|args| { //! format!("timeout error, duration: {}ms", args.timeout().as_millis()) @@ -133,14 +133,14 @@ //! # use std::io; //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; -//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::Context; //! # use seatbelt::timeout::Timeout; //! # async fn example(clock: Clock) -> Result<(), io::Error> { //! // Define common options for resilience middleware. -//! let options = SeatbeltOptions::new(&clock); +//! let context = Context::new(&clock); //! //! let stack = ( -//! Timeout::layer("my_timeout", &options) +//! Timeout::layer("my_timeout", &context) //! // Return an error for Result outputs on timeout //! .timeout_error(|args| io::Error::new(io::ErrorKind::TimedOut, "request timed out")) //! // Default timeout @@ -180,11 +180,11 @@ //! # use std::time::Duration; //! # use layered::{Execute, Stack}; //! # use tick::Clock; -//! # use seatbelt::SeatbeltOptions; +//! # use seatbelt::Context; //! # use seatbelt::timeout::Timeout; -//! # fn example(service_options: SeatbeltOptions) { +//! # fn example(context: Context) { //! let stack = ( -//! Timeout::layer("my_timeout", &service_options), // Missing required configuration! +//! Timeout::layer("my_timeout", &context), // Missing required configuration! //! Execute::new(|input| async move { input }) //! ); //! diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index af95ca9e..36546d16 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -11,7 +11,7 @@ use tick::{Clock, FutureExt}; use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; use crate::timeout::telemetry::TIMEOUT_EVENT_NAME; use crate::timeout::{OnTimeout, OnTimeoutArgs, TimeoutLayer, TimeoutOutput, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs}; -use crate::{EnableIf, NotSet, SeatbeltOptions}; +use crate::{Context, EnableIf, NotSet}; /// Applies a timeout to service execution for canceling long-running operations. /// @@ -55,11 +55,11 @@ impl Timeout { /// # use std::time::Duration; /// # use layered::{Execute, Stack}; /// # use tick::Clock; - /// # use seatbelt::SeatbeltOptions; + /// # use seatbelt::Context; /// use seatbelt::timeout::Timeout; /// - /// # fn example(options: SeatbeltOptions) { - /// let timeout_layer = Timeout::layer("my_timeout", &options) + /// # fn example(context: Context) { + /// let timeout_layer = Timeout::layer("my_timeout", &context) /// .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) /// .timeout(Duration::from_secs(30)); /// # } @@ -68,8 +68,8 @@ impl Timeout { /// For comprehensive examples, see the [timeout module] documentation. /// /// [timeout module]: crate::timeout - pub fn layer(name: impl Into>, options: &SeatbeltOptions) -> TimeoutLayer { - TimeoutLayer::new(name.into().into(), options) + pub fn layer(name: impl Into>, context: &Context) -> TimeoutLayer { + TimeoutLayer::new(name.into().into(), context) } } @@ -145,10 +145,10 @@ mod tests { #[tokio::test] async fn no_timeout() { let clock = Clock::new_frozen(); - let options = SeatbeltOptions::new(clock); + let context = Context::new(clock); let stack = ( - Timeout::layer("test_timeout", &options) + Timeout::layer("test_timeout", &context) .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) .timeout(Duration::from_secs(5)), Execute::new(|input: String| async move { input }), @@ -167,12 +167,12 @@ mod tests { .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(500)) .to_clock(); - let options = SeatbeltOptions::new(clock.clone()); + let context = Context::new(clock.clone()); let called = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); let called_clone = std::sync::Arc::clone(&called); let stack = ( - Timeout::layer("test_timeout", &options) + Timeout::layer("test_timeout", &context) .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) .timeout(Duration::from_millis(200)) .on_timeout(move |out, args| { @@ -205,7 +205,7 @@ mod tests { .to_clock(); let stack = ( - Timeout::layer("test_timeout", &SeatbeltOptions::new(clock.clone())) + Timeout::layer("test_timeout", &Context::new(clock.clone())) .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) .timeout(Duration::from_millis(200)) .timeout_override(|input, _args| { @@ -234,7 +234,7 @@ mod tests { async fn no_timeout_if_disabled() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let stack = ( - Timeout::layer("test_timeout", &SeatbeltOptions::new(&clock)) + Timeout::layer("test_timeout", &Context::new(&clock)) .timeout_output(|_args| "timed out".to_string()) .timeout(Duration::from_millis(200)) .disable(), From f4aa4759be10a402d7969e4c46619cfaf63e21e9 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 16:17:08 +0100 Subject: [PATCH 15/60] cleanup --- crates/seatbelt/benches/observability.rs | 101 ++++++----------------- crates/seatbelt/src/options/context.rs | 50 ++--------- 2 files changed, 34 insertions(+), 117 deletions(-) diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs index eef05a98..908f5865 100644 --- a/crates/seatbelt/benches/observability.rs +++ b/crates/seatbelt/benches/observability.rs @@ -5,15 +5,13 @@ use std::time::Duration; use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; -use layered::{Execute, Service}; -use opentelemetry::metrics::Counter; -use opentelemetry::{KeyValue, StringValue}; +use layered::{Execute, Service, Stack}; use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::metrics::data::ResourceMetrics; use opentelemetry_sdk::metrics::exporter::PushMetricExporter; use opentelemetry_sdk::metrics::{SdkMeterProvider, Temporality}; -use seatbelt::Context; -use seatbelt::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; +use seatbelt::retry::Retry; +use seatbelt::{Context, RecoveryInfo}; use tick::Clock; #[global_allocator] @@ -23,33 +21,37 @@ fn entry(c: &mut Criterion) { let mut group = c.benchmark_group("observability"); let session = Session::new(); - // No observability - let service = NoObservability(Execute::new(|v: Input| async move { Output::from(v) })); - let operation = session.operation("no-observability"); - group.bench_function("no-observability", |b| { - b.iter(|| { - let _span = operation.measure_thread(); - _ = block_on(service.execute(Input)); - }); - }); - - // With observability + // With retry middleware (no meter provider listener) let context = Context::new(Clock::new_frozen()); - let service = Observability::new(&context, Execute::new(|v: Input| async move { Output::from(v) })); - let operation = session.operation("observability"); - group.bench_function("observability", |b| { + let service = ( + Retry::layer("bench", &context) + .clone_input() + .base_delay(Duration::ZERO) + .recovery_with(|_, _| RecoveryInfo::retry()), + Execute::new(|v: Input| async move { Output::from(v) }), + ) + .build(); + let operation = session.operation("retry-no-telemetry"); + group.bench_function("retry-no-telemetry", |b| { b.iter(|| { let _span = operation.measure_thread(); _ = block_on(service.execute(Input)); }); }); - // With observability + listener + // With retry middleware + meter provider listener let meter_provider = SdkMeterProvider::builder().with_periodic_exporter(EmptyExporter).build(); let context = Context::new(Clock::new_frozen()).meter_provider(&meter_provider); - let service = Observability::new(&context, Execute::new(|v: Input| async move { Output::from(v) })); - let operation = session.operation("observability-and-listener"); - group.bench_function("observability-and-listener", |b| { + let service = ( + Retry::layer("bench", &context) + .clone_input() + .base_delay(Duration::ZERO) + .recovery_with(|_, _| RecoveryInfo::retry()), + Execute::new(|v: Input| async move { Output::from(v) }), + ) + .build(); + let operation = session.operation("retry-telemetry"); + group.bench_function("retry-telemetry", |b| { b.iter(|| { let _span = operation.measure_thread(); _ = block_on(service.execute(Input)); @@ -63,59 +65,10 @@ fn entry(c: &mut Criterion) { criterion_group!(benches, entry); criterion_main!(benches); -struct NoObservability(S); - -impl Service for NoObservability -where - S: Service, -{ - type Out = Output; - - async fn execute(&self, input: Input) -> Self::Out { - self.0.execute(input).await - } -} - -struct Observability { - service: S, - event_reporter: Counter, - pipeline_name: StringValue, -} - -impl Observability { - pub fn new(context: &Context, service: S) -> Self { - Self { - service, - event_reporter: context.create_resilience_event_counter(), - pipeline_name: context.get_pipeline_name().clone().into(), - } - } -} - -impl Service for Observability -where - S: Service, -{ - type Out = Output; - - async fn execute(&self, input: Input) -> Self::Out { - let output = self.service.execute(input).await; - - self.event_reporter.add( - 1, - &[ - KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), - KeyValue::new(STRATEGY_NAME, "benchmark.observability"), - KeyValue::new(EVENT_NAME, "custom"), - ], - ); - - output - } -} - +#[derive(Debug, Clone)] struct Input; +#[derive(Debug, Clone)] struct Output; impl From for Output { diff --git a/crates/seatbelt/src/options/context.rs b/crates/seatbelt/src/options/context.rs index 15fee623..b1e9ccb0 100644 --- a/crates/seatbelt/src/options/context.rs +++ b/crates/seatbelt/src/options/context.rs @@ -30,31 +30,17 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// Basic usage: /// /// ```rust -/// # use opentelemetry::KeyValue; /// # use opentelemetry::metrics::MeterProvider; /// # use seatbelt::Context; -/// # use seatbelt::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; /// # use tick::Clock; /// # fn example(clock: Clock, meter_provider: &dyn MeterProvider) { +/// // Create a context for a resilience pipeline /// let ctx = Context::::new(&clock) /// .pipeline_name("auth_pipeline") /// .meter_provider(meter_provider); /// -/// // Use the clock for timing operations -/// let start = ctx.get_clock().instant(); -/// -/// // Create an event reporter -/// let resilience_counter = ctx.create_resilience_event_counter(); -/// -/// // Report an event with required attributes -/// resilience_counter.add( -/// 1, -/// &[ -/// KeyValue::new(PIPELINE_NAME, ctx.get_pipeline_name().clone()), -/// KeyValue::new(STRATEGY_NAME, "my_strategy"), -/// KeyValue::new(EVENT_NAME, "my_event"), -/// ], -/// ); +/// // Pass the context to resilience middleware layers +/// // (e.g., Retry::layer("my_retry", &ctx), Timeout::layer("my_timeout", &ctx)) /// # } /// ``` /// @@ -122,7 +108,7 @@ impl Context { /// Middlewares use this to measure durations, track timeouts, and perform /// other time-related operations from a consistent source. #[must_use] - pub fn get_clock(&self) -> &Clock { + pub(crate) fn get_clock(&self) -> &Clock { &self.clock } @@ -140,7 +126,7 @@ impl Context { /// Get the configured pipeline name (`default` if not set). #[must_use] - pub fn get_pipeline_name(&self) -> &Cow<'static, str> { + pub(crate) fn get_pipeline_name(&self) -> &Cow<'static, str> { &self.pipeline_name } @@ -161,7 +147,7 @@ impl Context { /// - Version: `v0.1.0` /// - Schema URL: `https://opentelemetry.io/schemas/1.47.0` #[must_use] - pub fn get_meter(&self) -> &Meter { + pub(crate) fn get_meter(&self) -> &Meter { &self.meter } @@ -177,30 +163,8 @@ impl Context { /// - [`PIPELINE_NAME`][crate::telemetry::PIPELINE_NAME]: The name of the pipeline this middleware belongs to /// - [`STRATEGY_NAME`][crate::telemetry::STRATEGY_NAME]: The name of the specific strategy/middleware /// - [`EVENT_NAME`][crate::telemetry::EVENT_NAME]: The name of the event being reported - /// - /// # Examples - /// - /// ```rust - /// # use tick::Clock; - /// # use opentelemetry::KeyValue; - /// # use opentelemetry::metrics::Meter; - /// # use seatbelt::Context; - /// # use seatbelt::telemetry::{PIPELINE_NAME, STRATEGY_NAME, EVENT_NAME}; - /// # fn example(context: &Context<(), ()>) { - /// let counter = context.create_resilience_event_counter(); - /// - /// counter.add( - /// 1, - /// &[ - /// KeyValue::new(PIPELINE_NAME, "my_pipeline"), - /// KeyValue::new(STRATEGY_NAME, "my_strategy"), - /// KeyValue::new(EVENT_NAME, "my_event"), - /// ], - /// ); - /// # } - /// ``` #[must_use] - pub fn create_resilience_event_counter(&self) -> Counter { + pub(crate) fn create_resilience_event_counter(&self) -> Counter { create_resilience_event_counter(self.get_meter()) } } From e2fe66c42d98b7ce5e22138ff6fdee080d8e809d Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 16:33:03 +0100 Subject: [PATCH 16/60] spelling fixes --- .spelling | 2 ++ crates/seatbelt/src/circuit/constants.rs | 8 +++---- .../src/circuit/engine/probing/options.rs | 2 +- .../circuit/engine/probing/single_probe.rs | 10 ++++----- crates/seatbelt/src/circuit/layer.rs | 8 +++---- crates/seatbelt/src/circuit/mod.rs | 4 ++-- crates/seatbelt/src/lib.rs | 4 ++-- crates/seatbelt/src/options/backoff.rs | 6 ++--- crates/seatbelt/src/options/context.rs | 8 +++---- .../seatbelt/src/options/define_fn_wrapper.rs | 2 +- crates/seatbelt/src/retry/backoff.rs | 22 +++++++++---------- crates/seatbelt/src/retry/constants.rs | 6 ++--- crates/seatbelt/src/retry/layer.rs | 8 +++---- crates/seatbelt/src/retry/mod.rs | 2 +- crates/seatbelt/src/rnd.rs | 8 +++---- crates/seatbelt/src/telemetry/mod.rs | 2 +- crates/seatbelt/src/timeout/layer.rs | 2 +- 17 files changed, 53 insertions(+), 51 deletions(-) diff --git a/.spelling b/.spelling index e9ceb2a8..7b7e827b 100644 --- a/.spelling +++ b/.spelling @@ -137,6 +137,7 @@ interop Interop interoperability interoperate +jitter IOCP IP Kubernetes @@ -181,6 +182,7 @@ non-mockable ns nuget NUMA +observability ohno ok Ok diff --git a/crates/seatbelt/src/circuit/constants.rs b/crates/seatbelt/src/circuit/constants.rs index a81e40aa..7372f81c 100644 --- a/crates/seatbelt/src/circuit/constants.rs +++ b/crates/seatbelt/src/circuit/constants.rs @@ -8,27 +8,27 @@ pub(crate) const MIN_SAMPLING_DURATION: Duration = Duration::from_secs(1); /// Default minimum throughput (number of requests) in the sampling window before /// the circuit breaker can evaluate the failure rate and potentially trip the circuit. /// -/// The defaults taken from Polly V8: +/// The defaults taken from `Polly V8`: /// pub(crate) const DEFAULT_MIN_THROUGHPUT: u32 = 100; /// Default duration of the circuit breaker's sampling window. /// -/// The defaults taken from Polly V8: +/// The defaults taken from `Polly V8`: /// pub(crate) const DEFAULT_SAMPLING_DURATION: Duration = Duration::from_secs(30); /// Default failure threshold (percentage of failed requests) in the sampling window /// that will trip the circuit breaker. /// -/// The defaults taken from Polly V8: +/// The defaults taken from `Polly V8`: /// pub(crate) const DEFAULT_FAILURE_THRESHOLD: f32 = 0.1; /// Default duration that the circuit breaker remains open (broken) before /// transitioning to half-open to test if the service has recovered. /// -/// The defaults taken from Polly V8: +/// The defaults taken from `Polly V8`: /// pub(crate) const DEFAULT_BREAK_DURATION: Duration = Duration::from_secs(5); diff --git a/crates/seatbelt/src/circuit/engine/probing/options.rs b/crates/seatbelt/src/circuit/engine/probing/options.rs index c52d72c0..ec787e8d 100644 --- a/crates/seatbelt/src/circuit/engine/probing/options.rs +++ b/crates/seatbelt/src/circuit/engine/probing/options.rs @@ -14,7 +14,7 @@ const MIN_THROUGHPUT: u32 = 1; pub(crate) enum ProbeOptions { /// A single probe that allows one probe. /// - /// After the initial probe is allowed, it enters a cooldown period during which + /// After the initial probe is allowed, it enters a cool-down period during which /// no further probes are allowed. SingleProbe { cooldown: Duration }, diff --git a/crates/seatbelt/src/circuit/engine/probing/single_probe.rs b/crates/seatbelt/src/circuit/engine/probing/single_probe.rs index 4d8d2cba..69cf846a 100644 --- a/crates/seatbelt/src/circuit/engine/probing/single_probe.rs +++ b/crates/seatbelt/src/circuit/engine/probing/single_probe.rs @@ -30,13 +30,13 @@ impl SingleProbe { impl ProbeOperation for SingleProbe { fn allow_probe(&mut self, now: Instant) -> AllowProbeResult { match self.entered_at { - // First probe attempt - record the timestamp to start the cooldown period + // First probe attempt - record the timestamp to start the cool-down period None => { self.entered_at = Some(now); AllowProbeResult::Accepted } - // Cooldown has elapsed, allow the probe and reset the cooldown timer. - // We allow additional probe after the cooldown period to handle the case + // Cool-down has elapsed, allow the probe and reset the cool-down timer. + // We allow additional probe after the cool-down period to handle the case // where the probe result is not recorded due to future being dropped. Some(entered_at) if now.saturating_duration_since(entered_at) > self.probe_cooldown => { self.entered_at = Some(now); @@ -87,11 +87,11 @@ mod tests { // The first probe should be accepted assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); - // After exactly cooldown duration, the probe should still be rejected + // After exactly cool-down duration, the probe should still be rejected let later = now + Duration::from_secs(5); assert_eq!(probe.allow_probe(later), AllowProbeResult::Rejected); - // After cooldown + 1 microsecond, the probe should be accepted + // After cool-down + 1 microsecond, the probe should be accepted let later = now + Duration::from_secs(5) + Duration::from_micros(1); assert_eq!(probe.allow_probe(later), AllowProbeResult::Accepted); } diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index 581b4b79..7c038a95 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -17,7 +17,7 @@ use crate::{Context, EnableIf, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring circuit breaker resilience middleware. /// /// This type is created by calling [`Circuit::layer`] and uses the -/// typestate pattern to enforce that required properties are configured before the circuit breaker +/// type-state pattern to enforce that required properties are configured before the circuit breaker /// middleware can be built: /// /// - [`recovery`][CircuitLayer::recovery]: Required to determine if an output represents a failure @@ -159,13 +159,13 @@ impl CircuitLayer 1.0 are clamped) + /// * `threshold` - The failure threshold (0.0 to 1.0, values `>` 1.0 are clamped) #[must_use] pub fn failure_threshold(mut self, threshold: f32) -> Self { self.failure_threshold = threshold.min(1.0); diff --git a/crates/seatbelt/src/circuit/mod.rs b/crates/seatbelt/src/circuit/mod.rs index 05a8aa17..145e1c07 100644 --- a/crates/seatbelt/src/circuit/mod.rs +++ b/crates/seatbelt/src/circuit/mod.rs @@ -3,7 +3,7 @@ //! Circuit breaker resilience middleware for preventing cascading failures. //! //! This module provides automatic circuit breaking capabilities with configurable failure -//! thresholds, break durations, and comprehensive telemetry. The primary types are: +//! thresholds, break duration, and comprehensive telemetry. The primary types are: //! //! - [`Circuit`] is the middleware that wraps an inner service and monitors failure rates //! - [`CircuitLayer`] is used to configure and construct the circuit breaker middleware @@ -221,7 +221,7 @@ //! ## Advanced Usage //! //! This example demonstrates advanced usage of the circuit breaker middleware, including custom -//! failure thresholds, sampling durations, break durations, and state change callbacks. +//! failure thresholds, sampling duration, break duration, and state change callbacks. //! //! ```rust //! # use std::time::Duration; diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index ab5875fa..ecedd64b 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -21,7 +21,7 @@ //! //! # Runtime Agnostic Design //! -//! The seatbelt crate is designed to be **runtime agnostic** and works seamlessly across any +//! The `seatbelt` crate is designed to be **runtime agnostic** and works seamlessly across any //! async runtime. This flexibility allows you to use the same resilience patterns across //! different projects and migrate between runtimes without changing your resilience patterns. //! @@ -78,7 +78,7 @@ //! > operations like delays, timeouts, and backoff calculations. The clock is passed through //! > [`Context`] when creating middleware layers. //! -//! See [Built-in Middlewares](#built-in-middleware) for more details. +//! See [Built-in Middleware](#built-in-middleware) for more details. //! //! # Recovery Metadata //! diff --git a/crates/seatbelt/src/options/backoff.rs b/crates/seatbelt/src/options/backoff.rs index 748488cb..88d50971 100644 --- a/crates/seatbelt/src/options/backoff.rs +++ b/crates/seatbelt/src/options/backoff.rs @@ -9,16 +9,16 @@ pub enum Backoff { /// Constant backoff strategy that maintains consistent delays between attempts. /// - /// **Example with 2s base delay:** `2s, 2s, 2s, 2s, ...` + /// **Example with `2s` base delay:** `2s, 2s, 2s, 2s, ...` Constant, /// Linear backoff strategy that increases delays proportionally with attempt count. /// - /// **Example with 2s base delay:** `2s, 4s, 6s, 8s, 10s, ...` + /// **Example with `2s` base delay:** `2s, 4s, 6s, 8s, 10s, ...` Linear, /// Exponential backoff strategy that doubles delays with each attempt. /// - /// **Example with 2s base delay:** `2s, 4s, 8s, 16s, 32s, ...` + /// **Example with `2s` base delay:** `2s, 4s, 8s, 16s, 32s, ...` Exponential, } diff --git a/crates/seatbelt/src/options/context.rs b/crates/seatbelt/src/options/context.rs index b1e9ccb0..9fa530dc 100644 --- a/crates/seatbelt/src/options/context.rs +++ b/crates/seatbelt/src/options/context.rs @@ -23,7 +23,7 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// pipeline attribute, making dashboards and analysis easier. /// /// You can also override the meter provider via [`meter_provider`](Self::meter_provider) -/// if you need a non-global provider (e.g., tests or custom SDK wiring). +/// if you need a non-global provider (e.g., tests or custom SDKs wiring). /// /// # Examples /// @@ -46,7 +46,7 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// /// # Authoring Resilience `Middlewares` /// -/// This crate’s primitives are designed to make writing custom resilience middleware +/// This crate primitives are designed to make writing custom resilience middleware /// straightforward and observable. Pass a shared [`Context`] to each /// middleware that needs telemetry. You can keep a single instance (or derive child /// scopes from it) to group related middleware under one logical parent so their @@ -68,7 +68,7 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// 4. Report events at key decision points (retries, circuit breaker trips, timeouts, …) /// 5. Use recovery metadata from error types to make informed retry/backoff decisions /// -/// > Note: The canonical, in‑depth description of the Oxidizer middleware / service +/// > Note: The canonical, in depth description of the Oxidizer middleware / service /// > layering pattern lives in the [`layered`] crate documentation. Resilience /// > middleware in this crate is expected to follow that same pattern (layer types /// > implementing `Layer`, constructors taking the next service, no hidden global @@ -105,7 +105,7 @@ impl Context { /// Get the configured clock for timing operations. /// - /// Middlewares use this to measure durations, track timeouts, and perform + /// Middleware use this to measure duration, track timeouts, and perform /// other time-related operations from a consistent source. #[must_use] pub(crate) fn get_clock(&self) -> &Clock { diff --git a/crates/seatbelt/src/options/define_fn_wrapper.rs b/crates/seatbelt/src/options/define_fn_wrapper.rs index f30752dd..661fb955 100644 --- a/crates/seatbelt/src/options/define_fn_wrapper.rs +++ b/crates/seatbelt/src/options/define_fn_wrapper.rs @@ -4,7 +4,7 @@ /// /// This macro generates a type that wraps a function in an `Arc`, /// providing `Clone`, `Debug`, and convenient constructor methods. We need this to allow storing -/// user-provided functions (e.g., predicates) in a thread-safe, cloneable way. +/// user-provided functions (e.g., predicates) in a thread-safe, clonable way. /// /// # Syntax /// diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs index 2517af73..a3997afd 100644 --- a/crates/seatbelt/src/retry/backoff.rs +++ b/crates/seatbelt/src/retry/backoff.rs @@ -100,13 +100,13 @@ fn apply_jitter(delay: Duration, rnd: &Rnd) -> Duration { secs_to_duration_saturating(new_ms / 1000.0) } -/// Decorrelated-jitter backoff (v2): smooth exponential growth with bounded randomization. +/// De-correlated jitter backoff (`v2`): smooth exponential growth with bounded randomization. /// -/// Decorrelated Jitter V2 spreads retries evenly while preserving exponential backoff +/// De-correlated jitter `V2` spreads retries evenly while preserving exponential backoff /// (with a configurable first-retry median), reducing synchronized spikes and tail-latency -/// compared to naïve “±random” jitter. +/// compared to naive random jitter. /// -/// What does "decorrelated" mean? +/// What does "de-correlated" mean? /// /// - Successive delays are not a direct function of the immediately previous /// delay. Instead, each step samples a random phase (`t = attempt + U[0,1)`) @@ -114,19 +114,19 @@ fn apply_jitter(delay: Duration, rnd: &Rnd) -> Duration { /// position on that curve. This weakens correlation between consecutive /// samples and reduces synchronization across many callers. /// -/// What does "v2" mean? +/// What does `v2` mean? /// /// - It refers to the second-generation formulation from /// `Polly.Contrib.WaitAndRetry` (linked below). Compared to the earlier -/// (v1) "decorrelated jitter" popularized in the AWS blog post, v2 uses a +/// (`v1`) "de-correlated jitter" popularized in the `AWS` blog post, `v2` uses a /// closed-form function combining exponential growth with a `tanh(sqrt(p*t))` -/// taper to achieve monotonic expected growth, reduced tail latencies, and a -/// tighter distribution—while still remaining decorrelated. +/// taper to achieve monotonic expected growth, reduced tail latency, and a +/// tighter distribution while still remaining de-correlated. /// /// References -/// - [Polly V8 implementation](https://github.com/App-vNext/Polly/blob/8ba1e3ba295542cbc937d0555fadfa0d23b5c568/src/Polly.Core/Retry/RetryHelper.cs#L96) -/// - [Polly V7 implementation](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry/blob/7596d2dacf22d88bbd814bc49c28424fb6e921e9/src/Polly.Contrib.WaitAndRetry/Backoff.DecorrelatedJitterV2.cs#L22) -/// - [Polly.Contrib.WaitAndRetry repo](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry) +/// - [`Polly V8` implementation](https://github.com/App-vNext/Polly/blob/8ba1e3ba295542cbc937d0555fadfa0d23b5c568/src/Polly.Core/Retry/RetryHelper.cs#L96) +/// - [`Polly V7` implementation](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry/blob/7596d2dacf22d88bbd814bc49c28424fb6e921e9/src/Polly.Contrib.WaitAndRetry/Backoff.DecorrelatedJitterV2.cs#L22) +/// - [`Polly.Contrib.WaitAndRetry` repo](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry) #[inline] fn decorrelated_jitter_backoff_v2(attempt: u32, base_delay: Duration, prev: &mut f64, rnd: &Rnd) -> Duration { // The original author/credit for this jitter formula is @george-polevoy . diff --git a/crates/seatbelt/src/retry/constants.rs b/crates/seatbelt/src/retry/constants.rs index b73ed74b..202d4cbd 100644 --- a/crates/seatbelt/src/retry/constants.rs +++ b/crates/seatbelt/src/retry/constants.rs @@ -14,12 +14,12 @@ pub(super) const DEFAULT_BACKOFF: Backoff = Backoff::Exponential; /// Base delay for the backoff schedule; conservative 2 seconds by default. /// -/// A 2s starting delay prevents aggressive retry storms during partial outages +/// A `2s` starting delay prevents aggressive retry storms during partial outages /// while still enabling fast recovery for short-lived failures. Workloads with /// different needs can override this via configuration. pub(super) const DEFAULT_BASE_DELAY: Duration = Duration::from_secs(2); -/// Enable jitter by default to desynchronize clients and reduce contention. +/// Enable jitter by default to de-synchronize clients and reduce contention. /// /// Randomizing retry delays mitigates correlated bursts and improves tail /// latency under contention. See [Exponential Backoff and Jitter](https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter) for details. @@ -27,6 +27,6 @@ pub(super) const DEFAULT_USE_JITTER: bool = true; /// Default maximum retry attempts: 3. /// -/// The default is inherited from Polly v8 which is a widely used resilience library +/// The default is inherited from `Polly v8` which is a widely used resilience library /// and also uses 3 retry attempts. pub(super) const DEFAULT_RETRY_ATTEMPTS: u32 = 3; diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index d37cd44a..5324b25f 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -14,7 +14,7 @@ use crate::{Backoff, Context, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryI /// Builder for configuring retry resilience middleware. /// /// This type is created by calling [`Retry::layer`](crate::retry::Retry::layer) and uses the -/// typestate pattern to enforce that required properties are configured before the retry middleware can be built: +/// type-state pattern to enforce that required properties are configured before the retry middleware can be built: /// /// - [`clone_input`][RetryLayer::clone_input]: Required to specify how to clone inputs for retry attempts /// - [`recovery`][RetryLayer::recovery]: Required to determine if an output should trigger a retry @@ -59,7 +59,7 @@ impl RetryLayer RetryLayer Date: Mon, 19 Jan 2026 16:44:18 +0100 Subject: [PATCH 17/60] clipy and fmt --- crates/seatbelt/examples/retry_outage.rs | 2 ++ crates/seatbelt/examples/timeout.rs | 1 + crates/seatbelt/src/testing.rs | 2 -- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index efac0fb0..b81e49c0 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -1,5 +1,7 @@ // Copyright (c) Microsoft Corporation. +#![expect(clippy::unwrap_used, reason = "sample code")] + //! Demonstrates advanced retry patterns with input restoration from errors. //! //! This example showcases how to handle outage scenarios where: diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index 9f5d9724..d65e8b0a 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +#![expect(clippy::unwrap_used, reason = "sample code")] //! Simple timeout resilience middleware example. //! diff --git a/crates/seatbelt/src/testing.rs b/crates/seatbelt/src/testing.rs index 37ceb748..0e952edc 100644 --- a/crates/seatbelt/src/testing.rs +++ b/crates/seatbelt/src/testing.rs @@ -35,13 +35,11 @@ impl MetricTester { } #[must_use] - #[expect(clippy::missing_panics_doc, reason = "TODO")] pub fn collect_attributes(&self) -> Vec { self.provider.force_flush().unwrap(); collect_attributes(&self.exporter) } - #[expect(clippy::missing_panics_doc, reason = "TODO")] pub fn assert_attributes(&self, key_values: &[KeyValue], expected_length: Option) { let attributes = self.collect_attributes(); From e7c2aff15aa4ec3f01b038f656001c54504a97e1 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 17:42:18 +0100 Subject: [PATCH 18/60] license headers --- crates/seatbelt/benches/circuit.rs | 1 + crates/seatbelt/benches/observability.rs | 1 + crates/seatbelt/benches/retry.rs | 1 + crates/seatbelt/benches/timeout.rs | 1 + crates/seatbelt/examples/circuit.rs | 1 + crates/seatbelt/examples/resilience_pipeline.rs | 1 + crates/seatbelt/examples/retry.rs | 1 + crates/seatbelt/examples/retry_advanced.rs | 1 + crates/seatbelt/examples/retry_outage.rs | 1 + crates/seatbelt/examples/timeout.rs | 1 + crates/seatbelt/examples/timeout_advanced.rs | 1 + crates/seatbelt/src/circuit/args.rs | 1 + crates/seatbelt/src/circuit/callbacks.rs | 1 + crates/seatbelt/src/circuit/constants.rs | 1 + crates/seatbelt/src/circuit/engine/engine_core.rs | 1 + crates/seatbelt/src/circuit/engine/engine_fake.rs | 1 + crates/seatbelt/src/circuit/engine/engine_telemetry.rs | 1 + crates/seatbelt/src/circuit/engine/engines.rs | 1 + crates/seatbelt/src/circuit/engine/mod.rs | 1 + crates/seatbelt/src/circuit/engine/probing/health_probe.rs | 1 + crates/seatbelt/src/circuit/engine/probing/mod.rs | 1 + crates/seatbelt/src/circuit/engine/probing/options.rs | 1 + crates/seatbelt/src/circuit/engine/probing/probes.rs | 1 + crates/seatbelt/src/circuit/engine/probing/single_probe.rs | 1 + crates/seatbelt/src/circuit/execution_result.rs | 1 + crates/seatbelt/src/circuit/half_open_mode.rs | 1 + crates/seatbelt/src/circuit/health.rs | 1 + crates/seatbelt/src/circuit/layer.rs | 1 + crates/seatbelt/src/circuit/mod.rs | 1 + crates/seatbelt/src/circuit/partition_key.rs | 1 + crates/seatbelt/src/circuit/service.rs | 1 + crates/seatbelt/src/circuit/telemetry.rs | 1 + crates/seatbelt/src/options/attempt.rs | 1 + crates/seatbelt/src/options/backoff.rs | 1 + crates/seatbelt/src/options/context.rs | 1 + crates/seatbelt/src/options/define_fn_wrapper.rs | 1 + crates/seatbelt/src/options/mod.rs | 1 + crates/seatbelt/src/retry/args.rs | 1 + crates/seatbelt/src/retry/backoff.rs | 1 + crates/seatbelt/src/retry/callbacks.rs | 1 + crates/seatbelt/src/retry/constants.rs | 1 + crates/seatbelt/src/retry/layer.rs | 1 + crates/seatbelt/src/retry/mod.rs | 1 + crates/seatbelt/src/retry/service.rs | 1 + crates/seatbelt/src/retry/telemetry.rs | 1 + crates/seatbelt/src/rnd.rs | 1 + crates/seatbelt/src/telemetry/metrics.rs | 1 + crates/seatbelt/src/telemetry/mod.rs | 1 + crates/seatbelt/src/testing.rs | 1 + crates/seatbelt/src/timeout/args.rs | 1 + crates/seatbelt/src/timeout/callbacks.rs | 1 + crates/seatbelt/src/timeout/layer.rs | 1 + crates/seatbelt/src/timeout/mod.rs | 1 + crates/seatbelt/src/timeout/service.rs | 1 + crates/seatbelt/src/timeout/telemetry.rs | 1 + 55 files changed, 55 insertions(+) diff --git a/crates/seatbelt/benches/circuit.rs b/crates/seatbelt/benches/circuit.rs index 2556e23f..aacbe34a 100644 --- a/crates/seatbelt/benches/circuit.rs +++ b/crates/seatbelt/benches/circuit.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. #![expect(missing_docs, reason = "benchmark code")] use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs index 908f5865..2683897f 100644 --- a/crates/seatbelt/benches/observability.rs +++ b/crates/seatbelt/benches/observability.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. #![expect(missing_docs, reason = "benchmark code")] use std::time::Duration; diff --git a/crates/seatbelt/benches/retry.rs b/crates/seatbelt/benches/retry.rs index 736a6d3f..4b15a87f 100644 --- a/crates/seatbelt/benches/retry.rs +++ b/crates/seatbelt/benches/retry.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. #![expect(missing_docs, reason = "benchmark code")] use std::time::Duration; diff --git a/crates/seatbelt/benches/timeout.rs b/crates/seatbelt/benches/timeout.rs index fc370bc8..705adc29 100644 --- a/crates/seatbelt/benches/timeout.rs +++ b/crates/seatbelt/benches/timeout.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. #![expect(missing_docs, reason = "benchmark code")] use std::time::Duration; diff --git a/crates/seatbelt/examples/circuit.rs b/crates/seatbelt/examples/circuit.rs index d26e06d0..c6eaa921 100644 --- a/crates/seatbelt/examples/circuit.rs +++ b/crates/seatbelt/examples/circuit.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! Circuit breaker example that simulates a major service outage and tripping of the //! circuit breaker by: diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs index 0b2bfcf6..2d1706ea 100644 --- a/crates/seatbelt/examples/resilience_pipeline.rs +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! This example demonstrates how to combine multiple resilience middlewares //! using the `seatbelt` crate to create a robust execution pipeline with basic diff --git a/crates/seatbelt/examples/retry.rs b/crates/seatbelt/examples/retry.rs index 653f3a48..ea09764a 100644 --- a/crates/seatbelt/examples/retry.rs +++ b/crates/seatbelt/examples/retry.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! Basic retry middleware example with automatic input cloning and simple recovery logic. diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index f3fbb8e4..cd0c0d1c 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! Advanced retry middleware demonstrating custom input cloning and attempt info forwarding. //! diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index b81e49c0..670979dc 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. #![expect(clippy::unwrap_used, reason = "sample code")] diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index d65e8b0a..51a4fae4 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. #![expect(clippy::unwrap_used, reason = "sample code")] //! Simple timeout resilience middleware example. diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs index 23df2158..558a8d16 100644 --- a/crates/seatbelt/examples/timeout_advanced.rs +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! Advanced timeout resilience middleware example. //! diff --git a/crates/seatbelt/src/circuit/args.rs b/crates/seatbelt/src/circuit/args.rs index 1a8c6baa..d2e9d89f 100644 --- a/crates/seatbelt/src/circuit/args.rs +++ b/crates/seatbelt/src/circuit/args.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Duration; diff --git a/crates/seatbelt/src/circuit/callbacks.rs b/crates/seatbelt/src/circuit/callbacks.rs index b7157f8f..da93cfad 100644 --- a/crates/seatbelt/src/circuit/callbacks.rs +++ b/crates/seatbelt/src/circuit/callbacks.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use super::{OnClosedArgs, OnOpenedArgs, OnProbingArgs, PartitionKey, RecoveryArgs, RejectedInputArgs}; use crate::RecoveryInfo; diff --git a/crates/seatbelt/src/circuit/constants.rs b/crates/seatbelt/src/circuit/constants.rs index 7372f81c..1030ed01 100644 --- a/crates/seatbelt/src/circuit/constants.rs +++ b/crates/seatbelt/src/circuit/constants.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Duration; diff --git a/crates/seatbelt/src/circuit/engine/engine_core.rs b/crates/seatbelt/src/circuit/engine/engine_core.rs index e0032690..85bdef9a 100644 --- a/crates/seatbelt/src/circuit/engine/engine_core.rs +++ b/crates/seatbelt/src/circuit/engine/engine_core.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::sync::Mutex; use std::time::{Duration, Instant}; diff --git a/crates/seatbelt/src/circuit/engine/engine_fake.rs b/crates/seatbelt/src/circuit/engine/engine_fake.rs index 696894d4..c59930ef 100644 --- a/crates/seatbelt/src/circuit/engine/engine_fake.rs +++ b/crates/seatbelt/src/circuit/engine/engine_fake.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use crate::circuit::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; diff --git a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs index 9d85872f..cc5eecf4 100644 --- a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use opentelemetry::StringValue; use opentelemetry::metrics::Counter; diff --git a/crates/seatbelt/src/circuit/engine/engines.rs b/crates/seatbelt/src/circuit/engine/engines.rs index 4f3884b6..e22b96d9 100644 --- a/crates/seatbelt/src/circuit/engine/engines.rs +++ b/crates/seatbelt/src/circuit/engine/engines.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::collections::HashMap; use std::sync::{Arc, Mutex}; diff --git a/crates/seatbelt/src/circuit/engine/mod.rs b/crates/seatbelt/src/circuit/engine/mod.rs index d2679d7b..dc03a23e 100644 --- a/crates/seatbelt/src/circuit/engine/mod.rs +++ b/crates/seatbelt/src/circuit/engine/mod.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::fmt::Debug; use std::time::Duration; diff --git a/crates/seatbelt/src/circuit/engine/probing/health_probe.rs b/crates/seatbelt/src/circuit/engine/probing/health_probe.rs index 20faf95c..84adfd01 100644 --- a/crates/seatbelt/src/circuit/engine/probing/health_probe.rs +++ b/crates/seatbelt/src/circuit/engine/probing/health_probe.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Instant; diff --git a/crates/seatbelt/src/circuit/engine/probing/mod.rs b/crates/seatbelt/src/circuit/engine/probing/mod.rs index 4d64932c..56564610 100644 --- a/crates/seatbelt/src/circuit/engine/probing/mod.rs +++ b/crates/seatbelt/src/circuit/engine/probing/mod.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! Probing mechanisms for circuit breakers. //! diff --git a/crates/seatbelt/src/circuit/engine/probing/options.rs b/crates/seatbelt/src/circuit/engine/probing/options.rs index ec787e8d..ec3f6aba 100644 --- a/crates/seatbelt/src/circuit/engine/probing/options.rs +++ b/crates/seatbelt/src/circuit/engine/probing/options.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Duration; use std::vec::IntoIter; diff --git a/crates/seatbelt/src/circuit/engine/probing/probes.rs b/crates/seatbelt/src/circuit/engine/probing/probes.rs index 46e9c8b6..561f216b 100644 --- a/crates/seatbelt/src/circuit/engine/probing/probes.rs +++ b/crates/seatbelt/src/circuit/engine/probing/probes.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Instant; use std::vec; diff --git a/crates/seatbelt/src/circuit/engine/probing/single_probe.rs b/crates/seatbelt/src/circuit/engine/probing/single_probe.rs index 69cf846a..fc67d160 100644 --- a/crates/seatbelt/src/circuit/engine/probing/single_probe.rs +++ b/crates/seatbelt/src/circuit/engine/probing/single_probe.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::{Duration, Instant}; diff --git a/crates/seatbelt/src/circuit/execution_result.rs b/crates/seatbelt/src/circuit/execution_result.rs index b653a518..4d76a3f9 100644 --- a/crates/seatbelt/src/circuit/execution_result.rs +++ b/crates/seatbelt/src/circuit/execution_result.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use crate::RecoveryInfo; diff --git a/crates/seatbelt/src/circuit/half_open_mode.rs b/crates/seatbelt/src/circuit/half_open_mode.rs index 6566c05d..57c696f3 100644 --- a/crates/seatbelt/src/circuit/half_open_mode.rs +++ b/crates/seatbelt/src/circuit/half_open_mode.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Duration; diff --git a/crates/seatbelt/src/circuit/health.rs b/crates/seatbelt/src/circuit/health.rs index b7fd617b..b6cc049f 100644 --- a/crates/seatbelt/src/circuit/health.rs +++ b/crates/seatbelt/src/circuit/health.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::collections::VecDeque; use std::time::{Duration, Instant}; diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index 7c038a95..dca6be77 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::marker::PhantomData; use std::time::Duration; diff --git a/crates/seatbelt/src/circuit/mod.rs b/crates/seatbelt/src/circuit/mod.rs index 145e1c07..79d7006d 100644 --- a/crates/seatbelt/src/circuit/mod.rs +++ b/crates/seatbelt/src/circuit/mod.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! Circuit breaker resilience middleware for preventing cascading failures. //! diff --git a/crates/seatbelt/src/circuit/partition_key.rs b/crates/seatbelt/src/circuit/partition_key.rs index b912e91f..d15de192 100644 --- a/crates/seatbelt/src/circuit/partition_key.rs +++ b/crates/seatbelt/src/circuit/partition_key.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::borrow::Cow; use std::collections::hash_map::DefaultHasher; diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index cfeb4aed..127fb580 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::ops::ControlFlow; diff --git a/crates/seatbelt/src/circuit/telemetry.rs b/crates/seatbelt/src/circuit/telemetry.rs index 47238709..a6ded27a 100644 --- a/crates/seatbelt/src/circuit/telemetry.rs +++ b/crates/seatbelt/src/circuit/telemetry.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. pub(super) const CIRCUIT_OPENED_EVENT_NAME: &str = "circuit_opened"; pub(super) const CIRCUIT_CLOSED_EVENT_NAME: &str = "circuit_closed"; diff --git a/crates/seatbelt/src/options/attempt.rs b/crates/seatbelt/src/options/attempt.rs index 7e305efe..6e753fcd 100644 --- a/crates/seatbelt/src/options/attempt.rs +++ b/crates/seatbelt/src/options/attempt.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::fmt::Display; diff --git a/crates/seatbelt/src/options/backoff.rs b/crates/seatbelt/src/options/backoff.rs index 88d50971..1d149ff7 100644 --- a/crates/seatbelt/src/options/backoff.rs +++ b/crates/seatbelt/src/options/backoff.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. /// Defines the backoff strategy used by resilience middleware for retry operations. /// diff --git a/crates/seatbelt/src/options/context.rs b/crates/seatbelt/src/options/context.rs index 9fa530dc..3d65d3ff 100644 --- a/crates/seatbelt/src/options/context.rs +++ b/crates/seatbelt/src/options/context.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::borrow::Cow; diff --git a/crates/seatbelt/src/options/define_fn_wrapper.rs b/crates/seatbelt/src/options/define_fn_wrapper.rs index 661fb955..73097a35 100644 --- a/crates/seatbelt/src/options/define_fn_wrapper.rs +++ b/crates/seatbelt/src/options/define_fn_wrapper.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. /// A macro to generate `Fn` like wrapper types with consistent patterns. /// diff --git a/crates/seatbelt/src/options/mod.rs b/crates/seatbelt/src/options/mod.rs index 0c8c3443..8fc99568 100644 --- a/crates/seatbelt/src/options/mod.rs +++ b/crates/seatbelt/src/options/mod.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. /// A flag indicating that the required property is set. #[non_exhaustive] diff --git a/crates/seatbelt/src/retry/args.rs b/crates/seatbelt/src/retry/args.rs index 131c8034..d600950b 100644 --- a/crates/seatbelt/src/retry/args.rs +++ b/crates/seatbelt/src/retry/args.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Duration; diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs index a3997afd..68bdbf69 100644 --- a/crates/seatbelt/src/retry/backoff.rs +++ b/crates/seatbelt/src/retry/backoff.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::cmp::min; use std::time::Duration; diff --git a/crates/seatbelt/src/retry/callbacks.rs b/crates/seatbelt/src/retry/callbacks.rs index c7f14330..4f009cf9 100644 --- a/crates/seatbelt/src/retry/callbacks.rs +++ b/crates/seatbelt/src/retry/callbacks.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use super::{CloneArgs, OnRetryArgs, RecoveryArgs, RestoreInputArgs}; use crate::RecoveryInfo; diff --git a/crates/seatbelt/src/retry/constants.rs b/crates/seatbelt/src/retry/constants.rs index 202d4cbd..2aa8815f 100644 --- a/crates/seatbelt/src/retry/constants.rs +++ b/crates/seatbelt/src/retry/constants.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Duration; diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 5324b25f..5aac517b 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::marker::PhantomData; use std::time::Duration; diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index bd1d67ce..a767b4af 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! Retry resilience middleware for services, applications, and libraries. //! diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index d5c94972..711e57af 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::ops::ControlFlow; use std::time::Duration; diff --git a/crates/seatbelt/src/retry/telemetry.rs b/crates/seatbelt/src/retry/telemetry.rs index 7494f06f..05fc4a38 100644 --- a/crates/seatbelt/src/retry/telemetry.rs +++ b/crates/seatbelt/src/retry/telemetry.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. /// The name of the retry event for telemetry reporting. pub(super) const RETRY_EVENT: &str = "retry"; diff --git a/crates/seatbelt/src/rnd.rs b/crates/seatbelt/src/rnd.rs index 41971b77..70b92484 100644 --- a/crates/seatbelt/src/rnd.rs +++ b/crates/seatbelt/src/rnd.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::fmt::Debug; diff --git a/crates/seatbelt/src/telemetry/metrics.rs b/crates/seatbelt/src/telemetry/metrics.rs index 55d84e4c..558673f0 100644 --- a/crates/seatbelt/src/telemetry/metrics.rs +++ b/crates/seatbelt/src/telemetry/metrics.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use opentelemetry::InstrumentationScope; use opentelemetry::metrics::{Counter, Meter, MeterProvider}; diff --git a/crates/seatbelt/src/telemetry/mod.rs b/crates/seatbelt/src/telemetry/mod.rs index 945a9edf..30cc6040 100644 --- a/crates/seatbelt/src/telemetry/mod.rs +++ b/crates/seatbelt/src/telemetry/mod.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! # Service Middleware Telemetry //! diff --git a/crates/seatbelt/src/testing.rs b/crates/seatbelt/src/testing.rs index 0e952edc..9929a5db 100644 --- a/crates/seatbelt/src/testing.rs +++ b/crates/seatbelt/src/testing.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use opentelemetry::KeyValue; use opentelemetry_sdk::metrics::data::{AggregatedMetrics, Metric, MetricData}; diff --git a/crates/seatbelt/src/timeout/args.rs b/crates/seatbelt/src/timeout/args.rs index ad70b0f3..a88c381b 100644 --- a/crates/seatbelt/src/timeout/args.rs +++ b/crates/seatbelt/src/timeout/args.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Duration; diff --git a/crates/seatbelt/src/timeout/callbacks.rs b/crates/seatbelt/src/timeout/callbacks.rs index f714d1d1..f0f2ff00 100644 --- a/crates/seatbelt/src/timeout/callbacks.rs +++ b/crates/seatbelt/src/timeout/callbacks.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::time::Duration; diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index fb04fbc5..d1f0cc4f 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::marker::PhantomData; use std::time::Duration; diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs index 51f86edb..c3e7ac8c 100644 --- a/crates/seatbelt/src/timeout/mod.rs +++ b/crates/seatbelt/src/timeout/mod.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. //! Timeout resilience middleware for services, applications, and libraries. //! diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index 36546d16..fc17094c 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. use std::borrow::Cow; use std::time::Duration; diff --git a/crates/seatbelt/src/timeout/telemetry.rs b/crates/seatbelt/src/timeout/telemetry.rs index 314df271..989a3ec4 100644 --- a/crates/seatbelt/src/timeout/telemetry.rs +++ b/crates/seatbelt/src/timeout/telemetry.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. /// The name of the timeout event for telemetry reporting. pub(super) const TIMEOUT_EVENT_NAME: &str = "timeout"; From ae01de470cab3c575c67b0522bd019acba9173ab Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 17:42:38 +0100 Subject: [PATCH 19/60] fixes --- crates/seatbelt/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 0e28e3a7..12003fbe 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -21,7 +21,7 @@ that can automatically handle timeouts, retries, and other failure scenarios. ## Runtime Agnostic Design -The seatbelt crate is designed to be **runtime agnostic** and works seamlessly across any +The `seatbelt` crate is designed to be **runtime agnostic** and works seamlessly across any async runtime. This flexibility allows you to use the same resilience patterns across different projects and migrate between runtimes without changing your resilience patterns. @@ -67,7 +67,7 @@ let result = service.execute("input data".to_string()).await; > operations like delays, timeouts, and backoff calculations. The clock is passed through > [`Context`][__link4] when creating middleware layers. -See [Built-in Middlewares](#built-in-middleware) for more details. +See [Built-in Middleware](#built-in-middleware) for more details. ## Recovery Metadata @@ -105,7 +105,7 @@ This crate supports several optional features that can be enabled to extend func This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG2dhk5l0Ky--G-zcUMSmDnfBG6s2B3IYE_rxGzkEJPiyOJeqYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG1hGrYSwKcJlG5sEnPybj4Z2G2k1NmMcyogWG2e6c2EE42AfYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI [__link0]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo [__link1]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery [__link10]: https://crates.io/crates/tick/0.1.2 From 320d404c529b13e43cddb4aba5d10bbe0bd92085 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 17:47:50 +0100 Subject: [PATCH 20/60] fixes --- Cargo.lock | 1 - crates/seatbelt/Cargo.toml | 1 - crates/seatbelt/src/circuit/args.rs | 10 +++++----- crates/seatbelt/src/circuit/half_open_mode.rs | 4 ++-- crates/seatbelt/src/circuit/mod.rs | 2 +- crates/seatbelt/src/retry/mod.rs | 2 +- crates/seatbelt/src/timeout/mod.rs | 2 +- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 305f3f3b..bc142145 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1360,7 +1360,6 @@ name = "seatbelt" version = "0.1.0" dependencies = [ "alloc_tracker", - "anyhow", "criterion", "fastrand", "futures", diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index f9e9fc5d..98bc71b4 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -42,7 +42,6 @@ tracing = { workspace = true, optional = true } [dev-dependencies] alloc_tracker.workspace = true -anyhow.workspace = true criterion.workspace = true fastrand.workspace = true futures = { workspace = true, features = ["executor"] } diff --git a/crates/seatbelt/src/circuit/args.rs b/crates/seatbelt/src/circuit/args.rs index d2e9d89f..4c512b50 100644 --- a/crates/seatbelt/src/circuit/args.rs +++ b/crates/seatbelt/src/circuit/args.rs @@ -7,7 +7,7 @@ use tick::Clock; use crate::circuit::PartitionKey; -/// Arguments for the [`recovery_with`][super::CircuitBreakerLayer::recovery_with] callback function. +/// Arguments for the [`recovery_with`][super::CircuitLayer::recovery_with] callback function. /// /// Provides context for recovery classification in the circuit breaker. #[derive(Debug)] @@ -31,7 +31,7 @@ impl RecoveryArgs<'_> { } } -/// Arguments for the [`rejected_input`][super::CircuitBreakerLayer::rejected_input] callback function. +/// Arguments for the [`rejected_input`][super::CircuitLayer::rejected_input] callback function. /// /// Provides context for generating outputs when the inputs are rejected by the circuit breaker. #[derive(Debug)] @@ -47,7 +47,7 @@ impl RejectedInputArgs<'_> { } } -/// Arguments for the [`on_probing`][super::CircuitBreakerLayer::on_probing] callback function. +/// Arguments for the [`on_probing`][super::CircuitLayer::on_probing] callback function. /// /// Provides context when the circuit breaker enters the probing state to test if the service has recovered. #[derive(Debug)] @@ -64,7 +64,7 @@ impl OnProbingArgs<'_> { } } -/// Arguments for the [`on_closed`][super::CircuitBreakerLayer::on_closed] callback function. +/// Arguments for the [`on_closed`][super::CircuitLayer::on_closed] callback function. /// /// Provides context when the circuit breaker transitions to the closed state, allowing normal operation. #[derive(Debug)] @@ -88,7 +88,7 @@ impl OnClosedArgs<'_> { } } -/// Arguments for the [`on_opened`][super::CircuitBreakerLayer::on_opened] callback function. +/// Arguments for the [`on_opened`][super::CircuitLayer::on_opened] callback function. /// /// Provides context when the circuit breaker transitions to the open state, blocking requests due to failures. #[derive(Debug)] diff --git a/crates/seatbelt/src/circuit/half_open_mode.rs b/crates/seatbelt/src/circuit/half_open_mode.rs index 57c696f3..364cd4ce 100644 --- a/crates/seatbelt/src/circuit/half_open_mode.rs +++ b/crates/seatbelt/src/circuit/half_open_mode.rs @@ -15,7 +15,7 @@ use crate::circuit::engine::probing::ProbesOptions; /// Currently, two modes are supported: /// /// - [`HalfOpenMode::quick`]: Allows a single probe request to determine if the service has recovered. -/// - [`HalfOpenMode::reliable`](default): Gradually increases the percentage of probing requests over multiple stages. +/// - [`HalfOpenMode::reliable`]: Gradually increases the percentage of probing requests over multiple stages (default). #[derive(Debug, Clone, PartialEq)] pub struct HalfOpenMode { inner: Mode, @@ -52,7 +52,7 @@ impl HalfOpenMode { /// # Arguments /// /// - `stage_duration` - Optional custom stage duration for each probing stage. If not provided, - /// the value of [`break_duration`][CircuitBreakerLayer::break_duration] is used. The provided stage + /// the value of [`break_duration`][crate::circuit::CircuitLayer::break_duration] is used. The provided stage /// duration is clamped to a minimum of 1 second. #[must_use] pub fn reliable(stage_duration: impl Into>) -> Self { diff --git a/crates/seatbelt/src/circuit/mod.rs b/crates/seatbelt/src/circuit/mod.rs index 79d7006d..afcef32f 100644 --- a/crates/seatbelt/src/circuit/mod.rs +++ b/crates/seatbelt/src/circuit/mod.rs @@ -166,7 +166,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted when circuit state transitions occur and when requests are rejected //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`] +//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`][crate::Context::pipeline_name] //! - `resilience.strategy.name`: Circuit breaker identifier from [`Circuit::layer`] //! - `resilience.event.name`: One of: //! - `circuit_opened`: When the circuit transitions to open state due to failure threshold being exceeded diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index a767b4af..c94f7ca6 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -90,7 +90,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted for each attempt that should be retried (including the final retry attempt) //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`] +//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`][crate::Context::pipeline_name] //! - `resilience.strategy.name`: Timeout identifier from [`Retry::layer`] //! - `resilience.event.name`: Always `retry` //! - `resilience.attempt.index`: Attempt index (0-based) diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs index c3e7ac8c..793e511b 100644 --- a/crates/seatbelt/src/timeout/mod.rs +++ b/crates/seatbelt/src/timeout/mod.rs @@ -80,7 +80,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted when a timeout occurs //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`] +//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`][crate::Context::pipeline_name] //! - `resilience.strategy.name`: Timeout identifier from [`Timeout::layer`] //! - `resilience.event.name`: Always `timeout` //! From 6d40b711a1efb23945c75fcd846b9e923835d143 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 17:56:16 +0100 Subject: [PATCH 21/60] cleanup --- crates/seatbelt/src/circuit/layer.rs | 2 +- crates/seatbelt/src/circuit/service.rs | 2 +- crates/seatbelt/src/lib.rs | 7 ++----- crates/seatbelt/src/retry/layer.rs | 2 +- crates/seatbelt/src/retry/service.rs | 2 +- crates/seatbelt/src/timeout/layer.rs | 2 +- 6 files changed, 7 insertions(+), 10 deletions(-) diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index dca6be77..5dc66349 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -12,7 +12,7 @@ use super::{ PartionKeyProvider, PartitionKey, RejectedInput, RejectedInputArgs, ShouldRecover, }; use crate::circuit::engine::probing::ProbesOptions; -use crate::service::Layer; +use crate::Layer; use crate::{Context, EnableIf, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring circuit breaker resilience middleware. diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index 127fb580..d75fae4c 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -172,7 +172,7 @@ mod tests { use super::*; use crate::circuit::constants::DEFAULT_BREAK_DURATION; use crate::circuit::{EngineFake, HalfOpenMode, HealthInfo, Stats}; - use crate::service::Layer; + use crate::Layer; use crate::{Context, RecoveryInfo, Set}; #[test] diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index ecedd64b..9e3dba52 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -138,12 +138,9 @@ pub mod retry; #[cfg(any(feature = "circuit", test))] pub mod circuit; -/// Re-exported types from the [`layered`] crate. +#[doc(inline)] #[cfg(any(feature = "service", test))] -pub mod service { - #[doc(inline)] - pub use layered::{Layer, Service, Stack}; -} +pub use layered::{Layer, Service, Stack}; #[cfg(any(feature = "retry", feature = "circuit", test))] mod rnd; diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 5aac517b..dee772e9 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -9,7 +9,7 @@ use opentelemetry::StringValue; use crate::retry::backoff::BackoffOptions; use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, Retry, ShouldRecover}; -use crate::service::Layer; +use crate::Layer; use crate::{Backoff, Context, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring retry resilience middleware. diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 711e57af..e338c749 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -260,7 +260,7 @@ mod tests { use super::*; use crate::options::Backoff; use crate::retry::RetryLayer; - use crate::service::Layer; + use crate::Layer; use crate::testing::MetricTester; use crate::{Context, Set}; diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index d1f0cc4f..3fbc898b 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -6,7 +6,7 @@ use std::time::Duration; use opentelemetry::StringValue; -use crate::service::Layer; +use crate::Layer; use crate::timeout::{ OnTimeout, OnTimeoutArgs, Timeout, TimeoutOutput as TimeoutOutputCallback, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs, }; From 5057b59bacc02d3d18cfc8852d731019e8ab0385 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 18:00:43 +0100 Subject: [PATCH 22/60] refine the features --- crates/seatbelt/Cargo.toml | 8 +++----- crates/seatbelt/src/lib.rs | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 98bc71b4..8a431cd9 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -21,12 +21,10 @@ all-features = true [features] default = [] -service = ["dep:layered"] -telemetry = [] -metrics = ["dep:opentelemetry", "opentelemetry/metrics", "telemetry"] -logs = ["telemetry", "dep:tracing"] +metrics = ["dep:opentelemetry", "opentelemetry/metrics"] +logs = ["dep:tracing"] # This exposes a common APIs for building resilience middlewares. -options = ["dep:tick", "telemetry", "service", "metrics", "logs"] +options = ["dep:tick", "dep:layered", "metrics", "logs"] timeout = ["options"] retry = ["options", "dep:fastrand"] circuit = ["options", "dep:fastrand"] diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 9e3dba52..12f1d50d 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -126,7 +126,7 @@ pub(crate) use crate::options::MaxAttempts; #[cfg(any(feature = "options", test))] pub use crate::options::{Attempt, Backoff, Context, NotSet, Set}; -#[cfg(any(feature = "telemetry", test))] +#[cfg(any(feature = "options", test))] pub mod telemetry; #[cfg(any(feature = "timeout", test))] @@ -139,7 +139,7 @@ pub mod retry; pub mod circuit; #[doc(inline)] -#[cfg(any(feature = "service", test))] +#[cfg(any(feature = "options", test))] pub use layered::{Layer, Service, Stack}; #[cfg(any(feature = "retry", feature = "circuit", test))] From ba8723140218155cf4d788d9046dbb2be140a4e6 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 18:04:53 +0100 Subject: [PATCH 23/60] refine the features --- crates/seatbelt/Cargo.toml | 11 +++++------ crates/seatbelt/src/lib.rs | 21 +++++++++------------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 8a431cd9..3db69e6d 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -21,13 +21,12 @@ all-features = true [features] default = [] +timeout = ["_middleware"] +retry = ["_middleware", "dep:fastrand"] +circuit = ["_middleware", "dep:fastrand"] metrics = ["dep:opentelemetry", "opentelemetry/metrics"] logs = ["dep:tracing"] -# This exposes a common APIs for building resilience middlewares. -options = ["dep:tick", "dep:layered", "metrics", "logs"] -timeout = ["options"] -retry = ["options", "dep:fastrand"] -circuit = ["options", "dep:fastrand"] +_middleware = ["dep:tick", "dep:layered", "metrics", "logs"] [dependencies] fastrand = { workspace = true, optional = true } @@ -87,7 +86,7 @@ required-features = ["circuit"] [[bench]] name = "observability" harness = false -required-features = ["options"] +required-features = ["_middleware"] [[bench]] name = "timeout" diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 12f1d50d..a8ab951c 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -99,34 +99,31 @@ //! //! This crate supports several optional features that can be enabled to extend functionality: //! -//! - `options`: Enables common APIs for building resilience middleware, including [`Context`]. -//! Requires [`tick`] for timing operations. -//! - `service`: Re-exports common types for building middleware from [`layered`] crate. -//! - `telemetry`: Enables telemetry and observability features using OpenTelemetry for monitoring -//! resilience operations. -//! - `metrics`: Exposes the OpenTelemetry metrics API for collecting and reporting metrics. + //! - `timeout`: Enables the [`timeout`] middleware for canceling long-running operations. //! - `retry`: Enables the [`retry`] middleware for automatically retrying failed operations with //! configurable backoff strategies, jitter, and recovery classification. //! - `circuit`: Enables the [`circuit`] middleware for preventing cascading failures. +//! - `metrics`: Exposes the OpenTelemetry metrics API for collecting and reporting metrics. +//! - `logs`: Enables structured logging for resilience middleware using the `tracing` crate. #[doc(inline)] pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; -#[cfg(any(feature = "options", test))] +#[cfg(any(feature = "_middleware", test))] pub(crate) mod options; -#[cfg(any(feature = "options", test))] +#[cfg(any(feature = "_middleware", test))] pub(crate) use options::EnableIf; -#[cfg(any(feature = "options", test))] +#[cfg(any(feature = "_middleware", test))] pub(crate) use options::define_fn_wrapper; #[cfg(any(feature = "retry", test))] pub(crate) use crate::options::MaxAttempts; -#[cfg(any(feature = "options", test))] +#[cfg(any(feature = "_middleware", test))] pub use crate::options::{Attempt, Backoff, Context, NotSet, Set}; -#[cfg(any(feature = "options", test))] +#[cfg(any(feature = "_middleware", test))] pub mod telemetry; #[cfg(any(feature = "timeout", test))] @@ -139,7 +136,7 @@ pub mod retry; pub mod circuit; #[doc(inline)] -#[cfg(any(feature = "options", test))] +#[cfg(any(feature = "_middleware", test))] pub use layered::{Layer, Service, Stack}; #[cfg(any(feature = "retry", feature = "circuit", test))] From cc1aa9cdac1ccdc2fbf2304c57f826f4235538e6 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 18:07:43 +0100 Subject: [PATCH 24/60] PR comments --- README.md | 2 +- crates/seatbelt/Cargo.toml | 2 +- crates/seatbelt/README.md | 25 +++++++++---------------- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 1f7aca25..fdb23bac 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ These are the crates built out of this repo: - [`ohno`](./crates/ohno/README.md) - High-quality Rust error handling. - [`ohno_macros`](./crates/ohno_macros/README.md) - Macros for the `ohno` crate. - [`recoverable`](./crates/recoverable/README.md) - Recovery information and classification for resilience patterns. -- [`seatbelt`](./crates/seatbelt/README.md) - Resilience and recovery mechanisms for services, applications, and libraries. +- [`seatbelt`](./crates/seatbelt/README.md) - Resilience and recovery mechanisms for fallible operations. - [`thread_aware`](./crates/thread_aware/README.md) - Facilities to support thread-isolated state. - [`thread_aware_macros`](./crates/thread_aware_macros/README.md) - Macros for the `thread_aware` crate. - [`thread_aware_macros_impl`](./crates/thread_aware_macros_impl/README.md) - Macros for the `thread_aware` crate. diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 3db69e6d..49d6c51a 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -3,7 +3,7 @@ [package] name = "seatbelt" -description = "Resilience and recovery mechanisms for services, applications, and libraries." +description = "Resilience and recovery mechanisms for fallible operations." version = "0.1.0" readme = "README.md" keywords = ["oxidizer", "resilience", "tower", "recovery", "retry", "circuit-breaker"] diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 12003fbe..72408611 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -88,16 +88,12 @@ for each module for details on how to use them. This crate supports several optional features that can be enabled to extend functionality: -* `options`: Enables common APIs for building resilience middleware, including [`Context`][__link9]. - Requires [`tick`][__link10] for timing operations. -* `service`: Re-exports common types for building middleware from [`layered`][__link11] crate. -* `telemetry`: Enables telemetry and observability features using OpenTelemetry for monitoring - resilience operations. -* `metrics`: Exposes the OpenTelemetry metrics API for collecting and reporting metrics. -* `timeout`: Enables the [`timeout`][__link12] middleware for canceling long-running operations. -* `retry`: Enables the [`retry`][__link13] middleware for automatically retrying failed operations with +* `timeout`: Enables the [`timeout`][__link9] middleware for canceling long-running operations. +* `retry`: Enables the [`retry`][__link10] middleware for automatically retrying failed operations with configurable backoff strategies, jitter, and recovery classification. -* `circuit`: Enables the [`circuit`][__link14] middleware for preventing cascading failures. +* `circuit`: Enables the [`circuit`][__link11] middleware for preventing cascading failures. +* `metrics`: Exposes the OpenTelemetry metrics API for collecting and reporting metrics. +* `logs`: Enables structured logging for resilience middleware using the `tracing` crate.
@@ -105,14 +101,11 @@ This crate supports several optional features that can be enabled to extend func This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG1hGrYSwKcJlG5sEnPybj4Z2G2k1NmMcyogWG2e6c2EE42AfYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG0Hjz9kfeI26GzMvGpstB3vOG0ewSOFsNqPIG2iqBNy7tUwXYWSDgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI [__link0]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo [__link1]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery - [__link10]: https://crates.io/crates/tick/0.1.2 - [__link11]: https://crates.io/crates/layered/0.1.0 - [__link12]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html - [__link13]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html - [__link14]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html + [__link10]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html + [__link11]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html [__link2]: https://docs.rs/tick/0.1.2/tick/?search=Clock [__link3]: https://crates.io/crates/tick/0.1.2 [__link4]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context @@ -120,4 +113,4 @@ This crate was developed as part of The Oxidizer Project. Br [__link6]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html [__link7]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html - [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context + [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html From cc28b68659d5f670c605f7f76559ddcf99feba01 Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 18:17:18 +0100 Subject: [PATCH 25/60] fixes --- Cargo.toml | 10 +++++----- crates/seatbelt/Cargo.toml | 7 +++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 77971005..a8476857 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,9 +52,11 @@ criterion = { version = "0.7.0", default-features = false } derive_more = { version = "2.0.1", default-features = false } duct = { version = "1.1.1", default-features = false } dynosaur = { version = "0.3.0", default-features = false } +fastrand = { version = "2.3.0", default-features = false, features = ["std"] } futures = { version = "0.3.31", default-features = false } futures-core = { version = "0.3.31", default-features = false } futures-util = { version = "0.3.31", default-features = false } +http = { version = "1.2.0", default-features = false, features = ["std"] } infinity_pool = { version = "0.8.1", default-features = false } insta = { version = "1.44.1", default-features = false } jiff = { version = "0.2.16", default-features = false } @@ -66,6 +68,9 @@ new_zealand = { version = "1.0.1", default-features = false } nm = { version = "0.1.21", default-features = false } num-traits = { version = "0.2.19", default-features = false } once_cell = { version = "1.21.3", default-features = false } +opentelemetry = { version = "0.31.0", default-features = false } +opentelemetry-stdout = { version = "0.31.0", default-features = false } +opentelemetry_sdk = { version = "0.31.0", default-features = false } pin-project-lite = { version = "0.2.13", default-features = false } pretty_assertions = { version = "1.4.1", default-features = false } prettyplease = { version = "0.2.37", default-features = false } @@ -95,11 +100,6 @@ trybuild = { version = "1.0.114", default-features = false } typeid = { version = "1.0.3", default-features = false } windows-sys = { version = "0.61.2", default-features = false } xxhash-rust = { version = "0.8.15", default-features = false } -fastrand = { version = "2.3.0", default-features = false, features = ["std"] } -opentelemetry = { version = "0.31.0", default-features = false } -opentelemetry_sdk = { version = "0.31.0", default-features = false } -opentelemetry-stdout = { version = "0.31.0", default-features = false } -http = { version = "1.2.0", default-features = false, features = ["std"] } [workspace.lints.rust] ambiguous_negative_literals = "warn" diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 49d6c51a..2bc4a642 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -16,6 +16,13 @@ license.workspace = true homepage.workspace = true repository.workspace = true +[package.metadata.cargo_check_external_types] +allowed_external_types = [ + "tick::Clock", + "layered::{Stack, Layer, Service}", + "opentelemetry::metrics::MeterProvider", +] + [package.metadata.docs.rs] all-features = true From 45155d43cb2d608467b8ae3474b55c4d606bbf1d Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 18:27:41 +0100 Subject: [PATCH 26/60] update library docs --- crates/seatbelt/README.md | 78 +++++++++++++++++++------------------- crates/seatbelt/src/lib.rs | 63 +++++++++++++++--------------- 2 files changed, 68 insertions(+), 73 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 72408611..4b662150 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -13,22 +13,7 @@ -Resilience and fault handling for applications and libraries. - -This crate helps applications handle transient faults gracefully through composable -resilience patterns. It provides resilience middleware for building robust distributed systems -that can automatically handle timeouts, retries, and other failure scenarios. - -## Runtime Agnostic Design - -The `seatbelt` crate is designed to be **runtime agnostic** and works seamlessly across any -async runtime. This flexibility allows you to use the same resilience patterns across -different projects and migrate between runtimes without changing your resilience patterns. - -## Core Types - -* [`RecoveryInfo`][__link0]: Classifies errors as recoverable (transient) or non-recoverable (permanent). -* [`Recovery`][__link1]: A trait for types that can determine their recoverability. +Resilience and recovery mechanisms for fallible operations. ## Quick Start @@ -63,37 +48,50 @@ let result = service.execute("input data".to_string()).await; ``` > - > **Note**: Resilience middleware requires [`Clock`][__link2] from the [`tick`][__link3] crate for timing + > **Note**: Resilience middleware requires [`Clock`][__link0] from the [`tick`][__link1] crate for timing > operations like delays, timeouts, and backoff calculations. The clock is passed through - > [`Context`][__link4] when creating middleware layers. + > [`Context`][__link2] when creating middleware layers. + +## Why? + +This crate provides production-ready resilience middleware with excellent telemetry for building +robust distributed systems that can automatically handle timeouts, retries, and other failure +scenarios. + +* **Runtime agnostic** - Works seamlessly across any async runtime. Use the same resilience + patterns across different projects and migrate between runtimes without changes. +* **Production-ready** - Battle-tested middleware with sensible defaults and comprehensive + configuration options. +* **Excellent telemetry** - Built-in support for metrics and structured logging to monitor + resilience behavior in production. -See [Built-in Middleware](#built-in-middleware) for more details. +## Overview -## Recovery Metadata +### Core Types -Error types can implement [`Recovery`][__link5] to provide additional metadata about their retry characteristics. -This enables callers to use a unified, streamlined approach when determining whether to retry an -operation, regardless of the underlying error type or source. +* [`Context`][__link3] - Holds shared state for resilience middleware, including the clock. +* [`RecoveryInfo`][__link4] - Classifies errors as recoverable (transient) or non-recoverable (permanent). +* [`Recovery`][__link5] - A trait for types that can determine their recoverability. -## Built-in Middleware +### Built-in Middleware This crate provides built-in resilience middleware that you can use out of the box. See the documentation for each module for details on how to use them. -* [`timeout`][__link6]: Cancels long-running operations. -* [`retry`][__link7]: Automatically retries failed operations with configurable backoff strategies. -* [`circuit`][__link8]: Prevents cascading failures by stopping requests to unhealthy services. +* [`timeout`][__link6] - Middleware that cancels long-running operations. +* [`retry`][__link7] - Middleware that automatically retries failed operations. +* [`circuit`][__link8] - Middleware that prevents cascading failures. -### Features +## Features -This crate supports several optional features that can be enabled to extend functionality: +This crate provides several optional features that can be enabled in your `Cargo.toml`: -* `timeout`: Enables the [`timeout`][__link9] middleware for canceling long-running operations. -* `retry`: Enables the [`retry`][__link10] middleware for automatically retrying failed operations with +* **`timeout`** - Enables the [`timeout`][__link9] middleware for canceling long-running operations. +* **`retry`** - Enables the [`retry`][__link10] middleware for automatically retrying failed operations with configurable backoff strategies, jitter, and recovery classification. -* `circuit`: Enables the [`circuit`][__link11] middleware for preventing cascading failures. -* `metrics`: Exposes the OpenTelemetry metrics API for collecting and reporting metrics. -* `logs`: Enables structured logging for resilience middleware using the `tracing` crate. +* **`circuit`** - Enables the [`circuit`][__link11] middleware for preventing cascading failures. +* **`metrics`** - Exposes the OpenTelemetry metrics API for collecting and reporting metrics. +* **`logs`** - Enables structured logging for resilience middleware using the `tracing` crate.
@@ -101,14 +99,14 @@ This crate supports several optional features that can be enabled to extend func This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG0Hjz9kfeI26GzMvGpstB3vOG0ewSOFsNqPIG2iqBNy7tUwXYWSDgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI - [__link0]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo - [__link1]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG-il1i73I0eIG01mMeTMhRSaGy-RA787VSNNG1P5sUqhvHhWYWSDgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI + [__link0]: https://docs.rs/tick/0.1.2/tick/?search=Clock + [__link1]: https://crates.io/crates/tick/0.1.2 [__link10]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html [__link11]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html - [__link2]: https://docs.rs/tick/0.1.2/tick/?search=Clock - [__link3]: https://crates.io/crates/tick/0.1.2 - [__link4]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context + [__link2]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context + [__link3]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context + [__link4]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo [__link5]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery [__link6]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html [__link7]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index a8ab951c..12ef641c 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -13,22 +13,7 @@ ) )] -//! Resilience and fault handling for applications and libraries. -//! -//! This crate helps applications handle transient faults gracefully through composable -//! resilience patterns. It provides resilience middleware for building robust distributed systems -//! that can automatically handle timeouts, retries, and other failure scenarios. -//! -//! # Runtime Agnostic Design -//! -//! The `seatbelt` crate is designed to be **runtime agnostic** and works seamlessly across any -//! async runtime. This flexibility allows you to use the same resilience patterns across -//! different projects and migrate between runtimes without changing your resilience patterns. -//! -//! # Core Types -//! -//! - [`RecoveryInfo`]: Classifies errors as recoverable (transient) or non-recoverable (permanent). -//! - [`Recovery`]: A trait for types that can determine their recoverability. +//! Resilience and recovery mechanisms for fallible operations. //! //! # Quick Start //! @@ -78,34 +63,46 @@ //! > operations like delays, timeouts, and backoff calculations. The clock is passed through //! > [`Context`] when creating middleware layers. //! -//! See [Built-in Middleware](#built-in-middleware) for more details. +//! # Why? //! -//! # Recovery Metadata +//! This crate provides production-ready resilience middleware with excellent telemetry for building +//! robust distributed systems that can automatically handle timeouts, retries, and other failure +//! scenarios. //! -//! Error types can implement [`Recovery`] to provide additional metadata about their retry characteristics. -//! This enables callers to use a unified, streamlined approach when determining whether to retry an -//! operation, regardless of the underlying error type or source. +//! - **Runtime agnostic** - Works seamlessly across any async runtime. Use the same resilience +//! patterns across different projects and migrate between runtimes without changes. +//! - **Production-ready** - Battle-tested middleware with sensible defaults and comprehensive +//! configuration options. +//! - **Excellent telemetry** - Built-in support for metrics and structured logging to monitor +//! resilience behavior in production. //! -//! # Built-in Middleware +//! # Overview +//! +//! ## Core Types +//! +//! - [`Context`] - Holds shared state for resilience middleware, including the clock. +//! - [`RecoveryInfo`] - Classifies errors as recoverable (transient) or non-recoverable (permanent). +//! - [`Recovery`] - A trait for types that can determine their recoverability. +//! +//! ## Built-in Middleware //! //! This crate provides built-in resilience middleware that you can use out of the box. See the documentation //! for each module for details on how to use them. //! -//! - [`timeout`]: Cancels long-running operations. -//! - [`retry`]: Automatically retries failed operations with configurable backoff strategies. -//! - [`circuit`]: Prevents cascading failures by stopping requests to unhealthy services. +//! - [`timeout`] - Middleware that cancels long-running operations. +//! - [`retry`] - Middleware that automatically retries failed operations. +//! - [`circuit`] - Middleware that prevents cascading failures. //! -//! ## Features +//! # Features //! -//! This crate supports several optional features that can be enabled to extend functionality: +//! This crate provides several optional features that can be enabled in your `Cargo.toml`: //! - -//! - `timeout`: Enables the [`timeout`] middleware for canceling long-running operations. -//! - `retry`: Enables the [`retry`] middleware for automatically retrying failed operations with +//! - **`timeout`** - Enables the [`timeout`] middleware for canceling long-running operations. +//! - **`retry`** - Enables the [`retry`] middleware for automatically retrying failed operations with //! configurable backoff strategies, jitter, and recovery classification. -//! - `circuit`: Enables the [`circuit`] middleware for preventing cascading failures. -//! - `metrics`: Exposes the OpenTelemetry metrics API for collecting and reporting metrics. -//! - `logs`: Enables structured logging for resilience middleware using the `tracing` crate. +//! - **`circuit`** - Enables the [`circuit`] middleware for preventing cascading failures. +//! - **`metrics`** - Exposes the OpenTelemetry metrics API for collecting and reporting metrics. +//! - **`logs`** - Enables structured logging for resilience middleware using the `tracing` crate. #[doc(inline)] pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; From 6cecc790d9a4fdbfd7c77f8d4a7c809cea50e46e Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 18:30:14 +0100 Subject: [PATCH 27/60] improvements --- crates/seatbelt/README.md | 44 ++++++++++++++++++++++---------------- crates/seatbelt/src/lib.rs | 3 +++ 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 4b662150..e46e3d06 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -52,6 +52,10 @@ let result = service.execute("input data".to_string()).await; > operations like delays, timeouts, and backoff calculations. The clock is passed through > [`Context`][__link2] when creating middleware layers. + > + > **Note**: This crate uses the [`layered`][__link3] crate for composing middleware. The middleware layers + > can be stacked together using tuples and built into a service using the [`Stack`][__link4] trait. + ## Why? This crate provides production-ready resilience middleware with excellent telemetry for building @@ -69,27 +73,27 @@ scenarios. ### Core Types -* [`Context`][__link3] - Holds shared state for resilience middleware, including the clock. -* [`RecoveryInfo`][__link4] - Classifies errors as recoverable (transient) or non-recoverable (permanent). -* [`Recovery`][__link5] - A trait for types that can determine their recoverability. +* [`Context`][__link5] - Holds shared state for resilience middleware, including the clock. +* [`RecoveryInfo`][__link6] - Classifies errors as recoverable (transient) or non-recoverable (permanent). +* [`Recovery`][__link7] - A trait for types that can determine their recoverability. ### Built-in Middleware This crate provides built-in resilience middleware that you can use out of the box. See the documentation for each module for details on how to use them. -* [`timeout`][__link6] - Middleware that cancels long-running operations. -* [`retry`][__link7] - Middleware that automatically retries failed operations. -* [`circuit`][__link8] - Middleware that prevents cascading failures. +* [`timeout`][__link8] - Middleware that cancels long-running operations. +* [`retry`][__link9] - Middleware that automatically retries failed operations. +* [`circuit`][__link10] - Middleware that prevents cascading failures. ## Features This crate provides several optional features that can be enabled in your `Cargo.toml`: -* **`timeout`** - Enables the [`timeout`][__link9] middleware for canceling long-running operations. -* **`retry`** - Enables the [`retry`][__link10] middleware for automatically retrying failed operations with +* **`timeout`** - Enables the [`timeout`][__link11] middleware for canceling long-running operations. +* **`retry`** - Enables the [`retry`][__link12] middleware for automatically retrying failed operations with configurable backoff strategies, jitter, and recovery classification. -* **`circuit`** - Enables the [`circuit`][__link11] middleware for preventing cascading failures. +* **`circuit`** - Enables the [`circuit`][__link13] middleware for preventing cascading failures. * **`metrics`** - Exposes the OpenTelemetry metrics API for collecting and reporting metrics. * **`logs`** - Enables structured logging for resilience middleware using the `tracing` crate. @@ -99,16 +103,18 @@ This crate provides several optional features that can be enabled in your `Cargo This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG-il1i73I0eIG01mMeTMhRSaGy-RA787VSNNG1P5sUqhvHhWYWSDgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG8-jxRLAdSUYGz8tFJ1hv_siG0AGq6ysYdCHGyn4hzr6iA3-YWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI [__link0]: https://docs.rs/tick/0.1.2/tick/?search=Clock [__link1]: https://crates.io/crates/tick/0.1.2 - [__link10]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html - [__link11]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html + [__link10]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html + [__link11]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html + [__link12]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html + [__link13]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html [__link2]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context - [__link3]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context - [__link4]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo - [__link5]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery - [__link6]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html - [__link7]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html - [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html - [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html + [__link3]: https://crates.io/crates/layered/0.1.0 + [__link4]: https://docs.rs/layered/0.1.0/layered/?search=Stack + [__link5]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context + [__link6]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo + [__link7]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery + [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html + [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 12ef641c..34acda14 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -63,6 +63,9 @@ //! > operations like delays, timeouts, and backoff calculations. The clock is passed through //! > [`Context`] when creating middleware layers. //! +//! > **Note**: This crate uses the [`layered`] crate for composing middleware. The middleware layers +//! > can be stacked together using tuples and built into a service using the [`Stack`] trait. +//! //! # Why? //! //! This crate provides production-ready resilience middleware with excellent telemetry for building From 92ec61ce98a74f1b4b0651bba3a18bb3614f24cc Mon Sep 17 00:00:00 2001 From: martintomka Date: Mon, 19 Jan 2026 18:33:31 +0100 Subject: [PATCH 28/60] fixes --- crates/seatbelt/Cargo.toml | 11 ++++++++--- crates/seatbelt/src/circuit/layer.rs | 2 +- crates/seatbelt/src/circuit/service.rs | 2 +- crates/seatbelt/src/retry/layer.rs | 2 +- crates/seatbelt/src/retry/service.rs | 2 +- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 2bc4a642..135aea63 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -18,9 +18,14 @@ repository.workspace = true [package.metadata.cargo_check_external_types] allowed_external_types = [ - "tick::Clock", - "layered::{Stack, Layer, Service}", - "opentelemetry::metrics::MeterProvider", + "layered::layer::stack::Stack", + "layered::service::Service", + "opentelemetry::metrics::meter::MeterProvider", + "recoverable::Recovery", + "recoverable::RecoveryInfo", + "recoverable::RecoveryKind", + "tick::clock::Clock", + "tower_layer::Layer", ] [package.metadata.docs.rs] diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index 5dc66349..dc36ca70 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -11,8 +11,8 @@ use super::{ Circuit, Engines, HalfOpenMode, HealthMetricsBuilder, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RejectedInput, RejectedInputArgs, ShouldRecover, }; -use crate::circuit::engine::probing::ProbesOptions; use crate::Layer; +use crate::circuit::engine::probing::ProbesOptions; use crate::{Context, EnableIf, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring circuit breaker resilience middleware. diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index d75fae4c..30e8a7df 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -170,9 +170,9 @@ mod tests { use tick::ClockControl; use super::*; + use crate::Layer; use crate::circuit::constants::DEFAULT_BREAK_DURATION; use crate::circuit::{EngineFake, HalfOpenMode, HealthInfo, Stats}; - use crate::Layer; use crate::{Context, RecoveryInfo, Set}; #[test] diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index dee772e9..159d36ef 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -6,10 +6,10 @@ use std::time::Duration; use opentelemetry::StringValue; +use crate::Layer; use crate::retry::backoff::BackoffOptions; use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, Retry, ShouldRecover}; -use crate::Layer; use crate::{Backoff, Context, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring retry resilience middleware. diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index e338c749..081bbad3 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -258,9 +258,9 @@ mod tests { use tick::ClockControl; use super::*; + use crate::Layer; use crate::options::Backoff; use crate::retry::RetryLayer; - use crate::Layer; use crate::testing::MetricTester; use crate::{Context, Set}; From cf70a2c5849ee48d86be4ef90f57a5902655f535 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 11:00:34 +0100 Subject: [PATCH 29/60] reorganize the crate modules --- crates/seatbelt/Cargo.toml | 11 ++- crates/seatbelt/src/lib.rs | 24 ++---- .../seatbelt/src/{telemetry => }/metrics.rs | 5 +- crates/seatbelt/src/retry/callbacks.rs | 8 +- crates/seatbelt/src/retry/layer.rs | 11 +-- crates/seatbelt/src/retry/service.rs | 45 ++++++----- .../src/{options => shared}/attempt.rs | 0 .../src/{options => shared}/backoff.rs | 0 .../src/{options => shared}/context.rs | 76 ++++++++----------- .../seatbelt/src/{options => shared}/mod.rs | 17 ----- crates/seatbelt/src/timeout/callbacks.rs | 6 +- crates/seatbelt/src/timeout/layer.rs | 23 +++--- crates/seatbelt/src/timeout/mod.rs | 2 + crates/seatbelt/src/timeout/service.rs | 49 ++++++------ .../{telemetry/mod.rs => utils/attributes.rs} | 31 ++------ .../{options => utils}/define_fn_wrapper.rs | 26 +++---- crates/seatbelt/src/utils/mod.rs | 27 +++++++ crates/seatbelt/src/utils/telemetry_helper.rs | 10 +++ 18 files changed, 180 insertions(+), 191 deletions(-) rename crates/seatbelt/src/{telemetry => }/metrics.rs (91%) rename crates/seatbelt/src/{options => shared}/attempt.rs (100%) rename crates/seatbelt/src/{options => shared}/backoff.rs (100%) rename crates/seatbelt/src/{options => shared}/context.rs (81%) rename crates/seatbelt/src/{options => shared}/mod.rs (53%) rename crates/seatbelt/src/{telemetry/mod.rs => utils/attributes.rs} (56%) rename crates/seatbelt/src/{options => utils}/define_fn_wrapper.rs (80%) create mode 100644 crates/seatbelt/src/utils/mod.rs create mode 100644 crates/seatbelt/src/utils/telemetry_helper.rs diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 135aea63..a6d9f69b 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -33,20 +33,19 @@ all-features = true [features] default = [] -timeout = ["_middleware"] -retry = ["_middleware", "dep:fastrand"] -circuit = ["_middleware", "dep:fastrand"] +timeout = [] +retry = ["dep:fastrand"] +circuit = ["dep:fastrand"] metrics = ["dep:opentelemetry", "opentelemetry/metrics"] logs = ["dep:tracing"] -_middleware = ["dep:tick", "dep:layered", "metrics", "logs"] [dependencies] fastrand = { workspace = true, optional = true } futures = { workspace = true } -layered = { workspace = true, optional = true } +layered = { workspace = true } opentelemetry = { workspace = true, optional = true } recoverable = { workspace = true } -tick = { workspace = true, optional = true } +tick = { workspace = true } tracing = { workspace = true, optional = true } [dev-dependencies] diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 34acda14..4c67a0c5 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -110,21 +110,8 @@ #[doc(inline)] pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; -#[cfg(any(feature = "_middleware", test))] -pub(crate) mod options; - -#[cfg(any(feature = "_middleware", test))] -pub(crate) use options::EnableIf; -#[cfg(any(feature = "_middleware", test))] -pub(crate) use options::define_fn_wrapper; - -#[cfg(any(feature = "retry", test))] -pub(crate) use crate::options::MaxAttempts; -#[cfg(any(feature = "_middleware", test))] -pub use crate::options::{Attempt, Backoff, Context, NotSet, Set}; - -#[cfg(any(feature = "_middleware", test))] -pub mod telemetry; +pub(crate) mod shared; +pub use crate::shared::{Attempt, Backoff, Context, NotSet, Set}; #[cfg(any(feature = "timeout", test))] pub mod timeout; @@ -136,12 +123,17 @@ pub mod retry; pub mod circuit; #[doc(inline)] -#[cfg(any(feature = "_middleware", test))] pub use layered::{Layer, Service, Stack}; #[cfg(any(feature = "retry", feature = "circuit", test))] mod rnd; +#[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] +pub(crate) mod utils; + +#[cfg(any(feature = "metrics", test))] +mod metrics; + #[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] pub(crate) mod testing; diff --git a/crates/seatbelt/src/telemetry/metrics.rs b/crates/seatbelt/src/metrics.rs similarity index 91% rename from crates/seatbelt/src/telemetry/metrics.rs rename to crates/seatbelt/src/metrics.rs index 558673f0..3339a0a0 100644 --- a/crates/seatbelt/src/telemetry/metrics.rs +++ b/crates/seatbelt/src/metrics.rs @@ -2,7 +2,7 @@ // Licensed under the MIT License. use opentelemetry::InstrumentationScope; -use opentelemetry::metrics::{Counter, Meter, MeterProvider}; +use opentelemetry::metrics::{Meter, MeterProvider}; const METER_NAME: &str = "seatbelt"; const VERSION: &str = "v0.1.0"; @@ -17,7 +17,8 @@ pub(crate) fn create_meter(meter_provider: &dyn MeterProvider) -> Meter { ) } -pub(crate) fn create_resilience_event_counter(meter: &Meter) -> Counter { +#[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] +pub(crate) fn create_resilience_event_counter(meter: &Meter) -> opentelemetry::metrics::Counter { meter .u64_counter("resilience.event") .with_description("Emitted upon the occurrence of a resilience event.") diff --git a/crates/seatbelt/src/retry/callbacks.rs b/crates/seatbelt/src/retry/callbacks.rs index 4f009cf9..bedcd076 100644 --- a/crates/seatbelt/src/retry/callbacks.rs +++ b/crates/seatbelt/src/retry/callbacks.rs @@ -4,7 +4,7 @@ use super::{CloneArgs, OnRetryArgs, RecoveryArgs, RestoreInputArgs}; use crate::RecoveryInfo; -crate::define_fn_wrapper!(CloneInput(Fn(&mut In, CloneArgs) -> Option)); -crate::define_fn_wrapper!(ShouldRecover(Fn(&Out, RecoveryArgs) -> RecoveryInfo)); -crate::define_fn_wrapper!(OnRetry(Fn(&Out, OnRetryArgs))); -crate::define_fn_wrapper!(RestoreInput(Fn(&mut Out, RestoreInputArgs) -> Option)); +crate::utils::define_fn_wrapper!(CloneInput(Fn(&mut In, CloneArgs) -> Option)); +crate::utils::define_fn_wrapper!(ShouldRecover(Fn(&Out, RecoveryArgs) -> RecoveryInfo)); +crate::utils::define_fn_wrapper!(OnRetry(Fn(&Out, OnRetryArgs))); +crate::utils::define_fn_wrapper!(RestoreInput(Fn(&mut Out, RestoreInputArgs) -> Option)); diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 159d36ef..ae7ee638 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -1,16 +1,17 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use std::borrow::Cow; use std::marker::PhantomData; use std::time::Duration; -use opentelemetry::StringValue; - use crate::Layer; use crate::retry::backoff::BackoffOptions; use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, Retry, ShouldRecover}; -use crate::{Backoff, Context, EnableIf, MaxAttempts, NotSet, Recovery, RecoveryInfo, Set}; +use crate::shared::MaxAttempts; +use crate::utils::EnableIf; +use crate::{Backoff, Context, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring retry resilience middleware. /// @@ -30,7 +31,7 @@ pub struct RetryLayer { should_recover: Option>, on_retry: Option>, enable_if: EnableIf, - strategy_name: StringValue, + strategy_name: Cow<'static, str>, restore_input: Option>, handle_unavailable: bool, _state: PhantomData Out>, @@ -38,7 +39,7 @@ pub struct RetryLayer { impl RetryLayer { #[must_use] - pub(crate) fn new(name: StringValue, context: &Context) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &Context) -> Self { Self { context: context.clone(), max_attempts: MaxAttempts::Finite(DEFAULT_RETRY_ATTEMPTS.saturating_add(1)), diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 081bbad3..577265bb 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -1,21 +1,21 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use std::borrow::Cow; use std::ops::ControlFlow; use std::time::Duration; use layered::Service; -use opentelemetry::StringValue; -use opentelemetry::metrics::Counter; use tick::Clock; -use crate::options::MaxAttempts; use crate::retry::telemetry::{ATTEMPT_INDEX, ATTEMPT_NUMBER_IS_LAST, RETRY_EVENT}; use crate::retry::{ CloneArgs, CloneInput, DelayBackoff, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, ShouldRecover, }; +use crate::shared::MaxAttempts; use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; -use crate::{Attempt, EnableIf, NotSet, RecoveryInfo, RecoveryKind}; +use crate::utils::EnableIf; +use crate::{Attempt, NotSet, RecoveryInfo, RecoveryKind}; /// Applies retry logic to service execution for transient error handling. /// @@ -42,9 +42,10 @@ pub struct Retry { pub(super) should_recover: ShouldRecover, pub(super) on_retry: Option>, pub(super) enable_if: EnableIf, - pub(super) strategy_name: StringValue, - pub(super) pipeline_name: StringValue, - pub(super) event_reporter: Counter, + pub(super) strategy_name: Cow<'static, str>, + pub(super) pipeline_name: Cow<'static, str>, + #[cfg(any(feature = "metrics", test))] + pub(super) event_reporter: Option>, pub(super) restore_input: Option>, pub(super) handle_unavailable: bool, } @@ -176,26 +177,30 @@ enum RecoverableKind { impl Retry { fn emit_attempt_telemetry(&self, attempt: Attempt, retry_delay: Duration) { + #[cfg(any(feature = "logs", test))] tracing::event!( name: "seatbelt.retry", tracing::Level::WARN, - pipeline.name = self.pipeline_name.as_str(), - strategy.name = self.strategy_name.as_str(), + pipeline.name = %self.pipeline_name, + strategy.name = %self.strategy_name, resilience.attempt.index = attempt.index(), resilience.attempt.is_last = attempt.is_last(), resilience.retry.delay = retry_delay.as_secs_f32(), ); - self.event_reporter.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, RETRY_EVENT), - opentelemetry::KeyValue::new(ATTEMPT_INDEX, i64::from(attempt.index())), - opentelemetry::KeyValue::new(ATTEMPT_NUMBER_IS_LAST, attempt.is_last()), - ], - ); + #[cfg(any(feature = "metrics", test))] + if let Some(reporter) = &self.event_reporter { + reporter.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, RETRY_EVENT), + opentelemetry::KeyValue::new(ATTEMPT_INDEX, i64::from(attempt.index())), + opentelemetry::KeyValue::new(ATTEMPT_NUMBER_IS_LAST, attempt.is_last()), + ], + ); + } } #[inline] @@ -259,8 +264,8 @@ mod tests { use super::*; use crate::Layer; - use crate::options::Backoff; use crate::retry::RetryLayer; + use crate::shared::Backoff; use crate::testing::MetricTester; use crate::{Context, Set}; diff --git a/crates/seatbelt/src/options/attempt.rs b/crates/seatbelt/src/shared/attempt.rs similarity index 100% rename from crates/seatbelt/src/options/attempt.rs rename to crates/seatbelt/src/shared/attempt.rs diff --git a/crates/seatbelt/src/options/backoff.rs b/crates/seatbelt/src/shared/backoff.rs similarity index 100% rename from crates/seatbelt/src/options/backoff.rs rename to crates/seatbelt/src/shared/backoff.rs diff --git a/crates/seatbelt/src/options/context.rs b/crates/seatbelt/src/shared/context.rs similarity index 81% rename from crates/seatbelt/src/options/context.rs rename to crates/seatbelt/src/shared/context.rs index 3d65d3ff..fdf7ff05 100644 --- a/crates/seatbelt/src/options/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -3,11 +3,8 @@ use std::borrow::Cow; -use opentelemetry::metrics::*; use tick::Clock; -use crate::telemetry::metrics::*; - pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// Shared options for resilience middleware pipelines. @@ -80,7 +77,9 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; pub struct Context { clock: Clock, pipeline_name: Cow<'static, str>, - meter: Meter, + #[cfg(any(feature = "metrics", test))] + meter: Option, + logs_enabled: bool, _in: std::marker::PhantomData In>, _out: std::marker::PhantomData Out>, } @@ -92,13 +91,12 @@ impl Context { /// global provider. Override the provider later via /// [`meter_provider`](Self::meter_provider) if needed. pub fn new(clock: impl AsRef) -> Self { - #[cfg(any(feature = "metrics", test))] - let meter = create_meter(opentelemetry::global::meter_provider().as_ref()); - Self { clock: clock.as_ref().clone(), pipeline_name: Cow::Borrowed(DEFAULT_PIPELINE_NAME), - meter, + #[cfg(any(feature = "metrics", test))] + meter: None, + logs_enabled: false, _in: std::marker::PhantomData, _out: std::marker::PhantomData, } @@ -109,6 +107,7 @@ impl Context { /// Middleware use this to measure duration, track timeouts, and perform /// other time-related operations from a consistent source. #[must_use] + #[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] pub(crate) fn get_clock(&self) -> &Clock { &self.clock } @@ -125,48 +124,31 @@ impl Context { self } - /// Get the configured pipeline name (`default` if not set). - #[must_use] - pub(crate) fn get_pipeline_name(&self) -> &Cow<'static, str> { - &self.pipeline_name - } - /// Override the global meter provider with a custom one. #[must_use] - pub fn meter_provider(self, provider: &dyn MeterProvider) -> Self { - let meter = create_meter(provider); - - Self { meter, ..self } - } - - /// Get the configured OpenTelemetry meter. - /// - /// Use this to create counters, histograms, and gauges for middleware - /// observability. The meter uses: - /// - /// - Name: `seatbelt` - /// - Version: `v0.1.0` - /// - Schema URL: `https://opentelemetry.io/schemas/1.47.0` - #[must_use] - pub(crate) fn get_meter(&self) -> &Meter { - &self.meter + #[cfg(any(feature = "metrics", test))] + pub fn meter_provider(self, provider: &dyn opentelemetry::metrics::MeterProvider) -> Self { + Self { + meter: Some(crate::metrics::create_meter(provider)), + ..self + } } - /// Creates standardized counter for resilience events. - /// - /// Returns a counter that can be used to report events occurring within - /// resilience middleware. - /// - /// # Required Attributes - /// - /// When reporting events, the following attributes MUST be added: - /// - /// - [`PIPELINE_NAME`][crate::telemetry::PIPELINE_NAME]: The name of the pipeline this middleware belongs to - /// - [`STRATEGY_NAME`][crate::telemetry::STRATEGY_NAME]: The name of the specific strategy/middleware - /// - [`EVENT_NAME`][crate::telemetry::EVENT_NAME]: The name of the event being reported - #[must_use] - pub(crate) fn create_resilience_event_counter(&self) -> Counter { - create_resilience_event_counter(self.get_meter()) + #[cfg_attr( + not(any(feature = "metrics", feature = "logs")), + expect(unused_variables, reason = "unused when logs nor metrics are used") + )] + #[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] + pub(crate) fn create_telemetry(&self, strategy_name: Cow<'static, str>) -> crate::utils::TelemetryHelper { + crate::utils::TelemetryHelper { + #[cfg(any(feature = "metrics", test))] + event_reporter: self.meter.as_ref().map(crate::metrics::create_resilience_event_counter), + #[cfg(any(feature = "metrics", feature = "logs", test))] + pipeline_name: self.pipeline_name.clone(), + #[cfg(any(feature = "metrics", feature = "logs", test))] + strategy_name, + logs_enabled: self.logs_enabled, + } } } @@ -175,9 +157,11 @@ impl Clone for Context { Self { clock: self.clock.clone(), pipeline_name: self.pipeline_name.clone(), + #[cfg(any(feature = "metrics", test))] meter: self.meter.clone(), _in: std::marker::PhantomData, _out: std::marker::PhantomData, + logs_enabled: self.logs_enabled, } } } diff --git a/crates/seatbelt/src/options/mod.rs b/crates/seatbelt/src/shared/mod.rs similarity index 53% rename from crates/seatbelt/src/options/mod.rs rename to crates/seatbelt/src/shared/mod.rs index 8fc99568..c966582a 100644 --- a/crates/seatbelt/src/options/mod.rs +++ b/crates/seatbelt/src/shared/mod.rs @@ -11,26 +11,9 @@ pub struct Set; #[derive(Debug)] pub struct NotSet; -crate::define_fn_wrapper!(EnableIf(Fn(&In) -> bool)); - -impl EnableIf { - /// Creates a new `EnableIf` instance that always returns `true`. - pub fn always() -> Self { - Self::new(|_| true) - } - - /// Creates a new `EnableIf` instance that always returns `false`. - pub fn never() -> Self { - Self::new(|_| false) - } -} - mod context; pub use context::Context; -mod define_fn_wrapper; -pub(crate) use define_fn_wrapper::define_fn_wrapper; - mod attempt; mod backoff; diff --git a/crates/seatbelt/src/timeout/callbacks.rs b/crates/seatbelt/src/timeout/callbacks.rs index f0f2ff00..9c25c69f 100644 --- a/crates/seatbelt/src/timeout/callbacks.rs +++ b/crates/seatbelt/src/timeout/callbacks.rs @@ -5,6 +5,6 @@ use std::time::Duration; use super::{OnTimeoutArgs, TimeoutOutputArgs, TimeoutOverrideArgs}; -crate::define_fn_wrapper!(TimeoutOutput(Fn(TimeoutOutputArgs) -> Out)); -crate::define_fn_wrapper!(OnTimeout(Fn(&Out, OnTimeoutArgs))); -crate::define_fn_wrapper!(TimeoutOverride(Fn(&In, TimeoutOverrideArgs) -> Option)); +crate::utils::define_fn_wrapper!(TimeoutOutput(Fn(TimeoutOutputArgs) -> Out)); +crate::utils::define_fn_wrapper!(OnTimeout(Fn(&Out, OnTimeoutArgs))); +crate::utils::define_fn_wrapper!(TimeoutOverride(Fn(&In, TimeoutOverrideArgs) -> Option)); diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index 3fbc898b..94a4b4a6 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -1,16 +1,17 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use std::borrow::Cow; use std::marker::PhantomData; use std::time::Duration; -use opentelemetry::StringValue; - use crate::Layer; use crate::timeout::{ OnTimeout, OnTimeoutArgs, Timeout, TimeoutOutput as TimeoutOutputCallback, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs, }; -use crate::{Context, EnableIf, NotSet, Set}; +use crate::utils::EnableIf; +use crate::utils::TelemetryHelper; +use crate::{Context, NotSet, Set}; /// Builder for configuring timeout resilience middleware. /// @@ -28,20 +29,20 @@ pub struct TimeoutLayer { timeout_output: Option>, on_timeout: Option>, enable_if: EnableIf, - strategy_name: StringValue, + telemetry: TelemetryHelper, timeout_override: Option>, _state: PhantomData Out>, } impl TimeoutLayer { #[must_use] - pub(crate) fn new(name: StringValue, context: &Context) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &Context) -> Self { Self { timeout: None, timeout_output: None, on_timeout: None, enable_if: EnableIf::always(), - strategy_name: name, + telemetry: context.create_telemetry(name), context: context.clone(), timeout_override: None, _state: PhantomData, @@ -203,10 +204,8 @@ impl Layer for TimeoutLayer { enable_if: self.enable_if.clone(), on_timeout: self.on_timeout.clone(), timeout_output: self.timeout_output.clone().expect("timeout_result must be set in Ready state"), - name: self.strategy_name.clone(), - pipeline_name: self.context.get_pipeline_name().clone().into(), - event_reporter: self.context.create_resilience_event_counter(), timeout_override: self.timeout_override.clone(), + telemetry: self.telemetry.clone(), } } } @@ -218,7 +217,7 @@ impl TimeoutLayer = layer.timeout_output(|args| format!("timeout: {}", args.timeout().as_millis())); let result = layer.timeout_output.unwrap().call(TimeoutOutputArgs { diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs index 793e511b..7126fd8f 100644 --- a/crates/seatbelt/src/timeout/mod.rs +++ b/crates/seatbelt/src/timeout/mod.rs @@ -197,6 +197,8 @@ mod args; mod callbacks; mod layer; mod service; + +#[cfg(any(feature = "metrics", test))] mod telemetry; pub use args::{OnTimeoutArgs, TimeoutOutputArgs, TimeoutOverrideArgs}; diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index fc17094c..1e5d1477 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -6,13 +6,12 @@ use std::time::Duration; use futures::future::Either; use layered::Service; -use opentelemetry::StringValue; use tick::{Clock, FutureExt}; -use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; -use crate::timeout::telemetry::TIMEOUT_EVENT_NAME; use crate::timeout::{OnTimeout, OnTimeoutArgs, TimeoutLayer, TimeoutOutput, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs}; -use crate::{Context, EnableIf, NotSet}; +use crate::utils::EnableIf; +use crate::utils::TelemetryHelper; +use crate::{Context, NotSet}; /// Applies a timeout to service execution for canceling long-running operations. /// @@ -37,9 +36,7 @@ pub struct Timeout { pub(super) on_timeout: Option>, pub(super) timeout_override: Option>, pub(super) timeout_output: TimeoutOutput, - pub(super) name: StringValue, - pub(super) pipeline_name: StringValue, - pub(super) event_reporter: opentelemetry::metrics::Counter, + pub(super) telemetry: TelemetryHelper, } impl Timeout { @@ -104,24 +101,32 @@ where match self.inner.execute(input).timeout(&self.clock, timeout).await { Ok(output) => output, Err(_error) => { - self.event_reporter.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, TIMEOUT_EVENT_NAME), - ], - ); + #[cfg(any(feature = "metrics", test))] + if let Some(reporter) = &self.telemetry.event_reporter { + use crate::utils::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; + + reporter.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, super::telemetry::TIMEOUT_EVENT_NAME), + ], + ); + } let output = self.timeout_output.call(TimeoutOutputArgs { timeout }); - tracing::event!( - name: "seatbelt.timeout", - tracing::Level::WARN, - pipeline.name = self.pipeline_name.as_str(), - strategy.name = self.name.as_str(), - timeout.ms = timeout.as_millis(), - ); + if self.telemetry.logs_enabled { + #[cfg(any(feature = "logs", test))] + tracing::event!( + name: "seatbelt.timeout", + tracing::Level::WARN, + pipeline.name = %self.telemetry.pipeline_name, + strategy.name = %self.telemetry.strategy_name, + timeout.ms = timeout.as_millis(), + ); + } if let Some(on_timeout) = &self.on_timeout { on_timeout.call(&output, OnTimeoutArgs { timeout }); diff --git a/crates/seatbelt/src/telemetry/mod.rs b/crates/seatbelt/src/utils/attributes.rs similarity index 56% rename from crates/seatbelt/src/telemetry/mod.rs rename to crates/seatbelt/src/utils/attributes.rs index 30cc6040..51c1e46e 100644 --- a/crates/seatbelt/src/telemetry/mod.rs +++ b/crates/seatbelt/src/utils/attributes.rs @@ -1,45 +1,26 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! # Service Middleware Telemetry -//! -//! This module defines well known attribute / label keys used when emitting -//! telemetry (metrics, traces, logs) from middleware components such as retry, -//! timeout, bulkhead, circuit breaker, and fallback pipelines. -//! -//! The constants are stable keys you can attach to: -//! -//! - metrics (e.g. counters, histograms) -//! - tracing spans / events -//! - structured log records -//! -//! # Conventions -//! -//! Names follow the [OpenTelemetry naming guidelines](https://opentelemetry.io/docs/specs/semconv/general/naming/#general-naming-considerations). -//! -//! - Keys should be dot-separated (e.g., `pipeline.name`, `strategy.name`) -//! - Values should be concise and short, preferably using `snake_case` - -#[cfg(any(feature = "metrics", test))] -pub(crate) mod metrics; - /// Key used to annotate the name of a resilience pipeline. /// /// Values reported under this dimension should be short and concise, preferably in `snake_case`. /// Examples: `user_auth`, `data_processing`, `payment_flow`. -pub const PIPELINE_NAME: &str = "resilience.pipeline.name"; +#[cfg(any(feature = "metrics", test))] +pub(crate) const PIPELINE_NAME: &str = "resilience.pipeline.name"; /// Key used to annotate the name of a resilience strategy. /// /// Values reported under this dimension should be short and concise, preferably in `snake_case`. /// Examples: `retry`, `circuit_breaker`, `timeout`, `bulkhead`. -pub const STRATEGY_NAME: &str = "resilience.strategy.name"; +#[cfg(any(feature = "metrics", test))] +pub(crate) const STRATEGY_NAME: &str = "resilience.strategy.name"; /// Key used to annotate the specific resilience event being emitted. /// /// Values reported under this dimension should be short and concise, preferably in `snake_case`. /// Examples: `retry`, `timeout`, `circuit_opened`. -pub const EVENT_NAME: &str = "resilience.event.name"; +#[cfg(any(feature = "metrics", test))] +pub(crate) const EVENT_NAME: &str = "resilience.event.name"; #[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] diff --git a/crates/seatbelt/src/options/define_fn_wrapper.rs b/crates/seatbelt/src/utils/define_fn_wrapper.rs similarity index 80% rename from crates/seatbelt/src/options/define_fn_wrapper.rs rename to crates/seatbelt/src/utils/define_fn_wrapper.rs index 73097a35..01a4f576 100644 --- a/crates/seatbelt/src/options/define_fn_wrapper.rs +++ b/crates/seatbelt/src/utils/define_fn_wrapper.rs @@ -56,49 +56,49 @@ macro_rules! define_fn_wrapper { // Match pattern without return type (defaults to unit) ($name:ident<$($generics:ident),*>(Fn($($param_name:ident: $param_ty:ty),*))) => { - $crate::define_fn_wrapper!($name<$($generics),*>(Fn($($param_name: $param_ty),*) -> ())); + crate::utils::define_fn_wrapper!($name<$($generics),*>(Fn($($param_name: $param_ty),*) -> ())); }; // Alternative match for simple cases without explicit parameter names // For two parameters ($name:ident<$($generics:ident),*>(Fn($param1:ty, $param2:ty) -> $return_ty:ty)) => { - $crate::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1, arg2: $param2) -> $return_ty)); + crate::utils::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1, arg2: $param2) -> $return_ty)); }; // For two parameters without return type ($name:ident<$($generics:ident),*>(Fn($param1:ty, $param2:ty))) => { - $crate::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1, arg2: $param2) -> ())); + crate::utils::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1, arg2: $param2) -> ())); }; // For one parameter ($name:ident<$($generics:ident),*>(Fn($param1:ty) -> $return_ty:ty)) => { - $crate::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1) -> $return_ty)); + crate::utils::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1) -> $return_ty)); }; // For one parameter without return type ($name:ident<$($generics:ident),*>(Fn($param1:ty))) => { - $crate::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1) -> ())); + crate::utils::define_fn_wrapper!($name<$($generics),*>(Fn(arg1: $param1) -> ())); }; // For zero parameters ($name:ident<$($generics:ident),*>(Fn() -> $return_ty:ty)) => { - $crate::define_fn_wrapper!($name<$($generics),*>(Fn() -> $return_ty)); + $crate::utils::define_fn_wrapper!($name<$($generics),*>(Fn() -> $return_ty)); }; // For zero parameters without return type ($name:ident<$($generics:ident),*>(Fn())) => { - $crate::define_fn_wrapper!($name<$($generics),*>(Fn() -> ())); + $crate::utils::define_fn_wrapper!($name<$($generics),*>(Fn() -> ())); }; // Match pattern without return type (defaults to unit) ($name:ident(Fn($($param_name:ident: $param_ty:ty),*))) => { - $crate::define_fn_wrapper!($name(Fn($($param_name: $param_ty),*) -> ())); + $crate::utils::define_fn_wrapper!($name(Fn($($param_name: $param_ty),*) -> ())); }; // Alternative match for simple cases without explicit parameter names // For two parameters ($name:ident(Fn($param1:ty, $param2:ty) -> $return_ty:ty)) => { - $crate::define_fn_wrapper!($name(Fn(arg1: $param1, arg2: $param2) -> $return_ty)); + $crate::utils::define_fn_wrapper!($name(Fn(arg1: $param1, arg2: $param2) -> $return_ty)); }; // For two parameters without return type @@ -108,22 +108,22 @@ macro_rules! define_fn_wrapper { // For one parameter ($name:ident(Fn($param1:ty) -> $return_ty:ty)) => { - $crate::define_fn_wrapper!($name(Fn(arg1: $param1) -> $return_ty)); + $crate::utils::define_fn_wrapper!($name(Fn(arg1: $param1) -> $return_ty)); }; // For one parameter without return type ($name:ident(Fn($param1:ty))) => { - $crate::define_fn_wrapper!($name(Fn(arg1: $param1) -> ())); + $crate::utils::define_fn_wrapper!($name(Fn(arg1: $param1) -> ())); }; // For zero parameters ($name:ident(Fn() -> $return_ty:ty)) => { - $crate::define_fn_wrapper!($name(Fn() -> $return_ty)); + $crate::utils::define_fn_wrapper!($name(Fn() -> $return_ty)); }; // For zero parameters without return type ($name:ident(Fn())) => { - $crate::define_fn_wrapper!($name(Fn() -> ())); + $crate::utils::define_fn_wrapper!($name(Fn() -> ())); }; } diff --git a/crates/seatbelt/src/utils/mod.rs b/crates/seatbelt/src/utils/mod.rs new file mode 100644 index 00000000..456492c4 --- /dev/null +++ b/crates/seatbelt/src/utils/mod.rs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod define_fn_wrapper; +pub(crate) use define_fn_wrapper::define_fn_wrapper; + +#[cfg(any(feature = "metrics", test))] +mod attributes; +#[cfg(any(feature = "metrics", test))] +pub(crate) use attributes::*; + +mod telemetry_helper; +pub(crate) use telemetry_helper::TelemetryHelper; + +define_fn_wrapper!(EnableIf(Fn(&In) -> bool)); + +impl EnableIf { + /// Creates a new `EnableIf` instance that always returns `true`. + pub fn always() -> Self { + Self::new(|_| true) + } + + /// Creates a new `EnableIf` instance that always returns `false`. + pub fn never() -> Self { + Self::new(|_| false) + } +} diff --git a/crates/seatbelt/src/utils/telemetry_helper.rs b/crates/seatbelt/src/utils/telemetry_helper.rs new file mode 100644 index 00000000..8f415710 --- /dev/null +++ b/crates/seatbelt/src/utils/telemetry_helper.rs @@ -0,0 +1,10 @@ +#[derive(Debug, Clone)] +pub(crate) struct TelemetryHelper { + #[cfg(any(feature = "metrics", feature = "logs", test))] + pub(crate) pipeline_name: std::borrow::Cow<'static, str>, + #[cfg(any(feature = "metrics", feature = "logs", test))] + pub(crate) strategy_name: std::borrow::Cow<'static, str>, + #[cfg(any(feature = "metrics", test))] + pub(crate) event_reporter: Option>, + pub(crate) logs_enabled: bool, +} From 3a1ce82062fb07c8f198bb34aca919a78fffa92e Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 11:32:00 +0100 Subject: [PATCH 30/60] fixes --- crates/seatbelt/src/circuit/callbacks.rs | 12 +- .../src/circuit/engine/engine_telemetry.rs | 236 ++++++++++-------- crates/seatbelt/src/circuit/engine/engines.rs | 25 +- crates/seatbelt/src/circuit/engine/mod.rs | 1 + crates/seatbelt/src/circuit/health.rs | 2 + crates/seatbelt/src/circuit/layer.rs | 18 +- crates/seatbelt/src/circuit/mod.rs | 2 + crates/seatbelt/src/circuit/partition_key.rs | 10 + crates/seatbelt/src/circuit/service.rs | 2 +- crates/seatbelt/src/retry/layer.rs | 13 +- crates/seatbelt/src/retry/mod.rs | 2 + crates/seatbelt/src/retry/service.rs | 48 ++-- 12 files changed, 198 insertions(+), 173 deletions(-) diff --git a/crates/seatbelt/src/circuit/callbacks.rs b/crates/seatbelt/src/circuit/callbacks.rs index da93cfad..ebb579f7 100644 --- a/crates/seatbelt/src/circuit/callbacks.rs +++ b/crates/seatbelt/src/circuit/callbacks.rs @@ -4,9 +4,9 @@ use super::{OnClosedArgs, OnOpenedArgs, OnProbingArgs, PartitionKey, RecoveryArgs, RejectedInputArgs}; use crate::RecoveryInfo; -crate::define_fn_wrapper!(PartionKeyProvider(Fn(&In) -> PartitionKey)); -crate::define_fn_wrapper!(ShouldRecover(Fn(&Out, RecoveryArgs) -> RecoveryInfo)); -crate::define_fn_wrapper!(RejectedInput(Fn(In, RejectedInputArgs) -> Out)); -crate::define_fn_wrapper!(OnProbing(Fn(&mut In, OnProbingArgs))); -crate::define_fn_wrapper!(OnOpened(Fn(&Out, OnOpenedArgs))); -crate::define_fn_wrapper!(OnClosed(Fn(&Out, OnClosedArgs))); +crate::utils::define_fn_wrapper!(PartionKeyProvider(Fn(&In) -> PartitionKey)); +crate::utils::define_fn_wrapper!(ShouldRecover(Fn(&Out, RecoveryArgs) -> RecoveryInfo)); +crate::utils::define_fn_wrapper!(RejectedInput(Fn(In, RejectedInputArgs) -> Out)); +crate::utils::define_fn_wrapper!(OnProbing(Fn(&mut In, OnProbingArgs))); +crate::utils::define_fn_wrapper!(OnOpened(Fn(&Out, OnOpenedArgs))); +crate::utils::define_fn_wrapper!(OnClosed(Fn(&Out, OnClosedArgs))); diff --git a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs index cc5eecf4..5891d726 100644 --- a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs @@ -1,39 +1,32 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use opentelemetry::StringValue; -use opentelemetry::metrics::Counter; +use std::borrow::Cow; + use tick::Clock; +#[cfg(any(feature = "metrics", test))] use crate::circuit::telemetry::*; use crate::circuit::{CircuitEngine, CircuitState, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; -use crate::telemetry::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; + +use crate::utils::TelemetryHelper; +#[cfg(any(feature = "metrics", test))] +use crate::utils::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; /// Wrapper around a circuit engine to add telemetry capabilities. #[derive(Debug)] pub(crate) struct EngineTelemetry { inner: T, - pub(super) strategy_name: StringValue, - pub(super) pipeline_name: StringValue, - pub(super) resilience_events: Counter, - pub(super) partition_key: StringValue, + pub(super) telemetry: TelemetryHelper, + pub(super) partition_key: Cow<'static, str>, pub(super) clock: Clock, } impl EngineTelemetry { - pub fn new( - inner: T, - strategy_name: StringValue, - pipeline_name: StringValue, - partition_key: StringValue, - resilience_events: Counter, - clock: Clock, - ) -> Self { + pub fn new(inner: T, telemetry: TelemetryHelper, partition_key: Cow<'static, str>, clock: Clock) -> Self { Self { inner, - strategy_name, - pipeline_name, - resilience_events, + telemetry, partition_key, clock, } @@ -45,110 +38,138 @@ impl CircuitEngine for EngineTelemetry { let enter_result = self.inner.enter(); if matches!(enter_result, EnterCircuitResult::Rejected) { - self.resilience_events.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_REJECTED_EVENT_NAME), - opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), - opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), - ], - ); - - tracing::event!( - name: "seatbelt.circuit_breaker.rejected", - tracing::Level::WARN, - pipeline.name = self.pipeline_name.as_str(), - strategy.name = self.strategy_name.as_str(), - circuit_breaker.state = CircuitState::Open.as_str(), - circuit_breaker.partition = self.partition_key.as_str(), - ); - } - - enter_result - } - - fn exit(&self, result: ExecutionResult, mode: ExecutionMode) -> ExitCircuitResult { - if mode == ExecutionMode::Probe { - self.resilience_events.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_PROBE_EVENT_NAME), - opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::HalfOpen.as_str()), - opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), - opentelemetry::KeyValue::new(CIRCUIT_PROBE_RESULT, result.as_str()), - ], - ); - - tracing::event!( - name: "seatbelt.circuit_breaker.probe", - tracing::Level::INFO, - pipeline.name = self.pipeline_name.as_str(), - strategy.name = self.strategy_name.as_str(), - circuit_breaker.state = CircuitState::HalfOpen.as_str(), - circuit_breaker.partition = self.partition_key.as_str(), - circuit_breaker.probe.result = result.as_str(), - ); - } - - let exit_result = self.inner.exit(result, mode); - - // Emit telemetry events for circuit state changes - match exit_result { - ExitCircuitResult::Opened(health) => { - self.resilience_events.add( + #[cfg(any(feature = "metrics", test))] + if let Some(reporter) = &self.telemetry.event_reporter { + reporter.add( 1, &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_OPENED_EVENT_NAME), + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_REJECTED_EVENT_NAME), opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), ], ); + } + if self.telemetry.logs_enabled { + #[cfg(any(feature = "logs", test))] tracing::event!( - name: "seatbelt.circuit_breaker.opened", + name: "seatbelt.circuit_breaker.rejected", tracing::Level::WARN, - pipeline.name = self.pipeline_name.as_str(), - strategy.name = self.strategy_name.as_str(), + pipeline.name = %self.telemetry.pipeline_name, + strategy.name = %self.telemetry.strategy_name, circuit_breaker.state = CircuitState::Open.as_str(), - circuit_breaker.partition = self.partition_key.as_str(), - circuit_breaker.health.failure_rate = health.failure_rate(), - circuit_breaker.health.throughput = health.throughput(), + circuit_breaker.partition = %self.partition_key, ); } - ExitCircuitResult::Closed(ref stats) => { - self.resilience_events.add( + } + + enter_result + } + + fn exit(&self, result: ExecutionResult, mode: ExecutionMode) -> ExitCircuitResult { + if mode == ExecutionMode::Probe { + #[cfg(any(feature = "metrics", test))] + if let Some(reporter) = &self.telemetry.event_reporter { + reporter.add( 1, &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_CLOSED_EVENT_NAME), - opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Closed.as_str()), + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_PROBE_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::HalfOpen.as_str()), opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + opentelemetry::KeyValue::new(CIRCUIT_PROBE_RESULT, result.as_str()), ], ); + } + if self.telemetry.logs_enabled { + #[cfg(any(feature = "logs", test))] tracing::event!( - name: "seatbelt.circuit_breaker.closed", + name: "seatbelt.circuit_breaker.probe", tracing::Level::INFO, - pipeline.name = self.pipeline_name.as_str(), - strategy.name = self.strategy_name.as_str(), - circuit_breaker.state = CircuitState::Closed.as_str(), - circuit_breaker.open.duration = stats.opened_duration(self.clock.instant()).as_secs(), - circuit_breaker.partition = self.partition_key.as_str(), - circuit_breaker.probes.total = stats.probes_total, - circuit_breaker.probes.successfull = stats.probes_successes, - circuit_breaker.probes.failed = stats.probes_failures, - circuit_breaker.probes.lost = stats.probes_lost, - circuit_breaker.rejections = stats.rejected, - circuit_breaker.re_opened = stats.re_opened, + pipeline.name = %self.telemetry.pipeline_name, + strategy.name = %self.telemetry.strategy_name, + circuit_breaker.state = CircuitState::HalfOpen.as_str(), + circuit_breaker.partition = %self.partition_key, + circuit_breaker.probe.result = result.as_str(), ); } + } + + let exit_result = self.inner.exit(result, mode); + + // Emit telemetry events for circuit state changes + match exit_result { + ExitCircuitResult::Opened(health) => { + #[cfg(any(feature = "metrics", test))] + if let Some(reporter) = &self.telemetry.event_reporter { + reporter.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_OPENED_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + ], + ); + } + + if self.telemetry.logs_enabled { + #[cfg(any(feature = "logs", test))] + tracing::event!( + name: "seatbelt.circuit_breaker.opened", + tracing::Level::WARN, + pipeline.name = %self.telemetry.pipeline_name, + strategy.name = %self.telemetry.strategy_name, + circuit_breaker.state = CircuitState::Open.as_str(), + circuit_breaker.partition = %self.partition_key, + circuit_breaker.health.failure_rate = health.failure_rate(), + circuit_breaker.health.throughput = health.throughput(), + ); + } + + _ = health; + } + ExitCircuitResult::Closed(ref stats) => { + #[cfg(any(feature = "metrics", test))] + if let Some(reporter) = &self.telemetry.event_reporter { + reporter.add( + 1, + &[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_CLOSED_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Closed.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + ], + ); + } + + if self.telemetry.logs_enabled { + #[cfg(any(feature = "logs", test))] + tracing::event!( + name: "seatbelt.circuit_breaker.closed", + tracing::Level::INFO, + pipeline.name = %self.telemetry.pipeline_name, + strategy.name = %self.telemetry.strategy_name, + circuit_breaker.state = CircuitState::Closed.as_str(), + circuit_breaker.open.duration = stats.opened_duration(self.clock.instant()).as_secs(), + circuit_breaker.partition = %self.partition_key, + circuit_breaker.probes.total = stats.probes_total, + circuit_breaker.probes.successfull = stats.probes_successes, + circuit_breaker.probes.failed = stats.probes_failures, + circuit_breaker.probes.lost = stats.probes_lost, + circuit_breaker.rejections = stats.rejected, + circuit_breaker.re_opened = stats.re_opened, + ); + } + + _ = stats; + } ExitCircuitResult::Reopened | ExitCircuitResult::Unchanged => { // We do not report a telemetry event for reopening the circuit // as it is redundant because it is always preceded by an "opened" @@ -265,14 +286,13 @@ mod tests { fn create_engine(engine: EngineFake) -> (MetricTester, EngineTelemetry) { let tester = MetricTester::new(); - let telemetry_engine = EngineTelemetry::new( - engine, - "test_strategy".into(), - "test_pipeline".into(), - "test_partition".into(), - create_resilience_event_counter(&create_meter(tester.meter_provider())), - Clock::new_frozen(), - ); + let telemetry = TelemetryHelper { + pipeline_name: "test_pipeline".into(), + strategy_name: "test_strategy".into(), + event_reporter: Some(create_resilience_event_counter(&create_meter(tester.meter_provider()))), + logs_enabled: true, + }; + let telemetry_engine = EngineTelemetry::new(engine, telemetry, "test_partition".into(), Clock::new_frozen()); (tester, telemetry_engine) } } diff --git a/crates/seatbelt/src/circuit/engine/engines.rs b/crates/seatbelt/src/circuit/engine/engines.rs index e22b96d9..323b10e1 100644 --- a/crates/seatbelt/src/circuit/engine/engines.rs +++ b/crates/seatbelt/src/circuit/engine/engines.rs @@ -4,12 +4,11 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; -use opentelemetry::StringValue; -use opentelemetry::metrics::Counter; use tick::Clock; use crate::circuit::constants::ERR_POISONED_LOCK; use crate::circuit::{Engine, EngineCore, EngineOptions, EngineTelemetry, PartitionKey}; +use crate::utils::TelemetryHelper; /// Manages circuit breaker engines for different partition keys. #[derive(Debug)] @@ -17,26 +16,16 @@ pub(crate) struct Engines { map: Mutex>>, engine_options: EngineOptions, clock: Clock, - strategy_name: StringValue, - pipeline_name: StringValue, - resilience_event_counter: Counter, + telemetry: TelemetryHelper, } impl Engines { - pub fn new( - engine_options: EngineOptions, - clock: Clock, - strategy_name: StringValue, - pipeline_name: StringValue, - resilience_event_counter: Counter, - ) -> Self { + pub fn new(engine_options: EngineOptions, clock: Clock, telemetry: TelemetryHelper) -> Self { Self { map: Mutex::new(HashMap::new()), engine_options, clock, - strategy_name, - pipeline_name, - resilience_event_counter, + telemetry, } } @@ -61,10 +50,8 @@ impl Engines { fn create_engine(&self, key: &PartitionKey) -> Engine { EngineTelemetry::new( EngineCore::new(self.engine_options.clone(), self.clock.clone()), - self.strategy_name.clone(), - self.pipeline_name.clone(), - key.to_string().into(), - self.resilience_event_counter.clone(), + self.telemetry.clone(), + key.clone().into(), self.clock.clone(), ) } diff --git a/crates/seatbelt/src/circuit/engine/mod.rs b/crates/seatbelt/src/circuit/engine/mod.rs index dc03a23e..aba0e014 100644 --- a/crates/seatbelt/src/circuit/engine/mod.rs +++ b/crates/seatbelt/src/circuit/engine/mod.rs @@ -16,6 +16,7 @@ pub(crate) enum CircuitState { } impl CircuitState { + #[cfg(any(feature = "metrics", feature = "logs", test))] pub fn as_str(self) -> &'static str { match self { Self::Closed => "closed", diff --git a/crates/seatbelt/src/circuit/health.rs b/crates/seatbelt/src/circuit/health.rs index b6cc049f..2d253c0b 100644 --- a/crates/seatbelt/src/circuit/health.rs +++ b/crates/seatbelt/src/circuit/health.rs @@ -51,10 +51,12 @@ impl HealthInfo { } } + #[cfg(any(feature = "metrics", feature = "logs", test))] pub fn throughput(&self) -> u32 { self.throughput } + #[cfg(any(feature = "metrics", feature = "logs", test))] pub fn failure_rate(&self) -> f32 { self.failure_rate } diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index dc36ca70..ec0e7104 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -1,11 +1,10 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use std::borrow::Cow; use std::marker::PhantomData; use std::time::Duration; -use opentelemetry::StringValue; - use super::constants::{DEFAULT_BREAK_DURATION, DEFAULT_FAILURE_THRESHOLD, DEFAULT_MIN_THROUGHPUT, DEFAULT_SAMPLING_DURATION}; use super::{ Circuit, Engines, HalfOpenMode, HealthMetricsBuilder, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, @@ -13,7 +12,8 @@ use super::{ }; use crate::Layer; use crate::circuit::engine::probing::ProbesOptions; -use crate::{Context, EnableIf, NotSet, Recovery, RecoveryInfo, Set}; +use crate::utils::{EnableIf, TelemetryHelper}; +use crate::{Context, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring circuit breaker resilience middleware. /// @@ -35,7 +35,7 @@ pub struct CircuitLayer on_probing: Option>, partition_key: Option>, enable_if: EnableIf, - strategy_name: StringValue, + telemetry: TelemetryHelper, failure_threshold: f32, min_throughput: u32, sampling_duration: Duration, @@ -46,7 +46,7 @@ pub struct CircuitLayer impl CircuitLayer { #[must_use] - pub(crate) fn new(name: StringValue, context: &Context) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &Context) -> Self { Self { context: context.clone(), recovery: None, @@ -56,7 +56,7 @@ impl CircuitLayer { on_probing: None, partition_key: None, enable_if: EnableIf::always(), - strategy_name: name, + telemetry: context.create_telemetry(name), failure_threshold: DEFAULT_FAILURE_THRESHOLD, min_throughput: DEFAULT_MIN_THROUGHPUT, sampling_duration: DEFAULT_SAMPLING_DURATION, @@ -423,9 +423,7 @@ impl CircuitLayer CircuitLayer for PartitionKey { } } +impl From for Cow<'static, str> { + fn from(value: PartitionKey) -> Self { + match value.0 { + PartitionKeyValue::Number(n) => Cow::Owned(n.to_string()), + PartitionKeyValue::String(s) => s, + PartitionKeyValue::Hashed(_, label) => Cow::Borrowed(label), + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] enum PartitionKeyValue { Number(u64), diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index 30e8a7df..19a49544 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -11,7 +11,7 @@ use super::{ OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RecoveryArgs, RejectedInput, RejectedInputArgs, ShouldRecover, }; -use crate::{EnableIf, NotSet}; +use crate::{NotSet, utils::EnableIf}; /// Applies circuit breaker logic to prevent cascading failures. /// diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index ae7ee638..e7f6a8e3 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -11,6 +11,7 @@ use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, Retry, ShouldRecover}; use crate::shared::MaxAttempts; use crate::utils::EnableIf; +use crate::utils::TelemetryHelper; use crate::{Backoff, Context, NotSet, Recovery, RecoveryInfo, Set}; /// Builder for configuring retry resilience middleware. @@ -31,7 +32,7 @@ pub struct RetryLayer { should_recover: Option>, on_retry: Option>, enable_if: EnableIf, - strategy_name: Cow<'static, str>, + telemetry: TelemetryHelper, restore_input: Option>, handle_unavailable: bool, _state: PhantomData Out>, @@ -48,7 +49,7 @@ impl RetryLayer { should_recover: None, on_retry: None, enable_if: EnableIf::always(), - strategy_name: name, + telemetry: context.create_telemetry(name), restore_input: None, handle_unavailable: false, _state: PhantomData, @@ -414,9 +415,7 @@ impl Layer for RetryLayer { should_recover: self.should_recover.clone().expect("should_recover must be set in Ready state"), on_retry: self.on_retry.clone(), enable_if: self.enable_if.clone(), - strategy_name: self.strategy_name.clone(), - pipeline_name: self.context.get_pipeline_name().clone().into(), - event_reporter: self.context.create_resilience_event_counter(), + telemetry: self.telemetry.clone(), restore_input: self.restore_input.clone(), handle_unavailable: self.handle_unavailable, } @@ -497,7 +496,7 @@ impl RetryLayer { pub(super) should_recover: ShouldRecover, pub(super) on_retry: Option>, pub(super) enable_if: EnableIf, - pub(super) strategy_name: Cow<'static, str>, - pub(super) pipeline_name: Cow<'static, str>, - #[cfg(any(feature = "metrics", test))] - pub(super) event_reporter: Option>, + pub(super) telemetry: TelemetryHelper, pub(super) restore_input: Option>, pub(super) handle_unavailable: bool, } @@ -176,25 +171,34 @@ enum RecoverableKind { } impl Retry { + #[cfg_attr( + not(feature = "logs"), + expect(unused_variables, reason = "unused when logs feature not used") + )] fn emit_attempt_telemetry(&self, attempt: Attempt, retry_delay: Duration) { - #[cfg(any(feature = "logs", test))] - tracing::event!( - name: "seatbelt.retry", - tracing::Level::WARN, - pipeline.name = %self.pipeline_name, - strategy.name = %self.strategy_name, - resilience.attempt.index = attempt.index(), - resilience.attempt.is_last = attempt.is_last(), - resilience.retry.delay = retry_delay.as_secs_f32(), - ); + if self.telemetry.logs_enabled { + #[cfg(any(feature = "logs", test))] + tracing::event!( + name: "seatbelt.retry", + tracing::Level::WARN, + pipeline.name = %self.telemetry.pipeline_name, + strategy.name = %self.telemetry.strategy_name, + resilience.attempt.index = attempt.index(), + resilience.attempt.is_last = attempt.is_last(), + resilience.retry.delay = retry_delay.as_secs_f32(), + ); + } #[cfg(any(feature = "metrics", test))] - if let Some(reporter) = &self.event_reporter { + if let Some(reporter) = &self.telemetry.event_reporter { + use crate::utils::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; + use super::telemetry::{ATTEMPT_INDEX, ATTEMPT_NUMBER_IS_LAST, RETRY_EVENT}; + reporter.add( 1, &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.strategy_name.clone()), + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), opentelemetry::KeyValue::new(EVENT_NAME, RETRY_EVENT), opentelemetry::KeyValue::new(ATTEMPT_INDEX, i64::from(attempt.index())), opentelemetry::KeyValue::new(ATTEMPT_NUMBER_IS_LAST, attempt.is_last()), @@ -277,8 +281,8 @@ mod tests { let retry = layer.layer(Execute::new(|v: String| async move { v })); - assert_eq!(retry.pipeline_name.to_string(), "test_pipeline"); - assert_eq!(retry.strategy_name.to_string(), "test_retry"); + assert_eq!(retry.telemetry.pipeline_name.to_string(), "test_pipeline"); + assert_eq!(retry.telemetry.strategy_name.to_string(), "test_retry"); assert_eq!(retry.max_attempts, MaxAttempts::Finite(4)); assert_eq!(retry.backoff.0.base_delay, Duration::from_secs(2)); assert_eq!(retry.backoff.0.backoff_type, Backoff::Exponential); From ac69bad1a69ed19ee6c76c4df3fdd7e0a452d0b1 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 14:16:33 +0100 Subject: [PATCH 31/60] test fixes --- .../src/circuit/engine/engine_telemetry.rs | 2 +- crates/seatbelt/src/circuit/engine/engines.rs | 12 ++++++++---- crates/seatbelt/src/circuit/layer.rs | 14 +++++++------- crates/seatbelt/src/retry/layer.rs | 16 ++++++++-------- crates/seatbelt/src/shared/context.rs | 19 +++++++++++++------ crates/seatbelt/src/timeout/layer.rs | 12 ++++++------ 6 files changed, 43 insertions(+), 32 deletions(-) diff --git a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs index 5891d726..16d460f3 100644 --- a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs @@ -191,7 +191,7 @@ mod tests { use super::*; use crate::circuit::{EngineFake, HealthInfo, Stats}; - use crate::telemetry::metrics::{create_meter, create_resilience_event_counter}; + use crate::metrics::{create_meter, create_resilience_event_counter}; use crate::testing::MetricTester; #[test] diff --git a/crates/seatbelt/src/circuit/engine/engines.rs b/crates/seatbelt/src/circuit/engine/engines.rs index 323b10e1..7bfda31e 100644 --- a/crates/seatbelt/src/circuit/engine/engines.rs +++ b/crates/seatbelt/src/circuit/engine/engines.rs @@ -65,10 +65,16 @@ mod tests { use super::*; use crate::circuit::HealthMetricsBuilder; use crate::circuit::engine::probing::ProbesOptions; - use crate::telemetry::metrics::create_resilience_event_counter; + use crate::metrics::create_resilience_event_counter; #[test] fn get_engine_ok() { + let telemetry = TelemetryHelper { + pipeline_name: "pipeline".into(), + strategy_name: "strategy".into(), + event_reporter: Some(create_resilience_event_counter(&opentelemetry::global::meter("test"))), + logs_enabled: false, + }; let engines = Engines::new( EngineOptions { break_duration: Duration::from_secs(60), @@ -76,9 +82,7 @@ mod tests { probes: ProbesOptions::quick(Duration::from_secs(1)), }, Clock::new_frozen(), - StringValue::from("strategy"), - StringValue::from("pipeline"), - create_resilience_event_counter(&opentelemetry::global::meter("test")), + telemetry, ); assert!(Arc::ptr_eq( diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index ec0e7104..d7d2cf2f 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -465,11 +465,11 @@ mod tests { #[expect(clippy::float_cmp, reason = "Test")] fn new_creates_correct_initial_state() { let context = create_test_context(); - let layer: CircuitLayer<_, _, NotSet, NotSet> = CircuitLayer::new(StringValue::from("test_breaker"), &context); + let layer: CircuitLayer<_, _, NotSet, NotSet> = CircuitLayer::new("test_breaker".into(), &context); assert!(layer.recovery.is_none()); assert!(layer.rejected_input.is_none()); - assert_eq!(layer.strategy_name, StringValue::from("test_breaker")); + assert_eq!(layer.telemetry.strategy_name.as_ref(), "test_breaker"); assert!(layer.enable_if.call(&"test_input".to_string())); assert_eq!(layer.failure_threshold, 0.1); assert_eq!(layer.min_throughput, 100); @@ -479,7 +479,7 @@ mod tests { #[test] fn recovery_sets_correctly() { let context = create_test_context(); - let layer = CircuitLayer::new(StringValue::from("test"), &context); + let layer = CircuitLayer::new("test".into(), &context); let layer: CircuitLayer<_, _, Set, NotSet> = layer.recovery_with(|output, _args| { if output.contains("error") { @@ -511,7 +511,7 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { let context = Context::::new(Clock::new_frozen()); - let layer = CircuitLayer::new(StringValue::from("test"), &context); + let layer = CircuitLayer::new("test".into(), &context); let layer: CircuitLayer<_, _, Set, NotSet> = layer.recovery(); @@ -537,7 +537,7 @@ mod tests { #[test] fn rejected_input_sets_correctly() { let context = create_test_context(); - let layer = CircuitLayer::new(StringValue::from("test"), &context); + let layer = CircuitLayer::new("test".into(), &context); let layer: CircuitLayer<_, _, NotSet, Set> = layer.rejected_input(|_, _| "rejected".to_string()); @@ -640,7 +640,7 @@ mod tests { #[expect(clippy::float_cmp, reason = "Test")] fn default_values_are_correct() { let context = create_test_context(); - let layer = CircuitLayer::new(StringValue::from("test"), &context); + let layer = CircuitLayer::new("test".into(), &context); assert_eq!(layer.failure_threshold, DEFAULT_FAILURE_THRESHOLD); assert_eq!(layer.min_throughput, DEFAULT_MIN_THROUGHPUT); @@ -686,7 +686,7 @@ mod tests { } fn create_ready_layer() -> CircuitLayer { - CircuitLayer::new(StringValue::from("test"), &create_test_context()) + CircuitLayer::new("test".into(), &create_test_context()) .recovery_with(|output, _args| { if output.contains("error") { RecoveryInfo::retry() diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index e7f6a8e3..7d088fb5 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -521,7 +521,7 @@ mod tests { #[test] fn new_creates_correct_initial_state() { let context = create_test_context(); - let layer: RetryLayer<_, _, NotSet, NotSet> = RetryLayer::new(StringValue::from("test_retry"), &context); + let layer: RetryLayer<_, _, NotSet, NotSet> = RetryLayer::new("test_retry".into(), &context); assert_eq!(layer.max_attempts, MaxAttempts::Finite(4)); // 3 retries + 1 original = 4 total assert!(matches!(layer.backoff.backoff_type, Backoff::Exponential)); @@ -531,14 +531,14 @@ mod tests { assert!(layer.clone_input.is_none()); assert!(layer.should_recover.is_none()); assert!(layer.on_retry.is_none()); - assert_eq!(layer.telemetry.strategy_name, StringValue::from("test_retry")); + assert_eq!(layer.telemetry.strategy_name.as_ref(), "test_retry"); assert!(layer.enable_if.call(&"test_input".to_string())); } #[test] fn clone_input_sets_correctly() { let context = create_test_context(); - let layer = RetryLayer::new(StringValue::from("test"), &context); + let layer = RetryLayer::new("test".into(), &context); let layer: RetryLayer<_, _, Set, NotSet> = layer.clone_input_with(|input, _args| Some(input.clone())); @@ -555,7 +555,7 @@ mod tests { #[test] fn recovery_sets_correctly() { let context = create_test_context(); - let layer = RetryLayer::new(StringValue::from("test"), &context); + let layer = RetryLayer::new("test".into(), &context); let layer: RetryLayer<_, _, NotSet, Set> = layer.recovery_with(|output, _args| { if output.contains("error") { @@ -587,7 +587,7 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { let context = Context::::new(Clock::new_frozen()); - let layer = RetryLayer::new(StringValue::from("test"), &context); + let layer = RetryLayer::new("test".into(), &context); let layer: RetryLayer<_, _, NotSet, Set> = layer.recovery(); @@ -670,7 +670,7 @@ mod tests { #[test] fn handle_unavailable_sets_correctly() { let context = create_test_context(); - let layer = RetryLayer::new(StringValue::from("test"), &context); + let layer = RetryLayer::new("test".into(), &context); // Test default value assert!(!layer.handle_unavailable); @@ -687,7 +687,7 @@ mod tests { #[test] fn restore_input_sets_correctly() { let context = create_test_context(); - let layer = RetryLayer::new(StringValue::from("test"), &context); + let layer = RetryLayer::new("test".into(), &context); let layer = layer.restore_input(|output: &mut String, _args| { (output == "restore_me").then(|| { @@ -743,7 +743,7 @@ mod tests { } fn create_ready_layer() -> RetryLayer { - RetryLayer::new(StringValue::from("test"), &create_test_context()) + RetryLayer::new("test".into(), &create_test_context()) .clone_input_with(|input, _args| Some(input.clone())) .recovery_with(|output, _args| { if output.contains("error") { diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/shared/context.rs index fdf7ff05..9c82d77e 100644 --- a/crates/seatbelt/src/shared/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -28,10 +28,12 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// Basic usage: /// /// ```rust +/// # #[cfg(feature = "metrics")] +/// # fn example() { /// # use opentelemetry::metrics::MeterProvider; /// # use seatbelt::Context; /// # use tick::Clock; -/// # fn example(clock: Clock, meter_provider: &dyn MeterProvider) { +/// # fn inner(clock: Clock, meter_provider: &dyn MeterProvider) { /// // Create a context for a resilience pipeline /// let ctx = Context::::new(&clock) /// .pipeline_name("auth_pipeline") @@ -40,6 +42,7 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// // Pass the context to resilience middleware layers /// // (e.g., Retry::layer("my_retry", &ctx), Timeout::layer("my_timeout", &ctx)) /// # } +/// # } /// ``` /// /// # Authoring Resilience `Middlewares` @@ -177,7 +180,8 @@ mod tests { fn test_new_with_clock_sets_default_pipeline_name() { let clock = tick::Clock::new_frozen(); let ctx = Context::<(), ()>::new(clock); - assert_eq!(ctx.get_pipeline_name().as_ref(), DEFAULT_PIPELINE_NAME); + let telemetry = ctx.create_telemetry("test".into()); + assert_eq!(telemetry.pipeline_name.as_ref(), DEFAULT_PIPELINE_NAME); // Ensure clock reference behaves (timestamp monotonic relative behaviour not required, just accessible) let _ = ctx.get_clock().system_time(); } @@ -186,8 +190,9 @@ mod tests { fn test_pipeline_name_with_custom_value_sets_name_and_is_owned() { let clock = tick::Clock::new_frozen(); let ctx = Context::<(), ()>::new(clock).pipeline_name(String::from("custom_pipeline")); - assert_eq!(ctx.get_pipeline_name().as_ref(), "custom_pipeline"); - assert!(matches!(ctx.get_pipeline_name(), Cow::Owned(_))); + let telemetry = ctx.create_telemetry("test".into()); + assert_eq!(telemetry.pipeline_name.as_ref(), "custom_pipeline"); + assert!(matches!(telemetry.pipeline_name, Cow::Owned(_))); } #[cfg(not(miri))] @@ -197,8 +202,10 @@ mod tests { let (provider, exporter) = test_meter_provider(); let ctx = Context::<(), ()>::new(clock).meter_provider(&provider); - let c1 = create_resilience_event_counter(ctx.get_meter()); - let c2 = create_resilience_event_counter(ctx.get_meter()); + let telemetry1 = ctx.create_telemetry("test1".into()); + let telemetry2 = ctx.create_telemetry("test2".into()); + let c1 = telemetry1.event_reporter.unwrap(); + let c2 = telemetry2.event_reporter.unwrap(); c1.add(1, &[]); c2.add(2, &[]); diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index 94a4b4a6..96e78268 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -240,13 +240,13 @@ mod tests { #[test] fn new_needs_timeout_output() { let context = create_test_context(); - let layer: TimeoutLayer<_, _, NotSet, NotSet> = TimeoutLayer::new(StringValue::from("test_timeout"), &context); + let layer: TimeoutLayer<_, _, NotSet, NotSet> = TimeoutLayer::new("test_timeout".into(), &context); assert!(layer.timeout.is_none()); assert!(layer.timeout_output.is_none()); assert!(layer.on_timeout.is_none()); assert!(layer.timeout_override.is_none()); - assert_eq!(layer.telemetry.strategy_name, StringValue::from("test_timeout")); + assert_eq!(layer.telemetry.strategy_name.as_ref(), "test_timeout"); assert!(layer.enable_if.call(&"test_input".to_string())); } @@ -266,7 +266,7 @@ mod tests { #[test] fn timeout_error_ensure_set_correctly() { let context = create_test_context_result(); - let layer = TimeoutLayer::new(StringValue::from("test"), &context); + let layer = TimeoutLayer::new("test".into(), &context); let layer: TimeoutLayer<_, _, NotSet, Set> = layer.timeout_error(|args| format!("timeout: {}", args.timeout().as_millis())); let result = layer @@ -282,7 +282,7 @@ mod tests { #[test] fn timeout_ensure_set_correctly() { - let layer: TimeoutLayer<_, _, Set, Set> = TimeoutLayer::new(StringValue::from("test"), &create_test_context()) + let layer: TimeoutLayer<_, _, Set, Set> = TimeoutLayer::new("test".into(), &create_test_context()) .timeout_output(|_args| "timeout: ".to_string()) .timeout(Duration::from_millis(3)); @@ -399,13 +399,13 @@ mod tests { } fn create_ready_layer() -> TimeoutLayer { - TimeoutLayer::new(StringValue::from("test"), &create_test_context()) + TimeoutLayer::new("test".into(), &create_test_context()) .timeout_output(|_args| "timeout: ".to_string()) .timeout(Duration::from_millis(3)) } fn create_ready_layer_with_result() -> TimeoutLayer, Set, Set> { - TimeoutLayer::new(StringValue::from("test"), &create_test_context_result()) + TimeoutLayer::new("test".into(), &create_test_context_result()) .timeout_error(|_args| "timeout: ".to_string()) .timeout(Duration::from_millis(3)) } From 1ab07316038d1fc8b0024f4c0afc5fe473cd5d48 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 14:28:02 +0100 Subject: [PATCH 32/60] cleanup --- .../src/circuit/engine/engine_telemetry.rs | 23 ++++++++----------- crates/seatbelt/src/utils/telemetry_helper.rs | 19 +++++++++++++++ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs index 16d460f3..49f7f88d 100644 --- a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs @@ -71,22 +71,19 @@ impl CircuitEngine for EngineTelemetry { fn exit(&self, result: ExecutionResult, mode: ExecutionMode) -> ExitCircuitResult { if mode == ExecutionMode::Probe { #[cfg(any(feature = "metrics", test))] - if let Some(reporter) = &self.telemetry.event_reporter { - reporter.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_PROBE_EVENT_NAME), - opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::HalfOpen.as_str()), - opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), - opentelemetry::KeyValue::new(CIRCUIT_PROBE_RESULT, result.as_str()), - ], - ); + if self.telemetry.metrics_enabled() { + self.telemetry.report_metrics(&[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_PROBE_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::HalfOpen.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + opentelemetry::KeyValue::new(CIRCUIT_PROBE_RESULT, result.as_str()), + ]); } + #[cfg(any(feature = "logs", test))] if self.telemetry.logs_enabled { - #[cfg(any(feature = "logs", test))] tracing::event!( name: "seatbelt.circuit_breaker.probe", tracing::Level::INFO, diff --git a/crates/seatbelt/src/utils/telemetry_helper.rs b/crates/seatbelt/src/utils/telemetry_helper.rs index 8f415710..4c434f99 100644 --- a/crates/seatbelt/src/utils/telemetry_helper.rs +++ b/crates/seatbelt/src/utils/telemetry_helper.rs @@ -8,3 +8,22 @@ pub(crate) struct TelemetryHelper { pub(crate) event_reporter: Option>, pub(crate) logs_enabled: bool, } + +impl TelemetryHelper { + #[cfg(any(feature = "metrics", test))] + pub fn metrics_enabled(&self) -> bool { + self.event_reporter.is_some() + } + + #[cfg(any(feature = "metrics", test))] + pub fn report_metrics(&self, attributes: &[opentelemetry::KeyValue]) { + if let Some(reporter) = &self.event_reporter { + reporter.add(1, attributes); + } + } + + #[cfg(not(any(feature = "metrics", test)))] + pub fn metrics_enabled(&self) -> bool { + false + } +} From 7c11eccd393896ebff004fbf84e65fb55e2cae8e Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 14:32:12 +0100 Subject: [PATCH 33/60] fixes --- .../src/circuit/engine/engine_telemetry.rs | 57 ++++++++----------- crates/seatbelt/src/circuit/service.rs | 2 +- crates/seatbelt/src/retry/service.rs | 28 ++++----- crates/seatbelt/src/timeout/service.rs | 17 +++--- 4 files changed, 43 insertions(+), 61 deletions(-) diff --git a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs index 49f7f88d..d6918597 100644 --- a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs @@ -39,17 +39,14 @@ impl CircuitEngine for EngineTelemetry { if matches!(enter_result, EnterCircuitResult::Rejected) { #[cfg(any(feature = "metrics", test))] - if let Some(reporter) = &self.telemetry.event_reporter { - reporter.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_REJECTED_EVENT_NAME), - opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), - opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), - ], - ); + if self.telemetry.metrics_enabled() { + self.telemetry.report_metrics(&[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_REJECTED_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + ]); } if self.telemetry.logs_enabled { @@ -102,17 +99,14 @@ impl CircuitEngine for EngineTelemetry { match exit_result { ExitCircuitResult::Opened(health) => { #[cfg(any(feature = "metrics", test))] - if let Some(reporter) = &self.telemetry.event_reporter { - reporter.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_OPENED_EVENT_NAME), - opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), - opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), - ], - ); + if self.telemetry.metrics_enabled() { + self.telemetry.report_metrics(&[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_OPENED_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Open.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + ]); } if self.telemetry.logs_enabled { @@ -133,17 +127,14 @@ impl CircuitEngine for EngineTelemetry { } ExitCircuitResult::Closed(ref stats) => { #[cfg(any(feature = "metrics", test))] - if let Some(reporter) = &self.telemetry.event_reporter { - reporter.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_CLOSED_EVENT_NAME), - opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Closed.as_str()), - opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), - ], - ); + if self.telemetry.metrics_enabled() { + self.telemetry.report_metrics(&[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, CIRCUIT_CLOSED_EVENT_NAME), + opentelemetry::KeyValue::new(CIRCUIT_STATE, CircuitState::Closed.as_str()), + opentelemetry::KeyValue::new(CIRCUIT_PARTITION, self.partition_key.clone()), + ]); } if self.telemetry.logs_enabled { diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index 19a49544..bfaf7a72 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -49,7 +49,7 @@ impl Circuit { name: impl Into>, context: &crate::Context, ) -> CircuitLayer { - CircuitLayer::new(name.into().into(), context) + CircuitLayer::new(name.into(), context) } } diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index c64bdfdc..4fe66258 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -54,7 +54,7 @@ impl Retry { name: impl Into>, context: &crate::Context, ) -> crate::retry::RetryLayer { - crate::retry::RetryLayer::new(name.into().into(), context) + crate::retry::RetryLayer::new(name.into(), context) } } @@ -171,10 +171,7 @@ enum RecoverableKind { } impl Retry { - #[cfg_attr( - not(feature = "logs"), - expect(unused_variables, reason = "unused when logs feature not used") - )] + #[cfg_attr(not(feature = "logs"), expect(unused_variables, reason = "unused when logs feature not used"))] fn emit_attempt_telemetry(&self, attempt: Attempt, retry_delay: Duration) { if self.telemetry.logs_enabled { #[cfg(any(feature = "logs", test))] @@ -190,20 +187,17 @@ impl Retry { } #[cfg(any(feature = "metrics", test))] - if let Some(reporter) = &self.telemetry.event_reporter { - use crate::utils::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; + if self.telemetry.metrics_enabled() { use super::telemetry::{ATTEMPT_INDEX, ATTEMPT_NUMBER_IS_LAST, RETRY_EVENT}; + use crate::utils::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; - reporter.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, RETRY_EVENT), - opentelemetry::KeyValue::new(ATTEMPT_INDEX, i64::from(attempt.index())), - opentelemetry::KeyValue::new(ATTEMPT_NUMBER_IS_LAST, attempt.is_last()), - ], - ); + self.telemetry.report_metrics(&[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, RETRY_EVENT), + opentelemetry::KeyValue::new(ATTEMPT_INDEX, i64::from(attempt.index())), + opentelemetry::KeyValue::new(ATTEMPT_NUMBER_IS_LAST, attempt.is_last()), + ]); } } diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index 1e5d1477..6811b53a 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -67,7 +67,7 @@ impl Timeout { /// /// [timeout module]: crate::timeout pub fn layer(name: impl Into>, context: &Context) -> TimeoutLayer { - TimeoutLayer::new(name.into().into(), context) + TimeoutLayer::new(name.into(), context) } } @@ -102,17 +102,14 @@ where Ok(output) => output, Err(_error) => { #[cfg(any(feature = "metrics", test))] - if let Some(reporter) = &self.telemetry.event_reporter { + if self.telemetry.metrics_enabled() { use crate::utils::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; - reporter.add( - 1, - &[ - opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), - opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), - opentelemetry::KeyValue::new(EVENT_NAME, super::telemetry::TIMEOUT_EVENT_NAME), - ], - ); + self.telemetry.report_metrics(&[ + opentelemetry::KeyValue::new(PIPELINE_NAME, self.telemetry.pipeline_name.clone()), + opentelemetry::KeyValue::new(STRATEGY_NAME, self.telemetry.strategy_name.clone()), + opentelemetry::KeyValue::new(EVENT_NAME, super::telemetry::TIMEOUT_EVENT_NAME), + ]); } let output = self.timeout_output.call(TimeoutOutputArgs { timeout }); From 1fa6edc4f9bab3927b233ae7b2b02976989d650f Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 14:47:37 +0100 Subject: [PATCH 34/60] fixes --- crates/seatbelt/Cargo.toml | 2 +- .../src/circuit/engine/engine_telemetry.rs | 19 +++++++++++++++---- crates/seatbelt/src/circuit/engine/mod.rs | 3 ++- .../seatbelt/src/circuit/execution_result.rs | 1 + crates/seatbelt/src/circuit/health.rs | 10 ++++++++-- crates/seatbelt/src/retry/service.rs | 2 +- crates/seatbelt/src/shared/context.rs | 1 + crates/seatbelt/src/timeout/service.rs | 2 +- crates/seatbelt/src/utils/telemetry_helper.rs | 6 +----- 9 files changed, 31 insertions(+), 15 deletions(-) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index a6d9f69b..a7b7d47c 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -92,7 +92,7 @@ required-features = ["retry", "timeout"] [[example]] name = "circuit" -required-features = ["circuit"] +required-features = ["circuit", "metrics"] [[bench]] name = "observability" diff --git a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs index d6918597..f283f677 100644 --- a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit/engine/engine_telemetry.rs @@ -5,9 +5,11 @@ use std::borrow::Cow; use tick::Clock; +#[cfg(any(feature = "metrics", feature = "logs", test))] +use crate::circuit::CircuitState; #[cfg(any(feature = "metrics", test))] use crate::circuit::telemetry::*; -use crate::circuit::{CircuitEngine, CircuitState, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; +use crate::circuit::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; use crate::utils::TelemetryHelper; #[cfg(any(feature = "metrics", test))] @@ -17,12 +19,16 @@ use crate::utils::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; #[derive(Debug)] pub(crate) struct EngineTelemetry { inner: T, + #[cfg(any(feature = "metrics", feature = "logs", test))] pub(super) telemetry: TelemetryHelper, + #[cfg(any(feature = "metrics", feature = "logs", test))] pub(super) partition_key: Cow<'static, str>, + #[cfg(any(feature = "metrics", feature = "logs", test))] pub(super) clock: Clock, } impl EngineTelemetry { + #[cfg(any(feature = "metrics", feature = "logs", test))] pub fn new(inner: T, telemetry: TelemetryHelper, partition_key: Cow<'static, str>, clock: Clock) -> Self { Self { inner, @@ -31,6 +37,11 @@ impl EngineTelemetry { clock, } } + + #[cfg(not(any(feature = "metrics", feature = "logs", test)))] + pub fn new(inner: T, _telemetry: TelemetryHelper, _partition_key: Cow<'static, str>, _clock: Clock) -> Self { + Self { inner } + } } impl CircuitEngine for EngineTelemetry { @@ -49,8 +60,8 @@ impl CircuitEngine for EngineTelemetry { ]); } + #[cfg(any(feature = "logs", test))] if self.telemetry.logs_enabled { - #[cfg(any(feature = "logs", test))] tracing::event!( name: "seatbelt.circuit_breaker.rejected", tracing::Level::WARN, @@ -109,8 +120,8 @@ impl CircuitEngine for EngineTelemetry { ]); } + #[cfg(any(feature = "logs", test))] if self.telemetry.logs_enabled { - #[cfg(any(feature = "logs", test))] tracing::event!( name: "seatbelt.circuit_breaker.opened", tracing::Level::WARN, @@ -137,8 +148,8 @@ impl CircuitEngine for EngineTelemetry { ]); } + #[cfg(any(feature = "logs", test))] if self.telemetry.logs_enabled { - #[cfg(any(feature = "logs", test))] tracing::event!( name: "seatbelt.circuit_breaker.closed", tracing::Level::INFO, diff --git a/crates/seatbelt/src/circuit/engine/mod.rs b/crates/seatbelt/src/circuit/engine/mod.rs index aba0e014..ce089311 100644 --- a/crates/seatbelt/src/circuit/engine/mod.rs +++ b/crates/seatbelt/src/circuit/engine/mod.rs @@ -8,6 +8,7 @@ use crate::circuit::{ExecutionResult, HealthInfo, HealthMetricsBuilder}; pub(super) mod probing; +#[cfg(any(feature = "metrics", feature = "logs", test))] #[derive(Debug, Copy, Clone)] pub(crate) enum CircuitState { Closed, @@ -15,8 +16,8 @@ pub(crate) enum CircuitState { HalfOpen, } +#[cfg(any(feature = "metrics", feature = "logs", test))] impl CircuitState { - #[cfg(any(feature = "metrics", feature = "logs", test))] pub fn as_str(self) -> &'static str { match self { Self::Closed => "closed", diff --git a/crates/seatbelt/src/circuit/execution_result.rs b/crates/seatbelt/src/circuit/execution_result.rs index 4d76a3f9..2d38f8f7 100644 --- a/crates/seatbelt/src/circuit/execution_result.rs +++ b/crates/seatbelt/src/circuit/execution_result.rs @@ -13,6 +13,7 @@ pub(crate) enum ExecutionResult { Failure, } +#[cfg(any(feature = "logs", feature = "metrics", test))] impl ExecutionResult { pub fn as_str(self) -> &'static str { match self { diff --git a/crates/seatbelt/src/circuit/health.rs b/crates/seatbelt/src/circuit/health.rs index 2d253c0b..2ec726b4 100644 --- a/crates/seatbelt/src/circuit/health.rs +++ b/crates/seatbelt/src/circuit/health.rs @@ -51,12 +51,18 @@ impl HealthInfo { } } - #[cfg(any(feature = "metrics", feature = "logs", test))] + #[cfg_attr( + not(any(feature = "logs", test)), + expect(dead_code, reason = "trying to avoid dead code here leads to too much conditionals") + )] pub fn throughput(&self) -> u32 { self.throughput } - #[cfg(any(feature = "metrics", feature = "logs", test))] + #[cfg_attr( + not(any(feature = "logs", test)), + expect(dead_code, reason = "trying to avoid dead code here leads to too much conditionals") + )] pub fn failure_rate(&self) -> f32 { self.failure_rate } diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 4fe66258..c770316b 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -173,8 +173,8 @@ enum RecoverableKind { impl Retry { #[cfg_attr(not(feature = "logs"), expect(unused_variables, reason = "unused when logs feature not used"))] fn emit_attempt_telemetry(&self, attempt: Attempt, retry_delay: Duration) { + #[cfg(any(feature = "logs", test))] if self.telemetry.logs_enabled { - #[cfg(any(feature = "logs", test))] tracing::event!( name: "seatbelt.retry", tracing::Level::WARN, diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/shared/context.rs index 9c82d77e..b0132e68 100644 --- a/crates/seatbelt/src/shared/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -150,6 +150,7 @@ impl Context { pipeline_name: self.pipeline_name.clone(), #[cfg(any(feature = "metrics", feature = "logs", test))] strategy_name, + #[cfg(any(feature = "logs", test))] logs_enabled: self.logs_enabled, } } diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index 6811b53a..b8ea1ba6 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -114,8 +114,8 @@ where let output = self.timeout_output.call(TimeoutOutputArgs { timeout }); + #[cfg(any(feature = "logs", test))] if self.telemetry.logs_enabled { - #[cfg(any(feature = "logs", test))] tracing::event!( name: "seatbelt.timeout", tracing::Level::WARN, diff --git a/crates/seatbelt/src/utils/telemetry_helper.rs b/crates/seatbelt/src/utils/telemetry_helper.rs index 4c434f99..f01e5096 100644 --- a/crates/seatbelt/src/utils/telemetry_helper.rs +++ b/crates/seatbelt/src/utils/telemetry_helper.rs @@ -6,6 +6,7 @@ pub(crate) struct TelemetryHelper { pub(crate) strategy_name: std::borrow::Cow<'static, str>, #[cfg(any(feature = "metrics", test))] pub(crate) event_reporter: Option>, + #[cfg(any(feature = "logs", test))] pub(crate) logs_enabled: bool, } @@ -21,9 +22,4 @@ impl TelemetryHelper { reporter.add(1, attributes); } } - - #[cfg(not(any(feature = "metrics", test)))] - pub fn metrics_enabled(&self) -> bool { - false - } } From a646f4e4690b68c3b07524c47aead5cf2660952d Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 14:57:35 +0100 Subject: [PATCH 35/60] cleanup --- crates/seatbelt/Cargo.toml | 2 +- crates/seatbelt/benches/observability.rs | 28 +++++++++++--- crates/seatbelt/examples/circuit.rs | 2 +- .../seatbelt/examples/resilience_pipeline.rs | 2 +- crates/seatbelt/examples/retry_advanced.rs | 2 +- crates/seatbelt/examples/retry_outage.rs | 2 +- crates/seatbelt/examples/timeout.rs | 2 +- crates/seatbelt/examples/timeout_advanced.rs | 2 +- crates/seatbelt/src/retry/service.rs | 2 +- crates/seatbelt/src/shared/context.rs | 37 ++++++++++++++----- 10 files changed, 58 insertions(+), 23 deletions(-) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index a7b7d47c..fb228566 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -97,7 +97,7 @@ required-features = ["circuit", "metrics"] [[bench]] name = "observability" harness = false -required-features = ["_middleware"] +required-features = ["retry", "logs", "metrics"] [[bench]] name = "timeout" diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs index 2683897f..37f15435 100644 --- a/crates/seatbelt/benches/observability.rs +++ b/crates/seatbelt/benches/observability.rs @@ -22,7 +22,7 @@ fn entry(c: &mut Criterion) { let mut group = c.benchmark_group("observability"); let session = Session::new(); - // With retry middleware (no meter provider listener) + // No telemetry let context = Context::new(Clock::new_frozen()); let service = ( Retry::layer("bench", &context) @@ -40,9 +40,9 @@ fn entry(c: &mut Criterion) { }); }); - // With retry middleware + meter provider listener + // Metrics let meter_provider = SdkMeterProvider::builder().with_periodic_exporter(EmptyExporter).build(); - let context = Context::new(Clock::new_frozen()).meter_provider(&meter_provider); + let context = Context::new(Clock::new_frozen()).enable_metrics(&meter_provider); let service = ( Retry::layer("bench", &context) .clone_input() @@ -51,8 +51,26 @@ fn entry(c: &mut Criterion) { Execute::new(|v: Input| async move { Output::from(v) }), ) .build(); - let operation = session.operation("retry-telemetry"); - group.bench_function("retry-telemetry", |b| { + let operation = session.operation("retry-metrics"); + group.bench_function("retry-metrics", |b| { + b.iter(|| { + let _span = operation.measure_thread(); + _ = block_on(service.execute(Input)); + }); + }); + + // Logs + let context = Context::new(Clock::new_frozen()).enable_logs(); + let service = ( + Retry::layer("bench", &context) + .clone_input() + .base_delay(Duration::ZERO) + .recovery_with(|_, _| RecoveryInfo::retry()), + Execute::new(|v: Input| async move { Output::from(v) }), + ) + .build(); + let operation = session.operation("retry-logs"); + group.bench_function("retry-logs", |b| { b.iter(|| { let _span = operation.measure_thread(); _ = block_on(service.execute(Input)); diff --git a/crates/seatbelt/examples/circuit.rs b/crates/seatbelt/examples/circuit.rs index c6eaa921..3365a270 100644 --- a/crates/seatbelt/examples/circuit.rs +++ b/crates/seatbelt/examples/circuit.rs @@ -26,7 +26,7 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = Context::new(&clock).meter_provider(&meter_provider); + let context = Context::new(&clock).enable_metrics(&meter_provider); // Define stack with circuit breaker layer let stack = ( diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs index 2d1706ea..414b01f3 100644 --- a/crates/seatbelt/examples/resilience_pipeline.rs +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -25,7 +25,7 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Shared options for resilience middleware - let context = Context::new(&clock).meter_provider(&meter_provider).pipeline_name("my_pipeline"); + let context = Context::new(&clock).enable_metrics(&meter_provider).pipeline_name("my_pipeline"); // Define stack with retry and timeout middlewares let stack = ( diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index cd0c0d1c..4f6166bc 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -25,7 +25,7 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = Context::new(&clock).pipeline_name("retry_advanced").meter_provider(&meter_provider); + let context = Context::new(&clock).pipeline_name("retry_advanced").enable_metrics(&meter_provider); // Define stack with retry layer let stack = ( diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index 670979dc..a7d65c6c 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -32,7 +32,7 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = Context::new(&clock).meter_provider(&meter_provider); + let context = Context::new(&clock).enable_metrics(&meter_provider); // Configure retry layer for outage handling with input restoration let stack = ( diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index 51a4fae4..eba4a0ed 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -29,7 +29,7 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Create common options - let context = Context::new(&clock).meter_provider(&meter_provider); + let context = Context::new(&clock).enable_metrics(&meter_provider); // Define stack with timeout layer let stack = ( diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs index 558a8d16..41375087 100644 --- a/crates/seatbelt/examples/timeout_advanced.rs +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -29,7 +29,7 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Create service options - let context: Context> = Context::new(&clock).pipeline_name("my_pipeline").meter_provider(&meter_provider); + let context: Context> = Context::new(&clock).pipeline_name("my_pipeline").enable_metrics(&meter_provider); // Define stack with timeout layer let stack = ( diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index c770316b..de432ad9 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -579,7 +579,7 @@ mod tests { let tester = MetricTester::new(); let context = Context::::new(ClockControl::default().auto_advance_timers(true).to_clock()) .pipeline_name("test_pipeline") - .meter_provider(tester.meter_provider()); + .enable_metrics(tester.meter_provider()); let service = create_ready_retry_layer_core(RecoveryInfo::retry(), &context) .clone_input_with(move |input, _args| Some(input.clone())) diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/shared/context.rs index b0132e68..f4b2fe38 100644 --- a/crates/seatbelt/src/shared/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -20,8 +20,8 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// policies in a pipeline, exported metrics and events will carry the same /// pipeline attribute, making dashboards and analysis easier. /// -/// You can also override the meter provider via [`meter_provider`](Self::meter_provider) -/// if you need a non-global provider (e.g., tests or custom SDKs wiring). +/// You can enable metrics via [`enable_metrics`](Self::enable_metrics) and logs via +/// [`enable_logs`](Self::enable_logs) if you need telemetry for observability. /// /// # Examples /// @@ -37,7 +37,7 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// // Create a context for a resilience pipeline /// let ctx = Context::::new(&clock) /// .pipeline_name("auth_pipeline") -/// .meter_provider(meter_provider); +/// .enable_metrics(meter_provider); /// /// // Pass the context to resilience middleware layers /// // (e.g., Retry::layer("my_retry", &ctx), Timeout::layer("my_timeout", &ctx)) @@ -88,11 +88,11 @@ pub struct Context { } impl Context { - /// Create options with a clock and the global meter provider. + /// Create options with a clock. /// - /// Initializes with `pipeline_name = "default"` and a meter from the - /// global provider. Override the provider later via - /// [`meter_provider`](Self::meter_provider) if needed. + /// Initializes with `pipeline_name = "default"`. Enable metrics via + /// [`enable_metrics`](Self::enable_metrics) and logs via + /// [`enable_logs`](Self::enable_logs) if needed. pub fn new(clock: impl AsRef) -> Self { Self { clock: clock.as_ref().clone(), @@ -127,16 +127,33 @@ impl Context { self } - /// Override the global meter provider with a custom one. + /// Enable metrics reporting with a custom meter provider. + /// + /// Metrics are disabled by default. Call this method to enable metrics + /// reporting using the provided OpenTelemetry meter provider. #[must_use] #[cfg(any(feature = "metrics", test))] - pub fn meter_provider(self, provider: &dyn opentelemetry::metrics::MeterProvider) -> Self { + pub fn enable_metrics(self, provider: &dyn opentelemetry::metrics::MeterProvider) -> Self { Self { meter: Some(crate::metrics::create_meter(provider)), ..self } } + /// Enable structured logging for resilience events. + /// + /// Logs are disabled by default. Call this method to enable structured + /// logging for resilience events like retries, timeouts, and circuit breaker + /// state changes. + #[must_use] + #[cfg(any(feature = "logs", test))] + pub fn enable_logs(self) -> Self { + Self { + logs_enabled: true, + ..self + } + } + #[cfg_attr( not(any(feature = "metrics", feature = "logs")), expect(unused_variables, reason = "unused when logs nor metrics are used") @@ -202,7 +219,7 @@ mod tests { let clock = tick::Clock::new_frozen(); let (provider, exporter) = test_meter_provider(); - let ctx = Context::<(), ()>::new(clock).meter_provider(&provider); + let ctx = Context::<(), ()>::new(clock).enable_metrics(&provider); let telemetry1 = ctx.create_telemetry("test1".into()); let telemetry2 = ctx.create_telemetry("test2".into()); let c1 = telemetry1.event_reporter.unwrap(); From dfb2bb2ed0d98b14a37231ee02c93154e1ec32b4 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 15:06:24 +0100 Subject: [PATCH 36/60] fixes --- crates/seatbelt/src/retry/layer.rs | 1 + crates/seatbelt/src/retry/service.rs | 9 ++++++--- crates/seatbelt/src/shared/context.rs | 2 +- crates/seatbelt/src/timeout/layer.rs | 1 + crates/seatbelt/src/timeout/service.rs | 4 ++-- crates/seatbelt/src/utils/telemetry_helper.rs | 3 +++ 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 7d088fb5..0932a5e0 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -415,6 +415,7 @@ impl Layer for RetryLayer { should_recover: self.should_recover.clone().expect("should_recover must be set in Ready state"), on_retry: self.on_retry.clone(), enable_if: self.enable_if.clone(), + #[cfg(any(feature = "logs", feature = "metrics", test))] telemetry: self.telemetry.clone(), restore_input: self.restore_input.clone(), handle_unavailable: self.handle_unavailable, diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index de432ad9..6fc7a683 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -12,7 +12,6 @@ use crate::retry::{ }; use crate::shared::MaxAttempts; use crate::utils::EnableIf; -use crate::utils::TelemetryHelper; use crate::{Attempt, NotSet, RecoveryInfo, RecoveryKind}; /// Applies retry logic to service execution for transient error handling. @@ -40,7 +39,8 @@ pub struct Retry { pub(super) should_recover: ShouldRecover, pub(super) on_retry: Option>, pub(super) enable_if: EnableIf, - pub(super) telemetry: TelemetryHelper, + #[cfg(any(feature = "logs", feature = "metrics", test))] + pub(super) telemetry: crate::utils::TelemetryHelper, pub(super) restore_input: Option>, pub(super) handle_unavailable: bool, } @@ -171,7 +171,10 @@ enum RecoverableKind { } impl Retry { - #[cfg_attr(not(feature = "logs"), expect(unused_variables, reason = "unused when logs feature not used"))] + #[cfg_attr( + not(any(feature = "logs", test)), + expect(unused_variables, reason = "unused when logs feature not used") + )] fn emit_attempt_telemetry(&self, attempt: Attempt, retry_delay: Duration) { #[cfg(any(feature = "logs", test))] if self.telemetry.logs_enabled { diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/shared/context.rs index f4b2fe38..84d65732 100644 --- a/crates/seatbelt/src/shared/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -155,7 +155,7 @@ impl Context { } #[cfg_attr( - not(any(feature = "metrics", feature = "logs")), + not(any(feature = "metrics", feature = "logs", test)), expect(unused_variables, reason = "unused when logs nor metrics are used") )] #[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index 96e78268..047f6e8e 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -205,6 +205,7 @@ impl Layer for TimeoutLayer { on_timeout: self.on_timeout.clone(), timeout_output: self.timeout_output.clone().expect("timeout_result must be set in Ready state"), timeout_override: self.timeout_override.clone(), + #[cfg(any(feature = "logs", feature = "metrics", test))] telemetry: self.telemetry.clone(), } } diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index b8ea1ba6..fa024336 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -10,7 +10,6 @@ use tick::{Clock, FutureExt}; use crate::timeout::{OnTimeout, OnTimeoutArgs, TimeoutLayer, TimeoutOutput, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs}; use crate::utils::EnableIf; -use crate::utils::TelemetryHelper; use crate::{Context, NotSet}; /// Applies a timeout to service execution for canceling long-running operations. @@ -36,7 +35,8 @@ pub struct Timeout { pub(super) on_timeout: Option>, pub(super) timeout_override: Option>, pub(super) timeout_output: TimeoutOutput, - pub(super) telemetry: TelemetryHelper, + #[cfg(any(feature = "logs", feature = "metrics", test))] + pub(super) telemetry: crate::utils::TelemetryHelper, } impl Timeout { diff --git a/crates/seatbelt/src/utils/telemetry_helper.rs b/crates/seatbelt/src/utils/telemetry_helper.rs index f01e5096..59e7baa0 100644 --- a/crates/seatbelt/src/utils/telemetry_helper.rs +++ b/crates/seatbelt/src/utils/telemetry_helper.rs @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + #[derive(Debug, Clone)] pub(crate) struct TelemetryHelper { #[cfg(any(feature = "metrics", feature = "logs", test))] From 9298bbfb61d9f06d5203d31c59b6a0f662193e65 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 15:14:00 +0100 Subject: [PATCH 37/60] fixes --- crates/seatbelt/src/circuit/args.rs | 55 +++++++++++++++++++++++++++++ crates/seatbelt/src/retry/layer.rs | 24 +++++++++++++ 2 files changed, 79 insertions(+) diff --git a/crates/seatbelt/src/circuit/args.rs b/crates/seatbelt/src/circuit/args.rs index 4c512b50..7bb6c825 100644 --- a/crates/seatbelt/src/circuit/args.rs +++ b/crates/seatbelt/src/circuit/args.rs @@ -104,3 +104,58 @@ impl OnOpenedArgs<'_> { self.partition_key } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn recovery_args_accessors() { + let key = PartitionKey::from("test"); + let clock = Clock::new_frozen(); + let args = RecoveryArgs { + partition_key: &key, + clock: &clock, + }; + assert_eq!(args.partition_key(), &key); + let _ = args.clock(); + assert!(format!("{args:?}").contains("RecoveryArgs")); + } + + #[test] + fn rejected_input_args_accessors() { + let key = PartitionKey::from("rejected"); + let args = RejectedInputArgs { partition_key: &key }; + assert_eq!(args.partition_key(), &key); + assert!(format!("{args:?}").contains("RejectedInputArgs")); + } + + #[test] + fn on_probing_args_accessors() { + let key = PartitionKey::from("probing"); + let args = OnProbingArgs { partition_key: &key }; + assert_eq!(args.partition_key(), &key); + assert!(format!("{args:?}").contains("OnProbingArgs")); + } + + #[test] + fn on_closed_args_accessors() { + let key = PartitionKey::from("closed"); + let duration = Duration::from_secs(5); + let args = OnClosedArgs { + partition_key: &key, + open_duration: duration, + }; + assert_eq!(args.partition_key(), &key); + assert_eq!(args.open_duration(), duration); + assert!(format!("{args:?}").contains("OnClosedArgs")); + } + + #[test] + fn on_opened_args_accessors() { + let key = PartitionKey::from("opened"); + let args = OnOpenedArgs { partition_key: &key }; + assert_eq!(args.partition_key(), &key); + assert!(format!("{args:?}").contains("OnOpenedArgs")); + } +} diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 0932a5e0..52096a77 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -731,6 +731,30 @@ mod tests { } } + #[test] + fn infinite_retry_attempts_sets_correctly() { + let context = create_test_context(); + let layer = RetryLayer::new("test".into(), &context).infinite_retry_attempts(); + assert_eq!(layer.max_attempts, MaxAttempts::Infinite); + } + + #[test] + fn restore_input_from_error_sets_correctly() { + let context: Context> = Context::new(Clock::new_frozen()).pipeline_name("test"); + let layer = RetryLayer::new("test".into(), &context) + .restore_input_from_error(|e: &mut String, _| (e == "restore").then(|| std::mem::take(e))); + + let restore = layer.restore_input.as_ref().unwrap(); + let args = || RestoreInputArgs { + attempt: Attempt::new(1, false), + recovery: RecoveryInfo::retry(), + }; + + assert_eq!(restore.call(&mut Err("restore".into()), args()), Some("restore".to_string())); + assert_eq!(restore.call(&mut Err("other".into()), args()), None); + assert_eq!(restore.call(&mut Ok("success".into()), args()), None); + } + #[test] fn static_assertions() { static_assertions::assert_impl_all!(RetryLayer: Layer); From 850d8e6d08f7ed22a8102a09b76274be561530f4 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 15:48:43 +0100 Subject: [PATCH 38/60] cleanup --- crates/seatbelt/src/circuit/service.rs | 51 +++++++++++++++++++ crates/seatbelt/src/retry/service.rs | 27 ++++++++++ crates/seatbelt/src/testing.rs | 68 ++++++++++++++++++++++++++ crates/seatbelt/src/timeout/service.rs | 37 ++++++++++++++ 4 files changed, 183 insertions(+) diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index bfaf7a72..23d079aa 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -453,6 +453,57 @@ mod tests { assert_eq!(result, "B"); } + #[tokio::test] + async fn circuit_breaker_emits_logs() { + use tracing_subscriber::util::SubscriberInitExt; + + use crate::testing::LogCapture; + + let log_capture = LogCapture::new(); + let _guard = log_capture.subscriber().set_default(); + + let clock_control = ClockControl::new(); + let context = + Context::::new(clock_control.to_clock()).pipeline_name("log_test_pipeline").enable_logs(); + + let service = Circuit::layer("log_test_circuit", &context) + .min_throughput(3) + .half_open_mode(HalfOpenMode::quick()) + .recovery_with(|output, _| { + if output.contains("success") { + RecoveryInfo::never() + } else { + RecoveryInfo::retry() + } + }) + .rejected_input(|_, _| "rejected".to_string()) + .layer(Execute::new(|input: String| async move { input })); + + // Trip the circuit by generating failures + for _ in 0..3 { + let _ = service.execute("fail".to_string()).await; + } + + // Verify circuit opened log + log_capture.assert_contains("seatbelt::circuit"); + log_capture.assert_contains("log_test_pipeline"); + log_capture.assert_contains("log_test_circuit"); + log_capture.assert_contains("circuit_breaker.state=\"open\""); + log_capture.assert_contains("circuit_breaker.health.failure_rate"); + + // Request should be rejected (emits another open state log) + let _ = service.execute("test".to_string()).await; + + // Advance time past break duration to allow probing + clock_control.advance(DEFAULT_BREAK_DURATION); + + // Send a successful probe to close circuit + let _ = service.execute("success".to_string()).await; + log_capture.assert_contains("circuit_breaker.probe.result"); + log_capture.assert_contains("circuit_breaker.state=\"closed\""); + log_capture.assert_contains("circuit_breaker.open.duration"); + } + fn create_ready_circuit_breaker_layer(clock: &Clock) -> CircuitLayer { let context = Context::::new(clock.clone()).pipeline_name("test_pipeline"); Circuit::layer("test_breaker", &context) diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 6fc7a683..8e8868f8 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -606,6 +606,33 @@ mod tests { ); } + #[tokio::test] + async fn retry_emits_log() { + use tracing_subscriber::util::SubscriberInitExt; + + use crate::testing::LogCapture; + + let log_capture = LogCapture::new(); + let _guard = log_capture.subscriber().set_default(); + + let clock = ClockControl::default().auto_advance_timers(true).to_clock(); + let context = Context::::new(clock).pipeline_name("log_test_pipeline").enable_logs(); + + let service = Retry::layer("log_test_retry", &context) + .clone_input() + .recovery_with(|_, _| RecoveryInfo::retry()) + .max_retry_attempts(2) + .layer(Execute::new(|v: String| async move { v })); + + let _ = service.execute("test".to_string()).await; + + log_capture.assert_contains("seatbelt::retry"); + log_capture.assert_contains("log_test_pipeline"); + log_capture.assert_contains("log_test_retry"); + log_capture.assert_contains("resilience.attempt.index"); + log_capture.assert_contains("resilience.retry.delay"); + } + fn create_ready_retry_layer(clock: &Clock, recover: RecoveryInfo) -> RetryLayer { let context = Context::new(clock.clone()).pipeline_name("test_pipeline"); create_ready_retry_layer_core(recover, &context) diff --git a/crates/seatbelt/src/testing.rs b/crates/seatbelt/src/testing.rs index 9929a5db..07ce4e07 100644 --- a/crates/seatbelt/src/testing.rs +++ b/crates/seatbelt/src/testing.rs @@ -1,9 +1,13 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use std::io::Write; +use std::sync::{Arc, Mutex}; + use opentelemetry::KeyValue; use opentelemetry_sdk::metrics::data::{AggregatedMetrics, Metric, MetricData}; use opentelemetry_sdk::metrics::{InMemoryMetricExporter, SdkMeterProvider}; +use tracing_subscriber::fmt::MakeWriter; use crate::{Recovery, RecoveryInfo}; @@ -113,3 +117,67 @@ impl From for RecoverableType { Self(recovery) } } + +/// Thread-local log capture buffer for testing. +/// +/// Uses `tracing_subscriber::fmt::MakeWriter` to capture formatted log output +/// into a thread-local buffer that can be inspected in tests. +#[derive(Debug, Clone, Default)] +pub(crate) struct LogCapture { + buffer: Arc>>, +} + +impl LogCapture { + #[must_use] + pub fn new() -> Self { + Self { + buffer: Arc::new(Mutex::new(Vec::new())), + } + } + + /// Returns the captured log output as a string. + #[must_use] + pub fn output(&self) -> String { + String::from_utf8_lossy(&self.buffer.lock().unwrap()).to_string() + } + + /// Asserts that the captured log output contains the given substring. + pub fn assert_contains(&self, expected: &str) { + let output = self.output(); + assert!(output.contains(expected), "log output does not contain '{expected}', got:\n{output}"); + } + + /// Creates a `tracing_subscriber` that writes to this capture buffer. + /// Use with `set_default()` for thread-local capture. + #[must_use] + pub fn subscriber(&self) -> impl tracing::Subscriber { + use tracing_subscriber::layer::SubscriberExt; + tracing_subscriber::registry().with(tracing_subscriber::fmt::layer().with_writer(self.clone()).with_ansi(false)) + } +} + +impl<'a> MakeWriter<'a> for LogCapture { + type Writer = LogCaptureWriter; + + fn make_writer(&'a self) -> Self::Writer { + LogCaptureWriter { + buffer: Arc::clone(&self.buffer), + } + } +} + +/// Writer that appends to a shared buffer. +pub(crate) struct LogCaptureWriter { + buffer: Arc>>, +} + +impl Write for LogCaptureWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.buffer.lock().unwrap().extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index fa024336..25df95e2 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -258,4 +258,41 @@ mod tests { assert_eq!(output, "test input"); } + + #[tokio::test] + async fn timeout_emits_log() { + use tracing_subscriber::util::SubscriberInitExt; + + use crate::testing::LogCapture; + + let log_capture = LogCapture::new(); + let _guard = log_capture.subscriber().set_default(); + + let clock = ClockControl::default() + .auto_advance(Duration::from_millis(200)) + .auto_advance_limit(Duration::from_millis(500)) + .to_clock(); + let context = Context::new(clock.clone()).enable_logs().pipeline_name("log_test_pipeline"); + + let stack = ( + Timeout::layer("log_test_timeout", &context) + .timeout_output(|_| "timed out".to_string()) + .timeout(Duration::from_millis(100)), + Execute::new(move |input| { + let clock = clock.clone(); + async move { + clock.delay(Duration::from_secs(1)).await; + input + } + }), + ); + + let service = stack.build(); + let _ = service.execute("test".to_string()).await; + + log_capture.assert_contains("seatbelt::timeout"); + log_capture.assert_contains("log_test_pipeline"); + log_capture.assert_contains("log_test_timeout"); + log_capture.assert_contains("timeout.ms=100"); + } } From 98f7bef1033171e83824023cc82cda2f822d31b8 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 15:57:25 +0100 Subject: [PATCH 39/60] fixes --- crates/seatbelt/src/circuit/service.rs | 5 +++-- crates/seatbelt/src/retry/service.rs | 4 +++- crates/seatbelt/src/testing.rs | 7 +++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index 23d079aa..1ebd4a3f 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -463,8 +463,9 @@ mod tests { let _guard = log_capture.subscriber().set_default(); let clock_control = ClockControl::new(); - let context = - Context::::new(clock_control.to_clock()).pipeline_name("log_test_pipeline").enable_logs(); + let context = Context::::new(clock_control.to_clock()) + .pipeline_name("log_test_pipeline") + .enable_logs(); let service = Circuit::layer("log_test_circuit", &context) .min_throughput(3) diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 8e8868f8..f584c940 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -616,7 +616,9 @@ mod tests { let _guard = log_capture.subscriber().set_default(); let clock = ClockControl::default().auto_advance_timers(true).to_clock(); - let context = Context::::new(clock).pipeline_name("log_test_pipeline").enable_logs(); + let context = Context::::new(clock) + .pipeline_name("log_test_pipeline") + .enable_logs(); let service = Retry::layer("log_test_retry", &context) .clone_input() diff --git a/crates/seatbelt/src/testing.rs b/crates/seatbelt/src/testing.rs index 07ce4e07..5383ba56 100644 --- a/crates/seatbelt/src/testing.rs +++ b/crates/seatbelt/src/testing.rs @@ -141,10 +141,13 @@ impl LogCapture { String::from_utf8_lossy(&self.buffer.lock().unwrap()).to_string() } - /// Asserts that the captured log output contains the given substring. + /// Asserts that the captured log output contains the given string. pub fn assert_contains(&self, expected: &str) { let output = self.output(); - assert!(output.contains(expected), "log output does not contain '{expected}', got:\n{output}"); + assert!( + output.contains(expected), + "log output does not contain '{expected}', got:\n{output}" + ); } /// Creates a `tracing_subscriber` that writes to this capture buffer. From 6ac0b2347884e8d333dc5a57343c2a2c96cc362c Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 16:11:50 +0100 Subject: [PATCH 40/60] icons --- crates/seatbelt/README.md | 4 ++-- crates/seatbelt/favicon.ico | 4 ++-- crates/seatbelt/logo.png | 4 ++-- crates/seatbelt/src/lib.rs | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index e46e3d06..7e1c4a4a 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -110,10 +110,10 @@ This crate was developed as part of The Oxidizer Project. Br [__link11]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html [__link12]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html [__link13]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html - [__link2]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context + [__link2]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=shared::Context [__link3]: https://crates.io/crates/layered/0.1.0 [__link4]: https://docs.rs/layered/0.1.0/layered/?search=Stack - [__link5]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=options::Context + [__link5]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=shared::Context [__link6]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo [__link7]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html diff --git a/crates/seatbelt/favicon.ico b/crates/seatbelt/favicon.ico index ccae8114..e7752941 100644 --- a/crates/seatbelt/favicon.ico +++ b/crates/seatbelt/favicon.ico @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e48225cb42c8ac02df5dd4a9bdba29c1e8d10436cc27055d6a021bcb951d4145 -size 480683 +oid sha256:3de762b64903267b6d416cfb2d420b956fa4dfdfcb47ee8b232845da495be8e5 +size 28527 diff --git a/crates/seatbelt/logo.png b/crates/seatbelt/logo.png index f2bd6691..1794a428 100644 --- a/crates/seatbelt/logo.png +++ b/crates/seatbelt/logo.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63ad64ebb0185d7cd153acc291989d72e3a0094603d8bfe781a220861de247f2 -size 17876 +oid sha256:7992716781c4ace4099acf8d4104ac6ebc0e1cdeb437f43a0a884ead21bfd29e +size 84743 diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 4c67a0c5..ea119468 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -6,7 +6,7 @@ #![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/seatbelt/logo.png")] #![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/seatbelt/favicon.ico")] #![cfg_attr( - not(all(feature = "retry", feature = "timeout", feature = "circuit")), + not(all(feature = "retry", feature = "timeout", feature = "circuit", feature = "metrics", feature = "logs")), expect( rustdoc::broken_intra_doc_links, reason = "too ugly to make 'live links' possible with the combination of features" From 2c63f37d79e38cbeb5dcae480a29d200d9f56557 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 16:19:18 +0100 Subject: [PATCH 41/60] new tests --- crates/seatbelt/src/circuit/layer.rs | 16 ++++++++++ crates/seatbelt/src/retry/args.rs | 1 + crates/seatbelt/src/rnd.rs | 15 +++++++++ crates/seatbelt/src/timeout/service.rs | 42 ++++++++++++++++++++++++++ 4 files changed, 74 insertions(+) diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index d7d2cf2f..539336f6 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -550,6 +550,22 @@ mod tests { assert_eq!(result, "rejected"); } + #[test] + fn rejected_input_error_wraps_in_err() { + let context: Context> = Context::new(Clock::new_frozen()); + let layer = CircuitLayer::new("test".into(), &context); + + let layer: CircuitLayer<_, _, NotSet, Set> = layer.rejected_input_error(|input, _| format!("rejected: {input}")); + + let result = layer.rejected_input.as_ref().unwrap().call( + "test_input".to_string(), + RejectedInputArgs { + partition_key: &PartitionKey::default(), + }, + ); + assert_eq!(result, Err("rejected: test_input".to_string())); + } + #[test] fn enable_disable_conditions_work() { let layer = create_ready_layer().enable_if(|input| input.contains("enable")); diff --git a/crates/seatbelt/src/retry/args.rs b/crates/seatbelt/src/retry/args.rs index d600950b..6f06d817 100644 --- a/crates/seatbelt/src/retry/args.rs +++ b/crates/seatbelt/src/retry/args.rs @@ -121,6 +121,7 @@ mod tests { }; assert_eq!(args.attempt(), Attempt::new(3, true)); + let _clock = args.clock(); } #[test] diff --git a/crates/seatbelt/src/rnd.rs b/crates/seatbelt/src/rnd.rs index 70b92484..794b4655 100644 --- a/crates/seatbelt/src/rnd.rs +++ b/crates/seatbelt/src/rnd.rs @@ -53,3 +53,18 @@ impl Rnd { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn debug_real() { + assert_eq!(format!("{:?}", Rnd::Real), "Real"); + } + + #[test] + fn debug_test() { + assert_eq!(format!("{:?}", Rnd::new_fixed(0.5)), "Test"); + } +} diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index 25df95e2..39f7b722 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -295,4 +295,46 @@ mod tests { log_capture.assert_contains("log_test_timeout"); log_capture.assert_contains("timeout.ms=100"); } + + #[tokio::test] + async fn timeout_emits_metrics() { + use opentelemetry::KeyValue; + + use crate::testing::MetricTester; + use crate::utils::{EVENT_NAME, PIPELINE_NAME, STRATEGY_NAME}; + + let metrics = MetricTester::new(); + let clock = ClockControl::default() + .auto_advance(Duration::from_millis(200)) + .auto_advance_limit(Duration::from_millis(500)) + .to_clock(); + let context = Context::new(clock.clone()) + .enable_metrics(metrics.meter_provider()) + .pipeline_name("metrics_pipeline"); + + let stack = ( + Timeout::layer("metrics_timeout", &context) + .timeout_output(|_| "timed out".to_string()) + .timeout(Duration::from_millis(100)), + Execute::new(move |input| { + let clock = clock.clone(); + async move { + clock.delay(Duration::from_secs(1)).await; + input + } + }), + ); + + let service = stack.build(); + let _ = service.execute("test".to_string()).await; + + metrics.assert_attributes( + &[ + KeyValue::new(PIPELINE_NAME, "metrics_pipeline"), + KeyValue::new(STRATEGY_NAME, "metrics_timeout"), + KeyValue::new(EVENT_NAME, "timeout"), + ], + Some(3), + ); + } } From a51cb0b1c0f3f707a1be9d2c1601f39ac91f9eea Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 16:27:48 +0100 Subject: [PATCH 42/60] fixes --- .../src/circuit/engine/probing/mod.rs | 36 +++++++++++++++++++ .../src/circuit/engine/probing/probes.rs | 17 +++++++++ 2 files changed, 53 insertions(+) diff --git a/crates/seatbelt/src/circuit/engine/probing/mod.rs b/crates/seatbelt/src/circuit/engine/probing/mod.rs index 56564610..5bba251d 100644 --- a/crates/seatbelt/src/circuit/engine/probing/mod.rs +++ b/crates/seatbelt/src/circuit/engine/probing/mod.rs @@ -147,4 +147,40 @@ mod tests { EnterCircuitResult::Rejected )); } + + #[test] + fn probe_new_creates_health_probe() { + let options = HealthProbeOptions::new(Duration::from_secs(10), 0.2, 0.5); + let probe = Probe::new(ProbeOptions::HealthProbe(options)); + assert!(matches!(probe, Probe::Health(_))); + } + + #[test] + fn probe_health_allow_probe_delegates_to_inner() { + let options = HealthProbeOptions::new(Duration::from_secs(5), 0.2, 1.0); + let mut probe = Probe::new(ProbeOptions::HealthProbe(options)); + let now = Instant::now(); + + // With probing_ratio=1.0, all probes should be accepted + assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); + } + + #[test] + fn probe_health_record_delegates_to_inner() { + let options = HealthProbeOptions::new(Duration::from_secs(5), 0.2, 1.0); + let mut probe = Probe::new(ProbeOptions::HealthProbe(options)); + let now = Instant::now(); + + // allow_probe initializes the sampling period + assert_eq!(probe.allow_probe(now), AllowProbeResult::Accepted); + + // Record before sampling period ends returns Pending + assert_eq!(probe.record(ExecutionResult::Success, now), ProbingResult::Pending); + + // Record after sampling period with success returns Success + assert_eq!( + probe.record(ExecutionResult::Success, now + Duration::from_secs(5)), + ProbingResult::Success + ); + } } diff --git a/crates/seatbelt/src/circuit/engine/probing/probes.rs b/crates/seatbelt/src/circuit/engine/probing/probes.rs index 561f216b..7cfbc50f 100644 --- a/crates/seatbelt/src/circuit/engine/probing/probes.rs +++ b/crates/seatbelt/src/circuit/engine/probing/probes.rs @@ -53,7 +53,10 @@ mod tests { use std::time::Duration; + use tick::Clock; + use super::*; + use crate::circuit::engine::probing::HealthProbeOptions; #[test] fn multiple_probes_ok() { @@ -77,4 +80,18 @@ mod tests { assert!(probes.probes.next().is_none()); } + + #[test] + fn record_returns_pending_when_probe_returns_pending() { + let now = Clock::new_frozen().instant(); + + let options = ProbesOptions::new([ProbeOptions::HealthProbe(HealthProbeOptions::new(Duration::from_secs(5), 0.2, 1.0))]); + let mut probes = Probes::new(&options); + + // Initialize sampling period + assert_eq!(probes.allow_probe(now), AllowProbeResult::Accepted); + + // Record during sampling period returns Pending + assert_eq!(probes.record(ExecutionResult::Success, now), ProbingResult::Pending); + } } From afc10121cac028918b6d4dd4e48f8489ed2c58a6 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 16:34:45 +0100 Subject: [PATCH 43/60] more tests --- crates/layered/src/lib.rs | 2 +- .../src/circuit/engine/engine_core.rs | 30 +++++++++++++++++ crates/seatbelt/src/circuit/partition_key.rs | 32 +++++++++++++++++++ crates/seatbelt/src/retry/backoff.rs | 12 +++++++ crates/seatbelt/src/utils/mod.rs | 11 +++++++ 5 files changed, 86 insertions(+), 1 deletion(-) diff --git a/crates/layered/src/lib.rs b/crates/layered/src/lib.rs index f25ea316..9ec702f4 100644 --- a/crates/layered/src/lib.rs +++ b/crates/layered/src/lib.rs @@ -5,7 +5,7 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr( not(all(feature = "intercept", feature = "tower-service", feature = "dynamic-service")), - expect(rustdoc::broken_intra_doc_links) + expect(rustdoc::broken_intra_doc_links, reason = "simpler docs") )] #![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/layered/logo.png")] #![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/layered/favicon.ico")] diff --git a/crates/seatbelt/src/circuit/engine/engine_core.rs b/crates/seatbelt/src/circuit/engine/engine_core.rs index 85bdef9a..de520b16 100644 --- a/crates/seatbelt/src/circuit/engine/engine_core.rs +++ b/crates/seatbelt/src/circuit/engine/engine_core.rs @@ -657,4 +657,34 @@ mod tests { assert_eq!(stats.opened_duration(later), Duration::from_secs(10)); } + + #[test] + fn exit_when_half_open_with_pending_probe_returns_unchanged() { + use crate::circuit::engine::probing::{HealthProbeOptions, ProbeOptions}; + + let settings = EngineOptions { + break_duration: Duration::from_secs(5), + health_metrics_builder: HealthMetricsBuilder::new(Duration::from_secs(30), 0.1, 10), + // Use a HealthProbe with long sampling duration so it returns Pending + probes: ProbesOptions::new([ProbeOptions::HealthProbe(HealthProbeOptions::new( + Duration::from_secs(60), + 0.2, + 1.0, + ))]), + }; + let control = ClockControl::new(); + let clock = control.to_clock(); + let engine = EngineCore::new(settings, clock); + + // Force to open state + open_engine(&engine); + + // Advance time to transition to half-open + control.advance(Duration::from_secs(6)); + engine.enter(); + + // Record success - should return Unchanged because HealthProbe is still sampling + let result = engine.exit(ExecutionResult::Success, ExecutionMode::Probe); + assert!(matches!(result, ExitCircuitResult::Unchanged)); + } } diff --git a/crates/seatbelt/src/circuit/partition_key.rs b/crates/seatbelt/src/circuit/partition_key.rs index a2134b8f..ce42eb15 100644 --- a/crates/seatbelt/src/circuit/partition_key.rs +++ b/crates/seatbelt/src/circuit/partition_key.rs @@ -171,4 +171,36 @@ mod tests { assert_eq!(h1.finish(), h2.finish()); } + + #[test] + fn into_cow_from_number() { + let key = PartitionKey::from(42u64); + let cow: Cow<'static, str> = key.into(); + assert!(matches!(cow, Cow::Owned(_))); + assert_eq!(cow, "42"); + } + + #[test] + fn into_cow_from_string_owned() { + let key = PartitionKey::from(String::from("owned_string")); + let cow: Cow<'static, str> = key.into(); + assert!(matches!(cow, Cow::Owned(_))); + assert_eq!(cow, "owned_string"); + } + + #[test] + fn into_cow_from_string_borrowed() { + let key = PartitionKey::from("static_str"); + let cow: Cow<'static, str> = key.into(); + assert!(matches!(cow, Cow::Borrowed(_))); + assert_eq!(cow, "static_str"); + } + + #[test] + fn into_cow_from_hashed() { + let key = PartitionKey::hashed(&"value", "label"); + let cow: Cow<'static, str> = key.into(); + assert!(matches!(cow, Cow::Borrowed(_))); + assert_eq!(cow, "label"); + } } diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs index 68bdbf69..f6a533f7 100644 --- a/crates/seatbelt/src/retry/backoff.rs +++ b/crates/seatbelt/src/retry/backoff.rs @@ -658,4 +658,16 @@ mod tests { let computed: Vec<_> = backoff.delays().take(10).map(|v| v.as_millis()).collect(); assert_eq!(computed, delays_ms); } + + #[test] + fn secs_to_duration_saturating_zero() { + assert_eq!(secs_to_duration_saturating(0.0), Duration::ZERO); + } + + #[test] + fn secs_to_duration_saturating_negative() { + assert_eq!(secs_to_duration_saturating(-1.0), Duration::ZERO); + assert_eq!(secs_to_duration_saturating(-0.001), Duration::ZERO); + assert_eq!(secs_to_duration_saturating(f64::NEG_INFINITY), Duration::ZERO); + } } diff --git a/crates/seatbelt/src/utils/mod.rs b/crates/seatbelt/src/utils/mod.rs index 456492c4..8b8c1c58 100644 --- a/crates/seatbelt/src/utils/mod.rs +++ b/crates/seatbelt/src/utils/mod.rs @@ -25,3 +25,14 @@ impl EnableIf { Self::new(|_| false) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn enable_if_debug() { + let enable_if: EnableIf = EnableIf::always(); + assert_eq!(format!("{:?}", enable_if), "EnableIf"); + } +} From 8a98c9b11206f9102cc4619d082996f9f62d2249 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 16:50:40 +0100 Subject: [PATCH 44/60] cleanup --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/seatbelt/CHANGELOG.md | 13 +++++++++++++ crates/seatbelt/Cargo.toml | 2 +- crates/seatbelt/src/utils/mod.rs | 2 +- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 983a9b0f..254756b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1357,7 +1357,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "seatbelt" -version = "0.1.0" +version = "0.2.0" dependencies = [ "alloc_tracker", "criterion", diff --git a/Cargo.toml b/Cargo.toml index f2f07542..f85f919b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ layered = { path = "crates/layered", default-features = false, version = "0.1.0" ohno = { path = "crates/ohno", default-features = false, version = "0.2.1" } ohno_macros = { path = "crates/ohno_macros", default-features = false, version = "0.2.0" } recoverable = { path = "crates/recoverable", default-features = false, version = "0.1.0" } -seatbelt = { path = "crates/seatbelt", default-features = false, version = "0.1.0" } +seatbelt = { path = "crates/seatbelt", default-features = false, version = "0.2.0" } testing_aids = { path = "crates/testing_aids", default-features = false } thread_aware = { path = "crates/thread_aware", default-features = false, version = "0.6.1" } thread_aware_macros = { path = "crates/thread_aware_macros", default-features = false, version = "0.6.1" } diff --git a/crates/seatbelt/CHANGELOG.md b/crates/seatbelt/CHANGELOG.md index 825c32f0..cc03dcc8 100644 --- a/crates/seatbelt/CHANGELOG.md +++ b/crates/seatbelt/CHANGELOG.md @@ -1 +1,14 @@ # Changelog + +## [0.2.0] - 2026-01-20 + +Initial release. + +- ✨ Features + + - Timeout middleware for canceling long-running operations + - Retry middleware with constant, linear, and exponential backoff strategies + - Circuit breaker middleware with health-based failure detection and gradual recovery + - OpenTelemetry metrics integration (`metrics` feature) + - Structured logging via tracing (`logs` feature) + - Shared `Context` for clock and telemetry configuration diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index fb228566..850b7789 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -4,7 +4,7 @@ [package] name = "seatbelt" description = "Resilience and recovery mechanisms for fallible operations." -version = "0.1.0" +version = "0.2.0" readme = "README.md" keywords = ["oxidizer", "resilience", "tower", "recovery", "retry", "circuit-breaker"] categories = ["data-structures"] diff --git a/crates/seatbelt/src/utils/mod.rs b/crates/seatbelt/src/utils/mod.rs index 8b8c1c58..c9459081 100644 --- a/crates/seatbelt/src/utils/mod.rs +++ b/crates/seatbelt/src/utils/mod.rs @@ -33,6 +33,6 @@ mod tests { #[test] fn enable_if_debug() { let enable_if: EnableIf = EnableIf::always(); - assert_eq!(format!("{:?}", enable_if), "EnableIf"); + assert_eq!(format!("{enable_if:?}"), "EnableIf"); } } From 3fc1a0b4fa60c433bebae86ac3756c3db035a249 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 17:00:02 +0100 Subject: [PATCH 45/60] cleanup --- crates/seatbelt/README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 7e1c4a4a..7dd208d0 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -103,18 +103,18 @@ This crate provides several optional features that can be enabled in your `Cargo This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG8-jxRLAdSUYGz8tFJ1hv_siG0AGq6ysYdCHGyn4hzr6iA3-YWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4xLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG8-jxRLAdSUYGz8tFJ1hv_siG0AGq6ysYdCHGyn4hzr6iA3-YWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI [__link0]: https://docs.rs/tick/0.1.2/tick/?search=Clock [__link1]: https://crates.io/crates/tick/0.1.2 - [__link10]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html - [__link11]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html - [__link12]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html - [__link13]: https://docs.rs/seatbelt/0.1.0/seatbelt/circuit/index.html - [__link2]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=shared::Context + [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit/index.html + [__link11]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html + [__link12]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html + [__link13]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit/index.html + [__link2]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::Context [__link3]: https://crates.io/crates/layered/0.1.0 [__link4]: https://docs.rs/layered/0.1.0/layered/?search=Stack - [__link5]: https://docs.rs/seatbelt/0.1.0/seatbelt/?search=shared::Context + [__link5]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::Context [__link6]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo [__link7]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery - [__link8]: https://docs.rs/seatbelt/0.1.0/seatbelt/timeout/index.html - [__link9]: https://docs.rs/seatbelt/0.1.0/seatbelt/retry/index.html + [__link8]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html + [__link9]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html From ec8f6f8bac93db539d318668d20d2f653399d34f Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 17:46:41 +0100 Subject: [PATCH 46/60] fixes --- crates/seatbelt/src/circuit/engine/mod.rs | 4 ++-- crates/seatbelt/src/metrics.rs | 8 +++----- crates/seatbelt/src/shared/context.rs | 13 +++++++++---- crates/seatbelt/src/testing.rs | 2 ++ 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/crates/seatbelt/src/circuit/engine/mod.rs b/crates/seatbelt/src/circuit/engine/mod.rs index ce089311..9e186aaa 100644 --- a/crates/seatbelt/src/circuit/engine/mod.rs +++ b/crates/seatbelt/src/circuit/engine/mod.rs @@ -87,9 +87,9 @@ mod engine_core; pub(crate) use engine_core::*; #[cfg_attr(coverage_nightly, coverage(off))] -#[cfg(test)] +#[cfg(all(test, not(miri)))] mod engine_fake; -#[cfg(test)] +#[cfg(all(test, not(miri)))] pub(crate) use engine_fake::*; mod engine_telemetry; diff --git a/crates/seatbelt/src/metrics.rs b/crates/seatbelt/src/metrics.rs index 3339a0a0..d627a4d8 100644 --- a/crates/seatbelt/src/metrics.rs +++ b/crates/seatbelt/src/metrics.rs @@ -28,18 +28,16 @@ pub(crate) fn create_resilience_event_counter(meter: &Meter) -> opentelemetry::m #[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] +#[cfg(not(miri))] mod tests { - use opentelemetry_sdk::metrics::InMemoryMetricExporter; + use opentelemetry_sdk::metrics::{InMemoryMetricExporter, SdkMeterProvider}; use super::*; #[test] - #[cfg(not(miri))] fn assert_definitions() { let exporter = InMemoryMetricExporter::default(); - let meter_provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder() - .with_periodic_exporter(exporter.clone()) - .build(); + let meter_provider = SdkMeterProvider::builder().with_periodic_exporter(exporter.clone()).build(); let meter = create_meter(&meter_provider); let resilience_events = create_resilience_event_counter(&meter); diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/shared/context.rs index 84d65732..c108db34 100644 --- a/crates/seatbelt/src/shared/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -190,7 +190,6 @@ impl Clone for Context { #[cfg_attr(coverage_nightly, coverage(off))] #[cfg(test)] mod tests { - use opentelemetry_sdk::metrics::{InMemoryMetricExporter, SdkMeterProvider}; use super::*; @@ -235,9 +234,15 @@ mod tests { assert!(dump.contains('3')); } - fn test_meter_provider() -> (SdkMeterProvider, InMemoryMetricExporter) { - let exporter = InMemoryMetricExporter::default(); - let provider = SdkMeterProvider::builder().with_periodic_exporter(exporter.clone()).build(); + #[cfg(not(miri))] + fn test_meter_provider() -> ( + opentelemetry_sdk::metrics::SdkMeterProvider, + opentelemetry_sdk::metrics::InMemoryMetricExporter, + ) { + let exporter = opentelemetry_sdk::metrics::InMemoryMetricExporter::default(); + let provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder() + .with_periodic_exporter(exporter.clone()) + .build(); (provider, exporter) } } diff --git a/crates/seatbelt/src/testing.rs b/crates/seatbelt/src/testing.rs index 5383ba56..42f0f306 100644 --- a/crates/seatbelt/src/testing.rs +++ b/crates/seatbelt/src/testing.rs @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +#![cfg_attr(miri, expect(dead_code, reason = "too much noise to satisfy Miri's expectations"))] + use std::io::Write; use std::sync::{Arc, Mutex}; From 2550c45dadde5abebf8c2875b84c2a646a675664 Mon Sep 17 00:00:00 2001 From: martintomka Date: Tue, 20 Jan 2026 17:51:12 +0100 Subject: [PATCH 47/60] kill mutants --- crates/seatbelt/src/retry/backoff.rs | 1 + crates/seatbelt/src/retry/service.rs | 1 + crates/seatbelt/src/rnd.rs | 1 + crates/seatbelt/src/utils/telemetry_helper.rs | 16 ++++++++++++++++ 4 files changed, 19 insertions(+) diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs index f6a533f7..44ab12a5 100644 --- a/crates/seatbelt/src/retry/backoff.rs +++ b/crates/seatbelt/src/retry/backoff.rs @@ -129,6 +129,7 @@ fn apply_jitter(delay: Duration, rnd: &Rnd) -> Duration { /// - [`Polly V7` implementation](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry/blob/7596d2dacf22d88bbd814bc49c28424fb6e921e9/src/Polly.Contrib.WaitAndRetry/Backoff.DecorrelatedJitterV2.cs#L22) /// - [`Polly.Contrib.WaitAndRetry` repo](https://github.com/Polly-Contrib/Polly.Contrib.WaitAndRetry) #[inline] +#[cfg_attr(test, mutants::skip)] // Mutating arithmetic causes infinite loops in retry tests fn decorrelated_jitter_backoff_v2(attempt: u32, base_delay: Duration, prev: &mut f64, rnd: &Rnd) -> Duration { // The original author/credit for this jitter formula is @george-polevoy . // Jitter formula used with permission as described at https://github.com/App-vNext/Polly/issues/530#issuecomment-526555979 diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index f584c940..85436728 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -65,6 +65,7 @@ where { type Out = Out; + #[cfg_attr(test, mutants::skip)] // Mutating enable_if check causes infinite loops async fn execute(&self, mut input: In) -> Self::Out { // Check if retry is enabled for this input if !self.enable_if.call(&input) { diff --git a/crates/seatbelt/src/rnd.rs b/crates/seatbelt/src/rnd.rs index 794b4655..326f22ad 100644 --- a/crates/seatbelt/src/rnd.rs +++ b/crates/seatbelt/src/rnd.rs @@ -45,6 +45,7 @@ impl Rnd { Self::Test(std::sync::Arc::new(f)) } + #[cfg_attr(test, mutants::skip)] // Mutating return value causes infinite loops in backoff calculations pub fn next_f64(&self) -> f64 { match self { Self::Real => fastrand::f64(), diff --git a/crates/seatbelt/src/utils/telemetry_helper.rs b/crates/seatbelt/src/utils/telemetry_helper.rs index 59e7baa0..ab56633f 100644 --- a/crates/seatbelt/src/utils/telemetry_helper.rs +++ b/crates/seatbelt/src/utils/telemetry_helper.rs @@ -26,3 +26,19 @@ impl TelemetryHelper { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn metrics_enabled_returns_false_when_no_reporter() { + let helper = TelemetryHelper { + pipeline_name: "test".into(), + strategy_name: "test".into(), + event_reporter: None, + logs_enabled: false, + }; + assert!(!helper.metrics_enabled()); + } +} From 74e85f08a67e480cffaa070b1addf6f1246200d6 Mon Sep 17 00:00:00 2001 From: martintomka Date: Wed, 21 Jan 2026 09:38:55 +0100 Subject: [PATCH 48/60] rename Context to PipelineContext --- crates/seatbelt/benches/circuit.rs | 4 +- crates/seatbelt/benches/observability.rs | 8 +- crates/seatbelt/benches/retry.rs | 6 +- crates/seatbelt/benches/timeout.rs | 4 +- crates/seatbelt/examples/circuit.rs | 4 +- .../seatbelt/examples/resilience_pipeline.rs | 4 +- crates/seatbelt/examples/retry.rs | 4 +- crates/seatbelt/examples/retry_advanced.rs | 4 +- crates/seatbelt/examples/retry_outage.rs | 4 +- crates/seatbelt/examples/timeout.rs | 4 +- crates/seatbelt/examples/timeout_advanced.rs | 5 +- crates/seatbelt/src/circuit/layer.rs | 14 +- crates/seatbelt/src/circuit/mod.rs | 18 +-- crates/seatbelt/src/circuit/service.rs | 12 +- crates/seatbelt/src/lib.rs | 10 +- crates/seatbelt/src/retry/layer.rs | 26 ++-- crates/seatbelt/src/retry/mod.rs | 18 +-- crates/seatbelt/src/retry/service.rs | 21 +-- crates/seatbelt/src/shared/context.rs | 121 +++--------------- crates/seatbelt/src/shared/mod.rs | 2 +- crates/seatbelt/src/timeout/layer.rs | 14 +- crates/seatbelt/src/timeout/mod.rs | 18 +-- crates/seatbelt/src/timeout/service.rs | 22 ++-- 23 files changed, 134 insertions(+), 213 deletions(-) diff --git a/crates/seatbelt/benches/circuit.rs b/crates/seatbelt/benches/circuit.rs index aacbe34a..de1bea75 100644 --- a/crates/seatbelt/benches/circuit.rs +++ b/crates/seatbelt/benches/circuit.rs @@ -6,7 +6,7 @@ use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; use seatbelt::circuit::Circuit; -use seatbelt::{Context, RecoveryInfo}; +use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; #[global_allocator] @@ -27,7 +27,7 @@ fn entry(c: &mut Criterion) { }); // With circuit breaker (closed state) - let context = Context::new(Clock::new_frozen()); + let context = PipelineContext::new(Clock::new_frozen()); let service = ( Circuit::layer("bench", &context) diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs index 37f15435..91c3b824 100644 --- a/crates/seatbelt/benches/observability.rs +++ b/crates/seatbelt/benches/observability.rs @@ -12,7 +12,7 @@ use opentelemetry_sdk::metrics::data::ResourceMetrics; use opentelemetry_sdk::metrics::exporter::PushMetricExporter; use opentelemetry_sdk::metrics::{SdkMeterProvider, Temporality}; use seatbelt::retry::Retry; -use seatbelt::{Context, RecoveryInfo}; +use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; #[global_allocator] @@ -23,7 +23,7 @@ fn entry(c: &mut Criterion) { let session = Session::new(); // No telemetry - let context = Context::new(Clock::new_frozen()); + let context = PipelineContext::new(Clock::new_frozen()); let service = ( Retry::layer("bench", &context) .clone_input() @@ -42,7 +42,7 @@ fn entry(c: &mut Criterion) { // Metrics let meter_provider = SdkMeterProvider::builder().with_periodic_exporter(EmptyExporter).build(); - let context = Context::new(Clock::new_frozen()).enable_metrics(&meter_provider); + let context = PipelineContext::new(Clock::new_frozen()).enable_metrics(&meter_provider); let service = ( Retry::layer("bench", &context) .clone_input() @@ -60,7 +60,7 @@ fn entry(c: &mut Criterion) { }); // Logs - let context = Context::new(Clock::new_frozen()).enable_logs(); + let context = PipelineContext::new(Clock::new_frozen()).enable_logs(); let service = ( Retry::layer("bench", &context) .clone_input() diff --git a/crates/seatbelt/benches/retry.rs b/crates/seatbelt/benches/retry.rs index 4b15a87f..9198146a 100644 --- a/crates/seatbelt/benches/retry.rs +++ b/crates/seatbelt/benches/retry.rs @@ -9,7 +9,7 @@ use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; use seatbelt::retry::Retry; -use seatbelt::{Context, RecoveryInfo}; +use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; #[global_allocator] @@ -30,7 +30,7 @@ fn entry(c: &mut Criterion) { }); // With retry - let context = Context::new(Clock::new_frozen()); + let context = PipelineContext::new(Clock::new_frozen()); let service = ( Retry::layer("bench", &context) @@ -49,7 +49,7 @@ fn entry(c: &mut Criterion) { }); // With retry and recovery - let context = Context::new(Clock::new_frozen()); + let context = PipelineContext::new(Clock::new_frozen()); let service = ( Retry::layer("bench", &context) diff --git a/crates/seatbelt/benches/timeout.rs b/crates/seatbelt/benches/timeout.rs index 705adc29..e46c6206 100644 --- a/crates/seatbelt/benches/timeout.rs +++ b/crates/seatbelt/benches/timeout.rs @@ -8,7 +8,7 @@ use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; -use seatbelt::Context; +use seatbelt::PipelineContext; use seatbelt::timeout::Timeout; use tick::Clock; @@ -30,7 +30,7 @@ fn entry(c: &mut Criterion) { }); // With timeout - let context = Context::new(Clock::new_frozen()); + let context = PipelineContext::new(Clock::new_frozen()); let service = ( Timeout::layer("bench", &context) diff --git a/crates/seatbelt/examples/circuit.rs b/crates/seatbelt/examples/circuit.rs index 3365a270..b09f0122 100644 --- a/crates/seatbelt/examples/circuit.rs +++ b/crates/seatbelt/examples/circuit.rs @@ -16,7 +16,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::circuit::Circuit; -use seatbelt::{Context, RecoveryInfo}; +use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -26,7 +26,7 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = Context::new(&clock).enable_metrics(&meter_provider); + let context = PipelineContext::new(&clock).enable_metrics(&meter_provider); // Define stack with circuit breaker layer let stack = ( diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs index 414b01f3..ee89a4d8 100644 --- a/crates/seatbelt/examples/resilience_pipeline.rs +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -13,7 +13,7 @@ use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; use seatbelt::timeout::Timeout; -use seatbelt::{Context, RecoveryInfo}; +use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -25,7 +25,7 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Shared options for resilience middleware - let context = Context::new(&clock).enable_metrics(&meter_provider).pipeline_name("my_pipeline"); + let context = PipelineContext::new(&clock).enable_metrics(&meter_provider).name("my_pipeline"); // Define stack with retry and timeout middlewares let stack = ( diff --git a/crates/seatbelt/examples/retry.rs b/crates/seatbelt/examples/retry.rs index ea09764a..33e3c9c2 100644 --- a/crates/seatbelt/examples/retry.rs +++ b/crates/seatbelt/examples/retry.rs @@ -8,13 +8,13 @@ use std::io::Error; use layered::{Execute, Service, Stack}; use ohno::AppError; use seatbelt::retry::Retry; -use seatbelt::{Context, RecoveryInfo}; +use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; #[tokio::main] async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); - let context = Context::new(&clock); + let context = PipelineContext::new(&clock); // Define stack with retry layer let stack = ( diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index 4f6166bc..b0b960fc 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -15,7 +15,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; -use seatbelt::{Attempt, Context, RecoveryInfo}; +use seatbelt::{Attempt, PipelineContext, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -25,7 +25,7 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = Context::new(&clock).pipeline_name("retry_advanced").enable_metrics(&meter_provider); + let context = PipelineContext::new(&clock).name("retry_advanced").enable_metrics(&meter_provider); // Define stack with retry layer let stack = ( diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index a7d65c6c..ecad7fc7 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -19,7 +19,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; -use seatbelt::{Context, Recovery, RecoveryInfo}; +use seatbelt::{PipelineContext, Recovery, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -32,7 +32,7 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = Context::new(&clock).enable_metrics(&meter_provider); + let context = PipelineContext::new(&clock).enable_metrics(&meter_provider); // Configure retry layer for outage handling with input restoration let stack = ( diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index eba4a0ed..85919f89 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -13,7 +13,7 @@ use layered::{Execute, Service, Stack}; use ohno::{AppError, app_err}; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::Context; +use seatbelt::PipelineContext; use seatbelt::timeout::Timeout; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; @@ -29,7 +29,7 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Create common options - let context = Context::new(&clock).enable_metrics(&meter_provider); + let context = PipelineContext::new(&clock).enable_metrics(&meter_provider); // Define stack with timeout layer let stack = ( diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs index 41375087..f025b35e 100644 --- a/crates/seatbelt/examples/timeout_advanced.rs +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -12,7 +12,7 @@ use layered::{Execute, Service, Stack}; use ohno::{AppError, app_err}; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::Context; +use seatbelt::PipelineContext; use seatbelt::timeout::Timeout; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; @@ -29,7 +29,8 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Create service options - let context: Context> = Context::new(&clock).pipeline_name("my_pipeline").enable_metrics(&meter_provider); + let context: PipelineContext> = + PipelineContext::new(&clock).name("my_pipeline").enable_metrics(&meter_provider); // Define stack with timeout layer let stack = ( diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit/layer.rs index 539336f6..9669a430 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit/layer.rs @@ -13,7 +13,7 @@ use super::{ use crate::Layer; use crate::circuit::engine::probing::ProbesOptions; use crate::utils::{EnableIf, TelemetryHelper}; -use crate::{Context, NotSet, Recovery, RecoveryInfo, Set}; +use crate::{NotSet, PipelineContext, Recovery, RecoveryInfo, Set}; /// Builder for configuring circuit breaker resilience middleware. /// @@ -27,7 +27,7 @@ use crate::{Context, NotSet, Recovery, RecoveryInfo, Set}; /// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit] documentation. #[derive(Debug)] pub struct CircuitLayer { - context: Context, + context: PipelineContext, recovery: Option>, rejected_input: Option>, on_opened: Option>, @@ -46,7 +46,7 @@ pub struct CircuitLayer impl CircuitLayer { #[must_use] - pub(crate) fn new(name: Cow<'static, str>, context: &Context) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &PipelineContext) -> Self { Self { context: context.clone(), recovery: None, @@ -510,7 +510,7 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { - let context = Context::::new(Clock::new_frozen()); + let context = PipelineContext::::new(Clock::new_frozen()); let layer = CircuitLayer::new("test".into(), &context); let layer: CircuitLayer<_, _, Set, NotSet> = layer.recovery(); @@ -552,7 +552,7 @@ mod tests { #[test] fn rejected_input_error_wraps_in_err() { - let context: Context> = Context::new(Clock::new_frozen()); + let context: PipelineContext> = PipelineContext::new(Clock::new_frozen()); let layer = CircuitLayer::new("test".into(), &context); let layer: CircuitLayer<_, _, NotSet, Set> = layer.rejected_input_error(|input, _| format!("rejected: {input}")); @@ -697,8 +697,8 @@ mod tests { static_assertions::assert_impl_all!(CircuitLayer: Debug); } - fn create_test_context() -> Context { - Context::new(Clock::new_frozen()).pipeline_name("test_pipeline") + fn create_test_context() -> PipelineContext { + PipelineContext::new(Clock::new_frozen()).name("test_pipeline") } fn create_ready_layer() -> CircuitLayer { diff --git a/crates/seatbelt/src/circuit/mod.rs b/crates/seatbelt/src/circuit/mod.rs index 2e28bf97..31ae81b3 100644 --- a/crates/seatbelt/src/circuit/mod.rs +++ b/crates/seatbelt/src/circuit/mod.rs @@ -19,9 +19,9 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit::Circuit; -//! # use seatbelt::{RecoveryInfo, Context}; +//! # use seatbelt::{RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { -//! let context = Context::new(&clock); +//! let context = PipelineContext::new(&clock); //! //! let stack = ( //! Circuit::layer("circuit_breaker", &context) @@ -166,7 +166,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted when circuit state transitions occur and when requests are rejected //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`][crate::Context::pipeline_name] +//! - `resilience.pipeline.name`: Pipeline identifier from [`PipelineContext::name`][crate::PipelineContext::name] //! - `resilience.strategy.name`: Circuit breaker identifier from [`Circuit::layer`] //! - `resilience.event.name`: One of: //! - `circuit_opened`: When the circuit transitions to open state due to failure threshold being exceeded @@ -188,11 +188,11 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit::Circuit; -//! # use seatbelt::{RecoveryInfo, Context}; +//! # use seatbelt::{RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let context = Context::new(&clock).pipeline_name("example"); +//! let context = PipelineContext::new(&clock).name("example"); //! //! let stack = ( //! Circuit::layer("my_breaker", &context) @@ -229,10 +229,10 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit::{Circuit, PartitionKey, HalfOpenMode}; -//! # use seatbelt::{RecoveryInfo, Context}; +//! # use seatbelt::{RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. -//! let context = Context::new(&clock).pipeline_name("advanced_example"); +//! let context = PipelineContext::new(&clock).name("advanced_example"); //! //! let stack = ( //! Circuit::layer("advanced_breaker", &context) @@ -290,9 +290,9 @@ //! //! ```compile_fail //! # use seatbelt::circuit::Circuit; -//! # use seatbelt::Context; +//! # use seatbelt::PipelineContext; //! # use layered::Execute; -//! # fn example(context: Context>) { +//! # fn example(context: PipelineContext>) { //! let stack = ( //! Circuit::layer("test", &service_options), // Missing required configuration! //! Execute::new(|input| async move { Ok(input) }) diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit/service.rs index 1ebd4a3f..ce9c7952 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit/service.rs @@ -47,7 +47,7 @@ impl Circuit { /// before it can be used to build a circuit breaker service. pub fn layer( name: impl Into>, - context: &crate::Context, + context: &crate::PipelineContext, ) -> CircuitLayer { CircuitLayer::new(name.into(), context) } @@ -173,11 +173,11 @@ mod tests { use crate::Layer; use crate::circuit::constants::DEFAULT_BREAK_DURATION; use crate::circuit::{EngineFake, HalfOpenMode, HealthInfo, Stats}; - use crate::{Context, RecoveryInfo, Set}; + use crate::{PipelineContext, RecoveryInfo, Set}; #[test] fn layer_ensure_defaults() { - let context = Context::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); + let context = PipelineContext::::new(Clock::new_frozen()).name("test_pipeline"); let layer: CircuitLayer = Circuit::layer("test_breaker", &context); let layer = layer .recovery_with(|_, _| RecoveryInfo::never()) @@ -463,8 +463,8 @@ mod tests { let _guard = log_capture.subscriber().set_default(); let clock_control = ClockControl::new(); - let context = Context::::new(clock_control.to_clock()) - .pipeline_name("log_test_pipeline") + let context = PipelineContext::::new(clock_control.to_clock()) + .name("log_test_pipeline") .enable_logs(); let service = Circuit::layer("log_test_circuit", &context) @@ -506,7 +506,7 @@ mod tests { } fn create_ready_circuit_breaker_layer(clock: &Clock) -> CircuitLayer { - let context = Context::::new(clock.clone()).pipeline_name("test_pipeline"); + let context = PipelineContext::::new(clock.clone()).name("test_pipeline"); Circuit::layer("test_breaker", &context) .recovery_with(|output, _| { if output.contains("error") { diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index ea119468..2d8970ef 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -28,10 +28,10 @@ //! # use layered::{Execute, Service, Stack}; //! use seatbelt::retry::Retry; //! use seatbelt::timeout::Timeout; -//! use seatbelt::{RecoveryInfo, Context}; +//! use seatbelt::{RecoveryInfo, PipelineContext}; //! //! # async fn main(clock: Clock) { -//! let context = Context::new(&clock); +//! let context = PipelineContext::new(&clock); //! let service = ( //! // Retry middleware: Automatically retries failed operations //! Retry::layer("retry", &context) @@ -61,7 +61,7 @@ //! //! > **Note**: Resilience middleware requires [`Clock`][tick::Clock] from the [`tick`] crate for timing //! > operations like delays, timeouts, and backoff calculations. The clock is passed through -//! > [`Context`] when creating middleware layers. +//! > [`PipelineContext`] when creating middleware layers. //! //! > **Note**: This crate uses the [`layered`] crate for composing middleware. The middleware layers //! > can be stacked together using tuples and built into a service using the [`Stack`] trait. @@ -83,7 +83,7 @@ //! //! ## Core Types //! -//! - [`Context`] - Holds shared state for resilience middleware, including the clock. +//! - [`PipelineContext`] - Holds shared state for resilience middleware, including the clock. //! - [`RecoveryInfo`] - Classifies errors as recoverable (transient) or non-recoverable (permanent). //! - [`Recovery`] - A trait for types that can determine their recoverability. //! @@ -111,7 +111,7 @@ pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; pub(crate) mod shared; -pub use crate::shared::{Attempt, Backoff, Context, NotSet, Set}; +pub use crate::shared::{Attempt, Backoff, NotSet, PipelineContext, Set}; #[cfg(any(feature = "timeout", test))] pub mod timeout; diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 52096a77..87f6ab4c 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -12,7 +12,7 @@ use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, Re use crate::shared::MaxAttempts; use crate::utils::EnableIf; use crate::utils::TelemetryHelper; -use crate::{Backoff, Context, NotSet, Recovery, RecoveryInfo, Set}; +use crate::{Backoff, NotSet, PipelineContext, Recovery, RecoveryInfo, Set}; /// Builder for configuring retry resilience middleware. /// @@ -25,7 +25,7 @@ use crate::{Backoff, Context, NotSet, Recovery, RecoveryInfo, Set}; /// For comprehensive examples, see the [retry module][crate::retry] documentation. #[derive(Debug)] pub struct RetryLayer { - context: Context, + context: PipelineContext, max_attempts: MaxAttempts, backoff: BackoffOptions, clone_input: Option>, @@ -40,7 +40,7 @@ pub struct RetryLayer { impl RetryLayer { #[must_use] - pub(crate) fn new(name: Cow<'static, str>, context: &Context) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &PipelineContext) -> Self { Self { context: context.clone(), max_attempts: MaxAttempts::Finite(DEFAULT_RETRY_ATTEMPTS.saturating_add(1)), @@ -303,10 +303,10 @@ impl RetryLayer>::new(Clock::new_frozen()); + /// # let context = PipelineContext::>::new(Clock::new_frozen()); /// // Service with multiple endpoints that can route around unavailable services /// let layer = Retry::layer("multi_endpoint_retry", &context) /// .clone_input_with(|input: &mut String, args| { @@ -359,11 +359,11 @@ impl RetryLayer RetryLayer } /// # struct HttpResponse { status: u16 } @@ -467,7 +467,7 @@ impl RetryLayer>::new(&clock); + /// # let context = PipelineContext::>::new(&clock); /// type HttpResult = Result; /// /// let layer = Retry::layer("http_retry", &context).restore_input_from_error( @@ -587,7 +587,7 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { - let context = Context::::new(Clock::new_frozen()); + let context = PipelineContext::::new(Clock::new_frozen()); let layer = RetryLayer::new("test".into(), &context); let layer: RetryLayer<_, _, NotSet, Set> = layer.recovery(); @@ -740,7 +740,7 @@ mod tests { #[test] fn restore_input_from_error_sets_correctly() { - let context: Context> = Context::new(Clock::new_frozen()).pipeline_name("test"); + let context: PipelineContext> = PipelineContext::new(Clock::new_frozen()).name("test"); let layer = RetryLayer::new("test".into(), &context) .restore_input_from_error(|e: &mut String, _| (e == "restore").then(|| std::mem::take(e))); @@ -763,8 +763,8 @@ mod tests { static_assertions::assert_impl_all!(RetryLayer: Debug); } - fn create_test_context() -> Context { - Context::new(Clock::new_frozen()).pipeline_name("test_pipeline") + fn create_test_context() -> PipelineContext { + PipelineContext::new(Clock::new_frozen()).name("test_pipeline") } fn create_ready_layer() -> RetryLayer { diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index 21d8ae17..ec300051 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -16,9 +16,9 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::Retry; -//! # use seatbelt::{Backoff, RecoveryInfo, Context}; +//! # use seatbelt::{Backoff, RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { -//! let context = Context::new(&clock).pipeline_name("my_service"); +//! let context = PipelineContext::new(&clock).name("my_service"); //! //! let stack = ( //! Retry::layer("retry", &context) @@ -90,7 +90,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted for each attempt that should be retried (including the final retry attempt) //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`][crate::Context::pipeline_name] +//! - `resilience.pipeline.name`: Pipeline identifier from [`PipelineContext::name`][crate::PipelineContext::name] //! - `resilience.strategy.name`: Timeout identifier from [`Retry::layer`] //! - `resilience.event.name`: Always `retry` //! - `resilience.attempt.index`: Attempt index (0-based) @@ -107,11 +107,11 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::Retry; -//! # use seatbelt::{Backoff, RecoveryInfo, Context}; +//! # use seatbelt::{Backoff, RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let context = Context::new(&clock).pipeline_name("example"); +//! let context = PipelineContext::new(&clock).name("example"); //! //! let stack = ( //! Retry::layer("my_retry", &context) @@ -149,10 +149,10 @@ //! # use std::io; //! # use layered::{Execute, Stack, Service}; //! # use seatbelt::retry::Retry; -//! # use seatbelt::{RecoveryInfo, Context, Backoff}; +//! # use seatbelt::{RecoveryInfo, PipelineContext, Backoff}; //! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. -//! let context = Context::new(&clock); +//! let context = PipelineContext::new(&clock); //! //! let stack = ( //! Retry::layer("advanced_retry", &context) @@ -194,9 +194,9 @@ //! //! ```compile_fail //! # use seatbelt::retry::Retry; -//! # use seatbelt::Context; +//! # use seatbelt::PipelineContext; //! # use tick::Clock; -//! # fn example(context: Context) { +//! # fn example(context: PipelineContext) { //! let stack = ( //! Retry::layer("test", &service_options), // Missing required configuration! //! Execute::new(|input| async move { input }) diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 85436728..4c691b94 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -52,7 +52,7 @@ impl Retry { /// before it can be used to build a retry service. pub fn layer( name: impl Into>, - context: &crate::Context, + context: &crate::PipelineContext, ) -> crate::retry::RetryLayer { crate::retry::RetryLayer::new(name.into(), context) } @@ -269,11 +269,11 @@ mod tests { use crate::retry::RetryLayer; use crate::shared::Backoff; use crate::testing::MetricTester; - use crate::{Context, Set}; + use crate::{PipelineContext, Set}; #[test] fn layer_ensure_defaults() { - let context = Context::::new(Clock::new_frozen()).pipeline_name("test_pipeline"); + let context = PipelineContext::::new(Clock::new_frozen()).name("test_pipeline"); let layer: RetryLayer = Retry::layer("test_retry", &context); let layer = layer.recovery_with(|_, _| RecoveryInfo::never()).clone_input(); @@ -581,8 +581,8 @@ mod tests { #[tokio::test] async fn retries_exhausted_ensure_telemetry_reported() { let tester = MetricTester::new(); - let context = Context::::new(ClockControl::default().auto_advance_timers(true).to_clock()) - .pipeline_name("test_pipeline") + let context = PipelineContext::::new(ClockControl::default().auto_advance_timers(true).to_clock()) + .name("test_pipeline") .enable_metrics(tester.meter_provider()); let service = create_ready_retry_layer_core(RecoveryInfo::retry(), &context) @@ -617,8 +617,8 @@ mod tests { let _guard = log_capture.subscriber().set_default(); let clock = ClockControl::default().auto_advance_timers(true).to_clock(); - let context = Context::::new(clock) - .pipeline_name("log_test_pipeline") + let context = PipelineContext::::new(clock) + .name("log_test_pipeline") .enable_logs(); let service = Retry::layer("log_test_retry", &context) @@ -637,11 +637,14 @@ mod tests { } fn create_ready_retry_layer(clock: &Clock, recover: RecoveryInfo) -> RetryLayer { - let context = Context::new(clock.clone()).pipeline_name("test_pipeline"); + let context = PipelineContext::new(clock.clone()).name("test_pipeline"); create_ready_retry_layer_core(recover, &context) } - fn create_ready_retry_layer_core(recover: RecoveryInfo, context: &Context) -> RetryLayer { + fn create_ready_retry_layer_core( + recover: RecoveryInfo, + context: &PipelineContext, + ) -> RetryLayer { Retry::layer("test_retry", context) .recovery_with(move |_, _| recover.clone()) .clone_input() diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/shared/context.rs index c108db34..cd0f966f 100644 --- a/crates/seatbelt/src/shared/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -7,79 +7,15 @@ use tick::Clock; pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; -/// Shared options for resilience middleware pipelines. +/// Shared configuration and dependencies for a pipeline of resilience middleware. /// -/// `Context` bundles a clock and telemetry primitives (OpenTelemetry -/// meter) that resilience middleware uses to measure time, emit metrics, and -/// report events. Use a single instance to configure a whole resilience -/// pipeline (retries, timeouts, circuit breakers, hedging, rate limiting, -/// etc.). -/// -/// The [`pipeline_name`][`Context::pipeline_name`] groups resilience middleware under one logical -/// parent for telemetry correlation. When you reuse the same name across all -/// policies in a pipeline, exported metrics and events will carry the same -/// pipeline attribute, making dashboards and analysis easier. -/// -/// You can enable metrics via [`enable_metrics`](Self::enable_metrics) and logs via -/// [`enable_logs`](Self::enable_logs) if you need telemetry for observability. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ```rust -/// # #[cfg(feature = "metrics")] -/// # fn example() { -/// # use opentelemetry::metrics::MeterProvider; -/// # use seatbelt::Context; -/// # use tick::Clock; -/// # fn inner(clock: Clock, meter_provider: &dyn MeterProvider) { -/// // Create a context for a resilience pipeline -/// let ctx = Context::::new(&clock) -/// .pipeline_name("auth_pipeline") -/// .enable_metrics(meter_provider); -/// -/// // Pass the context to resilience middleware layers -/// // (e.g., Retry::layer("my_retry", &ctx), Timeout::layer("my_timeout", &ctx)) -/// # } -/// # } -/// ``` -/// -/// # Authoring Resilience `Middlewares` -/// -/// This crate primitives are designed to make writing custom resilience middleware -/// straightforward and observable. Pass a shared [`Context`] to each -/// middleware that needs telemetry. You can keep a single instance (or derive child -/// scopes from it) to group related middleware under one logical parent so their -/// telemetry automatically shares correlation dimensions (e.g., the pipeline name). -/// -/// With `Context` you can: -/// -/// - Share a common `Clock` for consistent timing and timeouts -/// - Create and use telemetry instruments via the OpenTelemetry `Meter` -/// - Set a logical parent (`pipeline_name`) that groups middleware together -/// -/// ## Middleware pattern -/// -/// Resilience middleware typically follows this pattern: -/// -/// 1. Accept a [`Context`] in your layer/builder to configure observability -/// 2. Create instruments from the options (e.g., counters, histograms) -/// 3. Wrap the next service/layer and forward requests without hidden global state -/// 4. Report events at key decision points (retries, circuit breaker trips, timeouts, …) -/// 5. Use recovery metadata from error types to make informed retry/backoff decisions -/// -/// > Note: The canonical, in depth description of the Oxidizer middleware / service -/// > layering pattern lives in the [`layered`] crate documentation. Resilience -/// > middleware in this crate is expected to follow that same pattern (layer types -/// > implementing `Layer`, constructors taking the next service, no hidden global -/// > state, etc.). If you need to evolve the pattern itself, update it in -/// > `layered` and reference it here instead of diverging. +/// Pass a single `PipelineContext` to all middleware in a pipeline (retry, timeout, +/// circuit breaker, etc.) to share a clock and telemetry configuration. #[derive(Debug)] #[non_exhaustive] -pub struct Context { +pub struct PipelineContext { clock: Clock, - pipeline_name: Cow<'static, str>, + name: Cow<'static, str>, #[cfg(any(feature = "metrics", test))] meter: Option, logs_enabled: bool, @@ -87,16 +23,12 @@ pub struct Context { _out: std::marker::PhantomData Out>, } -impl Context { - /// Create options with a clock. - /// - /// Initializes with `pipeline_name = "default"`. Enable metrics via - /// [`enable_metrics`](Self::enable_metrics) and logs via - /// [`enable_logs`](Self::enable_logs) if needed. +impl PipelineContext { + /// Create a context with a clock. Initializes with `name = "default"`. pub fn new(clock: impl AsRef) -> Self { Self { clock: clock.as_ref().clone(), - pipeline_name: Cow::Borrowed(DEFAULT_PIPELINE_NAME), + name: Cow::Borrowed(DEFAULT_PIPELINE_NAME), #[cfg(any(feature = "metrics", test))] meter: None, logs_enabled: false, @@ -106,31 +38,20 @@ impl Context { } /// Get the configured clock for timing operations. - /// - /// Middleware use this to measure duration, track timeouts, and perform - /// other time-related operations from a consistent source. #[must_use] #[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] pub(crate) fn get_clock(&self) -> &Clock { &self.clock } - /// Set the logical pipeline name used to group resilience middleware. - /// - /// Use the same `pipeline_name` across the policies (retry, timeout, - /// circuit breaker, etc.) that form one execution pipeline. The name is - /// attached to emitted metrics/events so they can be correlated. Prefer - /// `snake_case`, e.g., `user_auth`, `data_ingest`. + /// Set the pipeline name for telemetry correlation. Prefer `snake_case`. #[must_use] - pub fn pipeline_name(mut self, name: impl Into>) -> Self { - self.pipeline_name = name.into(); + pub fn name(mut self, name: impl Into>) -> Self { + self.name = name.into(); self } - /// Enable metrics reporting with a custom meter provider. - /// - /// Metrics are disabled by default. Call this method to enable metrics - /// reporting using the provided OpenTelemetry meter provider. + /// Enable metrics reporting with the given OpenTelemetry meter provider. #[must_use] #[cfg(any(feature = "metrics", test))] pub fn enable_metrics(self, provider: &dyn opentelemetry::metrics::MeterProvider) -> Self { @@ -141,10 +62,6 @@ impl Context { } /// Enable structured logging for resilience events. - /// - /// Logs are disabled by default. Call this method to enable structured - /// logging for resilience events like retries, timeouts, and circuit breaker - /// state changes. #[must_use] #[cfg(any(feature = "logs", test))] pub fn enable_logs(self) -> Self { @@ -164,7 +81,7 @@ impl Context { #[cfg(any(feature = "metrics", test))] event_reporter: self.meter.as_ref().map(crate::metrics::create_resilience_event_counter), #[cfg(any(feature = "metrics", feature = "logs", test))] - pipeline_name: self.pipeline_name.clone(), + pipeline_name: self.name.clone(), #[cfg(any(feature = "metrics", feature = "logs", test))] strategy_name, #[cfg(any(feature = "logs", test))] @@ -173,11 +90,11 @@ impl Context { } } -impl Clone for Context { +impl Clone for PipelineContext { fn clone(&self) -> Self { Self { clock: self.clock.clone(), - pipeline_name: self.pipeline_name.clone(), + name: self.name.clone(), #[cfg(any(feature = "metrics", test))] meter: self.meter.clone(), _in: std::marker::PhantomData, @@ -196,7 +113,7 @@ mod tests { #[test] fn test_new_with_clock_sets_default_pipeline_name() { let clock = tick::Clock::new_frozen(); - let ctx = Context::<(), ()>::new(clock); + let ctx = PipelineContext::<(), ()>::new(clock); let telemetry = ctx.create_telemetry("test".into()); assert_eq!(telemetry.pipeline_name.as_ref(), DEFAULT_PIPELINE_NAME); // Ensure clock reference behaves (timestamp monotonic relative behaviour not required, just accessible) @@ -204,9 +121,9 @@ mod tests { } #[test] - fn test_pipeline_name_with_custom_value_sets_name_and_is_owned() { + fn test_name_with_custom_value_sets_name_and_is_owned() { let clock = tick::Clock::new_frozen(); - let ctx = Context::<(), ()>::new(clock).pipeline_name(String::from("custom_pipeline")); + let ctx = PipelineContext::<(), ()>::new(clock).name(String::from("custom_pipeline")); let telemetry = ctx.create_telemetry("test".into()); assert_eq!(telemetry.pipeline_name.as_ref(), "custom_pipeline"); assert!(matches!(telemetry.pipeline_name, Cow::Owned(_))); @@ -218,7 +135,7 @@ mod tests { let clock = tick::Clock::new_frozen(); let (provider, exporter) = test_meter_provider(); - let ctx = Context::<(), ()>::new(clock).enable_metrics(&provider); + let ctx = PipelineContext::<(), ()>::new(clock).enable_metrics(&provider); let telemetry1 = ctx.create_telemetry("test1".into()); let telemetry2 = ctx.create_telemetry("test2".into()); let c1 = telemetry1.event_reporter.unwrap(); diff --git a/crates/seatbelt/src/shared/mod.rs b/crates/seatbelt/src/shared/mod.rs index c966582a..45ea541e 100644 --- a/crates/seatbelt/src/shared/mod.rs +++ b/crates/seatbelt/src/shared/mod.rs @@ -12,7 +12,7 @@ pub struct Set; pub struct NotSet; mod context; -pub use context::Context; +pub use context::PipelineContext; mod attempt; mod backoff; diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index 047f6e8e..51081ad8 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -11,7 +11,7 @@ use crate::timeout::{ }; use crate::utils::EnableIf; use crate::utils::TelemetryHelper; -use crate::{Context, NotSet, Set}; +use crate::{NotSet, PipelineContext, Set}; /// Builder for configuring timeout resilience middleware. /// @@ -24,7 +24,7 @@ use crate::{Context, NotSet, Set}; /// For comprehensive examples, see the [timeout module][crate::timeout] documentation. #[derive(Debug)] pub struct TimeoutLayer { - context: Context, + context: PipelineContext, timeout: Option, timeout_output: Option>, on_timeout: Option>, @@ -36,7 +36,7 @@ pub struct TimeoutLayer { impl TimeoutLayer { #[must_use] - pub(crate) fn new(name: Cow<'static, str>, context: &Context) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &PipelineContext) -> Self { Self { timeout: None, timeout_output: None, @@ -391,12 +391,12 @@ mod tests { static_assertions::assert_impl_all!(TimeoutLayer: Debug); } - fn create_test_context() -> Context { - Context::new(Clock::new_frozen()).pipeline_name("test_pipeline") + fn create_test_context() -> PipelineContext { + PipelineContext::new(Clock::new_frozen()).name("test_pipeline") } - fn create_test_context_result() -> Context> { - Context::new(Clock::new_frozen()).pipeline_name("test_pipeline") + fn create_test_context_result() -> PipelineContext> { + PipelineContext::new(Clock::new_frozen()).name("test_pipeline") } fn create_ready_layer() -> TimeoutLayer { diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs index 7126fd8f..2cf78f15 100644 --- a/crates/seatbelt/src/timeout/mod.rs +++ b/crates/seatbelt/src/timeout/mod.rs @@ -18,9 +18,9 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::timeout::Timeout; -//! # use seatbelt::Context; +//! # use seatbelt::PipelineContext; //! # async fn example(clock: Clock) -> Result<(), io::Error> { -//! let context = Context::new(&clock).pipeline_name("my_service"); +//! let context = PipelineContext::new(&clock).name("my_service"); //! //! let stack = ( //! Timeout::layer("timeout", &context) @@ -80,7 +80,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted when a timeout occurs //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`Context::pipeline_name`][crate::Context::pipeline_name] +//! - `resilience.pipeline.name`: Pipeline identifier from [`PipelineContext::name`][crate::PipelineContext::name] //! - `resilience.strategy.name`: Timeout identifier from [`Timeout::layer`] //! - `resilience.event.name`: Always `timeout` //! @@ -94,12 +94,12 @@ //! # use std::time::Duration; //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; -//! # use seatbelt::Context; +//! # use seatbelt::PipelineContext; //! # use seatbelt::timeout::Timeout; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let context = Context::new(&clock); +//! let context = PipelineContext::new(&clock); //! //! let stack = ( //! Timeout::layer("my_timeout", &context) @@ -134,11 +134,11 @@ //! # use std::io; //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; -//! # use seatbelt::Context; +//! # use seatbelt::PipelineContext; //! # use seatbelt::timeout::Timeout; //! # async fn example(clock: Clock) -> Result<(), io::Error> { //! // Define common options for resilience middleware. -//! let context = Context::new(&clock); +//! let context = PipelineContext::new(&clock); //! //! let stack = ( //! Timeout::layer("my_timeout", &context) @@ -181,9 +181,9 @@ //! # use std::time::Duration; //! # use layered::{Execute, Stack}; //! # use tick::Clock; -//! # use seatbelt::Context; +//! # use seatbelt::PipelineContext; //! # use seatbelt::timeout::Timeout; -//! # fn example(context: Context) { +//! # fn example(context: PipelineContext) { //! let stack = ( //! Timeout::layer("my_timeout", &context), // Missing required configuration! //! Execute::new(|input| async move { input }) diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index 39f7b722..8a4559fb 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -10,7 +10,7 @@ use tick::{Clock, FutureExt}; use crate::timeout::{OnTimeout, OnTimeoutArgs, TimeoutLayer, TimeoutOutput, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs}; use crate::utils::EnableIf; -use crate::{Context, NotSet}; +use crate::{NotSet, PipelineContext}; /// Applies a timeout to service execution for canceling long-running operations. /// @@ -53,10 +53,10 @@ impl Timeout { /// # use std::time::Duration; /// # use layered::{Execute, Stack}; /// # use tick::Clock; - /// # use seatbelt::Context; + /// # use seatbelt::PipelineContext; /// use seatbelt::timeout::Timeout; /// - /// # fn example(context: Context) { + /// # fn example(context: PipelineContext) { /// let timeout_layer = Timeout::layer("my_timeout", &context) /// .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) /// .timeout(Duration::from_secs(30)); @@ -66,7 +66,7 @@ impl Timeout { /// For comprehensive examples, see the [timeout module] documentation. /// /// [timeout module]: crate::timeout - pub fn layer(name: impl Into>, context: &Context) -> TimeoutLayer { + pub fn layer(name: impl Into>, context: &PipelineContext) -> TimeoutLayer { TimeoutLayer::new(name.into(), context) } } @@ -148,7 +148,7 @@ mod tests { #[tokio::test] async fn no_timeout() { let clock = Clock::new_frozen(); - let context = Context::new(clock); + let context = PipelineContext::new(clock); let stack = ( Timeout::layer("test_timeout", &context) @@ -170,7 +170,7 @@ mod tests { .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(500)) .to_clock(); - let context = Context::new(clock.clone()); + let context = PipelineContext::new(clock.clone()); let called = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); let called_clone = std::sync::Arc::clone(&called); @@ -208,7 +208,7 @@ mod tests { .to_clock(); let stack = ( - Timeout::layer("test_timeout", &Context::new(clock.clone())) + Timeout::layer("test_timeout", &PipelineContext::new(clock.clone())) .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) .timeout(Duration::from_millis(200)) .timeout_override(|input, _args| { @@ -237,7 +237,7 @@ mod tests { async fn no_timeout_if_disabled() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let stack = ( - Timeout::layer("test_timeout", &Context::new(&clock)) + Timeout::layer("test_timeout", &PipelineContext::new(&clock)) .timeout_output(|_args| "timed out".to_string()) .timeout(Duration::from_millis(200)) .disable(), @@ -272,7 +272,7 @@ mod tests { .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(500)) .to_clock(); - let context = Context::new(clock.clone()).enable_logs().pipeline_name("log_test_pipeline"); + let context = PipelineContext::new(clock.clone()).enable_logs().name("log_test_pipeline"); let stack = ( Timeout::layer("log_test_timeout", &context) @@ -308,9 +308,9 @@ mod tests { .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(500)) .to_clock(); - let context = Context::new(clock.clone()) + let context = PipelineContext::new(clock.clone()) .enable_metrics(metrics.meter_provider()) - .pipeline_name("metrics_pipeline"); + .name("metrics_pipeline"); let stack = ( Timeout::layer("metrics_timeout", &context) From fa2f124b09eb7aebf9f20ee3e4237d675dd3d29d Mon Sep 17 00:00:00 2001 From: martintomka Date: Wed, 21 Jan 2026 09:50:49 +0100 Subject: [PATCH 49/60] rename circuit to circuit_breaker --- crates/seatbelt/Cargo.toml | 10 ++++----- crates/seatbelt/README.md | 22 +++++++++---------- .../{circuit.rs => circuit_breaker.rs} | 2 +- .../{circuit.rs => circuit_breaker.rs} | 2 +- .../src/{circuit => circuit_breaker}/args.rs | 2 +- .../{circuit => circuit_breaker}/callbacks.rs | 0 .../{circuit => circuit_breaker}/constants.rs | 0 .../engine/engine_core.rs | 12 +++++----- .../engine/engine_fake.rs | 2 +- .../engine/engine_telemetry.rs | 8 +++---- .../engine/engines.rs | 8 +++---- .../engine/mod.rs | 2 +- .../engine/probing/health_probe.rs | 4 ++-- .../engine/probing/mod.rs | 2 +- .../engine/probing/options.rs | 2 +- .../engine/probing/probes.rs | 4 ++-- .../engine/probing/single_probe.rs | 2 +- .../execution_result.rs | 0 .../half_open_mode.rs | 8 +++---- .../{circuit => circuit_breaker}/health.rs | 2 +- .../src/{circuit => circuit_breaker}/layer.rs | 16 +++++++------- .../src/{circuit => circuit_breaker}/mod.rs | 8 +++---- .../partition_key.rs | 4 ++-- .../{circuit => circuit_breaker}/service.rs | 6 ++--- .../{circuit => circuit_breaker}/telemetry.rs | 0 crates/seatbelt/src/lib.rs | 20 +++++++++++------ crates/seatbelt/src/metrics.rs | 2 +- crates/seatbelt/src/shared/context.rs | 4 ++-- 28 files changed, 80 insertions(+), 74 deletions(-) rename crates/seatbelt/benches/{circuit.rs => circuit_breaker.rs} (97%) rename crates/seatbelt/examples/{circuit.rs => circuit_breaker.rs} (98%) rename crates/seatbelt/src/{circuit => circuit_breaker}/args.rs (99%) rename crates/seatbelt/src/{circuit => circuit_breaker}/callbacks.rs (100%) rename crates/seatbelt/src/{circuit => circuit_breaker}/constants.rs (100%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/engine_core.rs (98%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/engine_fake.rs (84%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/engine_telemetry.rs (97%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/engines.rs (90%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/mod.rs (97%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/probing/health_probe.rs (97%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/probing/mod.rs (98%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/probing/options.rs (99%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/probing/probes.rs (96%) rename crates/seatbelt/src/{circuit => circuit_breaker}/engine/probing/single_probe.rs (98%) rename crates/seatbelt/src/{circuit => circuit_breaker}/execution_result.rs (100%) rename crates/seatbelt/src/{circuit => circuit_breaker}/half_open_mode.rs (95%) rename crates/seatbelt/src/{circuit => circuit_breaker}/health.rs (99%) rename crates/seatbelt/src/{circuit => circuit_breaker}/layer.rs (98%) rename crates/seatbelt/src/{circuit => circuit_breaker}/mod.rs (98%) rename crates/seatbelt/src/{circuit => circuit_breaker}/partition_key.rs (98%) rename crates/seatbelt/src/{circuit => circuit_breaker}/service.rs (98%) rename crates/seatbelt/src/{circuit => circuit_breaker}/telemetry.rs (100%) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 850b7789..3f8951ca 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -35,7 +35,7 @@ all-features = true default = [] timeout = [] retry = ["dep:fastrand"] -circuit = ["dep:fastrand"] +circuit-breaker = ["dep:fastrand"] metrics = ["dep:opentelemetry", "opentelemetry/metrics"] logs = ["dep:tracing"] @@ -91,8 +91,8 @@ name = "resilience_pipeline" required-features = ["retry", "timeout"] [[example]] -name = "circuit" -required-features = ["circuit", "metrics"] +name = "circuit_breaker" +required-features = ["circuit-breaker", "metrics"] [[bench]] name = "observability" @@ -110,9 +110,9 @@ harness = false required-features = ["retry"] [[bench]] -name = "circuit" +name = "circuit_breaker" harness = false -required-features = ["circuit"] +required-features = ["circuit-breaker"] [lints] workspace = true diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 7dd208d0..a004c520 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -23,9 +23,9 @@ and **Timeout** prevents operations from hanging indefinitely: ```rust use seatbelt::retry::Retry; use seatbelt::timeout::Timeout; -use seatbelt::{RecoveryInfo, Context}; +use seatbelt::{RecoveryInfo, PipelineContext}; -let context = Context::new(&clock); +let context = PipelineContext::new(&clock); let service = ( // Retry middleware: Automatically retries failed operations Retry::layer("retry", &context) @@ -50,7 +50,7 @@ let result = service.execute("input data".to_string()).await; > > **Note**: Resilience middleware requires [`Clock`][__link0] from the [`tick`][__link1] crate for timing > operations like delays, timeouts, and backoff calculations. The clock is passed through - > [`Context`][__link2] when creating middleware layers. + > [`PipelineContext`][__link2] when creating middleware layers. > > **Note**: This crate uses the [`layered`][__link3] crate for composing middleware. The middleware layers @@ -73,7 +73,7 @@ scenarios. ### Core Types -* [`Context`][__link5] - Holds shared state for resilience middleware, including the clock. +* [`PipelineContext`][__link5] - Holds shared state for resilience middleware, including the clock. * [`RecoveryInfo`][__link6] - Classifies errors as recoverable (transient) or non-recoverable (permanent). * [`Recovery`][__link7] - A trait for types that can determine their recoverability. @@ -84,7 +84,7 @@ for each module for details on how to use them. * [`timeout`][__link8] - Middleware that cancels long-running operations. * [`retry`][__link9] - Middleware that automatically retries failed operations. -* [`circuit`][__link10] - Middleware that prevents cascading failures. +* [`circuit_breaker`][__link10] - Middleware that prevents cascading failures. ## Features @@ -93,7 +93,7 @@ This crate provides several optional features that can be enabled in your `Cargo * **`timeout`** - Enables the [`timeout`][__link11] middleware for canceling long-running operations. * **`retry`** - Enables the [`retry`][__link12] middleware for automatically retrying failed operations with configurable backoff strategies, jitter, and recovery classification. -* **`circuit`** - Enables the [`circuit`][__link13] middleware for preventing cascading failures. +* **`circuit-breaker`** - Enables the [`circuit_breaker`][__link13] middleware for preventing cascading failures. * **`metrics`** - Exposes the OpenTelemetry metrics API for collecting and reporting metrics. * **`logs`** - Enables structured logging for resilience middleware using the `tracing` crate. @@ -103,17 +103,17 @@ This crate provides several optional features that can be enabled in your `Cargo This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG8-jxRLAdSUYGz8tFJ1hv_siG0AGq6ysYdCHGyn4hzr6iA3-YWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEGyn6pS-pNJ0BG8D_ULLoyLY-G_Jceg8SxKmRG_uew5JtfsJUYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI [__link0]: https://docs.rs/tick/0.1.2/tick/?search=Clock [__link1]: https://crates.io/crates/tick/0.1.2 - [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit/index.html + [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html [__link11]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html [__link12]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html - [__link13]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit/index.html - [__link2]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::Context + [__link13]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html + [__link2]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext [__link3]: https://crates.io/crates/layered/0.1.0 [__link4]: https://docs.rs/layered/0.1.0/layered/?search=Stack - [__link5]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::Context + [__link5]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext [__link6]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo [__link7]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery [__link8]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html diff --git a/crates/seatbelt/benches/circuit.rs b/crates/seatbelt/benches/circuit_breaker.rs similarity index 97% rename from crates/seatbelt/benches/circuit.rs rename to crates/seatbelt/benches/circuit_breaker.rs index de1bea75..c5bbd765 100644 --- a/crates/seatbelt/benches/circuit.rs +++ b/crates/seatbelt/benches/circuit_breaker.rs @@ -5,7 +5,7 @@ use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; -use seatbelt::circuit::Circuit; +use seatbelt::circuit_breaker::Circuit; use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; diff --git a/crates/seatbelt/examples/circuit.rs b/crates/seatbelt/examples/circuit_breaker.rs similarity index 98% rename from crates/seatbelt/examples/circuit.rs rename to crates/seatbelt/examples/circuit_breaker.rs index b09f0122..e86eeec9 100644 --- a/crates/seatbelt/examples/circuit.rs +++ b/crates/seatbelt/examples/circuit_breaker.rs @@ -15,7 +15,7 @@ use layered::{Execute, Service, Stack}; use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::circuit::Circuit; +use seatbelt::circuit_breaker::Circuit; use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; diff --git a/crates/seatbelt/src/circuit/args.rs b/crates/seatbelt/src/circuit_breaker/args.rs similarity index 99% rename from crates/seatbelt/src/circuit/args.rs rename to crates/seatbelt/src/circuit_breaker/args.rs index 7bb6c825..f16ced2e 100644 --- a/crates/seatbelt/src/circuit/args.rs +++ b/crates/seatbelt/src/circuit_breaker/args.rs @@ -5,7 +5,7 @@ use std::time::Duration; use tick::Clock; -use crate::circuit::PartitionKey; +use crate::circuit_breaker::PartitionKey; /// Arguments for the [`recovery_with`][super::CircuitLayer::recovery_with] callback function. /// diff --git a/crates/seatbelt/src/circuit/callbacks.rs b/crates/seatbelt/src/circuit_breaker/callbacks.rs similarity index 100% rename from crates/seatbelt/src/circuit/callbacks.rs rename to crates/seatbelt/src/circuit_breaker/callbacks.rs diff --git a/crates/seatbelt/src/circuit/constants.rs b/crates/seatbelt/src/circuit_breaker/constants.rs similarity index 100% rename from crates/seatbelt/src/circuit/constants.rs rename to crates/seatbelt/src/circuit_breaker/constants.rs diff --git a/crates/seatbelt/src/circuit/engine/engine_core.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs similarity index 98% rename from crates/seatbelt/src/circuit/engine/engine_core.rs rename to crates/seatbelt/src/circuit_breaker/engine/engine_core.rs index de520b16..4edc58c6 100644 --- a/crates/seatbelt/src/circuit/engine/engine_core.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_core.rs @@ -7,9 +7,9 @@ use std::time::{Duration, Instant}; use tick::Clock; use super::{EngineOptions, EnterCircuitResult, ExitCircuitResult}; -use crate::circuit::constants::ERR_POISONED_LOCK; -use crate::circuit::engine::probing::{AllowProbeResult, Probes, ProbingResult}; -use crate::circuit::{CircuitEngine, ExecutionMode, ExecutionResult, HealthMetrics, HealthStatus}; +use crate::circuit_breaker::constants::ERR_POISONED_LOCK; +use crate::circuit_breaker::engine::probing::{AllowProbeResult, Probes, ProbingResult}; +use crate::circuit_breaker::{CircuitEngine, ExecutionMode, ExecutionResult, HealthMetrics, HealthStatus}; /// Engine that manages the state of the circuit breaker. #[derive(Debug)] @@ -201,8 +201,8 @@ mod tests { use tick::ClockControl; use super::*; - use crate::circuit::HealthMetricsBuilder; - use crate::circuit::engine::probing::ProbesOptions; + use crate::circuit_breaker::HealthMetricsBuilder; + use crate::circuit_breaker::engine::probing::ProbesOptions; fn create_test_settings() -> EngineOptions { EngineOptions { @@ -660,7 +660,7 @@ mod tests { #[test] fn exit_when_half_open_with_pending_probe_returns_unchanged() { - use crate::circuit::engine::probing::{HealthProbeOptions, ProbeOptions}; + use crate::circuit_breaker::engine::probing::{HealthProbeOptions, ProbeOptions}; let settings = EngineOptions { break_duration: Duration::from_secs(5), diff --git a/crates/seatbelt/src/circuit/engine/engine_fake.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs similarity index 84% rename from crates/seatbelt/src/circuit/engine/engine_fake.rs rename to crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs index c59930ef..e35a9cfa 100644 --- a/crates/seatbelt/src/circuit/engine/engine_fake.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_fake.rs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use crate::circuit::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; +use crate::circuit_breaker::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; /// Fake engine to be used in tests. #[derive(Debug)] diff --git a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs b/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs similarity index 97% rename from crates/seatbelt/src/circuit/engine/engine_telemetry.rs rename to crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs index f283f677..1cf92831 100644 --- a/crates/seatbelt/src/circuit/engine/engine_telemetry.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engine_telemetry.rs @@ -6,10 +6,10 @@ use std::borrow::Cow; use tick::Clock; #[cfg(any(feature = "metrics", feature = "logs", test))] -use crate::circuit::CircuitState; +use crate::circuit_breaker::CircuitState; #[cfg(any(feature = "metrics", test))] -use crate::circuit::telemetry::*; -use crate::circuit::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; +use crate::circuit_breaker::telemetry::*; +use crate::circuit_breaker::{CircuitEngine, EnterCircuitResult, ExecutionMode, ExecutionResult, ExitCircuitResult}; use crate::utils::TelemetryHelper; #[cfg(any(feature = "metrics", test))] @@ -189,7 +189,7 @@ mod tests { use opentelemetry::KeyValue; use super::*; - use crate::circuit::{EngineFake, HealthInfo, Stats}; + use crate::circuit_breaker::{EngineFake, HealthInfo, Stats}; use crate::metrics::{create_meter, create_resilience_event_counter}; use crate::testing::MetricTester; diff --git a/crates/seatbelt/src/circuit/engine/engines.rs b/crates/seatbelt/src/circuit_breaker/engine/engines.rs similarity index 90% rename from crates/seatbelt/src/circuit/engine/engines.rs rename to crates/seatbelt/src/circuit_breaker/engine/engines.rs index 7bfda31e..b4ee1729 100644 --- a/crates/seatbelt/src/circuit/engine/engines.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/engines.rs @@ -6,8 +6,8 @@ use std::sync::{Arc, Mutex}; use tick::Clock; -use crate::circuit::constants::ERR_POISONED_LOCK; -use crate::circuit::{Engine, EngineCore, EngineOptions, EngineTelemetry, PartitionKey}; +use crate::circuit_breaker::constants::ERR_POISONED_LOCK; +use crate::circuit_breaker::{Engine, EngineCore, EngineOptions, EngineTelemetry, PartitionKey}; use crate::utils::TelemetryHelper; /// Manages circuit breaker engines for different partition keys. @@ -63,8 +63,8 @@ mod tests { use std::time::Duration; use super::*; - use crate::circuit::HealthMetricsBuilder; - use crate::circuit::engine::probing::ProbesOptions; + use crate::circuit_breaker::HealthMetricsBuilder; + use crate::circuit_breaker::engine::probing::ProbesOptions; use crate::metrics::create_resilience_event_counter; #[test] diff --git a/crates/seatbelt/src/circuit/engine/mod.rs b/crates/seatbelt/src/circuit_breaker/engine/mod.rs similarity index 97% rename from crates/seatbelt/src/circuit/engine/mod.rs rename to crates/seatbelt/src/circuit_breaker/engine/mod.rs index 9e186aaa..940cfdb8 100644 --- a/crates/seatbelt/src/circuit/engine/mod.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/mod.rs @@ -4,7 +4,7 @@ use std::fmt::Debug; use std::time::Duration; -use crate::circuit::{ExecutionResult, HealthInfo, HealthMetricsBuilder}; +use crate::circuit_breaker::{ExecutionResult, HealthInfo, HealthMetricsBuilder}; pub(super) mod probing; diff --git a/crates/seatbelt/src/circuit/engine/probing/health_probe.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs similarity index 97% rename from crates/seatbelt/src/circuit/engine/probing/health_probe.rs rename to crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs index 84adfd01..b1496ec6 100644 --- a/crates/seatbelt/src/circuit/engine/probing/health_probe.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/health_probe.rs @@ -3,8 +3,8 @@ use std::time::Instant; -use crate::circuit::engine::probing::{AllowProbeResult, HealthProbeOptions, ProbeOperation, ProbingResult}; -use crate::circuit::{ExecutionResult, HealthMetrics, HealthStatus}; +use crate::circuit_breaker::engine::probing::{AllowProbeResult, HealthProbeOptions, ProbeOperation, ProbingResult}; +use crate::circuit_breaker::{ExecutionResult, HealthMetrics, HealthStatus}; use crate::rnd::Rnd; #[derive(Debug)] diff --git a/crates/seatbelt/src/circuit/engine/probing/mod.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs similarity index 98% rename from crates/seatbelt/src/circuit/engine/probing/mod.rs rename to crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs index 5bba251d..58a90dc4 100644 --- a/crates/seatbelt/src/circuit/engine/probing/mod.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/mod.rs @@ -12,7 +12,7 @@ use std::fmt::Debug; use std::time::Instant; -use crate::circuit::{EnterCircuitResult, ExecutionMode, ExecutionResult}; +use crate::circuit_breaker::{EnterCircuitResult, ExecutionMode, ExecutionResult}; mod health_probe; mod options; diff --git a/crates/seatbelt/src/circuit/engine/probing/options.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs similarity index 99% rename from crates/seatbelt/src/circuit/engine/probing/options.rs rename to crates/seatbelt/src/circuit_breaker/engine/probing/options.rs index ec3f6aba..a06ba905 100644 --- a/crates/seatbelt/src/circuit/engine/probing/options.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/options.rs @@ -4,7 +4,7 @@ use std::time::Duration; use std::vec::IntoIter; -use crate::circuit::HealthMetricsBuilder; +use crate::circuit_breaker::HealthMetricsBuilder; /// The minimum throughput during probing stage is set to 1, so at least one request must come /// through in each probing stage to evaluate the health. diff --git a/crates/seatbelt/src/circuit/engine/probing/probes.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs similarity index 96% rename from crates/seatbelt/src/circuit/engine/probing/probes.rs rename to crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs index 7cfbc50f..2b9529fd 100644 --- a/crates/seatbelt/src/circuit/engine/probing/probes.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/probes.rs @@ -5,7 +5,7 @@ use std::time::Instant; use std::vec; use super::{AllowProbeResult, Probe, ProbeOperation, ProbeOptions, ProbesOptions, ProbingResult}; -use crate::circuit::ExecutionResult; +use crate::circuit_breaker::ExecutionResult; /// Manages a sequence of probes. #[derive(Debug)] @@ -56,7 +56,7 @@ mod tests { use tick::Clock; use super::*; - use crate::circuit::engine::probing::HealthProbeOptions; + use crate::circuit_breaker::engine::probing::HealthProbeOptions; #[test] fn multiple_probes_ok() { diff --git a/crates/seatbelt/src/circuit/engine/probing/single_probe.rs b/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs similarity index 98% rename from crates/seatbelt/src/circuit/engine/probing/single_probe.rs rename to crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs index fc67d160..523b5bd0 100644 --- a/crates/seatbelt/src/circuit/engine/probing/single_probe.rs +++ b/crates/seatbelt/src/circuit_breaker/engine/probing/single_probe.rs @@ -4,7 +4,7 @@ use std::time::{Duration, Instant}; use super::{AllowProbeResult, ProbeOperation, ProbingResult}; -use crate::circuit::ExecutionResult; +use crate::circuit_breaker::ExecutionResult; /// Allows a single probe to get in and based on the result either closes the circuit /// or goes back to open state. diff --git a/crates/seatbelt/src/circuit/execution_result.rs b/crates/seatbelt/src/circuit_breaker/execution_result.rs similarity index 100% rename from crates/seatbelt/src/circuit/execution_result.rs rename to crates/seatbelt/src/circuit_breaker/execution_result.rs diff --git a/crates/seatbelt/src/circuit/half_open_mode.rs b/crates/seatbelt/src/circuit_breaker/half_open_mode.rs similarity index 95% rename from crates/seatbelt/src/circuit/half_open_mode.rs rename to crates/seatbelt/src/circuit_breaker/half_open_mode.rs index 364cd4ce..13de619b 100644 --- a/crates/seatbelt/src/circuit/half_open_mode.rs +++ b/crates/seatbelt/src/circuit_breaker/half_open_mode.rs @@ -3,8 +3,8 @@ use std::time::Duration; -use crate::circuit::constants::MIN_SAMPLING_DURATION; -use crate::circuit::engine::probing::ProbesOptions; +use crate::circuit_breaker::constants::MIN_SAMPLING_DURATION; +use crate::circuit_breaker::engine::probing::ProbesOptions; /// Defines the behavior of the circuit breaker when transitioning from half-open to closed state. /// @@ -52,7 +52,7 @@ impl HalfOpenMode { /// # Arguments /// /// - `stage_duration` - Optional custom stage duration for each probing stage. If not provided, - /// the value of [`break_duration`][crate::circuit::CircuitLayer::break_duration] is used. The provided stage + /// the value of [`break_duration`][crate::circuit_breaker::CircuitLayer::break_duration] is used. The provided stage /// duration is clamped to a minimum of 1 second. #[must_use] pub fn reliable(stage_duration: impl Into>) -> Self { @@ -79,7 +79,7 @@ enum Mode { #[cfg(test)] mod tests { use super::*; - use crate::circuit::engine::probing::ProbeOptions; + use crate::circuit_breaker::engine::probing::ProbeOptions; #[test] fn quick_mode_creates_single_probe() { diff --git a/crates/seatbelt/src/circuit/health.rs b/crates/seatbelt/src/circuit_breaker/health.rs similarity index 99% rename from crates/seatbelt/src/circuit/health.rs rename to crates/seatbelt/src/circuit_breaker/health.rs index 2ec726b4..bd0d4984 100644 --- a/crates/seatbelt/src/circuit/health.rs +++ b/crates/seatbelt/src/circuit_breaker/health.rs @@ -5,7 +5,7 @@ use std::collections::VecDeque; use std::time::{Duration, Instant}; use super::ExecutionResult; -use crate::circuit::constants::MIN_SAMPLING_DURATION; +use crate::circuit_breaker::constants::MIN_SAMPLING_DURATION; const WINDOW_COUNT: u32 = 10; diff --git a/crates/seatbelt/src/circuit/layer.rs b/crates/seatbelt/src/circuit_breaker/layer.rs similarity index 98% rename from crates/seatbelt/src/circuit/layer.rs rename to crates/seatbelt/src/circuit_breaker/layer.rs index 9669a430..6183ddd5 100644 --- a/crates/seatbelt/src/circuit/layer.rs +++ b/crates/seatbelt/src/circuit_breaker/layer.rs @@ -11,7 +11,7 @@ use super::{ PartionKeyProvider, PartitionKey, RejectedInput, RejectedInputArgs, ShouldRecover, }; use crate::Layer; -use crate::circuit::engine::probing::ProbesOptions; +use crate::circuit_breaker::engine::probing::ProbesOptions; use crate::utils::{EnableIf, TelemetryHelper}; use crate::{NotSet, PipelineContext, Recovery, RecoveryInfo, Set}; @@ -24,7 +24,7 @@ use crate::{NotSet, PipelineContext, Recovery, RecoveryInfo, Set}; /// - [`recovery`][CircuitLayer::recovery]: Required to determine if an output represents a failure /// - [`rejected_input`][CircuitLayer::rejected_input]: Required to specify the output when the circuit is open and inputs are rejected /// -/// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit] documentation. +/// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit_breaker] documentation. #[derive(Debug)] pub struct CircuitLayer { context: PipelineContext, @@ -98,18 +98,18 @@ impl CircuitLayer RecoveryInfo + Send + Sync + 'static, + recover_fn: impl Fn(&Out, crate::circuit_breaker::RecoveryArgs) -> RecoveryInfo + Send + Sync + 'static, ) -> CircuitLayer { self.recovery = Some(ShouldRecover::new(recover_fn)); self.into_state::() @@ -301,7 +301,7 @@ impl CircuitLayer Result<(), Box> { //! let context = PipelineContext::new(&clock); @@ -187,7 +187,7 @@ //! ```rust //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; -//! # use seatbelt::circuit::Circuit; +//! # use seatbelt::circuit_breaker::Circuit; //! # use seatbelt::{RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and @@ -228,7 +228,7 @@ //! # use std::time::Duration; //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; -//! # use seatbelt::circuit::{Circuit, PartitionKey, HalfOpenMode}; +//! # use seatbelt::circuit_breaker::{Circuit, PartitionKey, HalfOpenMode}; //! # use seatbelt::{RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. @@ -289,7 +289,7 @@ //! The code below will not compile because the circuit breaker layer is missing required configuration. //! //! ```compile_fail -//! # use seatbelt::circuit::Circuit; +//! # use seatbelt::circuit_breaker::Circuit; //! # use seatbelt::PipelineContext; //! # use layered::Execute; //! # fn example(context: PipelineContext>) { diff --git a/crates/seatbelt/src/circuit/partition_key.rs b/crates/seatbelt/src/circuit_breaker/partition_key.rs similarity index 98% rename from crates/seatbelt/src/circuit/partition_key.rs rename to crates/seatbelt/src/circuit_breaker/partition_key.rs index ce42eb15..3199d161 100644 --- a/crates/seatbelt/src/circuit/partition_key.rs +++ b/crates/seatbelt/src/circuit_breaker/partition_key.rs @@ -16,7 +16,7 @@ use std::hash::Hasher; /// ## Creation from a number /// /// ```rust -/// use seatbelt::circuit::PartitionKey; +/// use seatbelt::circuit_breaker::PartitionKey; /// /// let key = PartitionKey::from(42_u64); /// assert_eq!(key.to_string(), "42"); @@ -25,7 +25,7 @@ use std::hash::Hasher; /// ## Creation from HTTP request authority and scheme /// /// ```rust -/// use seatbelt::circuit::PartitionKey; +/// use seatbelt::circuit_breaker::PartitionKey; /// /// // Simulate extracting authority and scheme from an HTTP request /// let scheme = "https"; diff --git a/crates/seatbelt/src/circuit/service.rs b/crates/seatbelt/src/circuit_breaker/service.rs similarity index 98% rename from crates/seatbelt/src/circuit/service.rs rename to crates/seatbelt/src/circuit_breaker/service.rs index ce9c7952..3fa106f9 100644 --- a/crates/seatbelt/src/circuit/service.rs +++ b/crates/seatbelt/src/circuit_breaker/service.rs @@ -25,7 +25,7 @@ use crate::{NotSet, utils::EnableIf}; /// `Circuit` is configured by calling [`Circuit::layer`] and using the /// builder methods on the returned [`CircuitLayer`] instance. /// -/// For comprehensive examples and usage patterns, see the [`circuit_breaker` module][crate::circuit] documentation. +/// For comprehensive examples and usage patterns, see the [`circuit_breaker` module][crate::circuit_breaker] documentation. #[derive(Debug)] pub struct Circuit { pub(super) inner: S, @@ -171,8 +171,8 @@ mod tests { use super::*; use crate::Layer; - use crate::circuit::constants::DEFAULT_BREAK_DURATION; - use crate::circuit::{EngineFake, HalfOpenMode, HealthInfo, Stats}; + use crate::circuit_breaker::constants::DEFAULT_BREAK_DURATION; + use crate::circuit_breaker::{EngineFake, HalfOpenMode, HealthInfo, Stats}; use crate::{PipelineContext, RecoveryInfo, Set}; #[test] diff --git a/crates/seatbelt/src/circuit/telemetry.rs b/crates/seatbelt/src/circuit_breaker/telemetry.rs similarity index 100% rename from crates/seatbelt/src/circuit/telemetry.rs rename to crates/seatbelt/src/circuit_breaker/telemetry.rs diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 2d8970ef..ff5eb018 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -6,7 +6,13 @@ #![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/seatbelt/logo.png")] #![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/seatbelt/favicon.ico")] #![cfg_attr( - not(all(feature = "retry", feature = "timeout", feature = "circuit", feature = "metrics", feature = "logs")), + not(all( + feature = "retry", + feature = "timeout", + feature = "circuit-breaker", + feature = "metrics", + feature = "logs" + )), expect( rustdoc::broken_intra_doc_links, reason = "too ugly to make 'live links' possible with the combination of features" @@ -94,7 +100,7 @@ //! //! - [`timeout`] - Middleware that cancels long-running operations. //! - [`retry`] - Middleware that automatically retries failed operations. -//! - [`circuit`] - Middleware that prevents cascading failures. +//! - [`circuit_breaker`] - Middleware that prevents cascading failures. //! //! # Features //! @@ -103,7 +109,7 @@ //! - **`timeout`** - Enables the [`timeout`] middleware for canceling long-running operations. //! - **`retry`** - Enables the [`retry`] middleware for automatically retrying failed operations with //! configurable backoff strategies, jitter, and recovery classification. -//! - **`circuit`** - Enables the [`circuit`] middleware for preventing cascading failures. +//! - **`circuit-breaker`** - Enables the [`circuit_breaker`] middleware for preventing cascading failures. //! - **`metrics`** - Exposes the OpenTelemetry metrics API for collecting and reporting metrics. //! - **`logs`** - Enables structured logging for resilience middleware using the `tracing` crate. @@ -119,16 +125,16 @@ pub mod timeout; #[cfg(any(feature = "retry", test))] pub mod retry; -#[cfg(any(feature = "circuit", test))] -pub mod circuit; +#[cfg(any(feature = "circuit-breaker", test))] +pub mod circuit_breaker; #[doc(inline)] pub use layered::{Layer, Service, Stack}; -#[cfg(any(feature = "retry", feature = "circuit", test))] +#[cfg(any(feature = "retry", feature = "circuit-breaker", test))] mod rnd; -#[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] +#[cfg(any(feature = "retry", feature = "circuit-breaker", feature = "timeout", test))] pub(crate) mod utils; #[cfg(any(feature = "metrics", test))] diff --git a/crates/seatbelt/src/metrics.rs b/crates/seatbelt/src/metrics.rs index d627a4d8..347c7a20 100644 --- a/crates/seatbelt/src/metrics.rs +++ b/crates/seatbelt/src/metrics.rs @@ -17,7 +17,7 @@ pub(crate) fn create_meter(meter_provider: &dyn MeterProvider) -> Meter { ) } -#[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] +#[cfg(any(feature = "retry", feature = "circuit-breaker", feature = "timeout", test))] pub(crate) fn create_resilience_event_counter(meter: &Meter) -> opentelemetry::metrics::Counter { meter .u64_counter("resilience.event") diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/shared/context.rs index cd0f966f..97a4a46c 100644 --- a/crates/seatbelt/src/shared/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -39,7 +39,7 @@ impl PipelineContext { /// Get the configured clock for timing operations. #[must_use] - #[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] + #[cfg(any(feature = "retry", feature = "circuit-breaker", feature = "timeout", test))] pub(crate) fn get_clock(&self) -> &Clock { &self.clock } @@ -75,7 +75,7 @@ impl PipelineContext { not(any(feature = "metrics", feature = "logs", test)), expect(unused_variables, reason = "unused when logs nor metrics are used") )] - #[cfg(any(feature = "retry", feature = "circuit", feature = "timeout", test))] + #[cfg(any(feature = "retry", feature = "circuit-breaker", feature = "timeout", test))] pub(crate) fn create_telemetry(&self, strategy_name: Cow<'static, str>) -> crate::utils::TelemetryHelper { crate::utils::TelemetryHelper { #[cfg(any(feature = "metrics", test))] From ebf0672389ea867284e43b6c081b13c3cdb08dee Mon Sep 17 00:00:00 2001 From: martintomka Date: Wed, 21 Jan 2026 09:56:21 +0100 Subject: [PATCH 50/60] cleanup --- crates/seatbelt/Cargo.toml | 2 +- crates/seatbelt/README.md | 58 +++++++++++++++++++++++--------------- crates/seatbelt/src/lib.rs | 15 +++++++--- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/crates/seatbelt/Cargo.toml b/crates/seatbelt/Cargo.toml index 3f8951ca..60add28c 100644 --- a/crates/seatbelt/Cargo.toml +++ b/crates/seatbelt/Cargo.toml @@ -6,7 +6,7 @@ name = "seatbelt" description = "Resilience and recovery mechanisms for fallible operations." version = "0.2.0" readme = "README.md" -keywords = ["oxidizer", "resilience", "tower", "recovery", "retry", "circuit-breaker"] +keywords = ["oxidizer", "resilience", "layered", "recovery", "retry", "circuit-breaker"] categories = ["data-structures"] edition.workspace = true diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index a004c520..bbccb51a 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -17,8 +17,8 @@ Resilience and recovery mechanisms for fallible operations. ## Quick Start -Add resilience to your services with just a few lines of code. **Retry** handles transient failures -and **Timeout** prevents operations from hanging indefinitely: +Add resilience to fallible operations, such as RPC calls over the network, with just a few lines of code. +**Retry** handles transient failures and **Timeout** prevents operations from hanging indefinitely: ```rust use seatbelt::retry::Retry; @@ -62,38 +62,45 @@ This crate provides production-ready resilience middleware with excellent teleme robust distributed systems that can automatically handle timeouts, retries, and other failure scenarios. -* **Runtime agnostic** - Works seamlessly across any async runtime. Use the same resilience - patterns across different projects and migrate between runtimes without changes. * **Production-ready** - Battle-tested middleware with sensible defaults and comprehensive configuration options. * **Excellent telemetry** - Built-in support for metrics and structured logging to monitor resilience behavior in production. +* **Runtime agnostic** - Works seamlessly across any async runtime. Use the same resilience + patterns across different projects and migrate between runtimes without changes. ## Overview +This crate uses the [`layered`][__link5] crate for composing middleware. The middleware layers +can be stacked together using tuples and built into a service using the [`Stack`][__link6] trait. + +Resilience middleware also requires [`Clock`][__link7] from the [`tick`][__link8] crate for timing +operations like delays, timeouts, and backoff calculations. The clock is passed through +[`PipelineContext`][__link9] when creating middleware layers. + ### Core Types -* [`PipelineContext`][__link5] - Holds shared state for resilience middleware, including the clock. -* [`RecoveryInfo`][__link6] - Classifies errors as recoverable (transient) or non-recoverable (permanent). -* [`Recovery`][__link7] - A trait for types that can determine their recoverability. +* [`PipelineContext`][__link10] - Holds shared state for resilience middleware, including the clock. +* [`RecoveryInfo`][__link11] - Classifies errors as recoverable (transient) or non-recoverable (permanent). +* [`Recovery`][__link12] - A trait for types that can determine their recoverability. ### Built-in Middleware This crate provides built-in resilience middleware that you can use out of the box. See the documentation for each module for details on how to use them. -* [`timeout`][__link8] - Middleware that cancels long-running operations. -* [`retry`][__link9] - Middleware that automatically retries failed operations. -* [`circuit_breaker`][__link10] - Middleware that prevents cascading failures. +* [`timeout`][__link13] - Middleware that cancels long-running operations. +* [`retry`][__link14] - Middleware that automatically retries failed operations. +* [`circuit_breaker`][__link15] - Middleware that prevents cascading failures. ## Features This crate provides several optional features that can be enabled in your `Cargo.toml`: -* **`timeout`** - Enables the [`timeout`][__link11] middleware for canceling long-running operations. -* **`retry`** - Enables the [`retry`][__link12] middleware for automatically retrying failed operations with +* **`timeout`** - Enables the [`timeout`][__link16] middleware for canceling long-running operations. +* **`retry`** - Enables the [`retry`][__link17] middleware for automatically retrying failed operations with configurable backoff strategies, jitter, and recovery classification. -* **`circuit-breaker`** - Enables the [`circuit_breaker`][__link13] middleware for preventing cascading failures. +* **`circuit-breaker`** - Enables the [`circuit_breaker`][__link18] middleware for preventing cascading failures. * **`metrics`** - Exposes the OpenTelemetry metrics API for collecting and reporting metrics. * **`logs`** - Enables structured logging for resilience middleware using the `tracing` crate. @@ -103,18 +110,23 @@ This crate provides several optional features that can be enabled in your `Cargo This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEGyn6pS-pNJ0BG8D_ULLoyLY-G_Jceg8SxKmRG_uew5JtfsJUYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG_dm0xcJQgkeG6H_kYotDLiFG0Ega4q0xkG_G7Au75a7iimWYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI [__link0]: https://docs.rs/tick/0.1.2/tick/?search=Clock [__link1]: https://crates.io/crates/tick/0.1.2 - [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html - [__link11]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html - [__link12]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html - [__link13]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html + [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext + [__link11]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo + [__link12]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery + [__link13]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html + [__link14]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html + [__link15]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html + [__link16]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html + [__link17]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html + [__link18]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html [__link2]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext [__link3]: https://crates.io/crates/layered/0.1.0 [__link4]: https://docs.rs/layered/0.1.0/layered/?search=Stack - [__link5]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext - [__link6]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo - [__link7]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery - [__link8]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html - [__link9]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html + [__link5]: https://crates.io/crates/layered/0.1.0 + [__link6]: https://docs.rs/layered/0.1.0/layered/?search=Stack + [__link7]: https://docs.rs/tick/0.1.2/tick/?search=Clock + [__link8]: https://crates.io/crates/tick/0.1.2 + [__link9]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index ff5eb018..cf53a657 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -23,8 +23,8 @@ //! //! # Quick Start //! -//! Add resilience to your services with just a few lines of code. **Retry** handles transient failures -//! and **Timeout** prevents operations from hanging indefinitely: +//! Add resilience to fallible operations, such as RPC calls over the network, with just a few lines of code. +//! **Retry** handles transient failures and **Timeout** prevents operations from hanging indefinitely: //! //! ```rust //! # #[cfg(all(feature = "retry", feature = "timeout"))] @@ -78,15 +78,22 @@ //! robust distributed systems that can automatically handle timeouts, retries, and other failure //! scenarios. //! -//! - **Runtime agnostic** - Works seamlessly across any async runtime. Use the same resilience -//! patterns across different projects and migrate between runtimes without changes. //! - **Production-ready** - Battle-tested middleware with sensible defaults and comprehensive //! configuration options. //! - **Excellent telemetry** - Built-in support for metrics and structured logging to monitor //! resilience behavior in production. +//! - **Runtime agnostic** - Works seamlessly across any async runtime. Use the same resilience +//! patterns across different projects and migrate between runtimes without changes. //! //! # Overview //! +//! This crate uses the [`layered`] crate for composing middleware. The middleware layers +//! can be stacked together using tuples and built into a service using the [`Stack`] trait. +//! +//! Resilience middleware also requires [`Clock`][tick::Clock] from the [`tick`] crate for timing +//! operations like delays, timeouts, and backoff calculations. The clock is passed through +//! [`PipelineContext`] when creating middleware layers. +//! //! ## Core Types //! //! - [`PipelineContext`] - Holds shared state for resilience middleware, including the clock. From 4eda7fd6648c817c22fb8af7eee372c42b557b51 Mon Sep 17 00:00:00 2001 From: martintomka Date: Wed, 21 Jan 2026 09:58:59 +0100 Subject: [PATCH 51/60] spelling --- .spelling | 1 + 1 file changed, 1 insertion(+) diff --git a/.spelling b/.spelling index 7b7e827b..0c96de03 100644 --- a/.spelling +++ b/.spelling @@ -214,6 +214,7 @@ repo repos Reqwest Reusability +RPC runtime runtimes rustc From 989784e2560e8d8de652cbcd4faf0b81214f3c63 Mon Sep 17 00:00:00 2001 From: martintomka Date: Wed, 21 Jan 2026 10:05:28 +0100 Subject: [PATCH 52/60] doc updates --- crates/seatbelt/README.md | 70 +++++++++++++++----------------------- crates/seatbelt/src/lib.rs | 7 ---- 2 files changed, 28 insertions(+), 49 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index bbccb51a..b8f899fb 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -47,15 +47,6 @@ let service = ( let result = service.execute("input data".to_string()).await; ``` - > - > **Note**: Resilience middleware requires [`Clock`][__link0] from the [`tick`][__link1] crate for timing - > operations like delays, timeouts, and backoff calculations. The clock is passed through - > [`PipelineContext`][__link2] when creating middleware layers. - - > - > **Note**: This crate uses the [`layered`][__link3] crate for composing middleware. The middleware layers - > can be stacked together using tuples and built into a service using the [`Stack`][__link4] trait. - ## Why? This crate provides production-ready resilience middleware with excellent telemetry for building @@ -71,36 +62,36 @@ scenarios. ## Overview -This crate uses the [`layered`][__link5] crate for composing middleware. The middleware layers -can be stacked together using tuples and built into a service using the [`Stack`][__link6] trait. +This crate uses the [`layered`][__link0] crate for composing middleware. The middleware layers +can be stacked together using tuples and built into a service using the [`Stack`][__link1] trait. -Resilience middleware also requires [`Clock`][__link7] from the [`tick`][__link8] crate for timing +Resilience middleware also requires [`Clock`][__link2] from the [`tick`][__link3] crate for timing operations like delays, timeouts, and backoff calculations. The clock is passed through -[`PipelineContext`][__link9] when creating middleware layers. +[`PipelineContext`][__link4] when creating middleware layers. ### Core Types -* [`PipelineContext`][__link10] - Holds shared state for resilience middleware, including the clock. -* [`RecoveryInfo`][__link11] - Classifies errors as recoverable (transient) or non-recoverable (permanent). -* [`Recovery`][__link12] - A trait for types that can determine their recoverability. +* [`PipelineContext`][__link5] - Holds shared state for resilience middleware, including the clock. +* [`RecoveryInfo`][__link6] - Classifies errors as recoverable (transient) or non-recoverable (permanent). +* [`Recovery`][__link7] - A trait for types that can determine their recoverability. ### Built-in Middleware This crate provides built-in resilience middleware that you can use out of the box. See the documentation for each module for details on how to use them. -* [`timeout`][__link13] - Middleware that cancels long-running operations. -* [`retry`][__link14] - Middleware that automatically retries failed operations. -* [`circuit_breaker`][__link15] - Middleware that prevents cascading failures. +* [`timeout`][__link8] - Middleware that cancels long-running operations. +* [`retry`][__link9] - Middleware that automatically retries failed operations. +* [`circuit_breaker`][__link10] - Middleware that prevents cascading failures. ## Features This crate provides several optional features that can be enabled in your `Cargo.toml`: -* **`timeout`** - Enables the [`timeout`][__link16] middleware for canceling long-running operations. -* **`retry`** - Enables the [`retry`][__link17] middleware for automatically retrying failed operations with +* **`timeout`** - Enables the [`timeout`][__link11] middleware for canceling long-running operations. +* **`retry`** - Enables the [`retry`][__link12] middleware for automatically retrying failed operations with configurable backoff strategies, jitter, and recovery classification. -* **`circuit-breaker`** - Enables the [`circuit_breaker`][__link18] middleware for preventing cascading failures. +* **`circuit-breaker`** - Enables the [`circuit_breaker`][__link13] middleware for preventing cascading failures. * **`metrics`** - Exposes the OpenTelemetry metrics API for collecting and reporting metrics. * **`logs`** - Enables structured logging for resilience middleware using the `tracing` crate. @@ -110,23 +101,18 @@ This crate provides several optional features that can be enabled in your `Cargo This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG_dm0xcJQgkeG6H_kYotDLiFG0Ega4q0xkG_G7Au75a7iimWYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI - [__link0]: https://docs.rs/tick/0.1.2/tick/?search=Clock - [__link1]: https://crates.io/crates/tick/0.1.2 - [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext - [__link11]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo - [__link12]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery - [__link13]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html - [__link14]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html - [__link15]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html - [__link16]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html - [__link17]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html - [__link18]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html - [__link2]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext - [__link3]: https://crates.io/crates/layered/0.1.0 - [__link4]: https://docs.rs/layered/0.1.0/layered/?search=Stack - [__link5]: https://crates.io/crates/layered/0.1.0 - [__link6]: https://docs.rs/layered/0.1.0/layered/?search=Stack - [__link7]: https://docs.rs/tick/0.1.2/tick/?search=Clock - [__link8]: https://crates.io/crates/tick/0.1.2 - [__link9]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG0sUSiYcHuHeG_lowFs20mWBG06Nt4zQNqe4G-GgJn9zdLYpYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI + [__link0]: https://crates.io/crates/layered/0.1.0 + [__link1]: https://docs.rs/layered/0.1.0/layered/?search=Stack + [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html + [__link11]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html + [__link12]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html + [__link13]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html + [__link2]: https://docs.rs/tick/0.1.2/tick/?search=Clock + [__link3]: https://crates.io/crates/tick/0.1.2 + [__link4]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext + [__link5]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext + [__link6]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo + [__link7]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery + [__link8]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html + [__link9]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index cf53a657..18654239 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -65,13 +65,6 @@ //! # } //! ``` //! -//! > **Note**: Resilience middleware requires [`Clock`][tick::Clock] from the [`tick`] crate for timing -//! > operations like delays, timeouts, and backoff calculations. The clock is passed through -//! > [`PipelineContext`] when creating middleware layers. -//! -//! > **Note**: This crate uses the [`layered`] crate for composing middleware. The middleware layers -//! > can be stacked together using tuples and built into a service using the [`Stack`] trait. -//! //! # Why? //! //! This crate provides production-ready resilience middleware with excellent telemetry for building From b061a787d51afc7accf08ba3894fec1ffab20250 Mon Sep 17 00:00:00 2001 From: martintomka Date: Wed, 21 Jan 2026 15:08:19 +0100 Subject: [PATCH 53/60] improve docs --- crates/seatbelt/README.md | 8 +++++++- crates/seatbelt/src/lib.rs | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index b8f899fb..259c6eb9 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -49,6 +49,12 @@ let result = service.execute("input data".to_string()).await; ## Why? +Communicating over a network is inherently fraught with problems. The network can go down at any time, +sometimes for a millisecond or two. The endpoint you’re connecting to may crash or be rebooted, +network configuration may change from under you, etc. To deliver a robust experience to users, and to +achieve `5` or more `9s` of availability, it is imperative to implement robust resilience patterns to +mask these transient failures. + This crate provides production-ready resilience middleware with excellent telemetry for building robust distributed systems that can automatically handle timeouts, retries, and other failure scenarios. @@ -101,7 +107,7 @@ This crate provides several optional features that can be enabled in your `Cargo This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG0sUSiYcHuHeG_lowFs20mWBG06Nt4zQNqe4G-GgJn9zdLYpYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEGxCJHvMDzlVbG4wWnwONx-VDG8_fIAgvujggG8jr12bAoHJVYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI [__link0]: https://crates.io/crates/layered/0.1.0 [__link1]: https://docs.rs/layered/0.1.0/layered/?search=Stack [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 18654239..d3d72e89 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -67,6 +67,12 @@ //! //! # Why? //! +//! Communicating over a network is inherently fraught with problems. The network can go down at any time, +//! sometimes for a millisecond or two. The endpoint you're connecting to may crash or be rebooted, +//! network configuration may change from under you, etc. To deliver a robust experience to users, and to +//! achieve `5` or more `9s` of availability, it is imperative to implement robust resilience patterns to +//! mask these transient failures. +//! //! This crate provides production-ready resilience middleware with excellent telemetry for building //! robust distributed systems that can automatically handle timeouts, retries, and other failure //! scenarios. From d2bafe3890c69fdcb54b6cb70a8bdaf59db4976c Mon Sep 17 00:00:00 2001 From: martintomka Date: Wed, 21 Jan 2026 15:32:58 +0100 Subject: [PATCH 54/60] fixes --- crates/seatbelt/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 259c6eb9..3582167a 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -107,9 +107,9 @@ This crate provides several optional features that can be enabled in your `Cargo This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEGxCJHvMDzlVbG4wWnwONx-VDG8_fIAgvujggG8jr12bAoHJVYWSEgmdsYXllcmVkZTAuMS4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI - [__link0]: https://crates.io/crates/layered/0.1.0 - [__link1]: https://docs.rs/layered/0.1.0/layered/?search=Stack + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEGxCJHvMDzlVbG4wWnwONx-VDG8_fIAgvujggG8jr12bAoHJVYWSEgmdsYXllcmVkZTAuMi4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI + [__link0]: https://crates.io/crates/layered/0.2.0 + [__link1]: https://docs.rs/layered/0.2.0/layered/?search=Stack [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html [__link11]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html [__link12]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html From 64cfbb921fe3fdb54dd7b5c70d14db1394aac428 Mon Sep 17 00:00:00 2001 From: martintomka Date: Thu, 22 Jan 2026 08:56:06 +0100 Subject: [PATCH 55/60] address PR feedeback --- crates/seatbelt/examples/retry_advanced.rs | 4 +-- crates/seatbelt/src/circuit_breaker/mod.rs | 22 -------------- crates/seatbelt/src/lib.rs | 2 +- crates/seatbelt/src/retry/args.rs | 2 +- crates/seatbelt/src/retry/backoff.rs | 2 +- crates/seatbelt/src/retry/constants.rs | 2 +- crates/seatbelt/src/retry/layer.rs | 8 ++--- crates/seatbelt/src/retry/mod.rs | 35 +++++----------------- crates/seatbelt/src/retry/service.rs | 2 +- crates/seatbelt/src/shared/attempt.rs | 10 +++---- crates/seatbelt/src/timeout/mod.rs | 22 -------------- 11 files changed, 23 insertions(+), 88 deletions(-) diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index b0b960fc..326b57cd 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -14,8 +14,8 @@ use layered::{Execute, Service, Stack}; use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::retry::Retry; -use seatbelt::{Attempt, PipelineContext, RecoveryInfo}; +use seatbelt::retry::{Attempt, Retry}; +use seatbelt::{PipelineContext, RecoveryInfo}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; diff --git a/crates/seatbelt/src/circuit_breaker/mod.rs b/crates/seatbelt/src/circuit_breaker/mod.rs index 05ec4a78..2c74d765 100644 --- a/crates/seatbelt/src/circuit_breaker/mod.rs +++ b/crates/seatbelt/src/circuit_breaker/mod.rs @@ -282,28 +282,6 @@ //! # fn detect_partition(input: &String) -> String { input.to_string() } //! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } //! ``` -//! -//! ## Incomplete Configuration -//! -//! This example demonstrates what happens when the `CircuitLayer` is not fully configured. -//! The code below will not compile because the circuit breaker layer is missing required configuration. -//! -//! ```compile_fail -//! # use seatbelt::circuit_breaker::Circuit; -//! # use seatbelt::PipelineContext; -//! # use layered::Execute; -//! # fn example(context: PipelineContext>) { -//! let stack = ( -//! Circuit::layer("test", &service_options), // Missing required configuration! -//! Execute::new(|input| async move { Ok(input) }) -//! ); -//! -//! // This will fail to compile -//! let service = stack.build(); -//! # } -//! ``` -//! -//! For more comprehensive examples, see the [examples directory](https://github.com/microsoft/oxidizer/tree/main/crates/seatbelt/examples). mod args; mod callbacks; diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index d3d72e89..efe360bc 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -123,7 +123,7 @@ pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; pub(crate) mod shared; -pub use crate::shared::{Attempt, Backoff, NotSet, PipelineContext, Set}; +pub use crate::shared::{NotSet, PipelineContext, Set}; #[cfg(any(feature = "timeout", test))] pub mod timeout; diff --git a/crates/seatbelt/src/retry/args.rs b/crates/seatbelt/src/retry/args.rs index 6f06d817..bcd52878 100644 --- a/crates/seatbelt/src/retry/args.rs +++ b/crates/seatbelt/src/retry/args.rs @@ -5,7 +5,7 @@ use std::time::Duration; use tick::Clock; -use crate::{Attempt, RecoveryInfo}; +use crate::{RecoveryInfo, retry::Attempt}; /// Arguments for the [`clone_input_with`][super::RetryLayer::clone_input_with] callback function. /// diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs index 44ab12a5..43dad77b 100644 --- a/crates/seatbelt/src/retry/backoff.rs +++ b/crates/seatbelt/src/retry/backoff.rs @@ -4,7 +4,7 @@ use std::cmp::min; use std::time::Duration; -use crate::Backoff; +use crate::retry::Backoff; use crate::retry::constants::{DEFAULT_BACKOFF, DEFAULT_BASE_DELAY, DEFAULT_USE_JITTER}; use crate::rnd::Rnd; diff --git a/crates/seatbelt/src/retry/constants.rs b/crates/seatbelt/src/retry/constants.rs index 2aa8815f..c0777883 100644 --- a/crates/seatbelt/src/retry/constants.rs +++ b/crates/seatbelt/src/retry/constants.rs @@ -3,7 +3,7 @@ use std::time::Duration; -use crate::Backoff; +use crate::retry::Backoff; /// Default backoff strategy: exponential backoff. /// diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 87f6ab4c..31e22dda 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -12,7 +12,7 @@ use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, Re use crate::shared::MaxAttempts; use crate::utils::EnableIf; use crate::utils::TelemetryHelper; -use crate::{Backoff, NotSet, PipelineContext, Recovery, RecoveryInfo, Set}; +use crate::{NotSet, PipelineContext, Recovery, RecoveryInfo, Set, retry::Backoff}; /// Builder for configuring retry resilience middleware. /// @@ -302,8 +302,8 @@ impl RetryLayer>::new(Clock::new_frozen()); @@ -516,7 +516,7 @@ mod tests { use tick::Clock; use super::*; - use crate::Attempt; + use crate::retry::Attempt; use crate::testing::RecoverableType; #[test] diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index ec300051..6bbfc3e7 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -15,8 +15,8 @@ //! ```rust //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; -//! # use seatbelt::retry::Retry; -//! # use seatbelt::{Backoff, RecoveryInfo, PipelineContext}; +//! # use seatbelt::retry::{Retry, Backoff}; +//! # use seatbelt::{RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! let context = PipelineContext::new(&clock).name("my_service"); //! @@ -106,8 +106,8 @@ //! # use std::time::Duration; //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; -//! # use seatbelt::retry::Retry; -//! # use seatbelt::{Backoff, RecoveryInfo, PipelineContext}; +//! # use seatbelt::retry::{Retry, Backoff}; +//! # use seatbelt::{RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. @@ -148,8 +148,8 @@ //! # use tick::Clock; //! # use std::io; //! # use layered::{Execute, Stack, Service}; -//! # use seatbelt::retry::Retry; -//! # use seatbelt::{RecoveryInfo, PipelineContext, Backoff}; +//! # use seatbelt::retry::{Retry, Backoff}; +//! # use seatbelt::{RecoveryInfo, PipelineContext}; //! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. //! let context = PipelineContext::new(&clock); @@ -186,28 +186,6 @@ //! # } //! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } //! ``` -//! -//! ## Incomplete Configuration -//! -//! This example demonstrates what happens when the `RetryLayer` is not fully configured. -//! The code below will not compile because the retry layer is missing required configuration. -//! -//! ```compile_fail -//! # use seatbelt::retry::Retry; -//! # use seatbelt::PipelineContext; -//! # use tick::Clock; -//! # fn example(context: PipelineContext) { -//! let stack = ( -//! Retry::layer("test", &service_options), // Missing required configuration! -//! Execute::new(|input| async move { input }) -//! ); -//! -//! // This will fail to compile -//! let service = stack.build(); -//! # } -//! ``` -//! -//! For more comprehensive examples, see the [examples directory](https://github.com/microsoft/oxidizer/tree/main/crates/seatbelt/examples). mod args; mod backoff; @@ -219,6 +197,7 @@ mod service; #[cfg(any(feature = "metrics", test))] mod telemetry; +pub use crate::shared::{Attempt, Backoff}; pub use args::{CloneArgs, OnRetryArgs, RecoveryArgs, RestoreInputArgs}; pub(crate) use backoff::DelayBackoff; pub(crate) use callbacks::{CloneInput, OnRetry, RestoreInput, ShouldRecover}; diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index 4c691b94..ef5e847d 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -12,7 +12,7 @@ use crate::retry::{ }; use crate::shared::MaxAttempts; use crate::utils::EnableIf; -use crate::{Attempt, NotSet, RecoveryInfo, RecoveryKind}; +use crate::{NotSet, RecoveryInfo, RecoveryKind, retry::Attempt}; /// Applies retry logic to service execution for transient error handling. /// diff --git a/crates/seatbelt/src/shared/attempt.rs b/crates/seatbelt/src/shared/attempt.rs index 6e753fcd..a91e7959 100644 --- a/crates/seatbelt/src/shared/attempt.rs +++ b/crates/seatbelt/src/shared/attempt.rs @@ -18,7 +18,7 @@ use std::fmt::Display; /// # Examples /// /// ``` -/// use seatbelt::Attempt; +/// use seatbelt::retry::Attempt; /// /// // Create the first attempt (attempt 0) /// let attempt = Attempt::new(0, false); @@ -56,7 +56,7 @@ impl Attempt { /// # Examples /// /// ``` - /// use seatbelt::Attempt; + /// use seatbelt::retry::Attempt; /// /// let attempt = Attempt::new(0, false); /// assert_eq!(attempt.index(), 0); @@ -71,7 +71,7 @@ impl Attempt { /// # Examples /// /// ``` - /// use seatbelt::Attempt; + /// use seatbelt::retry::Attempt; /// /// let first_attempt = Attempt::new(0, false); /// assert!(first_attempt.is_first()); @@ -89,7 +89,7 @@ impl Attempt { /// # Examples /// /// ``` - /// use seatbelt::Attempt; + /// use seatbelt::retry::Attempt; /// /// let not_last = Attempt::new(1, false); /// assert!(!not_last.is_last()); @@ -107,7 +107,7 @@ impl Attempt { /// # Examples /// /// ``` - /// use seatbelt::Attempt; + /// use seatbelt::retry::Attempt; /// /// let attempt = Attempt::new(3, false); /// assert_eq!(attempt.index(), 3); diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs index 2cf78f15..93a5fca4 100644 --- a/crates/seatbelt/src/timeout/mod.rs +++ b/crates/seatbelt/src/timeout/mod.rs @@ -171,28 +171,6 @@ //! # } //! # async fn execute_unreliable_operation(input: String) -> Result { Ok(input) } //! ``` -//! -//! ## Incomplete Configuration -//! -//! This example demonstrates what happens when the `TimeoutLayer` is not fully configured. -//! The code below will not compile because the timeout layer is missing required configuration. -//! -//! ```compile_fail -//! # use std::time::Duration; -//! # use layered::{Execute, Stack}; -//! # use tick::Clock; -//! # use seatbelt::PipelineContext; -//! # use seatbelt::timeout::Timeout; -//! # fn example(context: PipelineContext) { -//! let stack = ( -//! Timeout::layer("my_timeout", &context), // Missing required configuration! -//! Execute::new(|input| async move { input }) -//! ); -//! -//! // This will fail to compile -//! let service = stack.build(); -//! # } -//! ``` mod args; mod callbacks; mod layer; From ba3579445b1232628d64d3af148a5393b7719a36 Mon Sep 17 00:00:00 2001 From: martintomka Date: Thu, 22 Jan 2026 16:04:38 +0100 Subject: [PATCH 56/60] PR comments --- crates/seatbelt/README.md | 16 +++++------ crates/seatbelt/benches/circuit_breaker.rs | 4 +-- crates/seatbelt/benches/observability.rs | 8 +++--- crates/seatbelt/benches/retry.rs | 6 ++-- crates/seatbelt/benches/timeout.rs | 4 +-- crates/seatbelt/examples/circuit_breaker.rs | 4 +-- .../seatbelt/examples/resilience_pipeline.rs | 4 +-- crates/seatbelt/examples/retry.rs | 4 +-- crates/seatbelt/examples/retry_advanced.rs | 6 ++-- crates/seatbelt/examples/retry_outage.rs | 4 +-- crates/seatbelt/examples/timeout.rs | 4 +-- crates/seatbelt/examples/timeout_advanced.rs | 6 ++-- crates/seatbelt/src/circuit_breaker/layer.rs | 16 +++++------ crates/seatbelt/src/circuit_breaker/mod.rs | 14 +++++----- .../seatbelt/src/circuit_breaker/service.rs | 12 ++++---- crates/seatbelt/src/lib.rs | 13 ++++----- crates/seatbelt/src/retry/layer.rs | 28 +++++++++---------- crates/seatbelt/src/retry/mod.rs | 14 +++++----- crates/seatbelt/src/retry/service.rs | 16 +++++------ crates/seatbelt/src/shared/context.rs | 14 +++++----- crates/seatbelt/src/shared/mod.rs | 2 +- crates/seatbelt/src/timeout/layer.rs | 16 +++++------ crates/seatbelt/src/timeout/mod.rs | 14 +++++----- crates/seatbelt/src/timeout/service.rs | 20 ++++++------- 24 files changed, 124 insertions(+), 125 deletions(-) diff --git a/crates/seatbelt/README.md b/crates/seatbelt/README.md index 3582167a..6c8fcb7d 100644 --- a/crates/seatbelt/README.md +++ b/crates/seatbelt/README.md @@ -23,9 +23,9 @@ Add resilience to fallible operations, such as RPC calls over the network, with ```rust use seatbelt::retry::Retry; use seatbelt::timeout::Timeout; -use seatbelt::{RecoveryInfo, PipelineContext}; +use seatbelt::{RecoveryInfo, ResilienceContext}; -let context = PipelineContext::new(&clock); +let context = ResilienceContext::new(&clock); let service = ( // Retry middleware: Automatically retries failed operations Retry::layer("retry", &context) @@ -73,11 +73,11 @@ can be stacked together using tuples and built into a service using the [`Stack` Resilience middleware also requires [`Clock`][__link2] from the [`tick`][__link3] crate for timing operations like delays, timeouts, and backoff calculations. The clock is passed through -[`PipelineContext`][__link4] when creating middleware layers. +[`ResilienceContext`][__link4] when creating middleware layers. ### Core Types -* [`PipelineContext`][__link5] - Holds shared state for resilience middleware, including the clock. +* [`ResilienceContext`][__link5] - Holds shared state for resilience middleware, including the clock. * [`RecoveryInfo`][__link6] - Classifies errors as recoverable (transient) or non-recoverable (permanent). * [`Recovery`][__link7] - A trait for types that can determine their recoverability. @@ -107,17 +107,17 @@ This crate provides several optional features that can be enabled in your `Cargo This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEGxCJHvMDzlVbG4wWnwONx-VDG8_fIAgvujggG8jr12bAoHJVYWSEgmdsYXllcmVkZTAuMi4wgmtyZWNvdmVyYWJsZWUwLjEuMIJoc2VhdGJlbHRlMC4yLjCCZHRpY2tlMC4xLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG-K4p0z0fzk6G-4Djif9GAJ_GxL0iP3D0ueKG8TczT6iIBrHYWSFgmVTdGFja_aCZ2xheWVyZWRlMC4yLjCCa3JlY292ZXJhYmxlZTAuMS4wgmhzZWF0YmVsdGUwLjIuMIJkdGlja2UwLjEuMg [__link0]: https://crates.io/crates/layered/0.2.0 - [__link1]: https://docs.rs/layered/0.2.0/layered/?search=Stack + [__link1]: https://crates.io/crates/Stack [__link10]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html [__link11]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html [__link12]: https://docs.rs/seatbelt/0.2.0/seatbelt/retry/index.html [__link13]: https://docs.rs/seatbelt/0.2.0/seatbelt/circuit_breaker/index.html [__link2]: https://docs.rs/tick/0.1.2/tick/?search=Clock [__link3]: https://crates.io/crates/tick/0.1.2 - [__link4]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext - [__link5]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::PipelineContext + [__link4]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::ResilienceContext + [__link5]: https://docs.rs/seatbelt/0.2.0/seatbelt/?search=shared::ResilienceContext [__link6]: https://docs.rs/recoverable/0.1.0/recoverable/?search=RecoveryInfo [__link7]: https://docs.rs/recoverable/0.1.0/recoverable/?search=Recovery [__link8]: https://docs.rs/seatbelt/0.2.0/seatbelt/timeout/index.html diff --git a/crates/seatbelt/benches/circuit_breaker.rs b/crates/seatbelt/benches/circuit_breaker.rs index c5bbd765..ea2d6d75 100644 --- a/crates/seatbelt/benches/circuit_breaker.rs +++ b/crates/seatbelt/benches/circuit_breaker.rs @@ -6,7 +6,7 @@ use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; use seatbelt::circuit_breaker::Circuit; -use seatbelt::{PipelineContext, RecoveryInfo}; +use seatbelt::{RecoveryInfo, ResilienceContext}; use tick::Clock; #[global_allocator] @@ -27,7 +27,7 @@ fn entry(c: &mut Criterion) { }); // With circuit breaker (closed state) - let context = PipelineContext::new(Clock::new_frozen()); + let context = ResilienceContext::new(Clock::new_frozen()); let service = ( Circuit::layer("bench", &context) diff --git a/crates/seatbelt/benches/observability.rs b/crates/seatbelt/benches/observability.rs index 91c3b824..d6f529fd 100644 --- a/crates/seatbelt/benches/observability.rs +++ b/crates/seatbelt/benches/observability.rs @@ -12,7 +12,7 @@ use opentelemetry_sdk::metrics::data::ResourceMetrics; use opentelemetry_sdk::metrics::exporter::PushMetricExporter; use opentelemetry_sdk::metrics::{SdkMeterProvider, Temporality}; use seatbelt::retry::Retry; -use seatbelt::{PipelineContext, RecoveryInfo}; +use seatbelt::{RecoveryInfo, ResilienceContext}; use tick::Clock; #[global_allocator] @@ -23,7 +23,7 @@ fn entry(c: &mut Criterion) { let session = Session::new(); // No telemetry - let context = PipelineContext::new(Clock::new_frozen()); + let context = ResilienceContext::new(Clock::new_frozen()); let service = ( Retry::layer("bench", &context) .clone_input() @@ -42,7 +42,7 @@ fn entry(c: &mut Criterion) { // Metrics let meter_provider = SdkMeterProvider::builder().with_periodic_exporter(EmptyExporter).build(); - let context = PipelineContext::new(Clock::new_frozen()).enable_metrics(&meter_provider); + let context = ResilienceContext::new(Clock::new_frozen()).enable_metrics(&meter_provider); let service = ( Retry::layer("bench", &context) .clone_input() @@ -60,7 +60,7 @@ fn entry(c: &mut Criterion) { }); // Logs - let context = PipelineContext::new(Clock::new_frozen()).enable_logs(); + let context = ResilienceContext::new(Clock::new_frozen()).enable_logs(); let service = ( Retry::layer("bench", &context) .clone_input() diff --git a/crates/seatbelt/benches/retry.rs b/crates/seatbelt/benches/retry.rs index 9198146a..d1c54c42 100644 --- a/crates/seatbelt/benches/retry.rs +++ b/crates/seatbelt/benches/retry.rs @@ -9,7 +9,7 @@ use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; use seatbelt::retry::Retry; -use seatbelt::{PipelineContext, RecoveryInfo}; +use seatbelt::{RecoveryInfo, ResilienceContext}; use tick::Clock; #[global_allocator] @@ -30,7 +30,7 @@ fn entry(c: &mut Criterion) { }); // With retry - let context = PipelineContext::new(Clock::new_frozen()); + let context = ResilienceContext::new(Clock::new_frozen()); let service = ( Retry::layer("bench", &context) @@ -49,7 +49,7 @@ fn entry(c: &mut Criterion) { }); // With retry and recovery - let context = PipelineContext::new(Clock::new_frozen()); + let context = ResilienceContext::new(Clock::new_frozen()); let service = ( Retry::layer("bench", &context) diff --git a/crates/seatbelt/benches/timeout.rs b/crates/seatbelt/benches/timeout.rs index e46c6206..fd69e15a 100644 --- a/crates/seatbelt/benches/timeout.rs +++ b/crates/seatbelt/benches/timeout.rs @@ -8,7 +8,7 @@ use alloc_tracker::{Allocator, Session}; use criterion::{Criterion, criterion_group, criterion_main}; use futures::executor::block_on; use layered::{Execute, Service, Stack}; -use seatbelt::PipelineContext; +use seatbelt::ResilienceContext; use seatbelt::timeout::Timeout; use tick::Clock; @@ -30,7 +30,7 @@ fn entry(c: &mut Criterion) { }); // With timeout - let context = PipelineContext::new(Clock::new_frozen()); + let context = ResilienceContext::new(Clock::new_frozen()); let service = ( Timeout::layer("bench", &context) diff --git a/crates/seatbelt/examples/circuit_breaker.rs b/crates/seatbelt/examples/circuit_breaker.rs index e86eeec9..d178baac 100644 --- a/crates/seatbelt/examples/circuit_breaker.rs +++ b/crates/seatbelt/examples/circuit_breaker.rs @@ -16,7 +16,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::circuit_breaker::Circuit; -use seatbelt::{PipelineContext, RecoveryInfo}; +use seatbelt::{RecoveryInfo, ResilienceContext}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -26,7 +26,7 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = PipelineContext::new(&clock).enable_metrics(&meter_provider); + let context = ResilienceContext::new(&clock).enable_metrics(&meter_provider); // Define stack with circuit breaker layer let stack = ( diff --git a/crates/seatbelt/examples/resilience_pipeline.rs b/crates/seatbelt/examples/resilience_pipeline.rs index ee89a4d8..55c36db6 100644 --- a/crates/seatbelt/examples/resilience_pipeline.rs +++ b/crates/seatbelt/examples/resilience_pipeline.rs @@ -13,7 +13,7 @@ use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; use seatbelt::timeout::Timeout; -use seatbelt::{PipelineContext, RecoveryInfo}; +use seatbelt::{RecoveryInfo, ResilienceContext}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -25,7 +25,7 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Shared options for resilience middleware - let context = PipelineContext::new(&clock).enable_metrics(&meter_provider).name("my_pipeline"); + let context = ResilienceContext::new(&clock).enable_metrics(&meter_provider).name("my_pipeline"); // Define stack with retry and timeout middlewares let stack = ( diff --git a/crates/seatbelt/examples/retry.rs b/crates/seatbelt/examples/retry.rs index 33e3c9c2..fda22019 100644 --- a/crates/seatbelt/examples/retry.rs +++ b/crates/seatbelt/examples/retry.rs @@ -8,13 +8,13 @@ use std::io::Error; use layered::{Execute, Service, Stack}; use ohno::AppError; use seatbelt::retry::Retry; -use seatbelt::{PipelineContext, RecoveryInfo}; +use seatbelt::{RecoveryInfo, ResilienceContext}; use tick::Clock; #[tokio::main] async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); - let context = PipelineContext::new(&clock); + let context = ResilienceContext::new(&clock); // Define stack with retry layer let stack = ( diff --git a/crates/seatbelt/examples/retry_advanced.rs b/crates/seatbelt/examples/retry_advanced.rs index 326b57cd..b11cf637 100644 --- a/crates/seatbelt/examples/retry_advanced.rs +++ b/crates/seatbelt/examples/retry_advanced.rs @@ -15,7 +15,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::{Attempt, Retry}; -use seatbelt::{PipelineContext, RecoveryInfo}; +use seatbelt::{RecoveryInfo, ResilienceContext}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -25,7 +25,9 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = PipelineContext::new(&clock).name("retry_advanced").enable_metrics(&meter_provider); + let context = ResilienceContext::new(&clock) + .name("retry_advanced") + .enable_metrics(&meter_provider); // Define stack with retry layer let stack = ( diff --git a/crates/seatbelt/examples/retry_outage.rs b/crates/seatbelt/examples/retry_outage.rs index ecad7fc7..bb8ac070 100644 --- a/crates/seatbelt/examples/retry_outage.rs +++ b/crates/seatbelt/examples/retry_outage.rs @@ -19,7 +19,7 @@ use ohno::AppError; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; use seatbelt::retry::Retry; -use seatbelt::{PipelineContext, Recovery, RecoveryInfo}; +use seatbelt::{Recovery, RecoveryInfo, ResilienceContext}; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -32,7 +32,7 @@ async fn main() -> Result<(), AppError> { let meter_provider = configure_telemetry(); let clock = Clock::new_tokio(); - let context = PipelineContext::new(&clock).enable_metrics(&meter_provider); + let context = ResilienceContext::new(&clock).enable_metrics(&meter_provider); // Configure retry layer for outage handling with input restoration let stack = ( diff --git a/crates/seatbelt/examples/timeout.rs b/crates/seatbelt/examples/timeout.rs index 85919f89..37d986e2 100644 --- a/crates/seatbelt/examples/timeout.rs +++ b/crates/seatbelt/examples/timeout.rs @@ -13,7 +13,7 @@ use layered::{Execute, Service, Stack}; use ohno::{AppError, app_err}; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::PipelineContext; +use seatbelt::ResilienceContext; use seatbelt::timeout::Timeout; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; @@ -29,7 +29,7 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Create common options - let context = PipelineContext::new(&clock).enable_metrics(&meter_provider); + let context = ResilienceContext::new(&clock).enable_metrics(&meter_provider); // Define stack with timeout layer let stack = ( diff --git a/crates/seatbelt/examples/timeout_advanced.rs b/crates/seatbelt/examples/timeout_advanced.rs index f025b35e..cb1e02fb 100644 --- a/crates/seatbelt/examples/timeout_advanced.rs +++ b/crates/seatbelt/examples/timeout_advanced.rs @@ -12,7 +12,7 @@ use layered::{Execute, Service, Stack}; use ohno::{AppError, app_err}; use opentelemetry_sdk::metrics::SdkMeterProvider; use opentelemetry_stdout::MetricExporter; -use seatbelt::PipelineContext; +use seatbelt::ResilienceContext; use seatbelt::timeout::Timeout; use tick::Clock; use tracing_subscriber::layer::SubscriberExt; @@ -29,8 +29,8 @@ async fn main() -> Result<(), AppError> { let clock = Clock::new_tokio(); // Create service options - let context: PipelineContext> = - PipelineContext::new(&clock).name("my_pipeline").enable_metrics(&meter_provider); + let context: ResilienceContext> = + ResilienceContext::new(&clock).name("my_pipeline").enable_metrics(&meter_provider); // Define stack with timeout layer let stack = ( diff --git a/crates/seatbelt/src/circuit_breaker/layer.rs b/crates/seatbelt/src/circuit_breaker/layer.rs index 6183ddd5..d518d149 100644 --- a/crates/seatbelt/src/circuit_breaker/layer.rs +++ b/crates/seatbelt/src/circuit_breaker/layer.rs @@ -10,10 +10,10 @@ use super::{ Circuit, Engines, HalfOpenMode, HealthMetricsBuilder, OnClosed, OnClosedArgs, OnOpened, OnOpenedArgs, OnProbing, OnProbingArgs, PartionKeyProvider, PartitionKey, RejectedInput, RejectedInputArgs, ShouldRecover, }; -use crate::Layer; use crate::circuit_breaker::engine::probing::ProbesOptions; use crate::utils::{EnableIf, TelemetryHelper}; -use crate::{NotSet, PipelineContext, Recovery, RecoveryInfo, Set}; +use crate::{NotSet, Recovery, RecoveryInfo, ResilienceContext, Set}; +use layered::Layer; /// Builder for configuring circuit breaker resilience middleware. /// @@ -27,7 +27,7 @@ use crate::{NotSet, PipelineContext, Recovery, RecoveryInfo, Set}; /// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit_breaker] documentation. #[derive(Debug)] pub struct CircuitLayer { - context: PipelineContext, + context: ResilienceContext, recovery: Option>, rejected_input: Option>, on_opened: Option>, @@ -46,7 +46,7 @@ pub struct CircuitLayer impl CircuitLayer { #[must_use] - pub(crate) fn new(name: Cow<'static, str>, context: &PipelineContext) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &ResilienceContext) -> Self { Self { context: context.clone(), recovery: None, @@ -510,7 +510,7 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { - let context = PipelineContext::::new(Clock::new_frozen()); + let context = ResilienceContext::::new(Clock::new_frozen()); let layer = CircuitLayer::new("test".into(), &context); let layer: CircuitLayer<_, _, Set, NotSet> = layer.recovery(); @@ -552,7 +552,7 @@ mod tests { #[test] fn rejected_input_error_wraps_in_err() { - let context: PipelineContext> = PipelineContext::new(Clock::new_frozen()); + let context: ResilienceContext> = ResilienceContext::new(Clock::new_frozen()); let layer = CircuitLayer::new("test".into(), &context); let layer: CircuitLayer<_, _, NotSet, Set> = layer.rejected_input_error(|input, _| format!("rejected: {input}")); @@ -697,8 +697,8 @@ mod tests { static_assertions::assert_impl_all!(CircuitLayer: Debug); } - fn create_test_context() -> PipelineContext { - PipelineContext::new(Clock::new_frozen()).name("test_pipeline") + fn create_test_context() -> ResilienceContext { + ResilienceContext::new(Clock::new_frozen()).name("test_pipeline") } fn create_ready_layer() -> CircuitLayer { diff --git a/crates/seatbelt/src/circuit_breaker/mod.rs b/crates/seatbelt/src/circuit_breaker/mod.rs index 2c74d765..7b223d82 100644 --- a/crates/seatbelt/src/circuit_breaker/mod.rs +++ b/crates/seatbelt/src/circuit_breaker/mod.rs @@ -19,9 +19,9 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit_breaker::Circuit; -//! # use seatbelt::{RecoveryInfo, PipelineContext}; +//! # use seatbelt::{RecoveryInfo, ResilienceContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { -//! let context = PipelineContext::new(&clock); +//! let context = ResilienceContext::new(&clock); //! //! let stack = ( //! Circuit::layer("circuit_breaker", &context) @@ -166,7 +166,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted when circuit state transitions occur and when requests are rejected //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`PipelineContext::name`][crate::PipelineContext::name] +//! - `resilience.pipeline.name`: Pipeline identifier from [`ResilienceContext::name`][crate::ResilienceContext::name] //! - `resilience.strategy.name`: Circuit breaker identifier from [`Circuit::layer`] //! - `resilience.event.name`: One of: //! - `circuit_opened`: When the circuit transitions to open state due to failure threshold being exceeded @@ -188,11 +188,11 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit_breaker::Circuit; -//! # use seatbelt::{RecoveryInfo, PipelineContext}; +//! # use seatbelt::{RecoveryInfo, ResilienceContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let context = PipelineContext::new(&clock).name("example"); +//! let context = ResilienceContext::new(&clock).name("example"); //! //! let stack = ( //! Circuit::layer("my_breaker", &context) @@ -229,10 +229,10 @@ //! # use layered::{Execute, Service, Stack}; //! # use tick::Clock; //! # use seatbelt::circuit_breaker::{Circuit, PartitionKey, HalfOpenMode}; -//! # use seatbelt::{RecoveryInfo, PipelineContext}; +//! # use seatbelt::{RecoveryInfo, ResilienceContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. -//! let context = PipelineContext::new(&clock).name("advanced_example"); +//! let context = ResilienceContext::new(&clock).name("advanced_example"); //! //! let stack = ( //! Circuit::layer("advanced_breaker", &context) diff --git a/crates/seatbelt/src/circuit_breaker/service.rs b/crates/seatbelt/src/circuit_breaker/service.rs index 3fa106f9..dd3c4eb8 100644 --- a/crates/seatbelt/src/circuit_breaker/service.rs +++ b/crates/seatbelt/src/circuit_breaker/service.rs @@ -47,7 +47,7 @@ impl Circuit { /// before it can be used to build a circuit breaker service. pub fn layer( name: impl Into>, - context: &crate::PipelineContext, + context: &crate::ResilienceContext, ) -> CircuitLayer { CircuitLayer::new(name.into(), context) } @@ -170,14 +170,14 @@ mod tests { use tick::ClockControl; use super::*; - use crate::Layer; use crate::circuit_breaker::constants::DEFAULT_BREAK_DURATION; use crate::circuit_breaker::{EngineFake, HalfOpenMode, HealthInfo, Stats}; - use crate::{PipelineContext, RecoveryInfo, Set}; + use crate::{RecoveryInfo, ResilienceContext, Set}; + use layered::Layer; #[test] fn layer_ensure_defaults() { - let context = PipelineContext::::new(Clock::new_frozen()).name("test_pipeline"); + let context = ResilienceContext::::new(Clock::new_frozen()).name("test_pipeline"); let layer: CircuitLayer = Circuit::layer("test_breaker", &context); let layer = layer .recovery_with(|_, _| RecoveryInfo::never()) @@ -463,7 +463,7 @@ mod tests { let _guard = log_capture.subscriber().set_default(); let clock_control = ClockControl::new(); - let context = PipelineContext::::new(clock_control.to_clock()) + let context = ResilienceContext::::new(clock_control.to_clock()) .name("log_test_pipeline") .enable_logs(); @@ -506,7 +506,7 @@ mod tests { } fn create_ready_circuit_breaker_layer(clock: &Clock) -> CircuitLayer { - let context = PipelineContext::::new(clock.clone()).name("test_pipeline"); + let context = ResilienceContext::::new(clock.clone()).name("test_pipeline"); Circuit::layer("test_breaker", &context) .recovery_with(|output, _| { if output.contains("error") { diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index efe360bc..20afb672 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -34,10 +34,10 @@ //! # use layered::{Execute, Service, Stack}; //! use seatbelt::retry::Retry; //! use seatbelt::timeout::Timeout; -//! use seatbelt::{RecoveryInfo, PipelineContext}; +//! use seatbelt::{RecoveryInfo, ResilienceContext}; //! //! # async fn main(clock: Clock) { -//! let context = PipelineContext::new(&clock); +//! let context = ResilienceContext::new(&clock); //! let service = ( //! // Retry middleware: Automatically retries failed operations //! Retry::layer("retry", &context) @@ -91,11 +91,11 @@ //! //! Resilience middleware also requires [`Clock`][tick::Clock] from the [`tick`] crate for timing //! operations like delays, timeouts, and backoff calculations. The clock is passed through -//! [`PipelineContext`] when creating middleware layers. +//! [`ResilienceContext`] when creating middleware layers. //! //! ## Core Types //! -//! - [`PipelineContext`] - Holds shared state for resilience middleware, including the clock. +//! - [`ResilienceContext`] - Holds shared state for resilience middleware, including the clock. //! - [`RecoveryInfo`] - Classifies errors as recoverable (transient) or non-recoverable (permanent). //! - [`Recovery`] - A trait for types that can determine their recoverability. //! @@ -123,7 +123,7 @@ pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; pub(crate) mod shared; -pub use crate::shared::{NotSet, PipelineContext, Set}; +pub use crate::shared::{NotSet, ResilienceContext, Set}; #[cfg(any(feature = "timeout", test))] pub mod timeout; @@ -134,9 +134,6 @@ pub mod retry; #[cfg(any(feature = "circuit-breaker", test))] pub mod circuit_breaker; -#[doc(inline)] -pub use layered::{Layer, Service, Stack}; - #[cfg(any(feature = "retry", feature = "circuit-breaker", test))] mod rnd; diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index 31e22dda..de535bfb 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -5,14 +5,14 @@ use std::borrow::Cow; use std::marker::PhantomData; use std::time::Duration; -use crate::Layer; use crate::retry::backoff::BackoffOptions; use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, Retry, ShouldRecover}; use crate::shared::MaxAttempts; use crate::utils::EnableIf; use crate::utils::TelemetryHelper; -use crate::{NotSet, PipelineContext, Recovery, RecoveryInfo, Set, retry::Backoff}; +use crate::{NotSet, Recovery, RecoveryInfo, ResilienceContext, Set, retry::Backoff}; +use layered::Layer; /// Builder for configuring retry resilience middleware. /// @@ -25,7 +25,7 @@ use crate::{NotSet, PipelineContext, Recovery, RecoveryInfo, Set, retry::Backoff /// For comprehensive examples, see the [retry module][crate::retry] documentation. #[derive(Debug)] pub struct RetryLayer { - context: PipelineContext, + context: ResilienceContext, max_attempts: MaxAttempts, backoff: BackoffOptions, clone_input: Option>, @@ -40,7 +40,7 @@ pub struct RetryLayer { impl RetryLayer { #[must_use] - pub(crate) fn new(name: Cow<'static, str>, context: &PipelineContext) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &ResilienceContext) -> Self { Self { context: context.clone(), max_attempts: MaxAttempts::Finite(DEFAULT_RETRY_ATTEMPTS.saturating_add(1)), @@ -303,10 +303,10 @@ impl RetryLayer>::new(Clock::new_frozen()); + /// # let context = ResilienceContext::>::new(Clock::new_frozen()); /// // Service with multiple endpoints that can route around unavailable services /// let layer = Retry::layer("multi_endpoint_retry", &context) /// .clone_input_with(|input: &mut String, args| { @@ -359,11 +359,11 @@ impl RetryLayer RetryLayer } /// # struct HttpResponse { status: u16 } @@ -467,7 +467,7 @@ impl RetryLayer>::new(&clock); + /// # let context = ResilienceContext::>::new(&clock); /// type HttpResult = Result; /// /// let layer = Retry::layer("http_retry", &context).restore_input_from_error( @@ -587,7 +587,7 @@ mod tests { #[test] fn recovery_auto_sets_correctly() { - let context = PipelineContext::::new(Clock::new_frozen()); + let context = ResilienceContext::::new(Clock::new_frozen()); let layer = RetryLayer::new("test".into(), &context); let layer: RetryLayer<_, _, NotSet, Set> = layer.recovery(); @@ -740,7 +740,7 @@ mod tests { #[test] fn restore_input_from_error_sets_correctly() { - let context: PipelineContext> = PipelineContext::new(Clock::new_frozen()).name("test"); + let context: ResilienceContext> = ResilienceContext::new(Clock::new_frozen()).name("test"); let layer = RetryLayer::new("test".into(), &context) .restore_input_from_error(|e: &mut String, _| (e == "restore").then(|| std::mem::take(e))); @@ -763,8 +763,8 @@ mod tests { static_assertions::assert_impl_all!(RetryLayer: Debug); } - fn create_test_context() -> PipelineContext { - PipelineContext::new(Clock::new_frozen()).name("test_pipeline") + fn create_test_context() -> ResilienceContext { + ResilienceContext::new(Clock::new_frozen()).name("test_pipeline") } fn create_ready_layer() -> RetryLayer { diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index 6bbfc3e7..87fb3fb9 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -16,9 +16,9 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::{Retry, Backoff}; -//! # use seatbelt::{RecoveryInfo, PipelineContext}; +//! # use seatbelt::{RecoveryInfo, ResilienceContext}; //! # async fn example(clock: Clock) -> Result<(), Box> { -//! let context = PipelineContext::new(&clock).name("my_service"); +//! let context = ResilienceContext::new(&clock).name("my_service"); //! //! let stack = ( //! Retry::layer("retry", &context) @@ -90,7 +90,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted for each attempt that should be retried (including the final retry attempt) //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`PipelineContext::name`][crate::PipelineContext::name] +//! - `resilience.pipeline.name`: Pipeline identifier from [`ResilienceContext::name`][crate::ResilienceContext::name] //! - `resilience.strategy.name`: Timeout identifier from [`Retry::layer`] //! - `resilience.event.name`: Always `retry` //! - `resilience.attempt.index`: Attempt index (0-based) @@ -107,11 +107,11 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::retry::{Retry, Backoff}; -//! # use seatbelt::{RecoveryInfo, PipelineContext}; +//! # use seatbelt::{RecoveryInfo, ResilienceContext}; //! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let context = PipelineContext::new(&clock).name("example"); +//! let context = ResilienceContext::new(&clock).name("example"); //! //! let stack = ( //! Retry::layer("my_retry", &context) @@ -149,10 +149,10 @@ //! # use std::io; //! # use layered::{Execute, Stack, Service}; //! # use seatbelt::retry::{Retry, Backoff}; -//! # use seatbelt::{RecoveryInfo, PipelineContext}; +//! # use seatbelt::{RecoveryInfo, ResilienceContext}; //! # async fn example(clock: Clock) -> Result<(), String> { //! // Define common options for resilience middleware. -//! let context = PipelineContext::new(&clock); +//! let context = ResilienceContext::new(&clock); //! //! let stack = ( //! Retry::layer("advanced_retry", &context) diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index ef5e847d..fe5e8dfb 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -52,7 +52,7 @@ impl Retry { /// before it can be used to build a retry service. pub fn layer( name: impl Into>, - context: &crate::PipelineContext, + context: &crate::ResilienceContext, ) -> crate::retry::RetryLayer { crate::retry::RetryLayer::new(name.into(), context) } @@ -265,15 +265,15 @@ mod tests { use tick::ClockControl; use super::*; - use crate::Layer; use crate::retry::RetryLayer; use crate::shared::Backoff; use crate::testing::MetricTester; - use crate::{PipelineContext, Set}; + use crate::{ResilienceContext, Set}; + use layered::Layer; #[test] fn layer_ensure_defaults() { - let context = PipelineContext::::new(Clock::new_frozen()).name("test_pipeline"); + let context = ResilienceContext::::new(Clock::new_frozen()).name("test_pipeline"); let layer: RetryLayer = Retry::layer("test_retry", &context); let layer = layer.recovery_with(|_, _| RecoveryInfo::never()).clone_input(); @@ -581,7 +581,7 @@ mod tests { #[tokio::test] async fn retries_exhausted_ensure_telemetry_reported() { let tester = MetricTester::new(); - let context = PipelineContext::::new(ClockControl::default().auto_advance_timers(true).to_clock()) + let context = ResilienceContext::::new(ClockControl::default().auto_advance_timers(true).to_clock()) .name("test_pipeline") .enable_metrics(tester.meter_provider()); @@ -617,7 +617,7 @@ mod tests { let _guard = log_capture.subscriber().set_default(); let clock = ClockControl::default().auto_advance_timers(true).to_clock(); - let context = PipelineContext::::new(clock) + let context = ResilienceContext::::new(clock) .name("log_test_pipeline") .enable_logs(); @@ -637,13 +637,13 @@ mod tests { } fn create_ready_retry_layer(clock: &Clock, recover: RecoveryInfo) -> RetryLayer { - let context = PipelineContext::new(clock.clone()).name("test_pipeline"); + let context = ResilienceContext::new(clock.clone()).name("test_pipeline"); create_ready_retry_layer_core(recover, &context) } fn create_ready_retry_layer_core( recover: RecoveryInfo, - context: &PipelineContext, + context: &ResilienceContext, ) -> RetryLayer { Retry::layer("test_retry", context) .recovery_with(move |_, _| recover.clone()) diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/shared/context.rs index 97a4a46c..746916fb 100644 --- a/crates/seatbelt/src/shared/context.rs +++ b/crates/seatbelt/src/shared/context.rs @@ -9,11 +9,11 @@ pub(crate) const DEFAULT_PIPELINE_NAME: &str = "default"; /// Shared configuration and dependencies for a pipeline of resilience middleware. /// -/// Pass a single `PipelineContext` to all middleware in a pipeline (retry, timeout, +/// Pass a single `ResilienceContext` to all middleware in a pipeline (retry, timeout, /// circuit breaker, etc.) to share a clock and telemetry configuration. #[derive(Debug)] #[non_exhaustive] -pub struct PipelineContext { +pub struct ResilienceContext { clock: Clock, name: Cow<'static, str>, #[cfg(any(feature = "metrics", test))] @@ -23,7 +23,7 @@ pub struct PipelineContext { _out: std::marker::PhantomData Out>, } -impl PipelineContext { +impl ResilienceContext { /// Create a context with a clock. Initializes with `name = "default"`. pub fn new(clock: impl AsRef) -> Self { Self { @@ -90,7 +90,7 @@ impl PipelineContext { } } -impl Clone for PipelineContext { +impl Clone for ResilienceContext { fn clone(&self) -> Self { Self { clock: self.clock.clone(), @@ -113,7 +113,7 @@ mod tests { #[test] fn test_new_with_clock_sets_default_pipeline_name() { let clock = tick::Clock::new_frozen(); - let ctx = PipelineContext::<(), ()>::new(clock); + let ctx = ResilienceContext::<(), ()>::new(clock); let telemetry = ctx.create_telemetry("test".into()); assert_eq!(telemetry.pipeline_name.as_ref(), DEFAULT_PIPELINE_NAME); // Ensure clock reference behaves (timestamp monotonic relative behaviour not required, just accessible) @@ -123,7 +123,7 @@ mod tests { #[test] fn test_name_with_custom_value_sets_name_and_is_owned() { let clock = tick::Clock::new_frozen(); - let ctx = PipelineContext::<(), ()>::new(clock).name(String::from("custom_pipeline")); + let ctx = ResilienceContext::<(), ()>::new(clock).name(String::from("custom_pipeline")); let telemetry = ctx.create_telemetry("test".into()); assert_eq!(telemetry.pipeline_name.as_ref(), "custom_pipeline"); assert!(matches!(telemetry.pipeline_name, Cow::Owned(_))); @@ -135,7 +135,7 @@ mod tests { let clock = tick::Clock::new_frozen(); let (provider, exporter) = test_meter_provider(); - let ctx = PipelineContext::<(), ()>::new(clock).enable_metrics(&provider); + let ctx = ResilienceContext::<(), ()>::new(clock).enable_metrics(&provider); let telemetry1 = ctx.create_telemetry("test1".into()); let telemetry2 = ctx.create_telemetry("test2".into()); let c1 = telemetry1.event_reporter.unwrap(); diff --git a/crates/seatbelt/src/shared/mod.rs b/crates/seatbelt/src/shared/mod.rs index 45ea541e..221dc0e7 100644 --- a/crates/seatbelt/src/shared/mod.rs +++ b/crates/seatbelt/src/shared/mod.rs @@ -12,7 +12,7 @@ pub struct Set; pub struct NotSet; mod context; -pub use context::PipelineContext; +pub use context::ResilienceContext; mod attempt; mod backoff; diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index 51081ad8..34e7e478 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -5,13 +5,13 @@ use std::borrow::Cow; use std::marker::PhantomData; use std::time::Duration; -use crate::Layer; use crate::timeout::{ OnTimeout, OnTimeoutArgs, Timeout, TimeoutOutput as TimeoutOutputCallback, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs, }; use crate::utils::EnableIf; use crate::utils::TelemetryHelper; -use crate::{NotSet, PipelineContext, Set}; +use crate::{NotSet, ResilienceContext, Set}; +use layered::Layer; /// Builder for configuring timeout resilience middleware. /// @@ -24,7 +24,7 @@ use crate::{NotSet, PipelineContext, Set}; /// For comprehensive examples, see the [timeout module][crate::timeout] documentation. #[derive(Debug)] pub struct TimeoutLayer { - context: PipelineContext, + context: ResilienceContext, timeout: Option, timeout_output: Option>, on_timeout: Option>, @@ -36,7 +36,7 @@ pub struct TimeoutLayer { impl TimeoutLayer { #[must_use] - pub(crate) fn new(name: Cow<'static, str>, context: &PipelineContext) -> Self { + pub(crate) fn new(name: Cow<'static, str>, context: &ResilienceContext) -> Self { Self { timeout: None, timeout_output: None, @@ -391,12 +391,12 @@ mod tests { static_assertions::assert_impl_all!(TimeoutLayer: Debug); } - fn create_test_context() -> PipelineContext { - PipelineContext::new(Clock::new_frozen()).name("test_pipeline") + fn create_test_context() -> ResilienceContext { + ResilienceContext::new(Clock::new_frozen()).name("test_pipeline") } - fn create_test_context_result() -> PipelineContext> { - PipelineContext::new(Clock::new_frozen()).name("test_pipeline") + fn create_test_context_result() -> ResilienceContext> { + ResilienceContext::new(Clock::new_frozen()).name("test_pipeline") } fn create_ready_layer() -> TimeoutLayer { diff --git a/crates/seatbelt/src/timeout/mod.rs b/crates/seatbelt/src/timeout/mod.rs index 93a5fca4..4853f335 100644 --- a/crates/seatbelt/src/timeout/mod.rs +++ b/crates/seatbelt/src/timeout/mod.rs @@ -18,9 +18,9 @@ //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; //! # use seatbelt::timeout::Timeout; -//! # use seatbelt::PipelineContext; +//! # use seatbelt::ResilienceContext; //! # async fn example(clock: Clock) -> Result<(), io::Error> { -//! let context = PipelineContext::new(&clock).name("my_service"); +//! let context = ResilienceContext::new(&clock).name("my_service"); //! //! let stack = ( //! Timeout::layer("timeout", &context) @@ -80,7 +80,7 @@ //! - **Metric**: `resilience.event` (counter) //! - **When**: Emitted when a timeout occurs //! - **Attributes**: -//! - `resilience.pipeline.name`: Pipeline identifier from [`PipelineContext::name`][crate::PipelineContext::name] +//! - `resilience.pipeline.name`: Pipeline identifier from [`ResilienceContext::name`][crate::ResilienceContext::name] //! - `resilience.strategy.name`: Timeout identifier from [`Timeout::layer`] //! - `resilience.event.name`: Always `timeout` //! @@ -94,12 +94,12 @@ //! # use std::time::Duration; //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; -//! # use seatbelt::PipelineContext; +//! # use seatbelt::ResilienceContext; //! # use seatbelt::timeout::Timeout; //! # async fn example(clock: Clock) -> Result<(), Box> { //! // Define common options for resilience middleware. The clock is runtime-specific and //! // must be provided. See its documentation for details. -//! let context = PipelineContext::new(&clock); +//! let context = ResilienceContext::new(&clock); //! //! let stack = ( //! Timeout::layer("my_timeout", &context) @@ -134,11 +134,11 @@ //! # use std::io; //! # use tick::Clock; //! # use layered::{Execute, Service, Stack}; -//! # use seatbelt::PipelineContext; +//! # use seatbelt::ResilienceContext; //! # use seatbelt::timeout::Timeout; //! # async fn example(clock: Clock) -> Result<(), io::Error> { //! // Define common options for resilience middleware. -//! let context = PipelineContext::new(&clock); +//! let context = ResilienceContext::new(&clock); //! //! let stack = ( //! Timeout::layer("my_timeout", &context) diff --git a/crates/seatbelt/src/timeout/service.rs b/crates/seatbelt/src/timeout/service.rs index 8a4559fb..9b2bbd41 100644 --- a/crates/seatbelt/src/timeout/service.rs +++ b/crates/seatbelt/src/timeout/service.rs @@ -10,7 +10,7 @@ use tick::{Clock, FutureExt}; use crate::timeout::{OnTimeout, OnTimeoutArgs, TimeoutLayer, TimeoutOutput, TimeoutOutputArgs, TimeoutOverride, TimeoutOverrideArgs}; use crate::utils::EnableIf; -use crate::{NotSet, PipelineContext}; +use crate::{NotSet, ResilienceContext}; /// Applies a timeout to service execution for canceling long-running operations. /// @@ -53,10 +53,10 @@ impl Timeout { /// # use std::time::Duration; /// # use layered::{Execute, Stack}; /// # use tick::Clock; - /// # use seatbelt::PipelineContext; + /// # use seatbelt::ResilienceContext; /// use seatbelt::timeout::Timeout; /// - /// # fn example(context: PipelineContext) { + /// # fn example(context: ResilienceContext) { /// let timeout_layer = Timeout::layer("my_timeout", &context) /// .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) /// .timeout(Duration::from_secs(30)); @@ -66,7 +66,7 @@ impl Timeout { /// For comprehensive examples, see the [timeout module] documentation. /// /// [timeout module]: crate::timeout - pub fn layer(name: impl Into>, context: &PipelineContext) -> TimeoutLayer { + pub fn layer(name: impl Into>, context: &ResilienceContext) -> TimeoutLayer { TimeoutLayer::new(name.into(), context) } } @@ -148,7 +148,7 @@ mod tests { #[tokio::test] async fn no_timeout() { let clock = Clock::new_frozen(); - let context = PipelineContext::new(clock); + let context = ResilienceContext::new(clock); let stack = ( Timeout::layer("test_timeout", &context) @@ -170,7 +170,7 @@ mod tests { .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(500)) .to_clock(); - let context = PipelineContext::new(clock.clone()); + let context = ResilienceContext::new(clock.clone()); let called = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); let called_clone = std::sync::Arc::clone(&called); @@ -208,7 +208,7 @@ mod tests { .to_clock(); let stack = ( - Timeout::layer("test_timeout", &PipelineContext::new(clock.clone())) + Timeout::layer("test_timeout", &ResilienceContext::new(clock.clone())) .timeout_output(|args| format!("timed out after {}ms", args.timeout().as_millis())) .timeout(Duration::from_millis(200)) .timeout_override(|input, _args| { @@ -237,7 +237,7 @@ mod tests { async fn no_timeout_if_disabled() { let clock = ClockControl::default().auto_advance_timers(true).to_clock(); let stack = ( - Timeout::layer("test_timeout", &PipelineContext::new(&clock)) + Timeout::layer("test_timeout", &ResilienceContext::new(&clock)) .timeout_output(|_args| "timed out".to_string()) .timeout(Duration::from_millis(200)) .disable(), @@ -272,7 +272,7 @@ mod tests { .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(500)) .to_clock(); - let context = PipelineContext::new(clock.clone()).enable_logs().name("log_test_pipeline"); + let context = ResilienceContext::new(clock.clone()).enable_logs().name("log_test_pipeline"); let stack = ( Timeout::layer("log_test_timeout", &context) @@ -308,7 +308,7 @@ mod tests { .auto_advance(Duration::from_millis(200)) .auto_advance_limit(Duration::from_millis(500)) .to_clock(); - let context = PipelineContext::new(clock.clone()) + let context = ResilienceContext::new(clock.clone()) .enable_metrics(metrics.meter_provider()) .name("metrics_pipeline"); From 3587a9e6149e716bf2f4c670fa714aa4988deb17 Mon Sep 17 00:00:00 2001 From: martintomka Date: Thu, 22 Jan 2026 16:07:06 +0100 Subject: [PATCH 57/60] cleanup --- crates/seatbelt/src/lib.rs | 2 +- crates/seatbelt/src/shared/mod.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 20afb672..8b3b2620 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -87,7 +87,7 @@ //! # Overview //! //! This crate uses the [`layered`] crate for composing middleware. The middleware layers -//! can be stacked together using tuples and built into a service using the [`Stack`] trait. +//! can be stacked together using tuples and built into a service using the [`Stack`][layered::Stack] trait. //! //! Resilience middleware also requires [`Clock`][tick::Clock] from the [`tick`] crate for timing //! operations like delays, timeouts, and backoff calculations. The clock is passed through diff --git a/crates/seatbelt/src/shared/mod.rs b/crates/seatbelt/src/shared/mod.rs index 221dc0e7..f079dba8 100644 --- a/crates/seatbelt/src/shared/mod.rs +++ b/crates/seatbelt/src/shared/mod.rs @@ -4,11 +4,13 @@ /// A flag indicating that the required property is set. #[non_exhaustive] #[derive(Debug)] +#[doc(hidden)] pub struct Set; /// A flag indicating that the required property has not been set. #[non_exhaustive] #[derive(Debug)] +#[doc(hidden)] pub struct NotSet; mod context; From 52a6cbd0bb87db04c223766a5dbf91d05819b5dc Mon Sep 17 00:00:00 2001 From: martintomka Date: Thu, 22 Jan 2026 16:24:01 +0100 Subject: [PATCH 58/60] type state concisenes --- crates/seatbelt/src/circuit_breaker/layer.rs | 31 ++++++++++++-------- crates/seatbelt/src/retry/layer.rs | 29 ++++++++++-------- crates/seatbelt/src/timeout/layer.rs | 30 ++++++++++--------- 3 files changed, 51 insertions(+), 39 deletions(-) diff --git a/crates/seatbelt/src/circuit_breaker/layer.rs b/crates/seatbelt/src/circuit_breaker/layer.rs index d518d149..e82a96b0 100644 --- a/crates/seatbelt/src/circuit_breaker/layer.rs +++ b/crates/seatbelt/src/circuit_breaker/layer.rs @@ -25,8 +25,13 @@ use layered::Layer; /// - [`rejected_input`][CircuitLayer::rejected_input]: Required to specify the output when the circuit is open and inputs are rejected /// /// For comprehensive documentation and examples, see the [`circuit_breaker` module][crate::circuit_breaker] documentation. +/// +/// # Type State +/// +/// - `S1`: Tracks whether [`recovery`][CircuitLayer::recovery] has been set +/// - `S2`: Tracks whether [`rejected_input`][CircuitLayer::rejected_input] has been set #[derive(Debug)] -pub struct CircuitLayer { +pub struct CircuitLayer { context: ResilienceContext, recovery: Option>, rejected_input: Option>, @@ -41,7 +46,7 @@ pub struct CircuitLayer sampling_duration: Duration, break_duration: Duration, half_open_mode: HalfOpenMode, - _state: PhantomData Out>, + _state: PhantomData Out>, } impl CircuitLayer { @@ -67,7 +72,7 @@ impl CircuitLayer { } } -impl CircuitLayer, RecoveryState, RejectedInputState> { +impl CircuitLayer, S1, S2> { /// Sets the error to return when the circuit breaker is open for Result-returning services. /// /// When the circuit is open, requests are immediately rejected and this function @@ -85,14 +90,14 @@ impl CircuitLayer E + Send + Sync + 'static, - ) -> CircuitLayer, RecoveryState, Set> { - self.into_state::() + ) -> CircuitLayer, S1, Set> { + self.into_state::() .rejected_input(move |input, args| Err(error_producer(input, args))) .into_state() } } -impl CircuitLayer { +impl CircuitLayer { /// Sets the recovery classification function. /// /// This function determines whether a specific output represents a failure @@ -110,9 +115,9 @@ impl CircuitLayer RecoveryInfo + Send + Sync + 'static, - ) -> CircuitLayer { + ) -> CircuitLayer { self.recovery = Some(ShouldRecover::new(recover_fn)); - self.into_state::() + self.into_state::() } /// Automatically sets the recovery classification function for types that implement [`Recovery`]. @@ -129,7 +134,7 @@ impl CircuitLayer CircuitLayer + pub fn recovery(self) -> CircuitLayer where Out: Recovery, { @@ -151,9 +156,9 @@ impl CircuitLayer Out + Send + Sync + 'static, - ) -> CircuitLayer { + ) -> CircuitLayer { self.rejected_input = Some(RejectedInput::new(rejected_fn)); - self.into_state::() + self.into_state::() } /// Sets the failure threshold for the circuit breaker. @@ -408,7 +413,7 @@ impl Layer for CircuitLayer { } } -impl CircuitLayer { +impl CircuitLayer { fn probes_options(&self) -> ProbesOptions { self.half_open_mode // we will use break duration as the sampling duration for probes @@ -427,7 +432,7 @@ impl CircuitLayer(self) -> CircuitLayer { + fn into_state(self) -> CircuitLayer { CircuitLayer { context: self.context, recovery: self.recovery, diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index de535bfb..b4b07ebd 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -23,8 +23,13 @@ use layered::Layer; /// - [`recovery`][RetryLayer::recovery]: Required to determine if an output should trigger a retry /// /// For comprehensive examples, see the [retry module][crate::retry] documentation. +/// +/// # Type State +/// +/// - `S1`: Tracks whether [`clone_input`][RetryLayer::clone_input] has been set +/// - `S2`: Tracks whether [`recovery`][RetryLayer::recovery] has been set #[derive(Debug)] -pub struct RetryLayer { +pub struct RetryLayer { context: ResilienceContext, max_attempts: MaxAttempts, backoff: BackoffOptions, @@ -35,7 +40,7 @@ pub struct RetryLayer { telemetry: TelemetryHelper, restore_input: Option>, handle_unavailable: bool, - _state: PhantomData Out>, + _state: PhantomData Out>, } impl RetryLayer { @@ -57,7 +62,7 @@ impl RetryLayer { } } -impl RetryLayer { +impl RetryLayer { /// Sets the maximum number of retry attempts. /// /// This specifies the maximum number of retry attempts in addition to the original call. @@ -153,9 +158,9 @@ impl RetryLayer Option + Send + Sync + 'static, - ) -> RetryLayer { + ) -> RetryLayer { self.clone_input = Some(CloneInput::new(clone_fn)); - self.into_state::() + self.into_state::() } /// Automatically sets the input cloning function for types that implement [`Clone`]. @@ -171,7 +176,7 @@ impl RetryLayer RetryLayer + pub fn clone_input(self) -> RetryLayer where In: Clone, { @@ -195,9 +200,9 @@ impl RetryLayer RecoveryInfo + Send + Sync + 'static, - ) -> RetryLayer { + ) -> RetryLayer { self.should_recover = Some(ShouldRecover::new(recover_fn)); - self.into_state::() + self.into_state::() } /// Automatically sets the recovery classification function for types that implement [`Recovery`]. @@ -214,7 +219,7 @@ impl RetryLayer RetryLayer + pub fn recovery(self) -> RetryLayer where Out: Recovery, { @@ -423,7 +428,7 @@ impl Layer for RetryLayer { } } -impl RetryLayer, CloneInputState, RecoveryState> { +impl RetryLayer, S1, S2> { /// Sets a specialized input restoration callback that operates only on error cases. /// /// This is a convenience method for working with `Result` outputs, where you @@ -487,8 +492,8 @@ impl RetryLayer RetryLayer { - fn into_state(self) -> RetryLayer { +impl RetryLayer { + fn into_state(self) -> RetryLayer { RetryLayer { context: self.context, max_attempts: self.max_attempts, diff --git a/crates/seatbelt/src/timeout/layer.rs b/crates/seatbelt/src/timeout/layer.rs index 34e7e478..2aecc1a1 100644 --- a/crates/seatbelt/src/timeout/layer.rs +++ b/crates/seatbelt/src/timeout/layer.rs @@ -22,8 +22,13 @@ use layered::Layer; /// - [`timeout`][TimeoutLayer::timeout]: Required to set the timeout duration for operations /// /// For comprehensive examples, see the [timeout module][crate::timeout] documentation. +/// +/// # Type State +/// +/// - `S1`: Tracks whether [`timeout`][TimeoutLayer::timeout] has been set +/// - `S2`: Tracks whether [`timeout_output`][TimeoutLayer::timeout_output] has been set #[derive(Debug)] -pub struct TimeoutLayer { +pub struct TimeoutLayer { context: ResilienceContext, timeout: Option, timeout_output: Option>, @@ -31,7 +36,7 @@ pub struct TimeoutLayer { enable_if: EnableIf, telemetry: TelemetryHelper, timeout_override: Option>, - _state: PhantomData Out>, + _state: PhantomData Out>, } impl TimeoutLayer { @@ -50,7 +55,7 @@ impl TimeoutLayer { } } -impl TimeoutLayer, Timeout, TimeoutOutput> { +impl TimeoutLayer, S1, S2> { /// Configures the error value to return when a timeout occurs for Result types. /// /// This is a convenience method for Result types that creates an error value @@ -64,14 +69,14 @@ impl TimeoutLayer, Timeou pub fn timeout_error( self, timeout_error: impl Fn(TimeoutOutputArgs) -> E + Send + Sync + 'static, - ) -> TimeoutLayer, Timeout, Set> { - self.into_state::() + ) -> TimeoutLayer, S1, Set> { + self.into_state::() .timeout_output(move |args| Err(timeout_error(args))) .into_state() } } -impl TimeoutLayer { +impl TimeoutLayer { /// Sets the timeout duration. /// /// This specifies how long to wait before timing out an operation. @@ -81,9 +86,9 @@ impl TimeoutLayer TimeoutLayer { + pub fn timeout(mut self, timeout: Duration) -> TimeoutLayer { self.timeout = Some(timeout); - self.into_state::() + self.into_state::() } /// Sets the timeout result factory function. @@ -97,12 +102,9 @@ impl TimeoutLayer Out + Send + Sync + 'static, - ) -> TimeoutLayer { + pub fn timeout_output(mut self, output: impl Fn(TimeoutOutputArgs) -> Out + Send + Sync + 'static) -> TimeoutLayer { self.timeout_output = Some(TimeoutOutputCallback::new(output)); - self.into_state::() + self.into_state::() } /// Configures a callback invoked when a timeout occurs. @@ -211,7 +213,7 @@ impl Layer for TimeoutLayer { } } -impl TimeoutLayer { +impl TimeoutLayer { fn into_state(self) -> TimeoutLayer { TimeoutLayer { timeout: self.timeout, From f73a666d50199ceb691ee5ff331292faaabd7af7 Mon Sep 17 00:00:00 2001 From: martintomka Date: Thu, 22 Jan 2026 17:36:35 +0100 Subject: [PATCH 59/60] cleanup --- crates/seatbelt/src/{shared => }/context.rs | 0 crates/seatbelt/src/lib.rs | 7 ++++-- .../seatbelt/src/{shared => retry}/attempt.rs | 0 crates/seatbelt/src/retry/backoff copy.rs | 4 +++ crates/seatbelt/src/retry/backoff.rs | 24 +++++++++++++++++- crates/seatbelt/src/retry/layer.rs | 2 +- crates/seatbelt/src/retry/mod.rs | 6 +++-- crates/seatbelt/src/retry/service.rs | 5 ++-- .../seatbelt/src/{shared/mod.rs => shared.rs} | 11 -------- crates/seatbelt/src/shared/backoff.rs | 25 ------------------- 10 files changed, 39 insertions(+), 45 deletions(-) rename crates/seatbelt/src/{shared => }/context.rs (100%) rename crates/seatbelt/src/{shared => retry}/attempt.rs (100%) create mode 100644 crates/seatbelt/src/retry/backoff copy.rs rename crates/seatbelt/src/{shared/mod.rs => shared.rs} (62%) delete mode 100644 crates/seatbelt/src/shared/backoff.rs diff --git a/crates/seatbelt/src/shared/context.rs b/crates/seatbelt/src/context.rs similarity index 100% rename from crates/seatbelt/src/shared/context.rs rename to crates/seatbelt/src/context.rs diff --git a/crates/seatbelt/src/lib.rs b/crates/seatbelt/src/lib.rs index 8b3b2620..1f407858 100644 --- a/crates/seatbelt/src/lib.rs +++ b/crates/seatbelt/src/lib.rs @@ -122,8 +122,11 @@ #[doc(inline)] pub use recoverable::{Recovery, RecoveryInfo, RecoveryKind}; -pub(crate) mod shared; -pub use crate::shared::{NotSet, ResilienceContext, Set}; +mod context; +pub use context::ResilienceContext; + +mod shared; +pub use crate::shared::{NotSet, Set}; #[cfg(any(feature = "timeout", test))] pub mod timeout; diff --git a/crates/seatbelt/src/shared/attempt.rs b/crates/seatbelt/src/retry/attempt.rs similarity index 100% rename from crates/seatbelt/src/shared/attempt.rs rename to crates/seatbelt/src/retry/attempt.rs diff --git a/crates/seatbelt/src/retry/backoff copy.rs b/crates/seatbelt/src/retry/backoff copy.rs new file mode 100644 index 00000000..bd4947e5 --- /dev/null +++ b/crates/seatbelt/src/retry/backoff copy.rs @@ -0,0 +1,4 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + + diff --git a/crates/seatbelt/src/retry/backoff.rs b/crates/seatbelt/src/retry/backoff.rs index 43dad77b..3d963a39 100644 --- a/crates/seatbelt/src/retry/backoff.rs +++ b/crates/seatbelt/src/retry/backoff.rs @@ -4,7 +4,6 @@ use std::cmp::min; use std::time::Duration; -use crate::retry::Backoff; use crate::retry::constants::{DEFAULT_BACKOFF, DEFAULT_BASE_DELAY, DEFAULT_USE_JITTER}; use crate::rnd::Rnd; @@ -14,6 +13,29 @@ const JITTER_FACTOR: f64 = 0.5; /// The default factor used for exponential backoff calculations for cases where jitter is not applied. const EXPONENTIAL_FACTOR: f64 = 2.0; +/// Defines the backoff strategy used by resilience middleware for retry operations. +/// +/// Backoff strategies control how delays between retry attempts are calculated, providing +/// different approaches to spacing out retries to avoid overwhelming failing systems while +/// balancing responsiveness and resource utilization. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Backoff { + /// Constant backoff strategy that maintains consistent delays between attempts. + /// + /// **Example with `2s` base delay:** `2s, 2s, 2s, 2s, ...` + Constant, + + /// Linear backoff strategy that increases delays proportionally with attempt count. + /// + /// **Example with `2s` base delay:** `2s, 4s, 6s, 8s, 10s, ...` + Linear, + + /// Exponential backoff strategy that doubles delays with each attempt. + /// + /// **Example with `2s` base delay:** `2s, 4s, 8s, 16s, 32s, ...` + Exponential, +} + // The delay generation follows the Polly V8 implementation: // // https://github.com/App-vNext/Polly/blob/452b34ee1e3a45ccce156a6980f60901a623ee67/src/Polly.Core/Retry/RetryHelper.cs#L3 diff --git a/crates/seatbelt/src/retry/layer.rs b/crates/seatbelt/src/retry/layer.rs index b4b07ebd..51f62939 100644 --- a/crates/seatbelt/src/retry/layer.rs +++ b/crates/seatbelt/src/retry/layer.rs @@ -5,10 +5,10 @@ use std::borrow::Cow; use std::marker::PhantomData; use std::time::Duration; +use super::MaxAttempts; use crate::retry::backoff::BackoffOptions; use crate::retry::constants::DEFAULT_RETRY_ATTEMPTS; use crate::retry::{CloneArgs, CloneInput, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, Retry, ShouldRecover}; -use crate::shared::MaxAttempts; use crate::utils::EnableIf; use crate::utils::TelemetryHelper; use crate::{NotSet, Recovery, RecoveryInfo, ResilienceContext, Set, retry::Backoff}; diff --git a/crates/seatbelt/src/retry/mod.rs b/crates/seatbelt/src/retry/mod.rs index 87fb3fb9..07cbd290 100644 --- a/crates/seatbelt/src/retry/mod.rs +++ b/crates/seatbelt/src/retry/mod.rs @@ -188,17 +188,19 @@ //! ``` mod args; +mod attempt; mod backoff; mod callbacks; mod constants; mod layer; mod service; - #[cfg(any(feature = "metrics", test))] mod telemetry; -pub use crate::shared::{Attempt, Backoff}; pub use args::{CloneArgs, OnRetryArgs, RecoveryArgs, RestoreInputArgs}; +pub use attempt::Attempt; +pub(crate) use attempt::MaxAttempts; +pub use backoff::Backoff; pub(crate) use backoff::DelayBackoff; pub(crate) use callbacks::{CloneInput, OnRetry, RestoreInput, ShouldRecover}; pub use layer::RetryLayer; diff --git a/crates/seatbelt/src/retry/service.rs b/crates/seatbelt/src/retry/service.rs index fe5e8dfb..2cdf6b6a 100644 --- a/crates/seatbelt/src/retry/service.rs +++ b/crates/seatbelt/src/retry/service.rs @@ -7,10 +7,10 @@ use std::time::Duration; use layered::Service; use tick::Clock; +use super::MaxAttempts; use crate::retry::{ CloneArgs, CloneInput, DelayBackoff, OnRetry, OnRetryArgs, RecoveryArgs, RestoreInput, RestoreInputArgs, ShouldRecover, }; -use crate::shared::MaxAttempts; use crate::utils::EnableIf; use crate::{NotSet, RecoveryInfo, RecoveryKind, retry::Attempt}; @@ -265,8 +265,7 @@ mod tests { use tick::ClockControl; use super::*; - use crate::retry::RetryLayer; - use crate::shared::Backoff; + use crate::retry::{Backoff, RetryLayer}; use crate::testing::MetricTester; use crate::{ResilienceContext, Set}; use layered::Layer; diff --git a/crates/seatbelt/src/shared/mod.rs b/crates/seatbelt/src/shared.rs similarity index 62% rename from crates/seatbelt/src/shared/mod.rs rename to crates/seatbelt/src/shared.rs index f079dba8..839465a3 100644 --- a/crates/seatbelt/src/shared/mod.rs +++ b/crates/seatbelt/src/shared.rs @@ -12,14 +12,3 @@ pub struct Set; #[derive(Debug)] #[doc(hidden)] pub struct NotSet; - -mod context; -pub use context::ResilienceContext; - -mod attempt; -mod backoff; - -pub use attempt::Attempt; -#[cfg(any(feature = "retry", test))] -pub(crate) use attempt::MaxAttempts; -pub use backoff::Backoff; diff --git a/crates/seatbelt/src/shared/backoff.rs b/crates/seatbelt/src/shared/backoff.rs deleted file mode 100644 index 1d149ff7..00000000 --- a/crates/seatbelt/src/shared/backoff.rs +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -/// Defines the backoff strategy used by resilience middleware for retry operations. -/// -/// Backoff strategies control how delays between retry attempts are calculated, providing -/// different approaches to spacing out retries to avoid overwhelming failing systems while -/// balancing responsiveness and resource utilization. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum Backoff { - /// Constant backoff strategy that maintains consistent delays between attempts. - /// - /// **Example with `2s` base delay:** `2s, 2s, 2s, 2s, ...` - Constant, - - /// Linear backoff strategy that increases delays proportionally with attempt count. - /// - /// **Example with `2s` base delay:** `2s, 4s, 6s, 8s, 10s, ...` - Linear, - - /// Exponential backoff strategy that doubles delays with each attempt. - /// - /// **Example with `2s` base delay:** `2s, 4s, 8s, 16s, 32s, ...` - Exponential, -} From b0a20e7dbf24d74c813d381168fe6fafc0779adb Mon Sep 17 00:00:00 2001 From: martintomka Date: Thu, 22 Jan 2026 17:38:13 +0100 Subject: [PATCH 60/60] PR comments --- crates/seatbelt/src/retry/attempt.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/seatbelt/src/retry/attempt.rs b/crates/seatbelt/src/retry/attempt.rs index a91e7959..8c6eb3c7 100644 --- a/crates/seatbelt/src/retry/attempt.rs +++ b/crates/seatbelt/src/retry/attempt.rs @@ -9,8 +9,10 @@ use std::fmt::Display; /// first or last attempt. /// /// The default attempt has: +/// /// - `attempt`: 0 (first attempt, 0-based indexing) -/// - `is_last`: true (indicating this is both the first and last attempt) +/// - `is_first`: true +/// - `is_last`: true /// /// This represents a single-shot operation with no retries, where the first /// attempt is also the final attempt.