From d8806020e44c52286bbaaa328c05a81e53651e99 Mon Sep 17 00:00:00 2001 From: Julian Pryde Date: Sat, 30 Aug 2025 15:56:55 -0400 Subject: [PATCH 1/6] add jitter to dns call timeoutes when made in the `stagger_call` function. --- iroh-relay/src/dns.rs | 59 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/iroh-relay/src/dns.rs b/iroh-relay/src/dns.rs index d70559385d..05d8663f86 100644 --- a/iroh-relay/src/dns.rs +++ b/iroh-relay/src/dns.rs @@ -28,6 +28,10 @@ pub const N0_DNS_NODE_ORIGIN_PROD: &str = "dns.iroh.link"; /// The n0 testing DNS node origin, for testing. pub const N0_DNS_NODE_ORIGIN_STAGING: &str = "staging-dns.iroh.link"; +/// Maximum jitter factor for staggered calls. The actual jitter is calculated as +/// a random number in the range: delay +/- (delay * (((max_jitter_factor) / 10) / 2)) +const MAX_JITTER_FACTOR: u64 = 4; + /// Potential errors related to dns. #[common_fields({ backtrace: Option, @@ -80,6 +84,14 @@ impl StaggeredError { } } +/// Error returned in the remote chance that the jitter calculation produces an int overflow. +#[allow(missing_docs)] +#[derive(Debug, Snafu)] +enum JitterCalcError { + #[snafu(display("Either delay or MAX_JITTER_FACTOR is too large"))] + Overflow, +} + /// The DNS resolver used throughout `iroh`. #[derive(Debug, Clone)] pub struct DnsResolver { @@ -503,7 +515,8 @@ async fn stagger_call< // NOTE: we add the 0 delay here to have a uniform set of futures. This is more performant than // using alternatives that allow futures of different types. for delay in std::iter::once(&0u64).chain(delays_ms) { - let delay = Duration::from_millis(*delay); + let jittered_delay = add_jitter(delay).unwrap_or(*delay); + let delay = Duration::from_millis(jittered_delay); let fut = f(); let staggered_fut = async move { time::sleep(delay).await; @@ -523,6 +536,19 @@ async fn stagger_call< Err(StaggeredError::new(errors)) } +fn add_jitter(delay: &u64) -> Result { + let max_jitter = delay + .checked_mul(MAX_JITTER_FACTOR).ok_or(JitterCalcError::Overflow)? + / 10; + let jitter = if max_jitter == 0 { + 0 + } else { + rand::random::() % max_jitter + }; + Ok((delay.checked_mul(8).ok_or(JitterCalcError::Overflow)? + / 10).checked_add(jitter).ok_or(JitterCalcError::Overflow)?) +} + #[cfg(test)] pub(crate) mod tests { use std::sync::atomic::AtomicUsize; @@ -548,4 +574,35 @@ pub(crate) mod tests { let result = stagger_call(f, &delays).await.unwrap(); assert_eq!(result, 5) } + + #[tokio::test] + #[traced_test] + async fn jitter_test_zero() { + let jittered_delay = jitter_test_backend(0).await.unwrap(); + assert_eq!(jittered_delay, 0); + } + + //Sanity checks that I did the math right + #[tokio::test] + #[traced_test] + async fn jitter_test_nonzero_positive() { + let delay: u64 = 300; + for _ in 0..1000000{ + assert!(jitter_test_backend(delay).await.unwrap() >= delay * 8 / 10); + } + } + + #[tokio::test] + #[traced_test] + async fn jitter_test_nonzero_negative() { + let delay: u64 = 300; + for _ in 0..1000000{ + assert!(jitter_test_backend(delay).await.unwrap() < delay * 12 / 10); + } + } + + async fn jitter_test_backend(delay: u64) -> Result { + add_jitter(&delay) + } + } From 603358094b2c7525c24ae00da5037ac9bc422440 Mon Sep 17 00:00:00 2001 From: Julian Pryde Date: Sun, 7 Sep 2025 03:19:20 +0900 Subject: [PATCH 2/6] Fix jitter calculation logic and tests --- iroh-relay/src/dns.rs | 60 +++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/iroh-relay/src/dns.rs b/iroh-relay/src/dns.rs index 05d8663f86..ea2d63cdcc 100644 --- a/iroh-relay/src/dns.rs +++ b/iroh-relay/src/dns.rs @@ -28,9 +28,8 @@ pub const N0_DNS_NODE_ORIGIN_PROD: &str = "dns.iroh.link"; /// The n0 testing DNS node origin, for testing. pub const N0_DNS_NODE_ORIGIN_STAGING: &str = "staging-dns.iroh.link"; -/// Maximum jitter factor for staggered calls. The actual jitter is calculated as -/// a random number in the range: delay +/- (delay * (((max_jitter_factor) / 10) / 2)) -const MAX_JITTER_FACTOR: u64 = 4; +/// Percent of total delay to jitter. 0.4 means +/- 20% of delay. +const MAX_JITTER_PERCENT: u64 = 20; /// Potential errors related to dns. #[common_fields({ @@ -84,14 +83,6 @@ impl StaggeredError { } } -/// Error returned in the remote chance that the jitter calculation produces an int overflow. -#[allow(missing_docs)] -#[derive(Debug, Snafu)] -enum JitterCalcError { - #[snafu(display("Either delay or MAX_JITTER_FACTOR is too large"))] - Overflow, -} - /// The DNS resolver used throughout `iroh`. #[derive(Debug, Clone)] pub struct DnsResolver { @@ -515,8 +506,7 @@ async fn stagger_call< // NOTE: we add the 0 delay here to have a uniform set of futures. This is more performant than // using alternatives that allow futures of different types. for delay in std::iter::once(&0u64).chain(delays_ms) { - let jittered_delay = add_jitter(delay).unwrap_or(*delay); - let delay = Duration::from_millis(jittered_delay); + let delay = add_jitter(delay); let fut = f(); let staggered_fut = async move { time::sleep(delay).await; @@ -536,17 +526,19 @@ async fn stagger_call< Err(StaggeredError::new(errors)) } -fn add_jitter(delay: &u64) -> Result { - let max_jitter = delay - .checked_mul(MAX_JITTER_FACTOR).ok_or(JitterCalcError::Overflow)? - / 10; - let jitter = if max_jitter == 0 { - 0 - } else { - rand::random::() % max_jitter - }; - Ok((delay.checked_mul(8).ok_or(JitterCalcError::Overflow)? - / 10).checked_add(jitter).ok_or(JitterCalcError::Overflow)?) +fn add_jitter(delay: &u64) -> Duration { + // If delay is 0, return 0 immediately. + if *delay == 0 { + return Duration::ZERO + } + + // Calculate jitter as a random value in the range of +/- MAX_JITTER_PERCENT of the delay. + let max_jitter = delay.saturating_mul(MAX_JITTER_PERCENT * 2) / 100; + let jitter = rand::random::() % max_jitter; + + Duration::from_millis( + delay.saturating_sub(max_jitter / 2).saturating_add(jitter) + ) } #[cfg(test)] @@ -578,31 +570,27 @@ pub(crate) mod tests { #[tokio::test] #[traced_test] async fn jitter_test_zero() { - let jittered_delay = jitter_test_backend(0).await.unwrap(); - assert_eq!(jittered_delay, 0); + let jittered_delay = add_jitter(&0); + assert_eq!(jittered_delay, Duration::from_secs(0)); } //Sanity checks that I did the math right - #[tokio::test] + #[test] #[traced_test] - async fn jitter_test_nonzero_positive() { + fn jitter_test_nonzero_positive() { let delay: u64 = 300; for _ in 0..1000000{ - assert!(jitter_test_backend(delay).await.unwrap() >= delay * 8 / 10); + assert!(add_jitter(&delay) >= Duration::from_millis(delay * 8 / 10)); } } - #[tokio::test] + #[test] #[traced_test] - async fn jitter_test_nonzero_negative() { + fn jitter_test_nonzero_negative() { let delay: u64 = 300; for _ in 0..1000000{ - assert!(jitter_test_backend(delay).await.unwrap() < delay * 12 / 10); + assert!(add_jitter(&delay) < Duration::from_millis(delay * 12 / 10)); } } - - async fn jitter_test_backend(delay: u64) -> Result { - add_jitter(&delay) - } } From ef7a8ee1634d6339b8a6a592b48634aad7ee667a Mon Sep 17 00:00:00 2001 From: Julian Pryde Date: Wed, 10 Sep 2025 16:31:09 +0900 Subject: [PATCH 3/6] Fix formatting and nomenclature issues --- iroh-relay/src/dns.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/iroh-relay/src/dns.rs b/iroh-relay/src/dns.rs index ea2d63cdcc..22c2658320 100644 --- a/iroh-relay/src/dns.rs +++ b/iroh-relay/src/dns.rs @@ -28,7 +28,7 @@ pub const N0_DNS_NODE_ORIGIN_PROD: &str = "dns.iroh.link"; /// The n0 testing DNS node origin, for testing. pub const N0_DNS_NODE_ORIGIN_STAGING: &str = "staging-dns.iroh.link"; -/// Percent of total delay to jitter. 0.4 means +/- 20% of delay. +/// Percent of total delay to jitter. 20 means +/- 20% of delay. const MAX_JITTER_PERCENT: u64 = 20; /// Potential errors related to dns. @@ -529,16 +529,14 @@ async fn stagger_call< fn add_jitter(delay: &u64) -> Duration { // If delay is 0, return 0 immediately. if *delay == 0 { - return Duration::ZERO + return Duration::ZERO; } // Calculate jitter as a random value in the range of +/- MAX_JITTER_PERCENT of the delay. let max_jitter = delay.saturating_mul(MAX_JITTER_PERCENT * 2) / 100; let jitter = rand::random::() % max_jitter; - Duration::from_millis( - delay.saturating_sub(max_jitter / 2).saturating_add(jitter) - ) + Duration::from_millis(delay.saturating_sub(max_jitter / 2).saturating_add(jitter)) } #[cfg(test)] @@ -577,20 +575,19 @@ pub(crate) mod tests { //Sanity checks that I did the math right #[test] #[traced_test] - fn jitter_test_nonzero_positive() { + fn jitter_test_nonzero_lower_bound() { let delay: u64 = 300; - for _ in 0..1000000{ + for _ in 0..1000000 { assert!(add_jitter(&delay) >= Duration::from_millis(delay * 8 / 10)); } } #[test] #[traced_test] - fn jitter_test_nonzero_negative() { + fn jitter_test_nonzero_upper_bound() { let delay: u64 = 300; - for _ in 0..1000000{ + for _ in 0..1000000 { assert!(add_jitter(&delay) < Duration::from_millis(delay * 12 / 10)); } } - } From ae2bf21f283d59ce6c5c988bd15e8e41f49df0d9 Mon Sep 17 00:00:00 2001 From: Julian Pryde Date: Wed, 10 Sep 2025 16:35:23 +0900 Subject: [PATCH 4/6] refactor: make `jitter_test_zero` not async --- iroh-relay/src/dns.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iroh-relay/src/dns.rs b/iroh-relay/src/dns.rs index 22c2658320..cd0f36402d 100644 --- a/iroh-relay/src/dns.rs +++ b/iroh-relay/src/dns.rs @@ -565,7 +565,7 @@ pub(crate) mod tests { assert_eq!(result, 5) } - #[tokio::test] + #[test] #[traced_test] async fn jitter_test_zero() { let jittered_delay = add_jitter(&0); From f710dd01dc5d32e92d4a06766528483faabe2815 Mon Sep 17 00:00:00 2001 From: Julian Pryde Date: Wed, 10 Sep 2025 16:36:09 +0900 Subject: [PATCH 5/6] fix: reduce iterations in jitter tests --- iroh-relay/src/dns.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iroh-relay/src/dns.rs b/iroh-relay/src/dns.rs index cd0f36402d..3cf5fb4936 100644 --- a/iroh-relay/src/dns.rs +++ b/iroh-relay/src/dns.rs @@ -577,7 +577,7 @@ pub(crate) mod tests { #[traced_test] fn jitter_test_nonzero_lower_bound() { let delay: u64 = 300; - for _ in 0..1000000 { + for _ in 0..100 { assert!(add_jitter(&delay) >= Duration::from_millis(delay * 8 / 10)); } } @@ -586,7 +586,7 @@ pub(crate) mod tests { #[traced_test] fn jitter_test_nonzero_upper_bound() { let delay: u64 = 300; - for _ in 0..1000000 { + for _ in 0..100 { assert!(add_jitter(&delay) < Duration::from_millis(delay * 12 / 10)); } } From ae87df84fa5805f74ea120c4dacd1c98a1067691 Mon Sep 17 00:00:00 2001 From: Julian Pryde Date: Wed, 10 Sep 2025 16:38:54 +0900 Subject: [PATCH 6/6] fix: remove `async` keyword from `jitter_test_zero` --- iroh-relay/src/dns.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iroh-relay/src/dns.rs b/iroh-relay/src/dns.rs index 3cf5fb4936..3228aed1fd 100644 --- a/iroh-relay/src/dns.rs +++ b/iroh-relay/src/dns.rs @@ -567,7 +567,7 @@ pub(crate) mod tests { #[test] #[traced_test] - async fn jitter_test_zero() { + fn jitter_test_zero() { let jittered_delay = add_jitter(&0); assert_eq!(jittered_delay, Duration::from_secs(0)); }