From 57957556e5a82b18713d14c69b097060ceb28004 Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Thu, 5 Mar 2026 09:13:14 +0100 Subject: [PATCH 1/3] test: stability of test_sign_contention_5_nodes - increase stockpile_max to have more T+P generations in parallel - increase time for T+P generation by 1 min - increase time for signature generation by 1 min None of these are perfect solutions. The generation really should be faster. But this might help with sporadic CI error that we currently face. --- integration-tests/tests/cases/mpc.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/integration-tests/tests/cases/mpc.rs b/integration-tests/tests/cases/mpc.rs index c407c93ab..bdd2ba432 100644 --- a/integration-tests/tests/cases/mpc.rs +++ b/integration-tests/tests/cases/mpc.rs @@ -521,7 +521,7 @@ async fn test_sign_contention_5_nodes() { const NUM_SIGN_REQUESTS: u8 = 5; // Reduced from 10 to match presignature availability const MIN_PRESIGNATURES_PER_OWNER: usize = 3; const STOCKPILE_MIN: u32 = 8; - const STOCKPILE_MAX: u32 = 12; + const STOCKPILE_MAX: u32 = 24; tracing::info!( num_nodes = NUM_NODES, @@ -542,8 +542,9 @@ async fn test_sign_contention_5_nodes() { // Wait for presignatures to be generated - 5-node triple generation takes ~3-4 minutes // We wait for a modest per-owner count since distribution is not uniform + // TODO: reduce timeout once P+T generation is more reliable tracing::info!("waiting for presignatures to be generated (triple gen takes ~3-4 min)..."); - let timeout = Duration::from_secs(480); // 8 minutes for triple + presignature generation + let timeout = Duration::from_secs(540); // 9 minutes for triple + presignature generation network .assert_presignatures(MIN_PRESIGNATURES_PER_OWNER, timeout) .await; @@ -563,7 +564,8 @@ async fn test_sign_contention_5_nodes() { } // Wait for all signatures - allow more time for 5-node consensus - let timeout = Duration::from_secs(120); + // TODO: reduce timeout once signature generation is more reliable + let timeout = Duration::from_secs(180); let actions = network .assert_actions(NUM_SIGN_REQUESTS as usize, timeout) .await; From 7e358897004348084636e564cd2381ced7687eee Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Thu, 5 Mar 2026 15:44:52 +0100 Subject: [PATCH 2/3] run triple poke in blocking task Tokio futures should not run for long. The poke on triples may take several seconds of CPU time. During this time, other tasks on the same executor will be blocked. Spawning the work to a separate thread avoids this issue. --- chain-signatures/node/src/protocol/triple.rs | 37 +++++++++++++++++--- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 6b3e55b10..1ee0ae81a 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -42,7 +42,9 @@ struct TripleGenerator { me: Participant, proposer: Participant, participants: Vec, - protocol: TripleProtocol, + /// Option to temporarily move it to a blocking task. Must be Some in all + /// other circumstances. + protocol: Option, timeout: Duration, slot: TriplePairSlot, created: Instant, @@ -84,7 +86,7 @@ impl TripleGenerator { me, proposer, participants, - protocol: Box::new(protocol), + protocol: Some(Box::new(protocol)), timeout, slot, created: Instant::now(), @@ -129,7 +131,31 @@ impl TripleGenerator { loop { let poke_start_time = Instant::now(); - let action = match self.protocol.poke() { + // Temporarily move protocol into blocking task and restore it immediately after. + let mut protocol = self.protocol.take().expect("must be always be Some"); + + let poke_result = + match tokio::task::spawn_blocking(move || (protocol.poke(), protocol)).await { + Ok((res, protocol)) => { + self.protocol = Some(protocol); + res + } + Err(err) => { + crate::metrics::protocols::TRIPLE_GENERATOR_FAILURES.inc(); + if self.proposer == self.me { + crate::metrics::protocols::TRIPLE_GENERATOR_OWNED_FAILURES.inc(); + } + tracing::warn!( + id = self.id, + ?err, + elapsed = ?start_time.elapsed(), + "triple generation failed in a spawned blocking task", + ); + return; + } + }; + + let action = match poke_result { Ok(action) => action, Err(err) => { crate::metrics::protocols::TRIPLE_GENERATOR_FAILURES.inc(); @@ -164,7 +190,10 @@ impl TripleGenerator { } break; }; - self.protocol.message(msg.from, msg.data); + self.protocol + .as_mut() + .expect("must always be Some") + .message(msg.from, msg.data); } Action::SendMany(data) => { for to in &self.participants { From e786ac4d6b9f00d8f9854e75268ea63616a37407 Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Thu, 5 Mar 2026 16:48:28 +0100 Subject: [PATCH 3/3] increase signature timeout --- integration-tests/tests/cases/mpc.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/integration-tests/tests/cases/mpc.rs b/integration-tests/tests/cases/mpc.rs index bdd2ba432..bf338360b 100644 --- a/integration-tests/tests/cases/mpc.rs +++ b/integration-tests/tests/cases/mpc.rs @@ -537,6 +537,7 @@ async fn test_sign_contention_5_nodes() { .with_max_triples_stockpile(STOCKPILE_MAX) .with_min_presignatures_stockpile(STOCKPILE_MIN) .with_max_presignatures_stockpile(STOCKPILE_MAX) + .with_signature_timeout_ms(20_000) // default is 10_000 but this test sometimes takes longer .build() .await;