diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 6b3e55b10..1ee0ae81a 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -42,7 +42,9 @@ struct TripleGenerator { me: Participant, proposer: Participant, participants: Vec, - protocol: TripleProtocol, + /// Option to temporarily move it to a blocking task. Must be Some in all + /// other circumstances. + protocol: Option, timeout: Duration, slot: TriplePairSlot, created: Instant, @@ -84,7 +86,7 @@ impl TripleGenerator { me, proposer, participants, - protocol: Box::new(protocol), + protocol: Some(Box::new(protocol)), timeout, slot, created: Instant::now(), @@ -129,7 +131,31 @@ impl TripleGenerator { loop { let poke_start_time = Instant::now(); - let action = match self.protocol.poke() { + // Temporarily move protocol into blocking task and restore it immediately after. + let mut protocol = self.protocol.take().expect("must be always be Some"); + + let poke_result = + match tokio::task::spawn_blocking(move || (protocol.poke(), protocol)).await { + Ok((res, protocol)) => { + self.protocol = Some(protocol); + res + } + Err(err) => { + crate::metrics::protocols::TRIPLE_GENERATOR_FAILURES.inc(); + if self.proposer == self.me { + crate::metrics::protocols::TRIPLE_GENERATOR_OWNED_FAILURES.inc(); + } + tracing::warn!( + id = self.id, + ?err, + elapsed = ?start_time.elapsed(), + "triple generation failed in a spawned blocking task", + ); + return; + } + }; + + let action = match poke_result { Ok(action) => action, Err(err) => { crate::metrics::protocols::TRIPLE_GENERATOR_FAILURES.inc(); @@ -164,7 +190,10 @@ impl TripleGenerator { } break; }; - self.protocol.message(msg.from, msg.data); + self.protocol + .as_mut() + .expect("must always be Some") + .message(msg.from, msg.data); } Action::SendMany(data) => { for to in &self.participants { diff --git a/integration-tests/tests/cases/mpc.rs b/integration-tests/tests/cases/mpc.rs index c407c93ab..bf338360b 100644 --- a/integration-tests/tests/cases/mpc.rs +++ b/integration-tests/tests/cases/mpc.rs @@ -521,7 +521,7 @@ async fn test_sign_contention_5_nodes() { const NUM_SIGN_REQUESTS: u8 = 5; // Reduced from 10 to match presignature availability const MIN_PRESIGNATURES_PER_OWNER: usize = 3; const STOCKPILE_MIN: u32 = 8; - const STOCKPILE_MAX: u32 = 12; + const STOCKPILE_MAX: u32 = 24; tracing::info!( num_nodes = NUM_NODES, @@ -537,13 +537,15 @@ async fn test_sign_contention_5_nodes() { .with_max_triples_stockpile(STOCKPILE_MAX) .with_min_presignatures_stockpile(STOCKPILE_MIN) .with_max_presignatures_stockpile(STOCKPILE_MAX) + .with_signature_timeout_ms(20_000) // default is 10_000 but this test sometimes takes longer .build() .await; // Wait for presignatures to be generated - 5-node triple generation takes ~3-4 minutes // We wait for a modest per-owner count since distribution is not uniform + // TODO: reduce timeout once P+T generation is more reliable tracing::info!("waiting for presignatures to be generated (triple gen takes ~3-4 min)..."); - let timeout = Duration::from_secs(480); // 8 minutes for triple + presignature generation + let timeout = Duration::from_secs(540); // 9 minutes for triple + presignature generation network .assert_presignatures(MIN_PRESIGNATURES_PER_OWNER, timeout) .await; @@ -563,7 +565,8 @@ async fn test_sign_contention_5_nodes() { } // Wait for all signatures - allow more time for 5-node consensus - let timeout = Duration::from_secs(120); + // TODO: reduce timeout once signature generation is more reliable + let timeout = Duration::from_secs(180); let actions = network .assert_actions(NUM_SIGN_REQUESTS as usize, timeout) .await;