From 70726eab25eb5aa5dffe05c2191681633730f93e Mon Sep 17 00:00:00 2001 From: Bas van Dijk Date: Wed, 22 Apr 2026 08:11:25 +0000 Subject: [PATCH] Revert "perf(crypto): CON-1650 Use Rayon in NIDKG (#9007)" This reverts commit dd1ef70ce87bd85b77107ef4519f68f6f63c7fa8. --- Cargo.lock | 2 - .../crypto_lib/bls12_381/type/BUILD.bazel | 2 - .../crypto_lib/bls12_381/type/Cargo.toml | 5 - .../crypto_lib/bls12_381/type/benches/ops.rs | 209 ------------------ .../crypto_lib/bls12_381/type/src/lib.rs | 123 +++-------- .../threshold_sig/bls12_381/BUILD.bazel | 3 - .../threshold_sig/bls12_381/Cargo.toml | 1 - .../bls12_381/benches/fs_nidkg.rs | 73 +----- .../src/ni_dkg/fs_ni_dkg/dlog_recovery.rs | 198 +++++++---------- .../src/ni_dkg/fs_ni_dkg/forward_secure.rs | 150 ++++++------- .../src/ni_dkg/fs_ni_dkg/nizk_chunking.rs | 162 +++++++------- .../src/ni_dkg/fs_ni_dkg/nizk_sharing.rs | 165 +++++++------- .../ni_dkg/groth20_bls12_381/encryption.rs | 42 ++-- .../ni_dkg/groth20_bls12_381/transcript.rs | 28 +-- .../bls12_381/tests/stability.rs | 48 +--- .../src/vault/local_csp_vault/builder/mod.rs | 23 -- .../src/vault/local_csp_vault/mod.rs | 40 ---- .../src/vault/local_csp_vault/ni_dkg/mod.rs | 65 +++--- .../crypto_service_provider/src/vault/mod.rs | 6 +- .../tarpc_csp_vault_server.rs | 159 +++++-------- 20 files changed, 466 insertions(+), 1038 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e920cd72d14..a40a196b767f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8433,7 +8433,6 @@ dependencies = [ "paste", "rand 0.8.5", "rand_chacha 0.3.1", - "rayon", "sha2 0.10.9", "subtle", "zeroize", @@ -8667,7 +8666,6 @@ dependencies = [ "proptest-derive", "rand 0.8.5", "rand_chacha 0.3.1", - "rayon", "serde", "serde_bytes", "serde_cbor", diff --git a/rs/crypto/internal/crypto_lib/bls12_381/type/BUILD.bazel b/rs/crypto/internal/crypto_lib/bls12_381/type/BUILD.bazel index 70d7bab675f5..f1e186f08aff 100644 --- a/rs/crypto/internal/crypto_lib/bls12_381/type/BUILD.bazel +++ b/rs/crypto/internal/crypto_lib/bls12_381/type/BUILD.bazel @@ -16,7 +16,6 @@ DEPENDENCIES = [ "@crate_index//:parking_lot", "@crate_index//:rand", "@crate_index//:rand_chacha", - "@crate_index//:rayon", "@crate_index//:sha2", "@crate_index//:subtle", "@crate_index//:zeroize", @@ -41,7 +40,6 @@ rust_library( name = "type", srcs = glob(["src/**"]), aliases = ALIASES, - crate_features = ["rayon"], crate_name = "ic_crypto_internal_bls12_381_type", proc_macro_deps = MACRO_DEPENDENCIES, version = "0.9.0", diff --git a/rs/crypto/internal/crypto_lib/bls12_381/type/Cargo.toml b/rs/crypto/internal/crypto_lib/bls12_381/type/Cargo.toml index cf6ee784dfd4..4b021d3d1453 100644 --- a/rs/crypto/internal/crypto_lib/bls12_381/type/Cargo.toml +++ b/rs/crypto/internal/crypto_lib/bls12_381/type/Cargo.toml @@ -8,10 +8,6 @@ documentation.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[features] -default = ["rayon"] -rayon = ["dep:rayon"] - [dependencies] cached = { version = "0.49", default-features = false } hex = { workspace = true } @@ -22,7 +18,6 @@ parking_lot = { workspace = true } paste = { workspace = true } rand = { workspace = true } rand_chacha = { workspace = true } -rayon = { workspace = true, optional = true } sha2 = { workspace = true } subtle = { workspace = true } zeroize = { workspace = true } diff --git a/rs/crypto/internal/crypto_lib/bls12_381/type/benches/ops.rs b/rs/crypto/internal/crypto_lib/bls12_381/type/benches/ops.rs index 724c62c01553..fb8ca60bd024 100644 --- a/rs/crypto/internal/crypto_lib/bls12_381/type/benches/ops.rs +++ b/rs/crypto/internal/crypto_lib/bls12_381/type/benches/ops.rs @@ -1088,212 +1088,6 @@ macro_rules! crypto_bls12_381_mul2_precomputation_init { crypto_bls12_381_mul2_precomputation_init!(g1, G1Projective); crypto_bls12_381_mul2_precomputation_init!(g2, G2Projective); -/// Benchmarks for operations that are parallelized when the `rayon` feature is -/// enabled. Run with and without `--features rayon` and compare results. -fn bls12_381_parallel_batch_ops(c: &mut Criterion) { - let mut group = c.benchmark_group("crypto_bls12_381_parallel_batch_ops"); - group.warm_up_time(WARMUP_TIME); - - let rng = &mut reproducible_rng(); - - for n in [2, 5, 10, 25, 50, 100] { - // --- G1 batch_mul --- - group.bench_with_input(BenchmarkId::new("g1_batch_mul", n), &n, |b, &size| { - b.iter_batched_ref( - || (random_g1(rng).to_affine(), n_random_scalar(size, rng)), - |(pt, scalars)| pt.batch_mul(scalars), - BatchSize::SmallInput, - ) - }); - - // --- G1 batch_mul_vartime --- - group.bench_with_input( - BenchmarkId::new("g1_batch_mul_vartime", n), - &n, - |b, &size| { - b.iter_batched_ref( - || (random_g1(rng).to_affine(), n_random_scalar(size, rng)), - |(pt, scalars)| pt.batch_mul_vartime(scalars), - BatchSize::SmallInput, - ) - }, - ); - - // --- G2 batch_mul --- - group.bench_with_input(BenchmarkId::new("g2_batch_mul", n), &n, |b, &size| { - b.iter_batched_ref( - || (random_g2(rng).to_affine(), n_random_scalar(size, rng)), - |(pt, scalars)| pt.batch_mul(scalars), - BatchSize::SmallInput, - ) - }); - - // --- G2 batch_mul_vartime --- - group.bench_with_input( - BenchmarkId::new("g2_batch_mul_vartime", n), - &n, - |b, &size| { - b.iter_batched_ref( - || (random_g2(rng).to_affine(), n_random_scalar(size, rng)), - |(pt, scalars)| pt.batch_mul_vartime(scalars), - BatchSize::SmallInput, - ) - }, - ); - - // --- G1 batch_deserialize --- - group.bench_with_input( - BenchmarkId::new("g1_batch_deserialize", n), - &n, - |b, &size| { - b.iter_batched_ref( - || { - (0..size) - .map(|_| random_g1(rng).to_affine().serialize()) - .collect::>() - }, - |bytes| G1Affine::batch_deserialize(bytes), - BatchSize::SmallInput, - ) - }, - ); - - // --- G2 batch_deserialize --- - group.bench_with_input( - BenchmarkId::new("g2_batch_deserialize", n), - &n, - |b, &size| { - b.iter_batched_ref( - || { - (0..size) - .map(|_| random_g2(rng).to_affine().serialize()) - .collect::>() - }, - |bytes| G2Affine::batch_deserialize(bytes), - BatchSize::SmallInput, - ) - }, - ); - - // --- G1 mul2 from precomputed table (N iterations) --- - group.bench_with_input(BenchmarkId::new("g1_mul2_from_tbl", n), &n, |b, &size| { - b.iter_batched_ref( - || { - let tbl = G1Projective::compute_mul2_tbl(&random_g1(rng), &random_g1(rng)); - let a = n_random_scalar(size, rng); - let b_vec = n_random_scalar(size, rng); - (tbl, a, b_vec) - }, - |(tbl, a, b_vec)| { - a.iter() - .zip(b_vec.iter()) - .map(|(ai, bi)| tbl.mul2(ai, bi)) - .collect::>() - }, - BatchSize::SmallInput, - ) - }); - - // --- G2 mul2 from precomputed table (N iterations) --- - group.bench_with_input(BenchmarkId::new("g2_mul2_from_tbl", n), &n, |b, &size| { - b.iter_batched_ref( - || { - let tbl = G2Projective::compute_mul2_tbl(&random_g2(rng), &random_g2(rng)); - let a = n_random_scalar(size, rng); - let b_vec = n_random_scalar(size, rng); - (tbl, a, b_vec) - }, - |(tbl, a, b_vec)| { - a.iter() - .zip(b_vec.iter()) - .map(|(ai, bi)| tbl.mul2(ai, bi)) - .collect::>() - }, - BatchSize::SmallInput, - ) - }); - - // --- batch signature verification (distinct) --- - group.bench_with_input( - BenchmarkId::new("verify_batch_distinct", n), - &n, - |b, &size| { - b.iter_batched_ref( - || (n_batch_sig_verification_instances(size, rng), rng.fork()), - |(sigs_pks_msgs, rng_fork)| { - let refs: Vec<_> = sigs_pks_msgs - .iter() - .map(|(sig, pk, msg)| (sig, pk, msg)) - .collect(); - black_box(verify_bls_signature_batch_distinct(&refs, rng_fork)); - }, - BatchSize::SmallInput, - ) - }, - ); - } - - group.finish(); -} - -/// Benchmarks for `mul2_array` which requires compile-time-known sizes. -/// Run with and without `--features rayon` and compare results. -macro_rules! bench_mul2_array { - ($fn_name:ident, $group_name:expr, $projective:ty, $random_pt:expr, $( $n:expr ),+) => { - fn $fn_name(c: &mut Criterion) { - let mut group = c.benchmark_group($group_name); - group.warm_up_time(WARMUP_TIME); - let rng = &mut reproducible_rng(); - - $( - group.bench_function(stringify!($n), |b| { - b.iter_batched_ref( - || { - let tbl = <$projective>::compute_mul2_tbl( - &$random_pt(rng), - &$random_pt(rng), - ); - let a: [Scalar; $n] = std::array::from_fn(|_| random_scalar(rng)); - let b_arr: [Scalar; $n] = std::array::from_fn(|_| random_scalar(rng)); - (tbl, a, b_arr) - }, - |(tbl, a, b_arr)| black_box(tbl.mul2_array(a, b_arr)), - BatchSize::SmallInput, - ) - }); - )+ - - group.finish(); - } - }; -} - -bench_mul2_array!( - bls12_381_g1_mul2_array, - "crypto_bls12_381_parallel_g1_mul2_array", - G1Projective, - random_g1, - 2, - 5, - 10, - 25, - 50, - 100 -); - -bench_mul2_array!( - bls12_381_g2_mul2_array, - "crypto_bls12_381_parallel_g2_mul2_array", - G2Projective, - random_g2, - 2, - 5, - 10, - 25, - 50, - 100 -); - criterion_group!( benches, bls12_381_scalar_ops, @@ -1305,8 +1099,5 @@ criterion_group!( bls12_381_batch_sig_verification_multithreaded, mul2_precomputation_g1, mul2_precomputation_g2, - bls12_381_parallel_batch_ops, - bls12_381_g1_mul2_array, - bls12_381_g2_mul2_array, ); criterion_main!(benches); diff --git a/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs b/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs index 7bd254279d72..addc4e422014 100644 --- a/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs +++ b/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs @@ -37,8 +37,6 @@ use itertools::multiunzip; use pairing::group::{Group, ff::Field}; use paste::paste; use rand::{CryptoRng, Rng, RngCore}; -#[cfg(feature = "rayon")] -use rayon::prelude::*; use std::sync::{Arc, LazyLock}; use std::{collections::HashMap, fmt}; use zeroize::{Zeroize, ZeroizeOnDrop}; @@ -502,28 +500,21 @@ impl Scalar { /// /// This function returns Ok only if all of the provided inputs /// represent a valid scalar. - pub fn batch_deserialize + Sync>( + pub fn batch_deserialize>( inputs: &[B], ) -> Result, PairingInvalidScalar> { - #[cfg(feature = "rayon")] - { - inputs.par_iter().map(Self::deserialize).collect() - } - #[cfg(not(feature = "rayon"))] - { - let mut r = Vec::with_capacity(inputs.len()); - for input in inputs { - r.push(Self::deserialize(input)?); - } - Ok(r) + let mut r = Vec::with_capacity(inputs.len()); + for input in inputs { + r.push(Self::deserialize(input)?); } + Ok(r) } /// Deserialize multiple scalars /// /// This function returns Ok only if all of the provided inputs /// represent a valid scalar. - pub fn batch_deserialize_array + Sync, const N: usize>( + pub fn batch_deserialize_array, const N: usize>( inputs: &[B; N], ) -> Result<[Self; N], PairingInvalidScalar> { // This could be made nicer, and avoid the heap allocation, by @@ -1199,19 +1190,12 @@ macro_rules! define_affine_and_projective_types { /// This version verifies that the decoded point is within the prime order /// subgroup, and is safe to call on untrusted inputs. It returns Ok only /// if all of the provided bytes represent a valid point. - pub fn batch_deserialize + Sync>(inputs: &[B]) -> Result, PairingInvalidPoint> { - #[cfg(feature = "rayon")] - { - inputs.par_iter().map(Self::deserialize).collect() - } - #[cfg(not(feature = "rayon"))] - { - let mut r = Vec::with_capacity(inputs.len()); - for input in inputs { - r.push(Self::deserialize(input)?); - } - Ok(r) + pub fn batch_deserialize>(inputs: &[B]) -> Result, PairingInvalidPoint> { + let mut r = Vec::with_capacity(inputs.len()); + for input in inputs { + r.push(Self::deserialize(input)?); } + Ok(r) } /// Deserialize multiple points (compressed format only) @@ -1219,7 +1203,7 @@ macro_rules! define_affine_and_projective_types { /// This version verifies that the decoded point is within the prime order /// subgroup, and is safe to call on untrusted inputs. It returns Ok only /// if all of the provided bytes represent a valid point. - pub fn batch_deserialize_array + Sync, const N: usize>(inputs: &[B; N]) -> Result<[Self; N], PairingInvalidPoint> { + pub fn batch_deserialize_array, const N: usize>(inputs: &[B; N]) -> Result<[Self; N], PairingInvalidPoint> { // This could be made nicer, and avoid the heap allocation, by // using array::try_map (currently only available in nightly) @@ -1296,16 +1280,10 @@ macro_rules! define_affine_and_projective_types { // the fact that we are using the same point for several multiplications, // for example by using larger precomputed tables - #[cfg(feature = "rayon")] - let result: Vec<$projective> = scalars.par_iter().map(|s| self * s).collect(); - #[cfg(not(feature = "rayon"))] - let result: Vec<$projective> = { - let mut result = Vec::with_capacity(scalars.len()); - for scalar in scalars { - result.push(self * scalar); - } - result - }; + let mut result = Vec::with_capacity(scalars.len()); + for scalar in scalars { + result.push(self * scalar); + } $projective::batch_normalize(&result) } @@ -1319,32 +1297,17 @@ macro_rules! define_affine_and_projective_types { // the fact that we are using the same point for several multiplications, // for example by using larger precomputed tables - #[cfg(feature = "rayon")] - let result: Vec<$projective> = scalars.par_iter().map(|s| self.mul_vartime(s)).collect(); - #[cfg(not(feature = "rayon"))] - let result: Vec<$projective> = { - let mut result = Vec::with_capacity(scalars.len()); - for scalar in scalars { - result.push(self.mul_vartime(scalar)); - } - result - }; + let mut result = Vec::with_capacity(scalars.len()); + for scalar in scalars { + result.push(self.mul_vartime(scalar)); + } $projective::batch_normalize(&result) } /// Batch multiplication pub fn batch_mul_array(&self, scalars: &[Scalar; N]) -> [Self; N] { - #[cfg(feature = "rayon")] - { - let results: Vec<$projective> = scalars.par_iter().map(|s| self * s).collect(); - let results: [$projective; N] = results.try_into().expect("Length preserved"); - $projective::batch_normalize_array(&results) - } - #[cfg(not(feature = "rayon"))] - { - let v = scalars.clone().map(|s| self * s); - $projective::batch_normalize_array(&v) - } + let v = scalars.clone().map(|s| self * s); + $projective::batch_normalize_array(&v) } /// Sum some points @@ -1698,19 +1661,8 @@ macro_rules! declare_mul2_table_impl { a: &[Scalar; N], b: &[Scalar; N], ) -> [$projective; N] { - #[cfg(feature = "rayon")] - { - let results: Vec<$projective> = (0..N) - .into_par_iter() - .map(|i| self.mul2(&a[i], &b[i])) - .collect(); - results.try_into().ok().expect("Length preserved") - } - #[cfg(not(feature = "rayon"))] - { - let iota: [usize; N] = std::array::from_fn(|i| i); - iota.map(|i| self.mul2(&a[i], &b[i])) - } + let iota: [usize; N] = std::array::from_fn(|i| i); + iota.map(|i| self.mul2(&a[i], &b[i])) } } }; @@ -2815,26 +2767,13 @@ pub fn verify_bls_signature_batch_distinct( let aggregate_sig = G1Projective::muln_affine_sparse_vartime(&sigs_scalars[..]).to_affine(); let inv_g2_gen = G2Prepared::neg_generator(); - #[cfg(feature = "rayon")] - let (msgs, pks_prepared): (Vec, Vec) = { - let msgs: Vec<_> = msgs - .par_iter() - .zip(random_scalars.par_iter()) - .map(|(&msg, s)| (msg * s).to_affine()) - .collect(); - let pks_prepared: Vec<_> = pks.into_par_iter().map(G2Prepared::from).collect(); - (msgs, pks_prepared) - }; - #[cfg(not(feature = "rayon"))] - let (msgs, pks_prepared): (Vec, Vec) = { - let msgs: Vec<_> = msgs - .iter() - .zip(random_scalars.iter()) - .map(|(&msg, s)| (msg * s).to_affine()) - .collect(); - let pks_prepared: Vec<_> = pks.into_iter().map(G2Prepared::from).collect(); - (msgs, pks_prepared) - }; + let msgs: Vec<_> = msgs + .iter() + .zip(random_scalars.iter()) + .map(|(&msg, s)| (msg * s).to_affine()) + .collect(); + + let pks_prepared: Vec<_> = pks.into_iter().map(G2Prepared::from).collect(); let mut multipairing_inputs = Vec::with_capacity(sigs_pks_msgs.len() + 1); multipairing_inputs.push((&aggregate_sig, inv_g2_gen)); diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/BUILD.bazel b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/BUILD.bazel index 64baf202de2e..b6d0d8319f4c 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/BUILD.bazel +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/BUILD.bazel @@ -21,7 +21,6 @@ DEPENDENCIES = [ "@crate_index//:parking_lot", "@crate_index//:rand", "@crate_index//:rand_chacha", - "@crate_index//:rayon", "@crate_index//:serde", "@crate_index//:serde_bytes", "@crate_index//:serde_cbor", @@ -120,9 +119,7 @@ rust_bench( ":bls12_381", "//rs/crypto/internal/crypto_lib/bls12_381/type", "//rs/crypto/internal/crypto_lib/seed", - "//rs/crypto/test_utils/reproducible_rng", "@crate_index//:criterion", - "@crate_index//:rand", ], ) diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/Cargo.toml b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/Cargo.toml index 16c6b83d22ee..1bbe40d3a75c 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/Cargo.toml +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/Cargo.toml @@ -18,7 +18,6 @@ ic-crypto-sha2 = { path = "../../../../sha2" } ic-types = { path = "../../../../../types/types" } parking_lot = { workspace = true } rand = { workspace = true } -rayon = { workspace = true } rand_chacha = { workspace = true } serde = { workspace = true } serde_bytes = { workspace = true } diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/fs_nidkg.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/fs_nidkg.rs index a2770dcaf3f6..ea5194f4a17c 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/fs_nidkg.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/fs_nidkg.rs @@ -1,13 +1,9 @@ use criterion::*; -use ic_crypto_internal_bls12_381_type::Scalar; use ic_crypto_internal_seed::Seed; use ic_crypto_internal_threshold_sig_bls12381::ni_dkg::fs_ni_dkg::Epoch; -use ic_crypto_internal_threshold_sig_bls12381::ni_dkg::fs_ni_dkg::forward_secure::*; use ic_crypto_internal_threshold_sig_bls12381::ni_dkg::groth20_bls12_381::{ SecretKey, create_forward_secure_key_pair, update_key_inplace_to_epoch, }; -use ic_crypto_test_utils_reproducible_rng::reproducible_rng; -use rand::{CryptoRng, Rng, RngCore}; fn fs_key_generation(c: &mut Criterion) { let rng = &mut Seed::from_bytes(b"ic-crypto-benchmark-fsnidkg").into_rng(); @@ -42,72 +38,5 @@ fn fs_key_generation(c: &mut Criterion) { } } -fn setup_keys_and_ciphertext( - node_count: usize, - epoch: Epoch, - associated_data: &[u8], - rng: &mut R, -) -> (Vec<(PublicKeyWithPop, SecretKey)>, FsEncryptionCiphertext) { - let sys = SysParam::global(); - let key_gen_assoc_data = rng.r#gen::<[u8; 32]>(); - - let mut keys = Vec::with_capacity(node_count); - for _ in 0..node_count { - keys.push(kgen(&key_gen_assoc_data, sys, rng)); - } - - let ptext_chunks: Vec<_> = (0..node_count) - .map(|_| { - let s = Scalar::random(rng); - ( - keys[0].0.public_key().clone(), // placeholder, replaced below - PlaintextChunks::from_scalar(&s), - ) - }) - .collect(); - - let pks_and_chunks: Vec<_> = keys - .iter() - .map(|k| k.0.public_key().clone()) - .zip(ptext_chunks.into_iter().map(|(_, c)| c)) - .collect(); - - let (crsz, _witness) = enc_chunks(&pks_and_chunks, epoch, associated_data, sys, rng); - (keys, crsz) -} - -fn fs_encrypt_decrypt(c: &mut Criterion) { - let rng = &mut reproducible_rng(); - let epoch = Epoch::from(0); - let associated_data = rng.r#gen::<[u8; 32]>(); - let sys = SysParam::global(); - - let node_count = 28; - let (keys, crsz) = setup_keys_and_ciphertext(node_count, epoch, &associated_data, rng); - - let mut group = c.benchmark_group("fs_nidkg_enc_dec"); - group.sample_size(10); - - group.bench_function(BenchmarkId::new("enc_chunks", node_count), |b| { - let pks_and_chunks: Vec<_> = keys - .iter() - .map(|k| { - let s = Scalar::random(rng); - (k.0.public_key().clone(), PlaintextChunks::from_scalar(&s)) - }) - .collect(); - - b.iter(|| enc_chunks(&pks_and_chunks, epoch, &associated_data, sys, rng)) - }); - - group.bench_function("verify_ciphertext_integrity", |b| { - b.iter(|| verify_ciphertext_integrity(&crsz, epoch, &associated_data, sys)) - }); - - group.bench_function("dec_chunks", |b| { - b.iter(|| dec_chunks(&keys[0].1, 0, &crsz, epoch, &associated_data)) - }); -} - -criterion_group!(benches, fs_key_generation, fs_encrypt_decrypt); +criterion_group!(benches, fs_key_generation); criterion_main!(benches); diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs index 12a2b99d4c32..a61426e95372 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs @@ -1,7 +1,6 @@ use crate::ni_dkg::fs_ni_dkg::forward_secure::{CHUNK_MAX, CHUNK_MIN, CHUNK_SIZE}; use crate::ni_dkg::fs_ni_dkg::nizk_chunking::{CHALLENGE_BITS, NUM_ZK_REPETITIONS}; use ic_crypto_internal_bls12_381_type::{Gt, Scalar}; -use rayon::prelude::*; use std::sync::LazyLock; pub struct HonestDealerDlogLookupTable { table: Vec, @@ -30,50 +29,67 @@ impl HonestDealerDlogLookupTable { pub fn solve_several(&self, targets: &[Gt]) -> Vec> { use subtle::{ConditionallySelectable, ConstantTimeEq}; - // Solve each target in parallel. For each target, perform a constant-time - // scan of the full table, then confirm the candidate (since hash collisions - // may have occurred if the dealer was dishonest). - targets - .par_iter() - .map(|target| { - let target_hash = target.short_hash_for_linear_search(); - - let mut candidate = 0_u16; - for x in CHUNK_MIN..=CHUNK_MAX { - let x_hash = self.table[x as usize]; - /* - Should this function need to be further optimized a good start would be to - replace the statements below with something like - - // Variable diff equals 0 iff x_hash == target_hash; - let diff = x_hash ^ target_hash; - // Variable mask equals u32::max if diff == 0, or 0 otherwise - let mask = 0u32.wrapping_sub((!diff & diff.wrapping_sub(1)) >> 31); - // Selectively OR in x as the solution, iff x_hash == target_hash - candidate |= (mask as u16) & (x as u16); - - This is more or less what ct_eq / conditional_assign end up being compiled - to, with the difference that it skips the volatile loads which subtle uses - to (try to) prevent LLVM from optimizing code into using a conditional jump. - - Also regarding performance note that it would be quite straightforward to - express the above statements in a 4-wide or 8-wide SIMD expression suitable - for SSE2 or AVX2 execution. This would not only be faster but also highly - likely immune to compiler optimizations that would introduce jumps, since - this would require moving the data out of SIMD registers which would be - expensive, and so the compiler would avoid such a transformation. - */ - let hashes_eq = x_hash.ct_eq(&target_hash); - candidate.conditional_assign(&(x as u16), hashes_eq); - } - - if Gt::g_mul_u16(candidate) == *target { - Some(Scalar::from_u64(candidate as u64)) - } else { - None - } - }) - .collect() + let target_hashes = targets + .iter() + .map(|t| t.short_hash_for_linear_search()) + .collect::>(); + + // This code assumes that CHUNK_MAX fits in a u16 + let mut scan_results = vec![0_u16; targets.len()]; + + for x in CHUNK_MIN..=CHUNK_MAX { + let x_hash = self.table[x as usize]; + + for i in 0..targets.len() { + /* + Should this function need to be further optimized a good start would be to + replace the statements below with something like + + // Variable diff equals 0 iff x_hash == target_hashes[i]; + let diff = x_hash ^ target_hashes[i]; + // Variable mask equals u32::max if diff == 0, or 0 otherwise + let mask = 0u32.wrapping_sub((!diff & diff.wrapping_sub(1)) >> 31); + // Selectively OR in x as the solution, iff x_hash == target_hashes[i] + scan_results[i] |= (mask as u16) & (x as u16); + + This is more or less what ct_eq / conditional_assign end up being compiled + to, with the difference that it skips the volatile loads which subtle uses + to (try to) prevent LLVM from optimizing code into using a conditional jump. + + Also regarding performance note that it would be quite straighforward to + express the above statements in a 4-wide or 8-wide SIMD expression suitable + for SSE2 or AVX2 execution. This would not only be faster but also highly + likely immune to compiler optimizations that would introduce jumps, since + this would require moving the data out of SIMD registers which would be + expensive, and so the compiler would avoid such a transformation. + */ + let hashes_eq = x_hash.ct_eq(&target_hashes[i]); + scan_results[i].conditional_assign(&(x as u16), hashes_eq); + } + } + + // Now confirm the results (since collisions may have occurred + // if the dealer was dishonest) and convert to Scalar + + let mut results = Vec::with_capacity(targets.len()); + + for i in 0..targets.len() { + /* + After finding a candidate we must perform a multiplication in order + to tell if we found the dlog correctly, or if there was a collision + due to a dishonest dealer. + + If no match was found then scan_results[i] will just be zero, we + perform the multiplication anyway and then reject the candidate dlog. + */ + if Gt::g_mul_u16(scan_results[i]) == targets[i] { + results.push(Some(Scalar::from_u64(scan_results[i] as u64))); + } else { + results.push(None); + } + } + + results } } @@ -202,60 +218,22 @@ impl BabyStepGiantStepTable { /// Returns the table plus the giant step fn new(base: &Gt, table_size: usize) -> (Self, Gt) { - let num_threads = rayon::current_num_threads(); - let chunk_size = table_size.div_ceil(num_threads.max(1)); - - /* - * We have to compute the entire table of [base*1, base*2, ..., base*t] - * but would prefer to do so in parallel. - * - * All `t` elements _can_ be computed perfectly in parallel, but at the cost - * of requiring a Gt multiplication per table element, which is rather costly. - * - * Instead split the table computation into chunks such that there is ~1 thread - * per chunk, compute the starting index for that chunk using a Gt mul, then - * use plain additions in Gt from there. - */ - let chunks: Vec> = (0..num_threads) - .into_par_iter() - .map(|t| { - let start = t * chunk_size; - let end = (start + chunk_size).min(table_size); - if start >= table_size { - return vec![]; - } - // TODO: Gt::mul_vartime_usize could use the 16-bit mul tables - // and be significantly faster than this - let mut accum = base.mul_vartime(&Scalar::from_usize(start)); - let mut result = Vec::with_capacity(end - start); - for i in start..end { - let (prefix, hash) = Self::hash_gt(&accum); - result.push((hash, i, prefix)); - accum += base; - } - result - }) - .collect(); - let mut table = Vec::with_capacity(table_size); let mut prefix_set = std::collections::HashSet::with_capacity_and_hasher(table_size, BuildShiftXorHasher); - - for chunk in chunks { - for (hash, i, prefix) in chunk { - table.push((hash, i)); - // we are not checking the return value of `insert` because - // duplicate prefixes do not affect the correctness - prefix_set.insert(prefix); - } + let mut accum = Gt::identity(); + + for i in 0..table_size { + let (prefix, hash) = Self::hash_gt(&accum); + table.push((hash, i)); + // we are not checking the return value of `insert` because + // duplicate prefixes do not affect the correctness + prefix_set.insert(prefix); + accum += base; } - table.sort_unstable(); - // Giant step is -(base * table_size) - let giant_step = base.mul_vartime(&Scalar::from_usize(table_size)).neg(); - - (Self { table, prefix_set }, giant_step) + (Self { table, prefix_set }, accum.neg()) } /// Return the value if gt exists in this table @@ -338,35 +316,17 @@ impl BabyStepGiantStep { /// /// Returns `None` if the discrete logarithm is not in the searched range. pub fn solve(&self, tgt: &Gt) -> Option { - let start = tgt + &self.offset; - - let num_threads = rayon::current_num_threads(); - let steps_per_thread = self.giant_steps.div_ceil(num_threads.max(1)); + let mut step = tgt + &self.offset; - (0..num_threads).into_par_iter().find_map_any(|t| { - let first_step = t * steps_per_thread; - let last_step = (first_step + steps_per_thread).min(self.giant_steps); - if first_step >= self.giant_steps { - return None; - } - - // Compute starting point for this thread's range of giant steps - let mut step = if first_step == 0 { - start.clone() - } else { - &start + &self.giant_step.mul_vartime(&Scalar::from_usize(first_step)) - }; - - for giant_step in first_step..last_step { - if let Some(i) = self.table.get(&step) { - let x = self.lo + (i + self.n * giant_step) as isize; - return Some(Scalar::from_isize(x)); - } - step += &self.giant_step; + for giant_step in 0..self.giant_steps { + if let Some(i) = self.table.get(&step) { + let x = self.lo + (i + self.n * giant_step) as isize; + return Some(Scalar::from_isize(x)); } + step += &self.giant_step; + } - None - }) + None } } diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/forward_secure.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/forward_secure.rs index 3c314437314c..8a76f5bf3ba1 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/forward_secure.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/forward_secure.rs @@ -16,7 +16,6 @@ use crate::ni_dkg::fs_ni_dkg::encryption_key_pop::{ EncryptionKeyInstance, EncryptionKeyPop, prove_pop, verify_pop, }; use crate::ni_dkg::fs_ni_dkg::random_oracles::{HashedMap, random_oracle}; -use rayon::prelude::*; use crate::ni_dkg::fs_ni_dkg::forward_secure::CiphertextIntegrityError::{ CrszVectorsLengthMismatch, InvalidNidkgCiphertext, @@ -220,7 +219,7 @@ impl BTENode { b: G2Affine::deserialize_unchecked(&node.b).expect("Malformed secret key at BTENode.b"), d_t: node .d_t - .par_iter() + .iter() .map(|g2| { G2Affine::deserialize_unchecked(&g2) .expect("Malformed secret key at BTENode.d_t") @@ -228,7 +227,7 @@ impl BTENode { .collect(), d_h: node .d_h - .par_iter() + .iter() .map(|g2| { G2Affine::deserialize_unchecked(&g2) .expect("Malformed secret key at BTENode.d_h") @@ -519,24 +518,19 @@ impl SecretKey { b_blind += (&f_acc + &sys.f[n]) * δ let e_blind = (&sys.h * &delta) + &node.e; - - let d_t_vec: Vec<_> = d_t.iter().collect(); - let d_t_blind_vec: Vec = d_t_vec - .par_iter() - .enumerate() - .map(|(i, d)| (&sys.f[n + 1 + i] * &delta + *d).to_affine()) - .collect(); let mut d_t_blind = LinkedList::new(); - d_t_blind_vec - .into_iter() - .for_each(|d| d_t_blind.push_back(d)); - - let d_h_blind: Vec = node - .d_h - .par_iter() - .zip(sys.f_h.par_iter()) - .map(|(d, f)| (f * &delta) + d) - .collect(); + let mut k = n + 1; + d_t.iter().for_each(|d| { + let tmp = (&sys.f[k] * &delta) + d; + d_t_blind.push_back(tmp.to_affine()); + k += 1; + }); + + let mut d_h_blind = Vec::new(); + node.d_h.iter().zip(&sys.f_h).for_each(|(d, f)| { + let tmp = (f * &delta) + d; + d_h_blind.push(tmp); + }); let d_h_blind = G2Projective::batch_normalize(&d_h_blind); self.bte_nodes.push_back(BTENode { @@ -560,26 +554,20 @@ impl SecretKey { let e = &sys.h * &delta + &node.e; b_acc += f_acc * δ + let mut d_t_blind = LinkedList::new(); // Typically `d_t_blind` remains empty. // It is only nontrivial if `epoch` is less than LAMBDA_T bits. - let epoch_len = epoch.len(); - let d_t_vec: Vec<_> = d_t.iter().collect(); - let d_t_blind_vec: Vec = d_t_vec - .par_iter() - .enumerate() - .map(|(i, d)| (&sys.f[epoch_len + i] * &delta + *d).to_affine()) - .collect(); - let mut d_t_blind = LinkedList::new(); - d_t_blind_vec - .into_iter() - .for_each(|d| d_t_blind.push_back(d)); - - let d_h_blind: Vec = node - .d_h - .par_iter() - .zip(sys.f_h.par_iter()) - .map(|(d, f)| (f * &delta + d).to_affine()) - .collect(); + let mut k = epoch.len(); + d_t.iter().for_each(|d| { + let tmp = (&sys.f[k] * &delta) + d; + d_t_blind.push_back(tmp.to_affine()); + k += 1; + }); + let mut d_h_blind = Vec::new(); + node.d_h.iter().zip(&sys.f_h).for_each(|(d, f)| { + let tmp = f * &delta + d; + d_h_blind.push(tmp.to_affine()); + }); self.bte_nodes.push_back(BTENode { tau, @@ -616,7 +604,7 @@ impl SecretKey { Self { bte_nodes: secret_key .bte_nodes - .par_iter() + .iter() .map(BTENode::deserialize_unchecked) .collect(), } @@ -671,7 +659,7 @@ impl FsEncryptionCiphertext { let cc: Vec<[G1Affine; NUM_CHUNKS]> = ciphertext .ciphertext_chunks - .par_iter() + .iter() .map(|cj| G1Affine::batch_deserialize_array(cj).or(Err("Malformed ciphertext_chunk"))) .collect::, _>>()?; @@ -708,6 +696,8 @@ pub fn enc_chunks( sys: &SysParam, rng: &mut R, ) -> (FsEncryptionCiphertext, EncryptionWitness) { + let receivers = recipient_and_message.len(); + let g1 = G1Affine::generator(); let s = Scalar::batch_random_array::(rng); @@ -716,14 +706,23 @@ pub fn enc_chunks( let r = Scalar::batch_random_array::(rng); let rr = g1.batch_mul_array(&r); - let cc: Vec<[G1Affine; NUM_CHUNKS]> = recipient_and_message - .par_iter() - .map(|(pk, ptext)| { + // TODO(CRP-2550) This can run in parallel (n = # receivers) + let cc = { + let mut cc: Vec<[G1Affine; NUM_CHUNKS]> = Vec::with_capacity(receivers); + + for (pk, ptext) in recipient_and_message { let pk_g1_tbl = G1Projective::compute_mul2_affine_tbl(pk, g1); + let chunks = ptext.chunks_as_scalars(); - G1Projective::batch_normalize_array(&pk_g1_tbl.mul2_array(&r, &chunks)) - }) - .collect(); + + let enc_chunks = + G1Projective::batch_normalize_array(&pk_g1_tbl.mul2_array(&r, &chunks)); + + cc.push(enc_chunks); + } + + cc + }; let id = ftau_extended(&cc, &rr, &ss, sys, epoch, associated_data); let id_h_tbl = G2Projective::compute_mul2_tbl(&id, &G2Projective::from(&sys.h)); @@ -801,17 +800,19 @@ pub fn dec_chunks( let bneg = G2Prepared::from(&bneg); let eneg = G2Prepared::from(&dk.e.neg()); - let powers: Vec<_> = (0..m) - .into_par_iter() - .map(|i| { - Gt::multipairing(&[ - (&cj[i], G2Prepared::generator()), - (&crsz.rr[i], &bneg), - (&dk.a, &G2Prepared::from(&crsz.zz[i])), - (&crsz.ss[i], &eneg), - ]) - }) - .collect(); + let mut powers = Vec::with_capacity(m); + + // TODO(CRP-2550) These multipairings could be computed in parallel + for i in 0..m { + let x = Gt::multipairing(&[ + (&cj[i], G2Prepared::generator()), + (&crsz.rr[i], &bneg), + (&dk.a, &G2Prepared::from(&crsz.zz[i])), + (&crsz.ss[i], &eneg), + ]); + + powers.push(x); + } // Find discrete log of the powers let linear_search = HonestDealerDlogLookupTable::new(); @@ -820,23 +821,15 @@ pub fn dec_chunks( let mut dlogs = linear_search.solve_several(&powers); if dlogs.iter().any(|x| x.is_none()) { - // Cheating dealer case — each BSGS solve can take hours + // Cheating dealer case let cheating_solver = CheatingDealerDlogSolver::new(n, m); - let unsolved: Vec<_> = dlogs - .iter() - .enumerate() - .filter(|(_, d)| d.is_none()) - .map(|(i, _)| i) - .collect(); - - let solved: Vec<_> = unsolved - .par_iter() - .map(|&i| (i, cheating_solver.solve(&powers[i]))) - .collect(); - - for (i, result) in solved { - dlogs[i] = result; + for i in 0..dlogs.len() { + if dlogs[i].is_none() { + // TODO(CRP-2550) All BSGS could be run in parallel + // It may take hours to brute force a cheater's discrete log. + dlogs[i] = cheating_solver.solve(&powers[i]); + } } } @@ -885,20 +878,19 @@ pub fn verify_ciphertext_integrity( let g1_neg = G1Affine::generator().neg(); let precomp_id = G2Prepared::from(&id); - let all_valid = (0..NUM_CHUNKS).into_par_iter().all(|i| { + // TODO(CRP-2550) Each of these checks could be run in parallel + for i in 0..NUM_CHUNKS { let r = &crsz.rr[i]; let s = &crsz.ss[i]; let z = G2Prepared::from(&crsz.zz[i]); let v = Gt::multipairing(&[(r, &precomp_id), (s, &sys.h_prep), (&g1_neg, &z)]); - v.is_identity() - }); - if all_valid { - Ok(()) - } else { - Err(InvalidNidkgCiphertext) + if !v.is_identity() { + return Err(InvalidNidkgCiphertext); + } } + Ok(()) } #[derive(Eq, PartialEq, Debug)] diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_chunking.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_chunking.rs index 18f0cd66df0d..7a8b71c962d9 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_chunking.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_chunking.rs @@ -10,7 +10,6 @@ use ic_crypto_internal_types::curves::bls12_381::{FrBytes, G1Bytes}; use ic_crypto_internal_types::sign::threshold_sig::ni_dkg::ni_dkg_groth20_bls12_381::ZKProofDec; use rand::{CryptoRng, Rng, RngCore, SeedableRng}; use rand_chacha::ChaCha20Rng; -use rayon::prelude::*; /// Domain separators for the zk proof of chunking const DOMAIN_PROOF_OF_CHUNKING_ORACLE: &str = "ic-zk-proof-of-chunking-chunking"; @@ -362,94 +361,85 @@ pub fn verify_chunking( let xpowers = Scalar::xpowers(&x, NUM_ZK_REPETITIONS); let g1 = &instance.g1_gen; - let (r1, (r2, r3)) = rayon::join( - || -> Result<(), ZkProofChunkingError> { - // First verification equation (for each i ∈ [1..n]): - // ∏_{j=1}^{m} R_j^{Σ_{k=1}^{l} e_{ijk} · x^k} · dd_i == g_1^{z_{r_i}} - let rhs = g1.batch_mul_vartime(&nizk.z_r); - - let lhs = { - let lhs: Vec<_> = e - .par_iter() - .enumerate() - .map(|(i, e_i)| { - let e_ijk_polynomials: Vec<_> = e_i - .iter() - .map(|e_ij| Scalar::muln_usize_vartime(&xpowers, e_ij)) - .collect(); - - let rj_e_ijk = G1Projective::muln_affine_vartime( - &instance.randomizers_r, - &e_ijk_polynomials, - ); - - rj_e_ijk + &nizk.dd[i + 1] - }) + // TODO(CRP-2550) Verification of chunking proof could run in 3 threads + + // Thread 1 + { + /* + Verify lhs == rhs where + lhs = product [R_j ^ sum [e_ijk * x^k | k <- [1..l]] | j <- [1..m]] * dd_i + rhs = g1 ^ z_r_i | i <- [1..n]] + */ + + let rhs = g1.batch_mul_vartime(&nizk.z_r); + + let lhs = { + let mut lhs = Vec::with_capacity(e.len()); + // TODO(CRP-2550) this loop can run in parallel + for (i, e_i) in e.iter().enumerate() { + let e_ijk_polynomials: Vec<_> = e_i + .iter() + .map(|e_ij| Scalar::muln_usize_vartime(&xpowers, e_ij)) .collect(); - G1Projective::batch_normalize(&lhs) - }; - if lhs != rhs { - return Err(ZkProofChunkingError::InvalidProof); + let rj_e_ijk = + G1Projective::muln_affine_vartime(&instance.randomizers_r, &e_ijk_polynomials); + + lhs.push(rj_e_ijk + &nizk.dd[i + 1]); } - Ok(()) - }, - || { - rayon::join( - || -> Result<(), ZkProofChunkingError> { - // Second verification equation: - // ∏_{k=1}^{l} bb_k^{x^k} · dd_0 == g_1^{z_β} - let lhs = G1Projective::muln_affine_vartime(&nizk.bb, &xpowers) + &nizk.dd[0]; - - let rhs = g1.mul_vartime(&nizk.z_beta); - if lhs != rhs { - return Err(ZkProofChunkingError::InvalidProof); - } - Ok(()) - }, - || -> Result<(), ZkProofChunkingError> { - // Third verification equation: - // lhs = ∏_{k=1}^{l} (∏_{i=1}^{n} ∏_{j=1}^{m} c_{ij}^{e_{ijk}})^{x^k} · ∏_{k=1}^{l} cc_k^{x^k} · Y - // rhs = ∏_{i=1}^{n} y_i^{z_{r_i}} · y_0^{z_β} · g_1^{Σ_{k=1}^{l} z_{s_k} · x^k} - - let cij_to_eijks: Vec = (0..NUM_ZK_REPETITIONS) - .into_par_iter() - .map(|k| { - let c_ij_s: Vec<_> = - instance.ciphertext_chunks.iter().flatten().collect(); - let e_ijk_s: Vec<_> = e - .iter() - .flatten() - .map(|e_ij| Scalar::from_usize(e_ij[k])) - .collect(); - if c_ij_s.len() != m * n || e_ijk_s.len() != m * n { - return Err(ZkProofChunkingError::InvalidProof); - } - - Ok(G1Projective::muln_affine_vartime_ref(&c_ij_s, &e_ijk_s) - + &nizk.cc[k]) - }) - .collect::, _>>()?; - - let lhs = - G1Projective::muln_vartime(&cij_to_eijks[..], &xpowers[..]) + &nizk.yy; - - let acc = Scalar::muln_vartime(&nizk.z_s, &xpowers); - - let rhs = G1Projective::muln_affine_vartime(&instance.public_keys, &nizk.z_r) - + G1Projective::mul2_affine_vartime(&nizk.y0, &nizk.z_beta, g1, &acc); - - if lhs != rhs { - return Err(ZkProofChunkingError::InvalidProof); - } - Ok(()) - }, - ) - }, - ); - r1?; - r2?; - r3?; + G1Projective::batch_normalize(&lhs) + }; + + if lhs != rhs { + return Err(ZkProofChunkingError::InvalidProof); + } + } + + // Thread 2 + { + // Verify: product [bb_k ^ x^k | k <- [1..l]] * dd_0 == g1 ^ z_beta + let lhs = G1Projective::muln_affine_vartime(&nizk.bb, &xpowers) + &nizk.dd[0]; + + let rhs = g1.mul_vartime(&nizk.z_beta); + if lhs != rhs { + return Err(ZkProofChunkingError::InvalidProof); + } + } + + // Thread 3 + { + // Verify: product [product [chunk_ij ^ e_ijk | i <- [1..n], j <- [1..m]] ^ x^k + // | k <- [1..l]] * product [cc_k ^ x^k | k <- [1..l]] * Y = product + // [y_i^z_ri | i <- [1..n]] * y0^z_beta * g_1 ^ sum [z_sk * x^k | k <- [1..l]] + + // TODO(CRP-2550) this loop can run in parallel + let cij_to_eijks: Vec = (0..NUM_ZK_REPETITIONS) + .map(|k| { + let c_ij_s: Vec<_> = instance.ciphertext_chunks.iter().flatten().collect(); + let e_ijk_s: Vec<_> = e + .iter() + .flatten() + .map(|e_ij| Scalar::from_usize(e_ij[k])) + .collect(); + if c_ij_s.len() != m * n || e_ijk_s.len() != m * n { + return Err(ZkProofChunkingError::InvalidProof); + } + + Ok(G1Projective::muln_affine_vartime_ref(&c_ij_s, &e_ijk_s) + &nizk.cc[k]) + }) + .collect::, _>>()?; + + let lhs = G1Projective::muln_vartime(&cij_to_eijks[..], &xpowers[..]) + &nizk.yy; + + let acc = Scalar::muln_vartime(&nizk.z_s, &xpowers); + + let rhs = G1Projective::muln_affine_vartime(&instance.public_keys, &nizk.z_r) + + G1Projective::mul2_affine_vartime(&nizk.y0, &nizk.z_beta, g1, &acc); + + if lhs != rhs { + return Err(ZkProofChunkingError::InvalidProof); + } + } Ok(()) } diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_sharing.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_sharing.rs index 58625958c278..337c2fbc703a 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_sharing.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_sharing.rs @@ -265,93 +265,86 @@ pub fn verify_sharing( // x' = oracle(x, F, A, Y) let x_challenge = sharing_proof_challenge(&x, &first_move); - let (r1, (r2, r3)) = rayon::join( - || -> Result<(), ZkProofSharingError> { - // First verification equation - // R^x' * F == g_1^z_r - let lhs = - &instance.combined_randomizer.mul_vartime(&x_challenge) + &first_move.blinder_g1; - let rhs = instance.g1_gen.mul_vartime(&nizk.z_r); - if lhs != rhs { - return Err(ZkProofSharingError::InvalidProof); + // TODO(CRP-2550): The verification can run in three threads + + // Thread 1 + { + // First verification equation + // R^x' * F == g_1^z_r + let lhs = &instance.combined_randomizer.mul_vartime(&x_challenge) + &first_move.blinder_g1; + let rhs = instance.g1_gen.mul_vartime(&nizk.z_r); + if lhs != rhs { + return Err(ZkProofSharingError::InvalidProof); + } + } + + // Thread 2 + { + // Second verification equation + // ( ∏_{k=0}^{t-1} A_k^{ Σ_{i=1}^n (i^k * x^i) } )^{x'} * A + // == g_2^{z_α} + + // We initialize ik with x_challenge (A) to avoid the point/scalar multiplication later + let mut ik = vec![x_challenge.clone(); instance.public_keys.len()]; + + let mut scalars = Vec::with_capacity(instance.public_coefficients.len()); + for _pc in &instance.public_coefficients { + let acc = Scalar::muln_vartime(&ik, &xpow); + scalars.push(acc); + + for i in 0..ik.len() { + ik[i] *= Scalar::from_u64((i + 1) as u64); } - Ok(()) - }, - || { - rayon::join( - || -> Result<(), ZkProofSharingError> { - // Second verification equation - // ( ∏_{k=0}^{t-1} A_k^{ Σ_{i=1}^n (i^k * x^i) } )^{x'} * A - // == g_2^{z_α} - - // We initialize ik with x_challenge (A) to avoid the scalar multiplication later - let mut ik = vec![x_challenge.clone(); instance.public_keys.len()]; - - let mut scalars = Vec::with_capacity(instance.public_coefficients.len()); - for _pc in &instance.public_coefficients { - let acc = Scalar::muln_vartime(&ik, &xpow); - scalars.push(acc); - - for i in 0..ik.len() { - ik[i] *= Scalar::from_u64((i + 1) as u64); - } - } - let lhs = G2Projective::muln_affine_vartime( - &instance.public_coefficients[..], - &scalars[..], - ) + &nizk.aa; - - let rhs = instance.g2_gen.mul_vartime(&nizk.z_alpha); - - if lhs != rhs { - return Err(ZkProofSharingError::InvalidProof); - } - Ok(()) - }, - || -> Result<(), ZkProofSharingError> { - // Third verification equation - // Original relation: - // (∏_{i=1}^n C_i^{x^i})^{x'} * Y == (∏_{i=1}^n y_i^{x^i})^{z_r} * g_1^{z_α} - // - // Equivalently, we can rewrite it by moving terms to opposite sides: - // - // lhs = (∏_{i=1}^n C_i^{x^i})^{x'} * (∏_{i=1}^n y_i^{x^i})^{-z_r} - // rhs = g_1^{z_α} * Y^{-1} - - // The two expressions are re-arranged so that it becomes possible to compute - // everything with a single multi scalar multiplication. - - let instance_inputs: Vec<_> = instance - .combined_ciphertexts - .iter() - .chain(&instance.public_keys) - .collect(); - let challenges = { - let mut c = Vec::with_capacity(xpow.len() * 2); - for xp in &xpow { - c.push(xp * &x_challenge); - } - let z_r_neg = nizk.z_r.neg(); - for xp in &xpow { - c.push(xp * &z_r_neg); - } - c - }; - - let lhs = G1Projective::muln_affine_vartime_ref(&instance_inputs, &challenges); - let rhs = &instance.g1_gen.mul_vartime(&nizk.z_alpha) + &nizk.yy.neg(); - - if lhs != rhs { - return Err(ZkProofSharingError::InvalidProof); - } - Ok(()) - }, - ) - }, - ); - r1?; - r2?; - r3?; + } + let lhs = + G2Projective::muln_affine_vartime(&instance.public_coefficients[..], &scalars[..]) + + &nizk.aa; + + let rhs = instance.g2_gen.mul_vartime(&nizk.z_alpha); + + if lhs != rhs { + return Err(ZkProofSharingError::InvalidProof); + } + } + + // Thread 3 + { + // Third verification equation + // Original relation: + // (∏_{i=1}^n C_i^{x^i})^{x'} * Y == (∏_{i=1}^n y_i^{x^i})^{z_r} * g_1^{z_α} + // + // Equivalently, we can rewrite it by moving terms to opposite sides: + // + // lhs = (∏_{i=1}^n C_i^{x^i})^{x'} * (∏_{i=1}^n y_i^{x^i})^{-z_r} + // rhs = g_1^{z_α} * Y^{-1} + + // The two expressions are re-arranged so that it becomes possible to compute + // everything with a single multi scalar multiplication. + + let instance_inputs: Vec<_> = instance + .combined_ciphertexts + .iter() + .chain(&instance.public_keys) + .collect(); + let challenges = { + let mut c = Vec::with_capacity(xpow.len() * 2); + for xp in &xpow { + c.push(xp * &x_challenge); + } + let z_r_neg = nizk.z_r.neg(); + for xp in &xpow { + c.push(xp * &z_r_neg); + } + c + }; + + let lhs = G1Projective::muln_affine_vartime_ref(&instance_inputs, &challenges); + let rhs = &instance.g1_gen.mul_vartime(&nizk.z_alpha) + &nizk.yy.neg(); + + if lhs != rhs { + return Err(ZkProofSharingError::InvalidProof); + } + } Ok(()) } diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/encryption.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/encryption.rs index 924bbb5fe1ab..90a7cedbf6fa 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/encryption.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/encryption.rs @@ -22,7 +22,6 @@ use ic_crypto_internal_types::sign::threshold_sig::{ }; use ic_types::{NumberOfNodes, crypto::AlgorithmId, crypto::error::InvalidArgumentError}; use rand::{CryptoRng, RngCore}; -use rayon::prelude::*; use std::collections::BTreeMap; use std::convert::TryFrom; @@ -101,12 +100,14 @@ pub fn encrypt_and_prove( public_coefficients: &PublicCoefficientsBytes, associated_data: &[u8], ) -> Result<(FsEncryptionCiphertextBytes, ZKProofDec, ZKProofShare), EncryptAndZKProveError> { - let public_keys: Result, EncryptAndZKProveError> = key_message_pairs - .par_iter() - .enumerate() - .map(|(receiver_index, (pk_bytes, _))| { - let public_key_bytes = pk_bytes.as_bytes(); - G1Affine::deserialize(public_key_bytes).map_err(|_| { + let receivers = key_message_pairs.len(); + + // TODO(CRP-2550) the deserialization step can run in parallel + let public_keys = { + let mut public_keys = Vec::with_capacity(receivers); + for receiver_index in 0..receivers { + let public_key_bytes = key_message_pairs[receiver_index].0.as_bytes(); + let pk = G1Affine::deserialize(public_key_bytes).map_err(|_| { EncryptAndZKProveError::MalformedFsPublicKeyError { receiver_index: receiver_index as u32, error: MalformedPublicKeyError { @@ -115,10 +116,12 @@ pub fn encrypt_and_prove( internal_error: "Could not parse public key.".to_string(), }, } - }) - }) - .collect(); - let public_keys = public_keys?; + })?; + + public_keys.push(pk); + } + public_keys + }; let plaintext_chunks = key_message_pairs .iter() @@ -144,8 +147,7 @@ pub fn encrypt_and_prove( rng, ); - // Generating the chunking and sharing proofs could happen in parallel but this - // changes the stability tests and may not be worth it + // TODO(CRP-2550) generating the chunking and sharing proofs can happen in parallel let chunking_proof = prove_chunking( &public_keys, @@ -342,11 +344,10 @@ pub fn verify_zk_proofs( associated_data: &[u8], ) -> Result<(), CspDkgVerifyDealingError> { // Conversions - let pk_values: Vec<_> = receiver_fs_public_keys.values().collect(); - let public_keys: Result, CspDkgVerifyDealingError> = pk_values - .par_iter() - .enumerate() - .map(|(receiver_index, public_key)| { + let public_keys: Result, CspDkgVerifyDealingError> = receiver_fs_public_keys + .values() + .zip(0..) + .map(|(public_key, receiver_index)| { G1Affine::deserialize(public_key.as_bytes()).map_err(|parse_error| { let error = MalformedPublicKeyError { algorithm: ALGORITHM_ID, @@ -354,7 +355,7 @@ pub fn verify_zk_proofs( internal_error: format!("{parse_error:?}"), }; CspDkgVerifyDealingError::MalformedFsPublicKeyError { - receiver_index: receiver_index as NodeIndex, + receiver_index, error, } }) @@ -404,9 +405,10 @@ pub fn verify_zk_proofs( // More conversions + // TODO(CRP-2550) this loop can run in parallel let public_coefficients = public_coefficients .coefficients - .par_iter() + .iter() .map(G2Affine::deserialize) .collect::, _>>() .map_err(|_| { diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/transcript.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/transcript.rs index 58a500c6de38..38703f73569f 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/transcript.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/transcript.rs @@ -11,7 +11,6 @@ use crate::types as threshold_types; use ic_crypto_internal_bls12_381_type::{G2Projective, LagrangeCoefficients, NodeIndices}; use ic_crypto_internal_types::sign::threshold_sig::ni_dkg::ni_dkg_groth20_bls12_381 as g20; use ic_types::{NodeIndex, NumberOfNodes}; -use rayon::prelude::*; use std::collections::BTreeMap; use std::convert::TryFrom; use std::ops::{AddAssign, MulAssign}; @@ -210,18 +209,18 @@ fn compute_transcript( LagrangeCoefficients::at_zero(&indices).into_coefficients() }; - let combined: Vec<_> = (0..threshold.get() as usize) - .into_par_iter() - .map(|i| { - let points: Vec<_> = individual_public_coefficients - .values() - .map(|pc| pc.coefficients[i].0.clone()) - .collect(); - crate::types::PublicKey( - G2Projective::muln_affine_vartime(&points, &coefficients).to_affine(), - ) - }) - .collect(); + let mut combined = Vec::with_capacity(threshold.get() as usize); + + // TODO(CRP-2550) this loop can run in parallel + for i in 0..threshold.get() as usize { + let points: Vec<_> = individual_public_coefficients + .values() + .map(|pc| &pc.coefficients[i].0) + .collect(); + combined.push(crate::types::PublicKey( + G2Projective::muln_affine_vartime_ref(&points, &coefficients).to_affine(), + )); + } let combined_public_coefficients = threshold_types::PublicCoefficients::new(combined); // This type conversion is needed because of the internal/CSP type duplication. @@ -259,10 +258,11 @@ pub fn compute_threshold_signing_key( ) -> Result { // Get my shares + // TODO(CRP-2550) this loop can run in parallel let shares_from_each_dealer: Result, _> = transcript .receiver_data - .par_iter() + .iter() .map(|(dealer_index, encrypted_shares)| { let secret_key = decrypt( encrypted_shares, diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/stability.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/stability.rs index aafffaebf27d..b228d3a1b0ff 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/stability.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/stability.rs @@ -60,50 +60,12 @@ fn test_updating_fs_secret_key_is_stable() { let mut sk = SecretKey::deserialize(&key_and_pop.secret_key); let seed = Seed::from_bytes(b"ic-crypto-update-key-seed"); + update_key_inplace_to_epoch(&mut sk, Epoch::from(2), seed); - let key_hash_for_epoch = [ - ( - 2, - "b79540c1e4148231c2e194b854e46f2449c4b8a534e854bf3f69d7f41d1af383", - ), - ( - 4, - "d6ecca647cf2069e1ae36a2dcda420932b1fc9ca66f8442953ca6d4cfc960a2b", - ), - ( - 9, - "5b7c6755f3a4ce7c399cb3f4db14cae977206deaa2a570b9969b7aa052d9c4f9", - ), - ( - 1023, - "8358c158fea524b8fe020973632423db981cff50c1c542572e74211a8e28b574", - ), - ( - 1024, - "27f15513f9d836fde9e80483498c66c7e77792a116676c1a46f368c2f6b3be2b", - ), - ( - 1 << 28, - "10aaf2a0245593cb6ecaa05cee4f2cb525f51fdd9a4ecfe106825990ba2b69f3", - ), - ( - (1 << 29) - 1, - "b755fde575c74f5746450d6eb275cbf7437b3fdff17c0833fc22d00be15a6dbf", - ), - ( - 1 << 29, - "2fde8a15cad2917c81e8adf8828c6d477512677cd2f89367539c47ca3fe7948e", - ), - ]; - - for (epoch, expected_hash) in &key_hash_for_epoch { - update_key_inplace_to_epoch( - &mut sk, - Epoch::from(*epoch), - seed.derive(&format!("Epoch {}", epoch)), - ); - assert_sha256_cbor_is(&sk.serialize(), expected_hash); - } + assert_sha256_cbor_is( + &sk.serialize(), + "f70143bdd1fad70ac7d24cda1f5141b6e730841361fc4c8e5059ddc0a1514e15", + ); } fn create_receiver_keys( diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/builder/mod.rs b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/builder/mod.rs index 2a8c4b228192..f87f3ce063c8 100644 --- a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/builder/mod.rs +++ b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/builder/mod.rs @@ -1,6 +1,5 @@ use super::*; use rand::rngs::OsRng; -use rayon::ThreadPool; pub struct LocalCspVaultBuilder { csprng: Box R>, @@ -10,7 +9,6 @@ pub struct LocalCspVaultBuilder { time_source: Arc, metrics: Arc, logger: ReplicaLogger, - thread_pool_nidkg: Option>, } impl ProdLocalCspVault { @@ -30,7 +28,6 @@ impl ProdLocalCspVault { time_source: Arc::new(SysTimeSource::new()), metrics, logger, - thread_pool_nidkg: None, } } @@ -85,7 +82,6 @@ where time_source: self.time_source, metrics: self.metrics, logger: self.logger, - thread_pool_nidkg: self.thread_pool_nidkg, } } @@ -101,7 +97,6 @@ where time_source: self.time_source, metrics: self.metrics, logger: self.logger, - thread_pool_nidkg: self.thread_pool_nidkg, } } @@ -117,7 +112,6 @@ where time_source: self.time_source, metrics: self.metrics, logger: self.logger, - thread_pool_nidkg: self.thread_pool_nidkg, } } @@ -133,7 +127,6 @@ where time_source: self.time_source, metrics: self.metrics, logger: self.logger, - thread_pool_nidkg: self.thread_pool_nidkg, } } @@ -147,18 +140,6 @@ where self } - /// Installs an externally-managed rayon pool used for NIDKG work. - /// - /// If not called, `build` creates a small default pool — see - /// [`super::NIDKG_THREAD_POOL_SIZE`]. Prod callers that already own a - /// NIDKG pool (e.g., the remote vault server) should inject it here so - /// that a single pool is shared across the RPC dispatch layer and the - /// vault's internal `par_iter` calls. - pub fn with_nidkg_thread_pool(mut self, thread_pool_nidkg: Arc) -> Self { - self.thread_pool_nidkg = Some(thread_pool_nidkg); - self - } - pub fn build(self) -> LocalCspVault { LocalCspVault { csprng: CspRwLock::new_for_rng((self.csprng)(), Arc::clone(&self.metrics)), @@ -177,9 +158,6 @@ where time_source: self.time_source, metrics: self.metrics, logger: self.logger, - thread_pool_nidkg: self - .thread_pool_nidkg - .unwrap_or_else(super::new_nidkg_thread_pool), } } @@ -216,7 +194,6 @@ mod test_utils { time_source: FastForwardTimeSource::new(), logger: no_op_logger(), metrics: Arc::new(CryptoMetrics::none()), - thread_pool_nidkg: None, } } } diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/mod.rs b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/mod.rs index 58ca7aaa6964..b748dfcd72f6 100644 --- a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/mod.rs +++ b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/mod.rs @@ -35,36 +35,10 @@ use ic_types::crypto::canister_threshold_sig::error::ThresholdEcdsaCreateSigShar use parking_lot::{RwLockReadGuard, RwLockWriteGuard}; use rand::rngs::OsRng; use rand::{CryptoRng, Rng}; -use rayon::{ThreadPool, ThreadPoolBuilder}; use std::collections::HashSet; use std::path::Path; use std::sync::Arc; -/// Default size of the per-vault rayon pool used for NIDKG work when no -/// externally-managed pool is injected via -/// [`builder::LocalCspVaultBuilder::with_nidkg_thread_pool`]. -/// -/// A small fixed size prevents oversubscription when many `LocalCspVault` -/// instances share a single process (e.g., registry integration tests that -/// run several local ICs in parallel, or the in-replica vault path): without -/// this bound, each vault's `par_iter` calls would fan out to the global -/// rayon pool, which defaults to the number of logical cores. -/// -/// The remote vault server injects its own NIDKG pool so that the outer RPC -/// dispatch pool and the inner `par_iter` pool are the same pool; in that -/// case this constant is unused. -const NIDKG_THREAD_POOL_SIZE: usize = 3; - -pub(crate) fn new_nidkg_thread_pool() -> Arc { - Arc::new( - ThreadPoolBuilder::new() - .thread_name(|i| format!("ic-crypto-local-vault-nidkg-{i}")) - .num_threads(NIDKG_THREAD_POOL_SIZE) - .build() - .expect("failed to instantiate a thread pool"), - ) -} - /// An implementation of `CspVault`-trait that runs in-process /// and uses local secret key stores. /// @@ -114,7 +88,6 @@ pub struct LocalCspVault< time_source: Arc, logger: ReplicaLogger, metrics: Arc, - thread_pool_nidkg: Arc, } pub type ProdLocalCspVault = @@ -222,19 +195,6 @@ impl(&self, job: F) -> T - where - F: FnOnce() -> T + Send, - T: Send, - { - self.thread_pool_nidkg.install(job) - } - fn combined_commitment_opening_from_sks( &self, combined_commitment: &CombinedCommitment, diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/ni_dkg/mod.rs b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/ni_dkg/mod.rs index da6cd14ce96d..9268c5bd4957 100644 --- a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/ni_dkg/mod.rs +++ b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/ni_dkg/mod.rs @@ -36,12 +36,8 @@ use std::collections::{BTreeMap, BTreeSet}; #[cfg(test)] mod tests; -impl< - R: Rng + CryptoRng + Send + Sync, - S: SecretKeyStore + Send + Sync, - C: SecretKeyStore + Send + Sync, - P: PublicKeyStore + Send + Sync, -> NiDkgCspVault for LocalCspVault +impl NiDkgCspVault + for LocalCspVault { fn gen_dealing_encryption_key_pair( &self, @@ -49,8 +45,7 @@ impl< ) -> Result<(CspFsEncryptionPublicKey, CspFsEncryptionPop), CspDkgCreateFsKeyError> { debug!(self.logger; crypto.method_name => "gen_dealing_encryption_key_pair"); let start_time = self.metrics.now(); - let result = self - .run_on_nidkg_thread_pool(|| self.gen_dealing_encryption_key_pair_internal(node_id)); + let result = self.gen_dealing_encryption_key_pair_internal(node_id); self.metrics.observe_duration_seconds( MetricsDomain::NiDkgAlgorithm, MetricsScope::Local, @@ -70,9 +65,7 @@ impl< debug!(self.logger; crypto.method_name => "update_forward_secure_epoch", crypto.dkg_epoch => epoch.get()); let start_time = self.metrics.now(); - let result = self.run_on_nidkg_thread_pool(|| { - self.update_forward_secure_epoch_internal(algorithm_id, key_id, epoch) - }); + let result = self.update_forward_secure_epoch_internal(algorithm_id, key_id, epoch); self.metrics.observe_duration_seconds( MetricsDomain::NiDkgAlgorithm, MetricsScope::Local, @@ -93,15 +86,13 @@ impl< ) -> Result { debug!(self.logger; crypto.method_name => "create_dealing", crypto.dkg_epoch => epoch.get()); let start_time = self.metrics.now(); - let result = self.run_on_nidkg_thread_pool(|| { - self.create_dealing_internal( - algorithm_id, - dealer_index, - threshold, - epoch, - &receiver_keys, - ) - }); + let result = self.create_dealing_internal( + algorithm_id, + dealer_index, + threshold, + epoch, + &receiver_keys, + ); self.metrics.observe_duration_seconds( MetricsDomain::NiDkgAlgorithm, MetricsScope::Local, @@ -123,16 +114,14 @@ impl< ) -> Result { debug!(self.logger; crypto.method_name => "create_resharing_dealing", crypto.dkg_epoch => epoch.get()); let start_time = self.metrics.now(); - let result = self.run_on_nidkg_thread_pool(|| { - self.create_resharing_dealing_internal( - algorithm_id, - dealer_index, - threshold, - epoch, - &receiver_keys, - resharing_secret, - ) - }); + let result = self.create_resharing_dealing_internal( + algorithm_id, + dealer_index, + threshold, + epoch, + &receiver_keys, + resharing_secret, + ); self.metrics.observe_duration_seconds( MetricsDomain::NiDkgAlgorithm, MetricsScope::Local, @@ -153,15 +142,13 @@ impl< ) -> Result<(), ni_dkg_errors::CspDkgLoadPrivateKeyError> { debug!(self.logger; crypto.method_name => "load_threshold_signing_key", crypto.dkg_epoch => epoch.get()); let start_time = self.metrics.now(); - let result = self.run_on_nidkg_thread_pool(|| { - self.load_threshold_signing_key_internal( - algorithm_id, - epoch, - csp_transcript, - fs_key_id, - receiver_index, - ) - }); + let result = self.load_threshold_signing_key_internal( + algorithm_id, + epoch, + csp_transcript, + fs_key_id, + receiver_index, + ); self.metrics.observe_duration_seconds( MetricsDomain::NiDkgAlgorithm, MetricsScope::Local, diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/mod.rs b/rs/crypto/internal/crypto_service_provider/src/vault/mod.rs index ffd44869c8b2..f7e6d03181ef 100644 --- a/rs/crypto/internal/crypto_service_provider/src/vault/mod.rs +++ b/rs/crypto/internal/crypto_service_provider/src/vault/mod.rs @@ -1,5 +1,5 @@ use self::api::CspVault; -use self::local_csp_vault::{ProdLocalCspVault, new_nidkg_thread_pool}; +use self::local_csp_vault::ProdLocalCspVault; use self::remote_csp_vault::RemoteCspVault; use crate::vault::api::{ CspBasicSignatureError, CspMultiSignatureError, CspSecretKeyStoreContainsError, @@ -58,9 +58,7 @@ fn in_replica_vault( logger, "Proceeding with an in-replica csp_vault, CryptoConfig: {:?}", config ); - let vault = ProdLocalCspVault::builder_in_dir(&config.crypto_root, metrics, logger) - .with_nidkg_thread_pool(new_nidkg_thread_pool()) - .build(); + let vault = ProdLocalCspVault::new_in_dir(&config.crypto_root, metrics, logger); Arc::new(vault) } diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/remote_csp_vault/tarpc_csp_vault_server.rs b/rs/crypto/internal/crypto_service_provider/src/vault/remote_csp_vault/tarpc_csp_vault_server.rs index c06db635d5ee..3845fe21363f 100644 --- a/rs/crypto/internal/crypto_service_provider/src/vault/remote_csp_vault/tarpc_csp_vault_server.rs +++ b/rs/crypto/internal/crypto_service_provider/src/vault/remote_csp_vault/tarpc_csp_vault_server.rs @@ -12,7 +12,7 @@ use crate::vault::api::{ use crate::vault::api::{ CspPublicKeyStoreError, CspVault, IDkgDealingInternalBytes, IDkgTranscriptInternalBytes, }; -use crate::vault::local_csp_vault::ProdLocalCspVault; +use crate::vault::local_csp_vault::{LocalCspVault, ProdLocalCspVault}; use crate::vault::remote_csp_vault::ThresholdSchnorrCreateSigShareVaultError; use crate::vault::remote_csp_vault::{FOUR_GIGA_BYTES, PksAndSksContainsErrors}; use crate::vault::remote_csp_vault::{TarpcCspVault, remote_vault_codec_builder}; @@ -64,8 +64,7 @@ use super::codec::{Bincode, CspVaultObserver, ObservableCodec}; pub struct TarpcCspVaultServerImpl { local_csp_vault: Arc, listener: UnixListener, - thread_pool_general: Arc, - thread_pool_nidkg: Arc, + thread_pool: Arc, max_frame_length: usize, metrics: Arc, #[allow(unused)] @@ -78,8 +77,7 @@ pub struct TarpcCspVaultServerImpl { /// created through cloning (with `Clone`). struct TarpcCspVaultServerWorker { local_csp_vault: Arc, - thread_pool_general: Arc, - thread_pool_nidkg: Arc, + thread_pool: Arc, } async fn execute_on_thread_pool(thread_pool: &ThreadPool, job: F) -> T @@ -110,8 +108,7 @@ impl Clone for TarpcCspVaultServerWorker { fn clone(&self) -> Self { Self { local_csp_vault: Arc::clone(&self.local_csp_vault), - thread_pool_general: Arc::clone(&self.thread_pool_general), - thread_pool_nidkg: Arc::clone(&self.thread_pool_nidkg), + thread_pool: Arc::clone(&self.thread_pool), } } } @@ -125,7 +122,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.sign(msg.into_vec()); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn gen_node_signing_key_pair( @@ -134,7 +131,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.gen_node_signing_key_pair(); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // `MultiSignatureCspVault`-methods. @@ -147,7 +144,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.multi_sign(algorithm_id, message.into_vec(), key_id); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn gen_committee_signing_key_pair( @@ -156,7 +153,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result<(CspPublicKey, CspPop), CspMultiSignatureKeygenError> { let vault = self.local_csp_vault; let job = move || vault.gen_committee_signing_key_pair(); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // `ThresholdSignatureCspVault`-methods. @@ -169,7 +166,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.threshold_sign(algorithm_id, message.into_vec(), key_id); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // `NiDkgCspVault`-methods. @@ -180,7 +177,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result<(CspFsEncryptionPublicKey, CspFsEncryptionPop), CspDkgCreateFsKeyError> { let vault = self.local_csp_vault; let job = move || vault.gen_dealing_encryption_key_pair(node_id); - execute_on_thread_pool(&self.thread_pool_nidkg, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn update_forward_secure_epoch( @@ -192,7 +189,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result<(), CspDkgUpdateFsEpochError> { let vault = self.local_csp_vault; let job = move || vault.update_forward_secure_epoch(algorithm_id, key_id, epoch); - execute_on_thread_pool(&self.thread_pool_nidkg, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn create_dealing( @@ -208,7 +205,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { let job = move || { vault.create_dealing(algorithm_id, dealer_index, threshold, epoch, receiver_keys) }; - execute_on_thread_pool(&self.thread_pool_nidkg, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn create_resharing_dealing( @@ -232,7 +229,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { resharing_secret, ) }; - execute_on_thread_pool(&self.thread_pool_nidkg, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn load_threshold_signing_key( @@ -254,7 +251,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { receiver_index, ) }; - execute_on_thread_pool(&self.thread_pool_nidkg, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn retain_threshold_keys_if_present( @@ -264,7 +261,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result<(), CspDkgRetainThresholdKeysError> { let vault = self.local_csp_vault; let job = move || vault.retain_threshold_keys_if_present(active_key_ids); - execute_on_thread_pool(&self.thread_pool_nidkg, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // SecretKeyStoreCspVault-methods. @@ -275,7 +272,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.sks_contains(key_id); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // PublicKeyStoreCspVault-methods. @@ -285,7 +282,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.current_node_public_keys(); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn current_node_public_keys_with_timestamps( @@ -294,13 +291,13 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.current_node_public_keys_with_timestamps(); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn idkg_key_count(self, _: context::Context) -> Result { let vault = self.local_csp_vault; let job = move || vault.idkg_dealing_encryption_pubkeys_count(); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // PublicAndSecretKeyStoreCspVault-methods. @@ -311,7 +308,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result<(), PksAndSksContainsErrors> { let vault = self.local_csp_vault; let job = move || vault.pks_and_sks_contains(external_public_keys); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn validate_pks_and_sks( @@ -320,7 +317,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.validate_pks_and_sks(); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // 'TlsHandshakeCspVault'-methods. @@ -331,7 +328,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.gen_tls_key_pair(node); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn tls_sign( @@ -342,7 +339,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.tls_sign(message.into_vec(), key_id); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // `IDkgProtocolCspVault`-methods. @@ -367,7 +364,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { transcript_operation, ) }; - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn idkg_verify_dealing_private( @@ -391,7 +388,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { context_data.into_vec(), ) }; - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn idkg_load_transcript( @@ -415,7 +412,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { transcript, ) }; - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn idkg_load_transcript_with_openings( @@ -441,7 +438,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { transcript, ) }; - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn idkg_retain_active_keys( @@ -452,7 +449,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result<(), IDkgRetainKeysError> { let vault = self.local_csp_vault; let job = move || vault.idkg_retain_active_keys(active_key_ids, oldest_public_key); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn idkg_gen_dealing_encryption_key_pair( @@ -461,7 +458,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.idkg_gen_dealing_encryption_key_pair(); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn idkg_open_dealing( @@ -485,7 +482,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { opener_key_id, ) }; - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // `ThresholdEcdsaSignerCspVault`-methods @@ -516,7 +513,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { algorithm_id, ) }; - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } // `ThresholdSchnorrSignerCspVault`-methods @@ -543,7 +540,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { algorithm_id, ) }; - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn create_encrypted_vetkd_key_share( @@ -565,7 +562,7 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { input.into_vec(), ) }; - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } async fn new_public_seed( @@ -574,12 +571,11 @@ impl TarpcCspVault for TarpcCspVaultServerWorker { ) -> Result { let vault = self.local_csp_vault; let job = move || vault.new_public_seed(); - execute_on_thread_pool(&self.thread_pool_general, job).await + execute_on_thread_pool(&self.thread_pool, job).await } } -type VaultFactory = - dyn Fn(&ReplicaLogger, Arc, Arc) -> Arc + Send + Sync; +type VaultFactory = dyn Fn(&ReplicaLogger, Arc) -> Arc + Send + Sync; pub struct TarpcCspVaultServerImplBuilder { local_csp_vault_factory: Box>, @@ -591,29 +587,21 @@ pub struct TarpcCspVaultServerImplBuilder { impl TarpcCspVaultServerImplBuilder { pub fn new(key_store_dir: &Path) -> Self { let key_store_path = key_store_dir.to_path_buf(); - let local_csp_vault_factory = - Box::new(move |logger: &ReplicaLogger, metrics, thread_pool_nidkg| { - Arc::new( - ProdLocalCspVault::builder_in_dir( - &key_store_path, - metrics, - new_logger!(logger), - ) - .with_nidkg_thread_pool(thread_pool_nidkg) - .build(), - ) - }); + let local_csp_vault_factory = Box::new(move |logger: &ReplicaLogger, metrics| { + Arc::new(LocalCspVault::new_in_dir( + &key_store_path, + metrics, + new_logger!(logger), + )) + }); Self::new_internal(local_csp_vault_factory) } } impl TarpcCspVaultServerImplBuilder { pub fn new_with_local_csp_vault(local_csp_vault: Arc) -> Self { - let local_csp_vault_factory = Box::new( - move |_logger: &ReplicaLogger, _metrics, _thread_pool_nidkg| { - Arc::clone(&local_csp_vault) - }, - ); + let local_csp_vault_factory = + Box::new(move |_logger: &ReplicaLogger, _metrics| Arc::clone(&local_csp_vault)); Self::new_internal(local_csp_vault_factory) } } @@ -644,51 +632,26 @@ impl TarpcCspVaultServerImplBuilder { } } -fn create_thread_pool(ident: &'static str) -> Arc { - let num_threads = { - let cores = std::thread::available_parallelism() - .expect("obtaining the number of available cores should never fail") - .get(); - - if cores >= 16 { - 8 - } else if cores >= 8 { - 4 - } else { - 2 - } - }; - - Arc::new( - ThreadPoolBuilder::new() - .thread_name(move |i| format!("ic-crypto-worker-{ident}-{i}")) - .num_threads(num_threads) - .build() - .expect("failed to instantiate a thread pool"), - ) -} - impl TarpcCspVaultServerImplBuilder { pub fn build(&self, listener: UnixListener) -> TarpcCspVaultServerImpl { info!(&self.logger, "Starting new RPC CSP vault server"); - // Created before the local vault so that the same pool can be shared: - // it bounds both the RPC-dispatch concurrency (see - // `execute_on_thread_pool`) and the inner `par_iter` calls inside - // `LocalCspVault`. With one shared pool, the vault's - // `ThreadPool::install` inside a job already running on this pool is - // effectively a no-op, avoiding the double-pool downsize that would - // otherwise occur. - let thread_pool_nidkg = create_thread_pool("nidkg"); - let local_csp_vault: Arc = (self.local_csp_vault_factory)( - &self.logger, - Arc::clone(&self.metrics), - Arc::clone(&thread_pool_nidkg), - ); + let local_csp_vault: Arc = + (self.local_csp_vault_factory)(&self.logger, Arc::clone(&self.metrics)); TarpcCspVaultServerImpl { local_csp_vault, listener, - thread_pool_general: create_thread_pool("general"), - thread_pool_nidkg, + // defaults the number of threads to the number of CPUs + thread_pool: Arc::new( + ThreadPoolBuilder::new() + .thread_name(|i| format!("ic-crypto-csp-{i}")) + .num_threads( + std::thread::available_parallelism() + .expect("obtaining the number of available cores should never fail") + .get(), + ) + .build() + .expect("failed to instantiate a thread pool"), + ), max_frame_length: self.max_frame_length, metrics: Arc::clone(&self.metrics), logger: new_logger!(&self.logger), @@ -728,8 +691,7 @@ impl TarpcCspVaultServerImpl { ) }); let local_csp_vault = Arc::clone(&self.local_csp_vault); - let thread_pool_general = Arc::clone(&self.thread_pool_general); - let thread_pool_nidkg = Arc::clone(&self.thread_pool_nidkg); + let thread_pool = Arc::clone(&self.thread_pool); let codec = ObservableCodec::new( Bincode::default(), CspVaultObserver::new(new_logger!(&self.logger), Arc::clone(&self.metrics)), @@ -739,8 +701,7 @@ impl TarpcCspVaultServerImpl { let transport = serde_transport::new(framed, codec); let worker = TarpcCspVaultServerWorker { local_csp_vault, - thread_pool_general, - thread_pool_nidkg, + thread_pool, }; let channel = BaseChannel::with_defaults(transport); channel