From 70726eab25eb5aa5dffe05c2191681633730f93e Mon Sep 17 00:00:00 2001
From: Bas van Dijk <bas@dfinity.org>
Date: Wed, 22 Apr 2026 08:11:25 +0000
Subject: [PATCH] Revert "perf(crypto): CON-1650 Use Rayon in NIDKG (#9007)"

This reverts commit dd1ef70ce87bd85b77107ef4519f68f6f63c7fa8.
---
 Cargo.lock                                    |   2 -
 .../crypto_lib/bls12_381/type/BUILD.bazel     |   2 -
 .../crypto_lib/bls12_381/type/Cargo.toml      |   5 -
 .../crypto_lib/bls12_381/type/benches/ops.rs  | 209 ------------------
 .../crypto_lib/bls12_381/type/src/lib.rs      | 123 +++--------
 .../threshold_sig/bls12_381/BUILD.bazel       |   3 -
 .../threshold_sig/bls12_381/Cargo.toml        |   1 -
 .../bls12_381/benches/fs_nidkg.rs             |  73 +-----
 .../src/ni_dkg/fs_ni_dkg/dlog_recovery.rs     | 198 +++++++----------
 .../src/ni_dkg/fs_ni_dkg/forward_secure.rs    | 150 ++++++-------
 .../src/ni_dkg/fs_ni_dkg/nizk_chunking.rs     | 162 +++++++-------
 .../src/ni_dkg/fs_ni_dkg/nizk_sharing.rs      | 165 +++++++-------
 .../ni_dkg/groth20_bls12_381/encryption.rs    |  42 ++--
 .../ni_dkg/groth20_bls12_381/transcript.rs    |  28 +--
 .../bls12_381/tests/stability.rs              |  48 +---
 .../src/vault/local_csp_vault/builder/mod.rs  |  23 --
 .../src/vault/local_csp_vault/mod.rs          |  40 ----
 .../src/vault/local_csp_vault/ni_dkg/mod.rs   |  65 +++---
 .../crypto_service_provider/src/vault/mod.rs  |   6 +-
 .../tarpc_csp_vault_server.rs                 | 159 +++++--------
 20 files changed, 466 insertions(+), 1038 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9e920cd72d14..a40a196b767f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8433,7 +8433,6 @@ dependencies = [
  "paste",
  "rand 0.8.5",
  "rand_chacha 0.3.1",
- "rayon",
  "sha2 0.10.9",
  "subtle",
  "zeroize",
@@ -8667,7 +8666,6 @@ dependencies = [
  "proptest-derive",
  "rand 0.8.5",
  "rand_chacha 0.3.1",
- "rayon",
  "serde",
  "serde_bytes",
  "serde_cbor",
diff --git a/rs/crypto/internal/crypto_lib/bls12_381/type/BUILD.bazel b/rs/crypto/internal/crypto_lib/bls12_381/type/BUILD.bazel
index 70d7bab675f5..f1e186f08aff 100644
--- a/rs/crypto/internal/crypto_lib/bls12_381/type/BUILD.bazel
+++ b/rs/crypto/internal/crypto_lib/bls12_381/type/BUILD.bazel
@@ -16,7 +16,6 @@ DEPENDENCIES = [
     "@crate_index//:parking_lot",
     "@crate_index//:rand",
     "@crate_index//:rand_chacha",
-    "@crate_index//:rayon",
     "@crate_index//:sha2",
     "@crate_index//:subtle",
     "@crate_index//:zeroize",
@@ -41,7 +40,6 @@ rust_library(
     name = "type",
     srcs = glob(["src/**"]),
     aliases = ALIASES,
-    crate_features = ["rayon"],
     crate_name = "ic_crypto_internal_bls12_381_type",
     proc_macro_deps = MACRO_DEPENDENCIES,
     version = "0.9.0",
diff --git a/rs/crypto/internal/crypto_lib/bls12_381/type/Cargo.toml b/rs/crypto/internal/crypto_lib/bls12_381/type/Cargo.toml
index cf6ee784dfd4..4b021d3d1453 100644
--- a/rs/crypto/internal/crypto_lib/bls12_381/type/Cargo.toml
+++ b/rs/crypto/internal/crypto_lib/bls12_381/type/Cargo.toml
@@ -8,10 +8,6 @@ documentation.workspace = true
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
-[features]
-default = ["rayon"]
-rayon = ["dep:rayon"]
-
 [dependencies]
 cached = { version = "0.49", default-features = false }
 hex = { workspace = true }
@@ -22,7 +18,6 @@ parking_lot = { workspace = true }
 paste = { workspace = true }
 rand = { workspace = true }
 rand_chacha = { workspace = true }
-rayon = { workspace = true, optional = true }
 sha2 = { workspace = true }
 subtle = { workspace = true }
 zeroize = { workspace = true }
diff --git a/rs/crypto/internal/crypto_lib/bls12_381/type/benches/ops.rs b/rs/crypto/internal/crypto_lib/bls12_381/type/benches/ops.rs
index 724c62c01553..fb8ca60bd024 100644
--- a/rs/crypto/internal/crypto_lib/bls12_381/type/benches/ops.rs
+++ b/rs/crypto/internal/crypto_lib/bls12_381/type/benches/ops.rs
@@ -1088,212 +1088,6 @@ macro_rules! crypto_bls12_381_mul2_precomputation_init {
 crypto_bls12_381_mul2_precomputation_init!(g1, G1Projective);
 crypto_bls12_381_mul2_precomputation_init!(g2, G2Projective);
 
-/// Benchmarks for operations that are parallelized when the `rayon` feature is
-/// enabled. Run with and without `--features rayon` and compare results.
-fn bls12_381_parallel_batch_ops(c: &mut Criterion) {
-    let mut group = c.benchmark_group("crypto_bls12_381_parallel_batch_ops");
-    group.warm_up_time(WARMUP_TIME);
-
-    let rng = &mut reproducible_rng();
-
-    for n in [2, 5, 10, 25, 50, 100] {
-        // --- G1 batch_mul ---
-        group.bench_with_input(BenchmarkId::new("g1_batch_mul", n), &n, |b, &size| {
-            b.iter_batched_ref(
-                || (random_g1(rng).to_affine(), n_random_scalar(size, rng)),
-                |(pt, scalars)| pt.batch_mul(scalars),
-                BatchSize::SmallInput,
-            )
-        });
-
-        // --- G1 batch_mul_vartime ---
-        group.bench_with_input(
-            BenchmarkId::new("g1_batch_mul_vartime", n),
-            &n,
-            |b, &size| {
-                b.iter_batched_ref(
-                    || (random_g1(rng).to_affine(), n_random_scalar(size, rng)),
-                    |(pt, scalars)| pt.batch_mul_vartime(scalars),
-                    BatchSize::SmallInput,
-                )
-            },
-        );
-
-        // --- G2 batch_mul ---
-        group.bench_with_input(BenchmarkId::new("g2_batch_mul", n), &n, |b, &size| {
-            b.iter_batched_ref(
-                || (random_g2(rng).to_affine(), n_random_scalar(size, rng)),
-                |(pt, scalars)| pt.batch_mul(scalars),
-                BatchSize::SmallInput,
-            )
-        });
-
-        // --- G2 batch_mul_vartime ---
-        group.bench_with_input(
-            BenchmarkId::new("g2_batch_mul_vartime", n),
-            &n,
-            |b, &size| {
-                b.iter_batched_ref(
-                    || (random_g2(rng).to_affine(), n_random_scalar(size, rng)),
-                    |(pt, scalars)| pt.batch_mul_vartime(scalars),
-                    BatchSize::SmallInput,
-                )
-            },
-        );
-
-        // --- G1 batch_deserialize ---
-        group.bench_with_input(
-            BenchmarkId::new("g1_batch_deserialize", n),
-            &n,
-            |b, &size| {
-                b.iter_batched_ref(
-                    || {
-                        (0..size)
-                            .map(|_| random_g1(rng).to_affine().serialize())
-                            .collect::<Vec<_>>()
-                    },
-                    |bytes| G1Affine::batch_deserialize(bytes),
-                    BatchSize::SmallInput,
-                )
-            },
-        );
-
-        // --- G2 batch_deserialize ---
-        group.bench_with_input(
-            BenchmarkId::new("g2_batch_deserialize", n),
-            &n,
-            |b, &size| {
-                b.iter_batched_ref(
-                    || {
-                        (0..size)
-                            .map(|_| random_g2(rng).to_affine().serialize())
-                            .collect::<Vec<_>>()
-                    },
-                    |bytes| G2Affine::batch_deserialize(bytes),
-                    BatchSize::SmallInput,
-                )
-            },
-        );
-
-        // --- G1 mul2 from precomputed table (N iterations) ---
-        group.bench_with_input(BenchmarkId::new("g1_mul2_from_tbl", n), &n, |b, &size| {
-            b.iter_batched_ref(
-                || {
-                    let tbl = G1Projective::compute_mul2_tbl(&random_g1(rng), &random_g1(rng));
-                    let a = n_random_scalar(size, rng);
-                    let b_vec = n_random_scalar(size, rng);
-                    (tbl, a, b_vec)
-                },
-                |(tbl, a, b_vec)| {
-                    a.iter()
-                        .zip(b_vec.iter())
-                        .map(|(ai, bi)| tbl.mul2(ai, bi))
-                        .collect::<Vec<_>>()
-                },
-                BatchSize::SmallInput,
-            )
-        });
-
-        // --- G2 mul2 from precomputed table (N iterations) ---
-        group.bench_with_input(BenchmarkId::new("g2_mul2_from_tbl", n), &n, |b, &size| {
-            b.iter_batched_ref(
-                || {
-                    let tbl = G2Projective::compute_mul2_tbl(&random_g2(rng), &random_g2(rng));
-                    let a = n_random_scalar(size, rng);
-                    let b_vec = n_random_scalar(size, rng);
-                    (tbl, a, b_vec)
-                },
-                |(tbl, a, b_vec)| {
-                    a.iter()
-                        .zip(b_vec.iter())
-                        .map(|(ai, bi)| tbl.mul2(ai, bi))
-                        .collect::<Vec<_>>()
-                },
-                BatchSize::SmallInput,
-            )
-        });
-
-        // --- batch signature verification (distinct) ---
-        group.bench_with_input(
-            BenchmarkId::new("verify_batch_distinct", n),
-            &n,
-            |b, &size| {
-                b.iter_batched_ref(
-                    || (n_batch_sig_verification_instances(size, rng), rng.fork()),
-                    |(sigs_pks_msgs, rng_fork)| {
-                        let refs: Vec<_> = sigs_pks_msgs
-                            .iter()
-                            .map(|(sig, pk, msg)| (sig, pk, msg))
-                            .collect();
-                        black_box(verify_bls_signature_batch_distinct(&refs, rng_fork));
-                    },
-                    BatchSize::SmallInput,
-                )
-            },
-        );
-    }
-
-    group.finish();
-}
-
-/// Benchmarks for `mul2_array` which requires compile-time-known sizes.
-/// Run with and without `--features rayon` and compare results.
-macro_rules! bench_mul2_array {
-    ($fn_name:ident, $group_name:expr, $projective:ty, $random_pt:expr, $( $n:expr ),+) => {
-        fn $fn_name(c: &mut Criterion) {
-            let mut group = c.benchmark_group($group_name);
-            group.warm_up_time(WARMUP_TIME);
-            let rng = &mut reproducible_rng();
-
-            $(
-                group.bench_function(stringify!($n), |b| {
-                    b.iter_batched_ref(
-                        || {
-                            let tbl = <$projective>::compute_mul2_tbl(
-                                &$random_pt(rng),
-                                &$random_pt(rng),
-                            );
-                            let a: [Scalar; $n] = std::array::from_fn(|_| random_scalar(rng));
-                            let b_arr: [Scalar; $n] = std::array::from_fn(|_| random_scalar(rng));
-                            (tbl, a, b_arr)
-                        },
-                        |(tbl, a, b_arr)| black_box(tbl.mul2_array(a, b_arr)),
-                        BatchSize::SmallInput,
-                    )
-                });
-            )+
-
-            group.finish();
-        }
-    };
-}
-
-bench_mul2_array!(
-    bls12_381_g1_mul2_array,
-    "crypto_bls12_381_parallel_g1_mul2_array",
-    G1Projective,
-    random_g1,
-    2,
-    5,
-    10,
-    25,
-    50,
-    100
-);
-
-bench_mul2_array!(
-    bls12_381_g2_mul2_array,
-    "crypto_bls12_381_parallel_g2_mul2_array",
-    G2Projective,
-    random_g2,
-    2,
-    5,
-    10,
-    25,
-    50,
-    100
-);
-
 criterion_group!(
     benches,
     bls12_381_scalar_ops,
@@ -1305,8 +1099,5 @@ criterion_group!(
     bls12_381_batch_sig_verification_multithreaded,
     mul2_precomputation_g1,
     mul2_precomputation_g2,
-    bls12_381_parallel_batch_ops,
-    bls12_381_g1_mul2_array,
-    bls12_381_g2_mul2_array,
 );
 criterion_main!(benches);
diff --git a/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs b/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs
index 7bd254279d72..addc4e422014 100644
--- a/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs
+++ b/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs
@@ -37,8 +37,6 @@ use itertools::multiunzip;
 use pairing::group::{Group, ff::Field};
 use paste::paste;
 use rand::{CryptoRng, Rng, RngCore};
-#[cfg(feature = "rayon")]
-use rayon::prelude::*;
 use std::sync::{Arc, LazyLock};
 use std::{collections::HashMap, fmt};
 use zeroize::{Zeroize, ZeroizeOnDrop};
@@ -502,28 +500,21 @@ impl Scalar {
     ///
     /// This function returns Ok only if all of the provided inputs
     /// represent a valid scalar.
-    pub fn batch_deserialize<B: AsRef<[u8]> + Sync>(
+    pub fn batch_deserialize<B: AsRef<[u8]>>(
         inputs: &[B],
     ) -> Result<Vec<Self>, PairingInvalidScalar> {
-        #[cfg(feature = "rayon")]
-        {
-            inputs.par_iter().map(Self::deserialize).collect()
-        }
-        #[cfg(not(feature = "rayon"))]
-        {
-            let mut r = Vec::with_capacity(inputs.len());
-            for input in inputs {
-                r.push(Self::deserialize(input)?);
-            }
-            Ok(r)
+        let mut r = Vec::with_capacity(inputs.len());
+        for input in inputs {
+            r.push(Self::deserialize(input)?);
         }
+        Ok(r)
     }
 
     /// Deserialize multiple scalars
     ///
     /// This function returns Ok only if all of the provided inputs
     /// represent a valid scalar.
-    pub fn batch_deserialize_array<B: AsRef<[u8]> + Sync, const N: usize>(
+    pub fn batch_deserialize_array<B: AsRef<[u8]>, const N: usize>(
         inputs: &[B; N],
     ) -> Result<[Self; N], PairingInvalidScalar> {
         // This could be made nicer, and avoid the heap allocation, by
@@ -1199,19 +1190,12 @@ macro_rules! define_affine_and_projective_types {
             /// This version verifies that the decoded point is within the prime order
             /// subgroup, and is safe to call on untrusted inputs. It returns Ok only
             /// if all of the provided bytes represent a valid point.
-            pub fn batch_deserialize<B: AsRef<[u8]> + Sync>(inputs: &[B]) -> Result<Vec<Self>, PairingInvalidPoint> {
-                #[cfg(feature = "rayon")]
-                {
-                    inputs.par_iter().map(Self::deserialize).collect()
-                }
-                #[cfg(not(feature = "rayon"))]
-                {
-                    let mut r = Vec::with_capacity(inputs.len());
-                    for input in inputs {
-                        r.push(Self::deserialize(input)?);
-                    }
-                    Ok(r)
+            pub fn batch_deserialize<B: AsRef<[u8]>>(inputs: &[B]) -> Result<Vec<Self>, PairingInvalidPoint> {
+                let mut r = Vec::with_capacity(inputs.len());
+                for input in inputs {
+                    r.push(Self::deserialize(input)?);
                 }
+                Ok(r)
             }
 
             /// Deserialize multiple points (compressed format only)
@@ -1219,7 +1203,7 @@ macro_rules! define_affine_and_projective_types {
             /// This version verifies that the decoded point is within the prime order
             /// subgroup, and is safe to call on untrusted inputs. It returns Ok only
             /// if all of the provided bytes represent a valid point.
-            pub fn batch_deserialize_array<B: AsRef<[u8]> + Sync, const N: usize>(inputs: &[B; N]) -> Result<[Self; N], PairingInvalidPoint> {
+            pub fn batch_deserialize_array<B: AsRef<[u8]>, const N: usize>(inputs: &[B; N]) -> Result<[Self; N], PairingInvalidPoint> {
 
                 // This could be made nicer, and avoid the heap allocation, by
                 // using array::try_map (currently only available in nightly)
@@ -1296,16 +1280,10 @@ macro_rules! define_affine_and_projective_types {
                 // the fact that we are using the same point for several multiplications,
                 // for example by using larger precomputed tables
 
-                #[cfg(feature = "rayon")]
-                let result: Vec<$projective> = scalars.par_iter().map(|s| self * s).collect();
-                #[cfg(not(feature = "rayon"))]
-                let result: Vec<$projective> = {
-                    let mut result = Vec::with_capacity(scalars.len());
-                    for scalar in scalars {
-                        result.push(self * scalar);
-                    }
-                    result
-                };
+                let mut result = Vec::with_capacity(scalars.len());
+                for scalar in scalars {
+                    result.push(self * scalar);
+                }
                 $projective::batch_normalize(&result)
             }
 
@@ -1319,32 +1297,17 @@ macro_rules! define_affine_and_projective_types {
                 // the fact that we are using the same point for several multiplications,
                 // for example by using larger precomputed tables
 
-                #[cfg(feature = "rayon")]
-                let result: Vec<$projective> = scalars.par_iter().map(|s| self.mul_vartime(s)).collect();
-                #[cfg(not(feature = "rayon"))]
-                let result: Vec<$projective> = {
-                    let mut result = Vec::with_capacity(scalars.len());
-                    for scalar in scalars {
-                        result.push(self.mul_vartime(scalar));
-                    }
-                    result
-                };
+                let mut result = Vec::with_capacity(scalars.len());
+                for scalar in scalars {
+                    result.push(self.mul_vartime(scalar));
+                }
                 $projective::batch_normalize(&result)
             }
 
             /// Batch multiplication
             pub fn batch_mul_array<const N: usize>(&self, scalars: &[Scalar; N]) -> [Self; N] {
-                #[cfg(feature = "rayon")]
-                {
-                    let results: Vec<$projective> = scalars.par_iter().map(|s| self * s).collect();
-                    let results: [$projective; N] = results.try_into().expect("Length preserved");
-                    $projective::batch_normalize_array(&results)
-                }
-                #[cfg(not(feature = "rayon"))]
-                {
-                    let v = scalars.clone().map(|s| self * s);
-                    $projective::batch_normalize_array(&v)
-                }
+                let v = scalars.clone().map(|s| self * s);
+                $projective::batch_normalize_array(&v)
             }
 
             /// Sum some points
@@ -1698,19 +1661,8 @@ macro_rules! declare_mul2_table_impl {
                 a: &[Scalar; N],
                 b: &[Scalar; N],
             ) -> [$projective; N] {
-                #[cfg(feature = "rayon")]
-                {
-                    let results: Vec<$projective> = (0..N)
-                        .into_par_iter()
-                        .map(|i| self.mul2(&a[i], &b[i]))
-                        .collect();
-                    results.try_into().ok().expect("Length preserved")
-                }
-                #[cfg(not(feature = "rayon"))]
-                {
-                    let iota: [usize; N] = std::array::from_fn(|i| i);
-                    iota.map(|i| self.mul2(&a[i], &b[i]))
-                }
+                let iota: [usize; N] = std::array::from_fn(|i| i);
+                iota.map(|i| self.mul2(&a[i], &b[i]))
             }
         }
     };
@@ -2815,26 +2767,13 @@ pub fn verify_bls_signature_batch_distinct<R: RngCore + CryptoRng>(
     let aggregate_sig = G1Projective::muln_affine_sparse_vartime(&sigs_scalars[..]).to_affine();
     let inv_g2_gen = G2Prepared::neg_generator();
 
-    #[cfg(feature = "rayon")]
-    let (msgs, pks_prepared): (Vec<G1Affine>, Vec<G2Prepared>) = {
-        let msgs: Vec<_> = msgs
-            .par_iter()
-            .zip(random_scalars.par_iter())
-            .map(|(&msg, s)| (msg * s).to_affine())
-            .collect();
-        let pks_prepared: Vec<_> = pks.into_par_iter().map(G2Prepared::from).collect();
-        (msgs, pks_prepared)
-    };
-    #[cfg(not(feature = "rayon"))]
-    let (msgs, pks_prepared): (Vec<G1Affine>, Vec<G2Prepared>) = {
-        let msgs: Vec<_> = msgs
-            .iter()
-            .zip(random_scalars.iter())
-            .map(|(&msg, s)| (msg * s).to_affine())
-            .collect();
-        let pks_prepared: Vec<_> = pks.into_iter().map(G2Prepared::from).collect();
-        (msgs, pks_prepared)
-    };
+    let msgs: Vec<_> = msgs
+        .iter()
+        .zip(random_scalars.iter())
+        .map(|(&msg, s)| (msg * s).to_affine())
+        .collect();
+
+    let pks_prepared: Vec<_> = pks.into_iter().map(G2Prepared::from).collect();
 
     let mut multipairing_inputs = Vec::with_capacity(sigs_pks_msgs.len() + 1);
     multipairing_inputs.push((&aggregate_sig, inv_g2_gen));
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/BUILD.bazel b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/BUILD.bazel
index 64baf202de2e..b6d0d8319f4c 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/BUILD.bazel
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/BUILD.bazel
@@ -21,7 +21,6 @@ DEPENDENCIES = [
     "@crate_index//:parking_lot",
     "@crate_index//:rand",
     "@crate_index//:rand_chacha",
-    "@crate_index//:rayon",
     "@crate_index//:serde",
     "@crate_index//:serde_bytes",
     "@crate_index//:serde_cbor",
@@ -120,9 +119,7 @@ rust_bench(
         ":bls12_381",
         "//rs/crypto/internal/crypto_lib/bls12_381/type",
         "//rs/crypto/internal/crypto_lib/seed",
-        "//rs/crypto/test_utils/reproducible_rng",
         "@crate_index//:criterion",
-        "@crate_index//:rand",
     ],
 )
 
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/Cargo.toml b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/Cargo.toml
index 16c6b83d22ee..1bbe40d3a75c 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/Cargo.toml
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/Cargo.toml
@@ -18,7 +18,6 @@ ic-crypto-sha2 = { path = "../../../../sha2" }
 ic-types = { path = "../../../../../types/types" }
 parking_lot = { workspace = true }
 rand = { workspace = true }
-rayon = { workspace = true }
 rand_chacha = { workspace = true }
 serde = { workspace = true }
 serde_bytes = { workspace = true }
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/fs_nidkg.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/fs_nidkg.rs
index a2770dcaf3f6..ea5194f4a17c 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/fs_nidkg.rs
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/fs_nidkg.rs
@@ -1,13 +1,9 @@
 use criterion::*;
-use ic_crypto_internal_bls12_381_type::Scalar;
 use ic_crypto_internal_seed::Seed;
 use ic_crypto_internal_threshold_sig_bls12381::ni_dkg::fs_ni_dkg::Epoch;
-use ic_crypto_internal_threshold_sig_bls12381::ni_dkg::fs_ni_dkg::forward_secure::*;
 use ic_crypto_internal_threshold_sig_bls12381::ni_dkg::groth20_bls12_381::{
     SecretKey, create_forward_secure_key_pair, update_key_inplace_to_epoch,
 };
-use ic_crypto_test_utils_reproducible_rng::reproducible_rng;
-use rand::{CryptoRng, Rng, RngCore};
 
 fn fs_key_generation(c: &mut Criterion) {
     let rng = &mut Seed::from_bytes(b"ic-crypto-benchmark-fsnidkg").into_rng();
@@ -42,72 +38,5 @@ fn fs_key_generation(c: &mut Criterion) {
     }
 }
 
-fn setup_keys_and_ciphertext<R: RngCore + CryptoRng>(
-    node_count: usize,
-    epoch: Epoch,
-    associated_data: &[u8],
-    rng: &mut R,
-) -> (Vec<(PublicKeyWithPop, SecretKey)>, FsEncryptionCiphertext) {
-    let sys = SysParam::global();
-    let key_gen_assoc_data = rng.r#gen::<[u8; 32]>();
-
-    let mut keys = Vec::with_capacity(node_count);
-    for _ in 0..node_count {
-        keys.push(kgen(&key_gen_assoc_data, sys, rng));
-    }
-
-    let ptext_chunks: Vec<_> = (0..node_count)
-        .map(|_| {
-            let s = Scalar::random(rng);
-            (
-                keys[0].0.public_key().clone(), // placeholder, replaced below
-                PlaintextChunks::from_scalar(&s),
-            )
-        })
-        .collect();
-
-    let pks_and_chunks: Vec<_> = keys
-        .iter()
-        .map(|k| k.0.public_key().clone())
-        .zip(ptext_chunks.into_iter().map(|(_, c)| c))
-        .collect();
-
-    let (crsz, _witness) = enc_chunks(&pks_and_chunks, epoch, associated_data, sys, rng);
-    (keys, crsz)
-}
-
-fn fs_encrypt_decrypt(c: &mut Criterion) {
-    let rng = &mut reproducible_rng();
-    let epoch = Epoch::from(0);
-    let associated_data = rng.r#gen::<[u8; 32]>();
-    let sys = SysParam::global();
-
-    let node_count = 28;
-    let (keys, crsz) = setup_keys_and_ciphertext(node_count, epoch, &associated_data, rng);
-
-    let mut group = c.benchmark_group("fs_nidkg_enc_dec");
-    group.sample_size(10);
-
-    group.bench_function(BenchmarkId::new("enc_chunks", node_count), |b| {
-        let pks_and_chunks: Vec<_> = keys
-            .iter()
-            .map(|k| {
-                let s = Scalar::random(rng);
-                (k.0.public_key().clone(), PlaintextChunks::from_scalar(&s))
-            })
-            .collect();
-
-        b.iter(|| enc_chunks(&pks_and_chunks, epoch, &associated_data, sys, rng))
-    });
-
-    group.bench_function("verify_ciphertext_integrity", |b| {
-        b.iter(|| verify_ciphertext_integrity(&crsz, epoch, &associated_data, sys))
-    });
-
-    group.bench_function("dec_chunks", |b| {
-        b.iter(|| dec_chunks(&keys[0].1, 0, &crsz, epoch, &associated_data))
-    });
-}
-
-criterion_group!(benches, fs_key_generation, fs_encrypt_decrypt);
+criterion_group!(benches, fs_key_generation);
 criterion_main!(benches);
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs
index 12a2b99d4c32..a61426e95372 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs
@@ -1,7 +1,6 @@
 use crate::ni_dkg::fs_ni_dkg::forward_secure::{CHUNK_MAX, CHUNK_MIN, CHUNK_SIZE};
 use crate::ni_dkg::fs_ni_dkg::nizk_chunking::{CHALLENGE_BITS, NUM_ZK_REPETITIONS};
 use ic_crypto_internal_bls12_381_type::{Gt, Scalar};
-use rayon::prelude::*;
 use std::sync::LazyLock;
 pub struct HonestDealerDlogLookupTable {
     table: Vec<u32>,
@@ -30,50 +29,67 @@ impl HonestDealerDlogLookupTable {
     pub fn solve_several(&self, targets: &[Gt]) -> Vec<Option<Scalar>> {
         use subtle::{ConditionallySelectable, ConstantTimeEq};
 
-        // Solve each target in parallel. For each target, perform a constant-time
-        // scan of the full table, then confirm the candidate (since hash collisions
-        // may have occurred if the dealer was dishonest).
-        targets
-            .par_iter()
-            .map(|target| {
-                let target_hash = target.short_hash_for_linear_search();
-
-                let mut candidate = 0_u16;
-                for x in CHUNK_MIN..=CHUNK_MAX {
-                    let x_hash = self.table[x as usize];
-                    /*
-                    Should this function need to be further optimized a good start would be to
-                    replace the statements below with something like
-
-                    // Variable diff equals 0 iff x_hash == target_hash;
-                    let diff = x_hash ^ target_hash;
-                    // Variable mask equals u32::max if diff == 0, or 0 otherwise
-                    let mask = 0u32.wrapping_sub((!diff & diff.wrapping_sub(1)) >> 31);
-                    // Selectively OR in x as the solution, iff x_hash == target_hash
-                    candidate |= (mask as u16) & (x as u16);
-
-                    This is more or less what ct_eq / conditional_assign end up being compiled
-                    to, with the difference that it skips the volatile loads which subtle uses
-                    to (try to) prevent LLVM from optimizing code into using a conditional jump.
-
-                    Also regarding performance note that it would be quite straightforward to
-                    express the above statements in a 4-wide or 8-wide SIMD expression suitable
-                    for SSE2 or AVX2 execution. This would not only be faster but also highly
-                    likely immune to compiler optimizations that would introduce jumps, since
-                    this would require moving the data out of SIMD registers which would be
-                    expensive, and so the compiler would avoid such a transformation.
-                    */
-                    let hashes_eq = x_hash.ct_eq(&target_hash);
-                    candidate.conditional_assign(&(x as u16), hashes_eq);
-                }
-
-                if Gt::g_mul_u16(candidate) == *target {
-                    Some(Scalar::from_u64(candidate as u64))
-                } else {
-                    None
-                }
-            })
-            .collect()
+        let target_hashes = targets
+            .iter()
+            .map(|t| t.short_hash_for_linear_search())
+            .collect::<Vec<_>>();
+
+        // This code assumes that CHUNK_MAX fits in a u16
+        let mut scan_results = vec![0_u16; targets.len()];
+
+        for x in CHUNK_MIN..=CHUNK_MAX {
+            let x_hash = self.table[x as usize];
+
+            for i in 0..targets.len() {
+                /*
+                Should this function need to be further optimized a good start would be to
+                replace the statements below with something like
+
+                // Variable diff equals 0 iff x_hash == target_hashes[i];
+                let diff = x_hash ^ target_hashes[i];
+                // Variable mask equals u32::max if diff == 0, or 0 otherwise
+                let mask = 0u32.wrapping_sub((!diff & diff.wrapping_sub(1)) >> 31);
+                // Selectively OR in x as the solution, iff x_hash == target_hashes[i]
+                scan_results[i] |= (mask as u16) & (x as u16);
+
+                This is more or less what ct_eq / conditional_assign end up being compiled
+                to, with the difference that it skips the volatile loads which subtle uses
+                to (try to) prevent LLVM from optimizing code into using a conditional jump.
+
+                Also regarding performance note that it would be quite straighforward to
+                express the above statements in a 4-wide or 8-wide SIMD expression suitable
+                for SSE2 or AVX2 execution. This would not only be faster but also highly
+                likely immune to compiler optimizations that would introduce jumps, since
+                this would require moving the data out of SIMD registers which would be
+                expensive, and so the compiler would avoid such a transformation.
+                */
+                let hashes_eq = x_hash.ct_eq(&target_hashes[i]);
+                scan_results[i].conditional_assign(&(x as u16), hashes_eq);
+            }
+        }
+
+        // Now confirm the results (since collisions may have occurred
+        // if the dealer was dishonest) and convert to Scalar
+
+        let mut results = Vec::with_capacity(targets.len());
+
+        for i in 0..targets.len() {
+            /*
+            After finding a candidate we must perform a multiplication in order
+            to tell if we found the dlog correctly, or if there was a collision
+            due to a dishonest dealer.
+
+            If no match was found then scan_results[i] will just be zero, we
+            perform the multiplication anyway and then reject the candidate dlog.
+             */
+            if Gt::g_mul_u16(scan_results[i]) == targets[i] {
+                results.push(Some(Scalar::from_u64(scan_results[i] as u64)));
+            } else {
+                results.push(None);
+            }
+        }
+
+        results
     }
 }
 
@@ -202,60 +218,22 @@ impl BabyStepGiantStepTable {
 
     /// Returns the table plus the giant step
     fn new(base: &Gt, table_size: usize) -> (Self, Gt) {
-        let num_threads = rayon::current_num_threads();
-        let chunk_size = table_size.div_ceil(num_threads.max(1));
-
-        /*
-         * We have to compute the entire table of [base*1, base*2, ..., base*t]
-         * but would prefer to do so in parallel.
-         *
-         * All `t` elements _can_ be computed perfectly in parallel, but at the cost
-         * of requiring a Gt multiplication per table element, which is rather costly.
-         *
-         * Instead split the table computation into chunks such that there is ~1 thread
-         * per chunk, compute the starting index for that chunk using a Gt mul, then
-         * use plain additions in Gt from there.
-         */
-        let chunks: Vec<Vec<_>> = (0..num_threads)
-            .into_par_iter()
-            .map(|t| {
-                let start = t * chunk_size;
-                let end = (start + chunk_size).min(table_size);
-                if start >= table_size {
-                    return vec![];
-                }
-                // TODO: Gt::mul_vartime_usize could use the 16-bit mul tables
-                // and be significantly faster than this
-                let mut accum = base.mul_vartime(&Scalar::from_usize(start));
-                let mut result = Vec::with_capacity(end - start);
-                for i in start..end {
-                    let (prefix, hash) = Self::hash_gt(&accum);
-                    result.push((hash, i, prefix));
-                    accum += base;
-                }
-                result
-            })
-            .collect();
-
         let mut table = Vec::with_capacity(table_size);
         let mut prefix_set =
             std::collections::HashSet::with_capacity_and_hasher(table_size, BuildShiftXorHasher);
-
-        for chunk in chunks {
-            for (hash, i, prefix) in chunk {
-                table.push((hash, i));
-                // we are not checking the return value of `insert` because
-                // duplicate prefixes do not affect the correctness
-                prefix_set.insert(prefix);
-            }
+        let mut accum = Gt::identity();
+
+        for i in 0..table_size {
+            let (prefix, hash) = Self::hash_gt(&accum);
+            table.push((hash, i));
+            // we are not checking the return value of `insert` because
+            // duplicate prefixes do not affect the correctness
+            prefix_set.insert(prefix);
+            accum += base;
         }
-
         table.sort_unstable();
 
-        // Giant step is -(base * table_size)
-        let giant_step = base.mul_vartime(&Scalar::from_usize(table_size)).neg();
-
-        (Self { table, prefix_set }, giant_step)
+        (Self { table, prefix_set }, accum.neg())
     }
 
     /// Return the value if gt exists in this table
@@ -338,35 +316,17 @@ impl BabyStepGiantStep {
     ///
     /// Returns `None` if the discrete logarithm is not in the searched range.
     pub fn solve(&self, tgt: &Gt) -> Option<Scalar> {
-        let start = tgt + &self.offset;
-
-        let num_threads = rayon::current_num_threads();
-        let steps_per_thread = self.giant_steps.div_ceil(num_threads.max(1));
+        let mut step = tgt + &self.offset;
 
-        (0..num_threads).into_par_iter().find_map_any(|t| {
-            let first_step = t * steps_per_thread;
-            let last_step = (first_step + steps_per_thread).min(self.giant_steps);
-            if first_step >= self.giant_steps {
-                return None;
-            }
-
-            // Compute starting point for this thread's range of giant steps
-            let mut step = if first_step == 0 {
-                start.clone()
-            } else {
-                &start + &self.giant_step.mul_vartime(&Scalar::from_usize(first_step))
-            };
-
-            for giant_step in first_step..last_step {
-                if let Some(i) = self.table.get(&step) {
-                    let x = self.lo + (i + self.n * giant_step) as isize;
-                    return Some(Scalar::from_isize(x));
-                }
-                step += &self.giant_step;
+        for giant_step in 0..self.giant_steps {
+            if let Some(i) = self.table.get(&step) {
+                let x = self.lo + (i + self.n * giant_step) as isize;
+                return Some(Scalar::from_isize(x));
             }
+            step += &self.giant_step;
+        }
 
-            None
-        })
+        None
     }
 }
 
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/forward_secure.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/forward_secure.rs
index 3c314437314c..8a76f5bf3ba1 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/forward_secure.rs
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/forward_secure.rs
@@ -16,7 +16,6 @@ use crate::ni_dkg::fs_ni_dkg::encryption_key_pop::{
     EncryptionKeyInstance, EncryptionKeyPop, prove_pop, verify_pop,
 };
 use crate::ni_dkg::fs_ni_dkg::random_oracles::{HashedMap, random_oracle};
-use rayon::prelude::*;
 
 use crate::ni_dkg::fs_ni_dkg::forward_secure::CiphertextIntegrityError::{
     CrszVectorsLengthMismatch, InvalidNidkgCiphertext,
@@ -220,7 +219,7 @@ impl BTENode {
             b: G2Affine::deserialize_unchecked(&node.b).expect("Malformed secret key at BTENode.b"),
             d_t: node
                 .d_t
-                .par_iter()
+                .iter()
                 .map(|g2| {
                     G2Affine::deserialize_unchecked(&g2)
                         .expect("Malformed secret key at BTENode.d_t")
@@ -228,7 +227,7 @@ impl BTENode {
                 .collect(),
             d_h: node
                 .d_h
-                .par_iter()
+                .iter()
                 .map(|g2| {
                     G2Affine::deserialize_unchecked(&g2)
                         .expect("Malformed secret key at BTENode.d_h")
@@ -519,24 +518,19 @@ impl SecretKey {
                 b_blind += (&f_acc + &sys.f[n]) * &delta;
 
                 let e_blind = (&sys.h * &delta) + &node.e;
-
-                let d_t_vec: Vec<_> = d_t.iter().collect();
-                let d_t_blind_vec: Vec<G2Affine> = d_t_vec
-                    .par_iter()
-                    .enumerate()
-                    .map(|(i, d)| (&sys.f[n + 1 + i] * &delta + *d).to_affine())
-                    .collect();
                 let mut d_t_blind = LinkedList::new();
-                d_t_blind_vec
-                    .into_iter()
-                    .for_each(|d| d_t_blind.push_back(d));
-
-                let d_h_blind: Vec<G2Projective> = node
-                    .d_h
-                    .par_iter()
-                    .zip(sys.f_h.par_iter())
-                    .map(|(d, f)| (f * &delta) + d)
-                    .collect();
+                let mut k = n + 1;
+                d_t.iter().for_each(|d| {
+                    let tmp = (&sys.f[k] * &delta) + d;
+                    d_t_blind.push_back(tmp.to_affine());
+                    k += 1;
+                });
+
+                let mut d_h_blind = Vec::new();
+                node.d_h.iter().zip(&sys.f_h).for_each(|(d, f)| {
+                    let tmp = (f * &delta) + d;
+                    d_h_blind.push(tmp);
+                });
                 let d_h_blind = G2Projective::batch_normalize(&d_h_blind);
 
                 self.bte_nodes.push_back(BTENode {
@@ -560,26 +554,20 @@ impl SecretKey {
         let e = &sys.h * &delta + &node.e;
         b_acc += f_acc * &delta;
 
+        let mut d_t_blind = LinkedList::new();
         // Typically `d_t_blind` remains empty.
         // It is only nontrivial if `epoch` is less than LAMBDA_T bits.
-        let epoch_len = epoch.len();
-        let d_t_vec: Vec<_> = d_t.iter().collect();
-        let d_t_blind_vec: Vec<G2Affine> = d_t_vec
-            .par_iter()
-            .enumerate()
-            .map(|(i, d)| (&sys.f[epoch_len + i] * &delta + *d).to_affine())
-            .collect();
-        let mut d_t_blind = LinkedList::new();
-        d_t_blind_vec
-            .into_iter()
-            .for_each(|d| d_t_blind.push_back(d));
-
-        let d_h_blind: Vec<G2Affine> = node
-            .d_h
-            .par_iter()
-            .zip(sys.f_h.par_iter())
-            .map(|(d, f)| (f * &delta + d).to_affine())
-            .collect();
+        let mut k = epoch.len();
+        d_t.iter().for_each(|d| {
+            let tmp = (&sys.f[k] * &delta) + d;
+            d_t_blind.push_back(tmp.to_affine());
+            k += 1;
+        });
+        let mut d_h_blind = Vec::new();
+        node.d_h.iter().zip(&sys.f_h).for_each(|(d, f)| {
+            let tmp = f * &delta + d;
+            d_h_blind.push(tmp.to_affine());
+        });
 
         self.bte_nodes.push_back(BTENode {
             tau,
@@ -616,7 +604,7 @@ impl SecretKey {
         Self {
             bte_nodes: secret_key
                 .bte_nodes
-                .par_iter()
+                .iter()
                 .map(BTENode::deserialize_unchecked)
                 .collect(),
         }
@@ -671,7 +659,7 @@ impl FsEncryptionCiphertext {
 
         let cc: Vec<[G1Affine; NUM_CHUNKS]> = ciphertext
             .ciphertext_chunks
-            .par_iter()
+            .iter()
             .map(|cj| G1Affine::batch_deserialize_array(cj).or(Err("Malformed ciphertext_chunk")))
             .collect::<Result<Vec<_>, _>>()?;
 
@@ -708,6 +696,8 @@ pub fn enc_chunks<R: RngCore + CryptoRng>(
     sys: &SysParam,
     rng: &mut R,
 ) -> (FsEncryptionCiphertext, EncryptionWitness) {
+    let receivers = recipient_and_message.len();
+
     let g1 = G1Affine::generator();
 
     let s = Scalar::batch_random_array::<NUM_CHUNKS, R>(rng);
@@ -716,14 +706,23 @@ pub fn enc_chunks<R: RngCore + CryptoRng>(
     let r = Scalar::batch_random_array::<NUM_CHUNKS, R>(rng);
     let rr = g1.batch_mul_array(&r);
 
-    let cc: Vec<[G1Affine; NUM_CHUNKS]> = recipient_and_message
-        .par_iter()
-        .map(|(pk, ptext)| {
+    // TODO(CRP-2550) This can run in parallel (n = # receivers)
+    let cc = {
+        let mut cc: Vec<[G1Affine; NUM_CHUNKS]> = Vec::with_capacity(receivers);
+
+        for (pk, ptext) in recipient_and_message {
             let pk_g1_tbl = G1Projective::compute_mul2_affine_tbl(pk, g1);
+
             let chunks = ptext.chunks_as_scalars();
-            G1Projective::batch_normalize_array(&pk_g1_tbl.mul2_array(&r, &chunks))
-        })
-        .collect();
+
+            let enc_chunks =
+                G1Projective::batch_normalize_array(&pk_g1_tbl.mul2_array(&r, &chunks));
+
+            cc.push(enc_chunks);
+        }
+
+        cc
+    };
 
     let id = ftau_extended(&cc, &rr, &ss, sys, epoch, associated_data);
     let id_h_tbl = G2Projective::compute_mul2_tbl(&id, &G2Projective::from(&sys.h));
@@ -801,17 +800,19 @@ pub fn dec_chunks(
     let bneg = G2Prepared::from(&bneg);
     let eneg = G2Prepared::from(&dk.e.neg());
 
-    let powers: Vec<_> = (0..m)
-        .into_par_iter()
-        .map(|i| {
-            Gt::multipairing(&[
-                (&cj[i], G2Prepared::generator()),
-                (&crsz.rr[i], &bneg),
-                (&dk.a, &G2Prepared::from(&crsz.zz[i])),
-                (&crsz.ss[i], &eneg),
-            ])
-        })
-        .collect();
+    let mut powers = Vec::with_capacity(m);
+
+    // TODO(CRP-2550) These multipairings could be computed in parallel
+    for i in 0..m {
+        let x = Gt::multipairing(&[
+            (&cj[i], G2Prepared::generator()),
+            (&crsz.rr[i], &bneg),
+            (&dk.a, &G2Prepared::from(&crsz.zz[i])),
+            (&crsz.ss[i], &eneg),
+        ]);
+
+        powers.push(x);
+    }
 
     // Find discrete log of the powers
     let linear_search = HonestDealerDlogLookupTable::new();
@@ -820,23 +821,15 @@ pub fn dec_chunks(
         let mut dlogs = linear_search.solve_several(&powers);
 
         if dlogs.iter().any(|x| x.is_none()) {
-            // Cheating dealer case — each BSGS solve can take hours
+            // Cheating dealer case
             let cheating_solver = CheatingDealerDlogSolver::new(n, m);
 
-            let unsolved: Vec<_> = dlogs
-                .iter()
-                .enumerate()
-                .filter(|(_, d)| d.is_none())
-                .map(|(i, _)| i)
-                .collect();
-
-            let solved: Vec<_> = unsolved
-                .par_iter()
-                .map(|&i| (i, cheating_solver.solve(&powers[i])))
-                .collect();
-
-            for (i, result) in solved {
-                dlogs[i] = result;
+            for i in 0..dlogs.len() {
+                if dlogs[i].is_none() {
+                    // TODO(CRP-2550) All BSGS could be run in parallel
+                    // It may take hours to brute force a cheater's discrete log.
+                    dlogs[i] = cheating_solver.solve(&powers[i]);
+                }
             }
         }
 
@@ -885,20 +878,19 @@ pub fn verify_ciphertext_integrity(
     let g1_neg = G1Affine::generator().neg();
     let precomp_id = G2Prepared::from(&id);
 
-    let all_valid = (0..NUM_CHUNKS).into_par_iter().all(|i| {
+    // TODO(CRP-2550) Each of these checks could be run in parallel
+    for i in 0..NUM_CHUNKS {
         let r = &crsz.rr[i];
         let s = &crsz.ss[i];
         let z = G2Prepared::from(&crsz.zz[i]);
 
         let v = Gt::multipairing(&[(r, &precomp_id), (s, &sys.h_prep), (&g1_neg, &z)]);
-        v.is_identity()
-    });
 
-    if all_valid {
-        Ok(())
-    } else {
-        Err(InvalidNidkgCiphertext)
+        if !v.is_identity() {
+            return Err(InvalidNidkgCiphertext);
+        }
     }
+    Ok(())
 }
 
 #[derive(Eq, PartialEq, Debug)]
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_chunking.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_chunking.rs
index 18f0cd66df0d..7a8b71c962d9 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_chunking.rs
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_chunking.rs
@@ -10,7 +10,6 @@ use ic_crypto_internal_types::curves::bls12_381::{FrBytes, G1Bytes};
 use ic_crypto_internal_types::sign::threshold_sig::ni_dkg::ni_dkg_groth20_bls12_381::ZKProofDec;
 use rand::{CryptoRng, Rng, RngCore, SeedableRng};
 use rand_chacha::ChaCha20Rng;
-use rayon::prelude::*;
 
 /// Domain separators for the zk proof of chunking
 const DOMAIN_PROOF_OF_CHUNKING_ORACLE: &str = "ic-zk-proof-of-chunking-chunking";
@@ -362,94 +361,85 @@ pub fn verify_chunking(
     let xpowers = Scalar::xpowers(&x, NUM_ZK_REPETITIONS);
     let g1 = &instance.g1_gen;
 
-    let (r1, (r2, r3)) = rayon::join(
-        || -> Result<(), ZkProofChunkingError> {
-            // First verification equation (for each i ∈ [1..n]):
-            //   ∏_{j=1}^{m} R_j^{Σ_{k=1}^{l} e_{ijk} · x^k} · dd_i == g_1^{z_{r_i}}
-            let rhs = g1.batch_mul_vartime(&nizk.z_r);
-
-            let lhs = {
-                let lhs: Vec<_> = e
-                    .par_iter()
-                    .enumerate()
-                    .map(|(i, e_i)| {
-                        let e_ijk_polynomials: Vec<_> = e_i
-                            .iter()
-                            .map(|e_ij| Scalar::muln_usize_vartime(&xpowers, e_ij))
-                            .collect();
-
-                        let rj_e_ijk = G1Projective::muln_affine_vartime(
-                            &instance.randomizers_r,
-                            &e_ijk_polynomials,
-                        );
-
-                        rj_e_ijk + &nizk.dd[i + 1]
-                    })
+    // TODO(CRP-2550) Verification of chunking proof could run in 3 threads
+
+    // Thread 1
+    {
+        /*
+        Verify lhs == rhs where
+        lhs = product [R_j ^ sum [e_ijk * x^k | k <- [1..l]] | j <- [1..m]] * dd_i
+        rhs = g1 ^ z_r_i | i <- [1..n]]
+         */
+
+        let rhs = g1.batch_mul_vartime(&nizk.z_r);
+
+        let lhs = {
+            let mut lhs = Vec::with_capacity(e.len());
+            // TODO(CRP-2550) this loop can run in parallel
+            for (i, e_i) in e.iter().enumerate() {
+                let e_ijk_polynomials: Vec<_> = e_i
+                    .iter()
+                    .map(|e_ij| Scalar::muln_usize_vartime(&xpowers, e_ij))
                     .collect();
-                G1Projective::batch_normalize(&lhs)
-            };
 
-            if lhs != rhs {
-                return Err(ZkProofChunkingError::InvalidProof);
+                let rj_e_ijk =
+                    G1Projective::muln_affine_vartime(&instance.randomizers_r, &e_ijk_polynomials);
+
+                lhs.push(rj_e_ijk + &nizk.dd[i + 1]);
             }
-            Ok(())
-        },
-        || {
-            rayon::join(
-                || -> Result<(), ZkProofChunkingError> {
-                    // Second verification equation:
-                    //   ∏_{k=1}^{l} bb_k^{x^k} · dd_0 == g_1^{z_β}
-                    let lhs = G1Projective::muln_affine_vartime(&nizk.bb, &xpowers) + &nizk.dd[0];
-
-                    let rhs = g1.mul_vartime(&nizk.z_beta);
-                    if lhs != rhs {
-                        return Err(ZkProofChunkingError::InvalidProof);
-                    }
-                    Ok(())
-                },
-                || -> Result<(), ZkProofChunkingError> {
-                    // Third verification equation:
-                    //   lhs = ∏_{k=1}^{l} (∏_{i=1}^{n} ∏_{j=1}^{m} c_{ij}^{e_{ijk}})^{x^k} · ∏_{k=1}^{l} cc_k^{x^k} · Y
-                    //   rhs = ∏_{i=1}^{n} y_i^{z_{r_i}} · y_0^{z_β} · g_1^{Σ_{k=1}^{l} z_{s_k} · x^k}
-
-                    let cij_to_eijks: Vec<G1Projective> = (0..NUM_ZK_REPETITIONS)
-                        .into_par_iter()
-                        .map(|k| {
-                            let c_ij_s: Vec<_> =
-                                instance.ciphertext_chunks.iter().flatten().collect();
-                            let e_ijk_s: Vec<_> = e
-                                .iter()
-                                .flatten()
-                                .map(|e_ij| Scalar::from_usize(e_ij[k]))
-                                .collect();
-                            if c_ij_s.len() != m * n || e_ijk_s.len() != m * n {
-                                return Err(ZkProofChunkingError::InvalidProof);
-                            }
-
-                            Ok(G1Projective::muln_affine_vartime_ref(&c_ij_s, &e_ijk_s)
-                                + &nizk.cc[k])
-                        })
-                        .collect::<Result<Vec<_>, _>>()?;
-
-                    let lhs =
-                        G1Projective::muln_vartime(&cij_to_eijks[..], &xpowers[..]) + &nizk.yy;
-
-                    let acc = Scalar::muln_vartime(&nizk.z_s, &xpowers);
-
-                    let rhs = G1Projective::muln_affine_vartime(&instance.public_keys, &nizk.z_r)
-                        + G1Projective::mul2_affine_vartime(&nizk.y0, &nizk.z_beta, g1, &acc);
-
-                    if lhs != rhs {
-                        return Err(ZkProofChunkingError::InvalidProof);
-                    }
-                    Ok(())
-                },
-            )
-        },
-    );
-    r1?;
-    r2?;
-    r3?;
+            G1Projective::batch_normalize(&lhs)
+        };
+
+        if lhs != rhs {
+            return Err(ZkProofChunkingError::InvalidProof);
+        }
+    }
+
+    // Thread 2
+    {
+        // Verify: product [bb_k ^ x^k | k <- [1..l]] * dd_0 == g1 ^ z_beta
+        let lhs = G1Projective::muln_affine_vartime(&nizk.bb, &xpowers) + &nizk.dd[0];
+
+        let rhs = g1.mul_vartime(&nizk.z_beta);
+        if lhs != rhs {
+            return Err(ZkProofChunkingError::InvalidProof);
+        }
+    }
+
+    // Thread 3
+    {
+        // Verify: product [product [chunk_ij ^ e_ijk | i <- [1..n], j <- [1..m]] ^ x^k
+        // | k <- [1..l]] * product [cc_k ^ x^k | k <- [1..l]] * Y   = product
+        // [y_i^z_ri | i <- [1..n]] * y0^z_beta * g_1 ^ sum [z_sk * x^k | k <- [1..l]]
+
+        // TODO(CRP-2550) this loop can run in parallel
+        let cij_to_eijks: Vec<G1Projective> = (0..NUM_ZK_REPETITIONS)
+            .map(|k| {
+                let c_ij_s: Vec<_> = instance.ciphertext_chunks.iter().flatten().collect();
+                let e_ijk_s: Vec<_> = e
+                    .iter()
+                    .flatten()
+                    .map(|e_ij| Scalar::from_usize(e_ij[k]))
+                    .collect();
+                if c_ij_s.len() != m * n || e_ijk_s.len() != m * n {
+                    return Err(ZkProofChunkingError::InvalidProof);
+                }
+
+                Ok(G1Projective::muln_affine_vartime_ref(&c_ij_s, &e_ijk_s) + &nizk.cc[k])
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let lhs = G1Projective::muln_vartime(&cij_to_eijks[..], &xpowers[..]) + &nizk.yy;
+
+        let acc = Scalar::muln_vartime(&nizk.z_s, &xpowers);
+
+        let rhs = G1Projective::muln_affine_vartime(&instance.public_keys, &nizk.z_r)
+            + G1Projective::mul2_affine_vartime(&nizk.y0, &nizk.z_beta, g1, &acc);
+
+        if lhs != rhs {
+            return Err(ZkProofChunkingError::InvalidProof);
+        }
+    }
 
     Ok(())
 }
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_sharing.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_sharing.rs
index 58625958c278..337c2fbc703a 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_sharing.rs
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/nizk_sharing.rs
@@ -265,93 +265,86 @@ pub fn verify_sharing(
     // x' = oracle(x, F, A, Y)
     let x_challenge = sharing_proof_challenge(&x, &first_move);
 
-    let (r1, (r2, r3)) = rayon::join(
-        || -> Result<(), ZkProofSharingError> {
-            // First verification equation
-            // R^x' * F == g_1^z_r
-            let lhs =
-                &instance.combined_randomizer.mul_vartime(&x_challenge) + &first_move.blinder_g1;
-            let rhs = instance.g1_gen.mul_vartime(&nizk.z_r);
-            if lhs != rhs {
-                return Err(ZkProofSharingError::InvalidProof);
+    // TODO(CRP-2550): The verification can run in three threads
+
+    // Thread 1
+    {
+        // First verification equation
+        // R^x' * F == g_1^z_r
+        let lhs = &instance.combined_randomizer.mul_vartime(&x_challenge) + &first_move.blinder_g1;
+        let rhs = instance.g1_gen.mul_vartime(&nizk.z_r);
+        if lhs != rhs {
+            return Err(ZkProofSharingError::InvalidProof);
+        }
+    }
+
+    // Thread 2
+    {
+        // Second verification equation
+        //   ( ∏_{k=0}^{t-1} A_k^{ Σ_{i=1}^n (i^k * x^i) } )^{x'} * A
+        //     == g_2^{z_α}
+
+        // We initialize ik with x_challenge (A) to avoid the point/scalar multiplication later
+        let mut ik = vec![x_challenge.clone(); instance.public_keys.len()];
+
+        let mut scalars = Vec::with_capacity(instance.public_coefficients.len());
+        for _pc in &instance.public_coefficients {
+            let acc = Scalar::muln_vartime(&ik, &xpow);
+            scalars.push(acc);
+
+            for i in 0..ik.len() {
+                ik[i] *= Scalar::from_u64((i + 1) as u64);
             }
-            Ok(())
-        },
-        || {
-            rayon::join(
-                || -> Result<(), ZkProofSharingError> {
-                    // Second verification equation
-                    //   ( ∏_{k=0}^{t-1} A_k^{ Σ_{i=1}^n (i^k * x^i) } )^{x'} * A
-                    //     == g_2^{z_α}
-
-                    // We initialize ik with x_challenge (A) to avoid the scalar multiplication later
-                    let mut ik = vec![x_challenge.clone(); instance.public_keys.len()];
-
-                    let mut scalars = Vec::with_capacity(instance.public_coefficients.len());
-                    for _pc in &instance.public_coefficients {
-                        let acc = Scalar::muln_vartime(&ik, &xpow);
-                        scalars.push(acc);
-
-                        for i in 0..ik.len() {
-                            ik[i] *= Scalar::from_u64((i + 1) as u64);
-                        }
-                    }
-                    let lhs = G2Projective::muln_affine_vartime(
-                        &instance.public_coefficients[..],
-                        &scalars[..],
-                    ) + &nizk.aa;
-
-                    let rhs = instance.g2_gen.mul_vartime(&nizk.z_alpha);
-
-                    if lhs != rhs {
-                        return Err(ZkProofSharingError::InvalidProof);
-                    }
-                    Ok(())
-                },
-                || -> Result<(), ZkProofSharingError> {
-                    // Third verification equation
-                    // Original relation:
-                    //   (∏_{i=1}^n C_i^{x^i})^{x'} * Y  ==  (∏_{i=1}^n y_i^{x^i})^{z_r} * g_1^{z_α}
-                    //
-                    // Equivalently, we can rewrite it by moving terms to opposite sides:
-                    //
-                    //   lhs = (∏_{i=1}^n C_i^{x^i})^{x'} * (∏_{i=1}^n y_i^{x^i})^{-z_r}
-                    //   rhs = g_1^{z_α} * Y^{-1}
-
-                    // The two expressions are re-arranged so that it becomes possible to compute
-                    // everything with a single multi scalar multiplication.
-
-                    let instance_inputs: Vec<_> = instance
-                        .combined_ciphertexts
-                        .iter()
-                        .chain(&instance.public_keys)
-                        .collect();
-                    let challenges = {
-                        let mut c = Vec::with_capacity(xpow.len() * 2);
-                        for xp in &xpow {
-                            c.push(xp * &x_challenge);
-                        }
-                        let z_r_neg = nizk.z_r.neg();
-                        for xp in &xpow {
-                            c.push(xp * &z_r_neg);
-                        }
-                        c
-                    };
-
-                    let lhs = G1Projective::muln_affine_vartime_ref(&instance_inputs, &challenges);
-                    let rhs = &instance.g1_gen.mul_vartime(&nizk.z_alpha) + &nizk.yy.neg();
-
-                    if lhs != rhs {
-                        return Err(ZkProofSharingError::InvalidProof);
-                    }
-                    Ok(())
-                },
-            )
-        },
-    );
-    r1?;
-    r2?;
-    r3?;
+        }
+        let lhs =
+            G2Projective::muln_affine_vartime(&instance.public_coefficients[..], &scalars[..])
+                + &nizk.aa;
+
+        let rhs = instance.g2_gen.mul_vartime(&nizk.z_alpha);
+
+        if lhs != rhs {
+            return Err(ZkProofSharingError::InvalidProof);
+        }
+    }
+
+    // Thread 3
+    {
+        // Third verification equation
+        // Original relation:
+        //   (∏_{i=1}^n C_i^{x^i})^{x'} * Y  ==  (∏_{i=1}^n y_i^{x^i})^{z_r} * g_1^{z_α}
+        //
+        // Equivalently, we can rewrite it by moving terms to opposite sides:
+        //
+        //   lhs = (∏_{i=1}^n C_i^{x^i})^{x'} * (∏_{i=1}^n y_i^{x^i})^{-z_r}
+        //   rhs = g_1^{z_α} * Y^{-1}
+
+        // The two expressions are re-arranged so that it becomes possible to compute
+        // everything with a single multi scalar multiplication.
+
+        let instance_inputs: Vec<_> = instance
+            .combined_ciphertexts
+            .iter()
+            .chain(&instance.public_keys)
+            .collect();
+        let challenges = {
+            let mut c = Vec::with_capacity(xpow.len() * 2);
+            for xp in &xpow {
+                c.push(xp * &x_challenge);
+            }
+            let z_r_neg = nizk.z_r.neg();
+            for xp in &xpow {
+                c.push(xp * &z_r_neg);
+            }
+            c
+        };
+
+        let lhs = G1Projective::muln_affine_vartime_ref(&instance_inputs, &challenges);
+        let rhs = &instance.g1_gen.mul_vartime(&nizk.z_alpha) + &nizk.yy.neg();
+
+        if lhs != rhs {
+            return Err(ZkProofSharingError::InvalidProof);
+        }
+    }
 
     Ok(())
 }
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/encryption.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/encryption.rs
index 924bbb5fe1ab..90a7cedbf6fa 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/encryption.rs
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/encryption.rs
@@ -22,7 +22,6 @@ use ic_crypto_internal_types::sign::threshold_sig::{
 };
 use ic_types::{NumberOfNodes, crypto::AlgorithmId, crypto::error::InvalidArgumentError};
 use rand::{CryptoRng, RngCore};
-use rayon::prelude::*;
 use std::collections::BTreeMap;
 use std::convert::TryFrom;
 
@@ -101,12 +100,14 @@ pub fn encrypt_and_prove(
     public_coefficients: &PublicCoefficientsBytes,
     associated_data: &[u8],
 ) -> Result<(FsEncryptionCiphertextBytes, ZKProofDec, ZKProofShare), EncryptAndZKProveError> {
-    let public_keys: Result<Vec<G1Affine>, EncryptAndZKProveError> = key_message_pairs
-        .par_iter()
-        .enumerate()
-        .map(|(receiver_index, (pk_bytes, _))| {
-            let public_key_bytes = pk_bytes.as_bytes();
-            G1Affine::deserialize(public_key_bytes).map_err(|_| {
+    let receivers = key_message_pairs.len();
+
+    // TODO(CRP-2550) the deserialization step can run in parallel
+    let public_keys = {
+        let mut public_keys = Vec::with_capacity(receivers);
+        for receiver_index in 0..receivers {
+            let public_key_bytes = key_message_pairs[receiver_index].0.as_bytes();
+            let pk = G1Affine::deserialize(public_key_bytes).map_err(|_| {
                 EncryptAndZKProveError::MalformedFsPublicKeyError {
                     receiver_index: receiver_index as u32,
                     error: MalformedPublicKeyError {
@@ -115,10 +116,12 @@ pub fn encrypt_and_prove(
                         internal_error: "Could not parse public key.".to_string(),
                     },
                 }
-            })
-        })
-        .collect();
-    let public_keys = public_keys?;
+            })?;
+
+            public_keys.push(pk);
+        }
+        public_keys
+    };
 
     let plaintext_chunks = key_message_pairs
         .iter()
@@ -144,8 +147,7 @@ pub fn encrypt_and_prove(
         rng,
     );
 
-    // Generating the chunking and sharing proofs could happen in parallel but this
-    // changes the stability tests and may not be worth it
+    // TODO(CRP-2550) generating the chunking and sharing proofs can happen in parallel
 
     let chunking_proof = prove_chunking(
         &public_keys,
@@ -342,11 +344,10 @@ pub fn verify_zk_proofs(
     associated_data: &[u8],
 ) -> Result<(), CspDkgVerifyDealingError> {
     // Conversions
-    let pk_values: Vec<_> = receiver_fs_public_keys.values().collect();
-    let public_keys: Result<Vec<G1Affine>, CspDkgVerifyDealingError> = pk_values
-        .par_iter()
-        .enumerate()
-        .map(|(receiver_index, public_key)| {
+    let public_keys: Result<Vec<G1Affine>, CspDkgVerifyDealingError> = receiver_fs_public_keys
+        .values()
+        .zip(0..)
+        .map(|(public_key, receiver_index)| {
             G1Affine::deserialize(public_key.as_bytes()).map_err(|parse_error| {
                 let error = MalformedPublicKeyError {
                     algorithm: ALGORITHM_ID,
@@ -354,7 +355,7 @@ pub fn verify_zk_proofs(
                     internal_error: format!("{parse_error:?}"),
                 };
                 CspDkgVerifyDealingError::MalformedFsPublicKeyError {
-                    receiver_index: receiver_index as NodeIndex,
+                    receiver_index,
                     error,
                 }
             })
@@ -404,9 +405,10 @@ pub fn verify_zk_proofs(
 
     // More conversions
 
+    // TODO(CRP-2550) this loop can run in parallel
     let public_coefficients = public_coefficients
         .coefficients
-        .par_iter()
+        .iter()
         .map(G2Affine::deserialize)
         .collect::<Result<Vec<_>, _>>()
         .map_err(|_| {
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/transcript.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/transcript.rs
index 58a500c6de38..38703f73569f 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/transcript.rs
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/groth20_bls12_381/transcript.rs
@@ -11,7 +11,6 @@ use crate::types as threshold_types;
 use ic_crypto_internal_bls12_381_type::{G2Projective, LagrangeCoefficients, NodeIndices};
 use ic_crypto_internal_types::sign::threshold_sig::ni_dkg::ni_dkg_groth20_bls12_381 as g20;
 use ic_types::{NodeIndex, NumberOfNodes};
-use rayon::prelude::*;
 use std::collections::BTreeMap;
 use std::convert::TryFrom;
 use std::ops::{AddAssign, MulAssign};
@@ -210,18 +209,18 @@ fn compute_transcript(
             LagrangeCoefficients::at_zero(&indices).into_coefficients()
         };
 
-        let combined: Vec<_> = (0..threshold.get() as usize)
-            .into_par_iter()
-            .map(|i| {
-                let points: Vec<_> = individual_public_coefficients
-                    .values()
-                    .map(|pc| pc.coefficients[i].0.clone())
-                    .collect();
-                crate::types::PublicKey(
-                    G2Projective::muln_affine_vartime(&points, &coefficients).to_affine(),
-                )
-            })
-            .collect();
+        let mut combined = Vec::with_capacity(threshold.get() as usize);
+
+        // TODO(CRP-2550) this loop can run in parallel
+        for i in 0..threshold.get() as usize {
+            let points: Vec<_> = individual_public_coefficients
+                .values()
+                .map(|pc| &pc.coefficients[i].0)
+                .collect();
+            combined.push(crate::types::PublicKey(
+                G2Projective::muln_affine_vartime_ref(&points, &coefficients).to_affine(),
+            ));
+        }
 
         let combined_public_coefficients = threshold_types::PublicCoefficients::new(combined);
         // This type conversion is needed because of the internal/CSP type duplication.
@@ -259,10 +258,11 @@ pub fn compute_threshold_signing_key(
 ) -> Result<threshold_types::SecretKeyBytes, ni_dkg_errors::CspDkgLoadPrivateKeyError> {
     // Get my shares
 
+    // TODO(CRP-2550) this loop can run in parallel
     let shares_from_each_dealer: Result<BTreeMap<NodeIndex, threshold_types::SecretKey>, _> =
         transcript
             .receiver_data
-            .par_iter()
+            .iter()
             .map(|(dealer_index, encrypted_shares)| {
                 let secret_key = decrypt(
                     encrypted_shares,
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/stability.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/stability.rs
index aafffaebf27d..b228d3a1b0ff 100644
--- a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/stability.rs
+++ b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/stability.rs
@@ -60,50 +60,12 @@ fn test_updating_fs_secret_key_is_stable() {
     let mut sk = SecretKey::deserialize(&key_and_pop.secret_key);
 
     let seed = Seed::from_bytes(b"ic-crypto-update-key-seed");
+    update_key_inplace_to_epoch(&mut sk, Epoch::from(2), seed);
 
-    let key_hash_for_epoch = [
-        (
-            2,
-            "b79540c1e4148231c2e194b854e46f2449c4b8a534e854bf3f69d7f41d1af383",
-        ),
-        (
-            4,
-            "d6ecca647cf2069e1ae36a2dcda420932b1fc9ca66f8442953ca6d4cfc960a2b",
-        ),
-        (
-            9,
-            "5b7c6755f3a4ce7c399cb3f4db14cae977206deaa2a570b9969b7aa052d9c4f9",
-        ),
-        (
-            1023,
-            "8358c158fea524b8fe020973632423db981cff50c1c542572e74211a8e28b574",
-        ),
-        (
-            1024,
-            "27f15513f9d836fde9e80483498c66c7e77792a116676c1a46f368c2f6b3be2b",
-        ),
-        (
-            1 << 28,
-            "10aaf2a0245593cb6ecaa05cee4f2cb525f51fdd9a4ecfe106825990ba2b69f3",
-        ),
-        (
-            (1 << 29) - 1,
-            "b755fde575c74f5746450d6eb275cbf7437b3fdff17c0833fc22d00be15a6dbf",
-        ),
-        (
-            1 << 29,
-            "2fde8a15cad2917c81e8adf8828c6d477512677cd2f89367539c47ca3fe7948e",
-        ),
-    ];
-
-    for (epoch, expected_hash) in &key_hash_for_epoch {
-        update_key_inplace_to_epoch(
-            &mut sk,
-            Epoch::from(*epoch),
-            seed.derive(&format!("Epoch {}", epoch)),
-        );
-        assert_sha256_cbor_is(&sk.serialize(), expected_hash);
-    }
+    assert_sha256_cbor_is(
+        &sk.serialize(),
+        "f70143bdd1fad70ac7d24cda1f5141b6e730841361fc4c8e5059ddc0a1514e15",
+    );
 }
 
 fn create_receiver_keys(
diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/builder/mod.rs b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/builder/mod.rs
index 2a8c4b228192..f87f3ce063c8 100644
--- a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/builder/mod.rs
+++ b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/builder/mod.rs
@@ -1,6 +1,5 @@
 use super::*;
 use rand::rngs::OsRng;
-use rayon::ThreadPool;
 
 pub struct LocalCspVaultBuilder<R, S, C, P> {
     csprng: Box<dyn FnOnce() -> R>,
@@ -10,7 +9,6 @@ pub struct LocalCspVaultBuilder<R, S, C, P> {
     time_source: Arc<dyn TimeSource>,
     metrics: Arc<CryptoMetrics>,
     logger: ReplicaLogger,
-    thread_pool_nidkg: Option<Arc<ThreadPool>>,
 }
 
 impl ProdLocalCspVault {
@@ -30,7 +28,6 @@ impl ProdLocalCspVault {
             time_source: Arc::new(SysTimeSource::new()),
             metrics,
             logger,
-            thread_pool_nidkg: None,
         }
     }
 
@@ -85,7 +82,6 @@ where
             time_source: self.time_source,
             metrics: self.metrics,
             logger: self.logger,
-            thread_pool_nidkg: self.thread_pool_nidkg,
         }
     }
 
@@ -101,7 +97,6 @@ where
             time_source: self.time_source,
             metrics: self.metrics,
             logger: self.logger,
-            thread_pool_nidkg: self.thread_pool_nidkg,
         }
     }
 
@@ -117,7 +112,6 @@ where
             time_source: self.time_source,
             metrics: self.metrics,
             logger: self.logger,
-            thread_pool_nidkg: self.thread_pool_nidkg,
         }
     }
 
@@ -133,7 +127,6 @@ where
             time_source: self.time_source,
             metrics: self.metrics,
             logger: self.logger,
-            thread_pool_nidkg: self.thread_pool_nidkg,
         }
     }
 
@@ -147,18 +140,6 @@ where
         self
     }
 
-    /// Installs an externally-managed rayon pool used for NIDKG work.
-    ///
-    /// If not called, `build` creates a small default pool — see
-    /// [`super::NIDKG_THREAD_POOL_SIZE`]. Prod callers that already own a
-    /// NIDKG pool (e.g., the remote vault server) should inject it here so
-    /// that a single pool is shared across the RPC dispatch layer and the
-    /// vault's internal `par_iter` calls.
-    pub fn with_nidkg_thread_pool(mut self, thread_pool_nidkg: Arc<ThreadPool>) -> Self {
-        self.thread_pool_nidkg = Some(thread_pool_nidkg);
-        self
-    }
-
     pub fn build(self) -> LocalCspVault<R, S, C, P> {
         LocalCspVault {
             csprng: CspRwLock::new_for_rng((self.csprng)(), Arc::clone(&self.metrics)),
@@ -177,9 +158,6 @@ where
             time_source: self.time_source,
             metrics: self.metrics,
             logger: self.logger,
-            thread_pool_nidkg: self
-                .thread_pool_nidkg
-                .unwrap_or_else(super::new_nidkg_thread_pool),
         }
     }
 
@@ -216,7 +194,6 @@ mod test_utils {
                 time_source: FastForwardTimeSource::new(),
                 logger: no_op_logger(),
                 metrics: Arc::new(CryptoMetrics::none()),
-                thread_pool_nidkg: None,
             }
         }
     }
diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/mod.rs b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/mod.rs
index 58ca7aaa6964..b748dfcd72f6 100644
--- a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/mod.rs
+++ b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/mod.rs
@@ -35,36 +35,10 @@ use ic_types::crypto::canister_threshold_sig::error::ThresholdEcdsaCreateSigShar
 use parking_lot::{RwLockReadGuard, RwLockWriteGuard};
 use rand::rngs::OsRng;
 use rand::{CryptoRng, Rng};
-use rayon::{ThreadPool, ThreadPoolBuilder};
 use std::collections::HashSet;
 use std::path::Path;
 use std::sync::Arc;
 
-/// Default size of the per-vault rayon pool used for NIDKG work when no
-/// externally-managed pool is injected via
-/// [`builder::LocalCspVaultBuilder::with_nidkg_thread_pool`].
-///
-/// A small fixed size prevents oversubscription when many `LocalCspVault`
-/// instances share a single process (e.g., registry integration tests that
-/// run several local ICs in parallel, or the in-replica vault path): without
-/// this bound, each vault's `par_iter` calls would fan out to the global
-/// rayon pool, which defaults to the number of logical cores.
-///
-/// The remote vault server injects its own NIDKG pool so that the outer RPC
-/// dispatch pool and the inner `par_iter` pool are the same pool; in that
-/// case this constant is unused.
-const NIDKG_THREAD_POOL_SIZE: usize = 3;
-
-pub(crate) fn new_nidkg_thread_pool() -> Arc<ThreadPool> {
-    Arc::new(
-        ThreadPoolBuilder::new()
-            .thread_name(|i| format!("ic-crypto-local-vault-nidkg-{i}"))
-            .num_threads(NIDKG_THREAD_POOL_SIZE)
-            .build()
-            .expect("failed to instantiate a thread pool"),
-    )
-}
-
 /// An implementation of `CspVault`-trait that runs in-process
 /// and uses local secret key stores.
 ///
@@ -114,7 +88,6 @@ pub struct LocalCspVault<
     time_source: Arc<dyn TimeSource>,
     logger: ReplicaLogger,
     metrics: Arc<CryptoMetrics>,
-    thread_pool_nidkg: Arc<ThreadPool>,
 }
 
 pub type ProdLocalCspVault =
@@ -222,19 +195,6 @@ impl<R: Rng + CryptoRng, S: SecretKeyStore, C: SecretKeyStore, P: PublicKeyStore
         Seed::from_bytes(&intermediate_seed) // use of intermediate seed minimizes locking time
     }
 
-    /// Runs `job` on this vault's NIDKG rayon thread pool.
-    ///
-    /// Rayon `par_iter` calls inside `job` use this pool instead of the
-    /// global one, which bounds the total number of worker threads when
-    /// multiple `LocalCspVault` instances coexist in a single process.
-    fn run_on_nidkg_thread_pool<F, T>(&self, job: F) -> T
-    where
-        F: FnOnce() -> T + Send,
-        T: Send,
-    {
-        self.thread_pool_nidkg.install(job)
-    }
-
     fn combined_commitment_opening_from_sks(
         &self,
         combined_commitment: &CombinedCommitment,
diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/ni_dkg/mod.rs b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/ni_dkg/mod.rs
index da6cd14ce96d..9268c5bd4957 100644
--- a/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/ni_dkg/mod.rs
+++ b/rs/crypto/internal/crypto_service_provider/src/vault/local_csp_vault/ni_dkg/mod.rs
@@ -36,12 +36,8 @@ use std::collections::{BTreeMap, BTreeSet};
 #[cfg(test)]
 mod tests;
 
-impl<
-    R: Rng + CryptoRng + Send + Sync,
-    S: SecretKeyStore + Send + Sync,
-    C: SecretKeyStore + Send + Sync,
-    P: PublicKeyStore + Send + Sync,
-> NiDkgCspVault for LocalCspVault<R, S, C, P>
+impl<R: Rng + CryptoRng, S: SecretKeyStore, C: SecretKeyStore, P: PublicKeyStore> NiDkgCspVault
+    for LocalCspVault<R, S, C, P>
 {
     fn gen_dealing_encryption_key_pair(
         &self,
@@ -49,8 +45,7 @@ impl<
     ) -> Result<(CspFsEncryptionPublicKey, CspFsEncryptionPop), CspDkgCreateFsKeyError> {
         debug!(self.logger; crypto.method_name => "gen_dealing_encryption_key_pair");
         let start_time = self.metrics.now();
-        let result = self
-            .run_on_nidkg_thread_pool(|| self.gen_dealing_encryption_key_pair_internal(node_id));
+        let result = self.gen_dealing_encryption_key_pair_internal(node_id);
         self.metrics.observe_duration_seconds(
             MetricsDomain::NiDkgAlgorithm,
             MetricsScope::Local,
@@ -70,9 +65,7 @@ impl<
         debug!(self.logger; crypto.method_name => "update_forward_secure_epoch", crypto.dkg_epoch => epoch.get());
         let start_time = self.metrics.now();
 
-        let result = self.run_on_nidkg_thread_pool(|| {
-            self.update_forward_secure_epoch_internal(algorithm_id, key_id, epoch)
-        });
+        let result = self.update_forward_secure_epoch_internal(algorithm_id, key_id, epoch);
         self.metrics.observe_duration_seconds(
             MetricsDomain::NiDkgAlgorithm,
             MetricsScope::Local,
@@ -93,15 +86,13 @@ impl<
     ) -> Result<CspNiDkgDealing, ni_dkg_errors::CspDkgCreateDealingError> {
         debug!(self.logger; crypto.method_name => "create_dealing", crypto.dkg_epoch => epoch.get());
         let start_time = self.metrics.now();
-        let result = self.run_on_nidkg_thread_pool(|| {
-            self.create_dealing_internal(
-                algorithm_id,
-                dealer_index,
-                threshold,
-                epoch,
-                &receiver_keys,
-            )
-        });
+        let result = self.create_dealing_internal(
+            algorithm_id,
+            dealer_index,
+            threshold,
+            epoch,
+            &receiver_keys,
+        );
         self.metrics.observe_duration_seconds(
             MetricsDomain::NiDkgAlgorithm,
             MetricsScope::Local,
@@ -123,16 +114,14 @@ impl<
     ) -> Result<CspNiDkgDealing, ni_dkg_errors::CspDkgCreateReshareDealingError> {
         debug!(self.logger; crypto.method_name => "create_resharing_dealing", crypto.dkg_epoch => epoch.get());
         let start_time = self.metrics.now();
-        let result = self.run_on_nidkg_thread_pool(|| {
-            self.create_resharing_dealing_internal(
-                algorithm_id,
-                dealer_index,
-                threshold,
-                epoch,
-                &receiver_keys,
-                resharing_secret,
-            )
-        });
+        let result = self.create_resharing_dealing_internal(
+            algorithm_id,
+            dealer_index,
+            threshold,
+            epoch,
+            &receiver_keys,
+            resharing_secret,
+        );
         self.metrics.observe_duration_seconds(
             MetricsDomain::NiDkgAlgorithm,
             MetricsScope::Local,
@@ -153,15 +142,13 @@ impl<
     ) -> Result<(), ni_dkg_errors::CspDkgLoadPrivateKeyError> {
         debug!(self.logger; crypto.method_name => "load_threshold_signing_key", crypto.dkg_epoch => epoch.get());
         let start_time = self.metrics.now();
-        let result = self.run_on_nidkg_thread_pool(|| {
-            self.load_threshold_signing_key_internal(
-                algorithm_id,
-                epoch,
-                csp_transcript,
-                fs_key_id,
-                receiver_index,
-            )
-        });
+        let result = self.load_threshold_signing_key_internal(
+            algorithm_id,
+            epoch,
+            csp_transcript,
+            fs_key_id,
+            receiver_index,
+        );
         self.metrics.observe_duration_seconds(
             MetricsDomain::NiDkgAlgorithm,
             MetricsScope::Local,
diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/mod.rs b/rs/crypto/internal/crypto_service_provider/src/vault/mod.rs
index ffd44869c8b2..f7e6d03181ef 100644
--- a/rs/crypto/internal/crypto_service_provider/src/vault/mod.rs
+++ b/rs/crypto/internal/crypto_service_provider/src/vault/mod.rs
@@ -1,5 +1,5 @@
 use self::api::CspVault;
-use self::local_csp_vault::{ProdLocalCspVault, new_nidkg_thread_pool};
+use self::local_csp_vault::ProdLocalCspVault;
 use self::remote_csp_vault::RemoteCspVault;
 use crate::vault::api::{
     CspBasicSignatureError, CspMultiSignatureError, CspSecretKeyStoreContainsError,
@@ -58,9 +58,7 @@ fn in_replica_vault(
         logger,
         "Proceeding with an in-replica csp_vault, CryptoConfig: {:?}", config
     );
-    let vault = ProdLocalCspVault::builder_in_dir(&config.crypto_root, metrics, logger)
-        .with_nidkg_thread_pool(new_nidkg_thread_pool())
-        .build();
+    let vault = ProdLocalCspVault::new_in_dir(&config.crypto_root, metrics, logger);
     Arc::new(vault)
 }
 
diff --git a/rs/crypto/internal/crypto_service_provider/src/vault/remote_csp_vault/tarpc_csp_vault_server.rs b/rs/crypto/internal/crypto_service_provider/src/vault/remote_csp_vault/tarpc_csp_vault_server.rs
index c06db635d5ee..3845fe21363f 100644
--- a/rs/crypto/internal/crypto_service_provider/src/vault/remote_csp_vault/tarpc_csp_vault_server.rs
+++ b/rs/crypto/internal/crypto_service_provider/src/vault/remote_csp_vault/tarpc_csp_vault_server.rs
@@ -12,7 +12,7 @@ use crate::vault::api::{
 use crate::vault::api::{
     CspPublicKeyStoreError, CspVault, IDkgDealingInternalBytes, IDkgTranscriptInternalBytes,
 };
-use crate::vault::local_csp_vault::ProdLocalCspVault;
+use crate::vault::local_csp_vault::{LocalCspVault, ProdLocalCspVault};
 use crate::vault::remote_csp_vault::ThresholdSchnorrCreateSigShareVaultError;
 use crate::vault::remote_csp_vault::{FOUR_GIGA_BYTES, PksAndSksContainsErrors};
 use crate::vault::remote_csp_vault::{TarpcCspVault, remote_vault_codec_builder};
@@ -64,8 +64,7 @@ use super::codec::{Bincode, CspVaultObserver, ObservableCodec};
 pub struct TarpcCspVaultServerImpl<C: CspVault> {
     local_csp_vault: Arc<C>,
     listener: UnixListener,
-    thread_pool_general: Arc<ThreadPool>,
-    thread_pool_nidkg: Arc<ThreadPool>,
+    thread_pool: Arc<ThreadPool>,
     max_frame_length: usize,
     metrics: Arc<CryptoMetrics>,
     #[allow(unused)]
@@ -78,8 +77,7 @@ pub struct TarpcCspVaultServerImpl<C: CspVault> {
 /// created through cloning (with `Clone`).
 struct TarpcCspVaultServerWorker<C: CspVault> {
     local_csp_vault: Arc<C>,
-    thread_pool_general: Arc<ThreadPool>,
-    thread_pool_nidkg: Arc<ThreadPool>,
+    thread_pool: Arc<ThreadPool>,
 }
 
 async fn execute_on_thread_pool<F, T>(thread_pool: &ThreadPool, job: F) -> T
@@ -110,8 +108,7 @@ impl<C: CspVault> Clone for TarpcCspVaultServerWorker<C> {
     fn clone(&self) -> Self {
         Self {
             local_csp_vault: Arc::clone(&self.local_csp_vault),
-            thread_pool_general: Arc::clone(&self.thread_pool_general),
-            thread_pool_nidkg: Arc::clone(&self.thread_pool_nidkg),
+            thread_pool: Arc::clone(&self.thread_pool),
         }
     }
 }
@@ -125,7 +122,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<CspSignature, CspBasicSignatureError> {
         let vault = self.local_csp_vault;
         let job = move || vault.sign(msg.into_vec());
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn gen_node_signing_key_pair(
@@ -134,7 +131,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<CspPublicKey, CspBasicSignatureKeygenError> {
         let vault = self.local_csp_vault;
         let job = move || vault.gen_node_signing_key_pair();
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // `MultiSignatureCspVault`-methods.
@@ -147,7 +144,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<CspSignature, CspMultiSignatureError> {
         let vault = self.local_csp_vault;
         let job = move || vault.multi_sign(algorithm_id, message.into_vec(), key_id);
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn gen_committee_signing_key_pair(
@@ -156,7 +153,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<(CspPublicKey, CspPop), CspMultiSignatureKeygenError> {
         let vault = self.local_csp_vault;
         let job = move || vault.gen_committee_signing_key_pair();
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // `ThresholdSignatureCspVault`-methods.
@@ -169,7 +166,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<CspSignature, CspThresholdSignError> {
         let vault = self.local_csp_vault;
         let job = move || vault.threshold_sign(algorithm_id, message.into_vec(), key_id);
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // `NiDkgCspVault`-methods.
@@ -180,7 +177,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<(CspFsEncryptionPublicKey, CspFsEncryptionPop), CspDkgCreateFsKeyError> {
         let vault = self.local_csp_vault;
         let job = move || vault.gen_dealing_encryption_key_pair(node_id);
-        execute_on_thread_pool(&self.thread_pool_nidkg, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn update_forward_secure_epoch(
@@ -192,7 +189,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<(), CspDkgUpdateFsEpochError> {
         let vault = self.local_csp_vault;
         let job = move || vault.update_forward_secure_epoch(algorithm_id, key_id, epoch);
-        execute_on_thread_pool(&self.thread_pool_nidkg, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn create_dealing(
@@ -208,7 +205,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
         let job = move || {
             vault.create_dealing(algorithm_id, dealer_index, threshold, epoch, receiver_keys)
         };
-        execute_on_thread_pool(&self.thread_pool_nidkg, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn create_resharing_dealing(
@@ -232,7 +229,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 resharing_secret,
             )
         };
-        execute_on_thread_pool(&self.thread_pool_nidkg, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn load_threshold_signing_key(
@@ -254,7 +251,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 receiver_index,
             )
         };
-        execute_on_thread_pool(&self.thread_pool_nidkg, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn retain_threshold_keys_if_present(
@@ -264,7 +261,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<(), CspDkgRetainThresholdKeysError> {
         let vault = self.local_csp_vault;
         let job = move || vault.retain_threshold_keys_if_present(active_key_ids);
-        execute_on_thread_pool(&self.thread_pool_nidkg, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // SecretKeyStoreCspVault-methods.
@@ -275,7 +272,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<bool, CspSecretKeyStoreContainsError> {
         let vault = self.local_csp_vault;
         let job = move || vault.sks_contains(key_id);
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // PublicKeyStoreCspVault-methods.
@@ -285,7 +282,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<CurrentNodePublicKeys, CspPublicKeyStoreError> {
         let vault = self.local_csp_vault;
         let job = move || vault.current_node_public_keys();
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn current_node_public_keys_with_timestamps(
@@ -294,13 +291,13 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<CurrentNodePublicKeys, CspPublicKeyStoreError> {
         let vault = self.local_csp_vault;
         let job = move || vault.current_node_public_keys_with_timestamps();
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn idkg_key_count(self, _: context::Context) -> Result<usize, CspPublicKeyStoreError> {
         let vault = self.local_csp_vault;
         let job = move || vault.idkg_dealing_encryption_pubkeys_count();
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // PublicAndSecretKeyStoreCspVault-methods.
@@ -311,7 +308,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<(), PksAndSksContainsErrors> {
         let vault = self.local_csp_vault;
         let job = move || vault.pks_and_sks_contains(external_public_keys);
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn validate_pks_and_sks(
@@ -320,7 +317,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<ValidNodePublicKeys, ValidatePksAndSksError> {
         let vault = self.local_csp_vault;
         let job = move || vault.validate_pks_and_sks();
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // 'TlsHandshakeCspVault'-methods.
@@ -331,7 +328,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<TlsPublicKeyCert, CspTlsKeygenError> {
         let vault = self.local_csp_vault;
         let job = move || vault.gen_tls_key_pair(node);
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn tls_sign(
@@ -342,7 +339,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<CspSignature, CspTlsSignError> {
         let vault = self.local_csp_vault;
         let job = move || vault.tls_sign(message.into_vec(), key_id);
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // `IDkgProtocolCspVault`-methods.
@@ -367,7 +364,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 transcript_operation,
             )
         };
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn idkg_verify_dealing_private(
@@ -391,7 +388,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 context_data.into_vec(),
             )
         };
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn idkg_load_transcript(
@@ -415,7 +412,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 transcript,
             )
         };
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn idkg_load_transcript_with_openings(
@@ -441,7 +438,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 transcript,
             )
         };
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn idkg_retain_active_keys(
@@ -452,7 +449,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<(), IDkgRetainKeysError> {
         let vault = self.local_csp_vault;
         let job = move || vault.idkg_retain_active_keys(active_key_ids, oldest_public_key);
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn idkg_gen_dealing_encryption_key_pair(
@@ -461,7 +458,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<MEGaPublicKey, CspCreateMEGaKeyError> {
         let vault = self.local_csp_vault;
         let job = move || vault.idkg_gen_dealing_encryption_key_pair();
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn idkg_open_dealing(
@@ -485,7 +482,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 opener_key_id,
             )
         };
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // `ThresholdEcdsaSignerCspVault`-methods
@@ -516,7 +513,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 algorithm_id,
             )
         };
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     // `ThresholdSchnorrSignerCspVault`-methods
@@ -543,7 +540,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 algorithm_id,
             )
         };
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn create_encrypted_vetkd_key_share(
@@ -565,7 +562,7 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
                 input.into_vec(),
             )
         };
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 
     async fn new_public_seed(
@@ -574,12 +571,11 @@ impl<C: CspVault + 'static> TarpcCspVault for TarpcCspVaultServerWorker<C> {
     ) -> Result<Seed, PublicRandomSeedGeneratorError> {
         let vault = self.local_csp_vault;
         let job = move || vault.new_public_seed();
-        execute_on_thread_pool(&self.thread_pool_general, job).await
+        execute_on_thread_pool(&self.thread_pool, job).await
     }
 }
 
-type VaultFactory<C> =
-    dyn Fn(&ReplicaLogger, Arc<CryptoMetrics>, Arc<ThreadPool>) -> Arc<C> + Send + Sync;
+type VaultFactory<C> = dyn Fn(&ReplicaLogger, Arc<CryptoMetrics>) -> Arc<C> + Send + Sync;
 
 pub struct TarpcCspVaultServerImplBuilder<C> {
     local_csp_vault_factory: Box<VaultFactory<C>>,
@@ -591,29 +587,21 @@ pub struct TarpcCspVaultServerImplBuilder<C> {
 impl TarpcCspVaultServerImplBuilder<ProdLocalCspVault> {
     pub fn new(key_store_dir: &Path) -> Self {
         let key_store_path = key_store_dir.to_path_buf();
-        let local_csp_vault_factory =
-            Box::new(move |logger: &ReplicaLogger, metrics, thread_pool_nidkg| {
-                Arc::new(
-                    ProdLocalCspVault::builder_in_dir(
-                        &key_store_path,
-                        metrics,
-                        new_logger!(logger),
-                    )
-                    .with_nidkg_thread_pool(thread_pool_nidkg)
-                    .build(),
-                )
-            });
+        let local_csp_vault_factory = Box::new(move |logger: &ReplicaLogger, metrics| {
+            Arc::new(LocalCspVault::new_in_dir(
+                &key_store_path,
+                metrics,
+                new_logger!(logger),
+            ))
+        });
         Self::new_internal(local_csp_vault_factory)
     }
 }
 
 impl<C: 'static + Send + Sync> TarpcCspVaultServerImplBuilder<C> {
     pub fn new_with_local_csp_vault(local_csp_vault: Arc<C>) -> Self {
-        let local_csp_vault_factory = Box::new(
-            move |_logger: &ReplicaLogger, _metrics, _thread_pool_nidkg| {
-                Arc::clone(&local_csp_vault)
-            },
-        );
+        let local_csp_vault_factory =
+            Box::new(move |_logger: &ReplicaLogger, _metrics| Arc::clone(&local_csp_vault));
         Self::new_internal(local_csp_vault_factory)
     }
 }
@@ -644,51 +632,26 @@ impl<C> TarpcCspVaultServerImplBuilder<C> {
     }
 }
 
-fn create_thread_pool(ident: &'static str) -> Arc<ThreadPool> {
-    let num_threads = {
-        let cores = std::thread::available_parallelism()
-            .expect("obtaining the number of available cores should never fail")
-            .get();
-
-        if cores >= 16 {
-            8
-        } else if cores >= 8 {
-            4
-        } else {
-            2
-        }
-    };
-
-    Arc::new(
-        ThreadPoolBuilder::new()
-            .thread_name(move |i| format!("ic-crypto-worker-{ident}-{i}"))
-            .num_threads(num_threads)
-            .build()
-            .expect("failed to instantiate a thread pool"),
-    )
-}
-
 impl<C: CspVault> TarpcCspVaultServerImplBuilder<C> {
     pub fn build(&self, listener: UnixListener) -> TarpcCspVaultServerImpl<C> {
         info!(&self.logger, "Starting new RPC CSP vault server");
-        // Created before the local vault so that the same pool can be shared:
-        // it bounds both the RPC-dispatch concurrency (see
-        // `execute_on_thread_pool`) and the inner `par_iter` calls inside
-        // `LocalCspVault`. With one shared pool, the vault's
-        // `ThreadPool::install` inside a job already running on this pool is
-        // effectively a no-op, avoiding the double-pool downsize that would
-        // otherwise occur.
-        let thread_pool_nidkg = create_thread_pool("nidkg");
-        let local_csp_vault: Arc<C> = (self.local_csp_vault_factory)(
-            &self.logger,
-            Arc::clone(&self.metrics),
-            Arc::clone(&thread_pool_nidkg),
-        );
+        let local_csp_vault: Arc<C> =
+            (self.local_csp_vault_factory)(&self.logger, Arc::clone(&self.metrics));
         TarpcCspVaultServerImpl {
             local_csp_vault,
             listener,
-            thread_pool_general: create_thread_pool("general"),
-            thread_pool_nidkg,
+            // defaults the number of threads to the number of CPUs
+            thread_pool: Arc::new(
+                ThreadPoolBuilder::new()
+                    .thread_name(|i| format!("ic-crypto-csp-{i}"))
+                    .num_threads(
+                        std::thread::available_parallelism()
+                            .expect("obtaining the number of available cores should never fail")
+                            .get(),
+                    )
+                    .build()
+                    .expect("failed to instantiate a thread pool"),
+            ),
             max_frame_length: self.max_frame_length,
             metrics: Arc::clone(&self.metrics),
             logger: new_logger!(&self.logger),
@@ -728,8 +691,7 @@ impl<C: CspVault + 'static> TarpcCspVaultServerImpl<C> {
                 )
             });
             let local_csp_vault = Arc::clone(&self.local_csp_vault);
-            let thread_pool_general = Arc::clone(&self.thread_pool_general);
-            let thread_pool_nidkg = Arc::clone(&self.thread_pool_nidkg);
+            let thread_pool = Arc::clone(&self.thread_pool);
             let codec = ObservableCodec::new(
                 Bincode::default(),
                 CspVaultObserver::new(new_logger!(&self.logger), Arc::clone(&self.metrics)),
@@ -739,8 +701,7 @@ impl<C: CspVault + 'static> TarpcCspVaultServerImpl<C> {
                 let transport = serde_transport::new(framed, codec);
                 let worker = TarpcCspVaultServerWorker {
                     local_csp_vault,
-                    thread_pool_general,
-                    thread_pool_nidkg,
+                    thread_pool,
                 };
                 let channel = BaseChannel::with_defaults(transport);
                 channel