diff --git a/jolt-core/src/poly/commitment/dory/commitment_scheme.rs b/jolt-core/src/poly/commitment/dory/commitment_scheme.rs index e4f85acd8..898f33c00 100644 --- a/jolt-core/src/poly/commitment/dory/commitment_scheme.rs +++ b/jolt-core/src/poly/commitment/dory/commitment_scheme.rs @@ -16,8 +16,7 @@ use crate::{ transcripts::Transcript, utils::{errors::ProofVerifyError, math::Math, small_scalar::SmallScalar}, }; -use ark_bn254::{G1Affine, G1Projective}; -use ark_ec::CurveGroup; +use ark_bn254::G1Projective; use ark_ff::Zero; use dory::primitives::{ arithmetic::{Field as DoryField, Group, PairingCurve}, @@ -100,6 +99,12 @@ impl CommitmentScheme for DoryCommitmentScheme { #[cfg(not(test))] DoryGlobals::init_prepared_cache(&setup.g1_vec, &setup.g2_vec); + // Unlike the prepared-point cache above, the affine G1 cache is safe to + // initialize in tests: it uses "replace if larger" semantics, so a small + // setup never overwrites a larger one. All setups share the same URS, so + // the first N entries are identical regardless of setup size. + DoryGlobals::init_affine_g1_cache(&setup.g1_vec); + setup } @@ -314,16 +319,10 @@ impl StreamingCommitmentScheme for DoryCommitmentScheme { debug_assert_eq!(chunk.len(), DoryGlobals::get_num_columns()); let row_len = DoryGlobals::get_num_columns(); - let g1_slice = - unsafe { std::slice::from_raw_parts(setup.g1_vec.as_ptr(), setup.g1_vec.len()) }; - - let g1_bases: Vec = g1_slice[..row_len] - .iter() - .map(|g| g.0.into_affine()) - .collect(); + let g1_bases = DoryGlobals::affine_g1_bases_or_init(&setup.g1_vec); let row_commitment = - ArkG1(T::msm(&g1_bases[..chunk.len()], chunk).expect("MSM calculation failed.")); + ArkG1(T::msm(&g1_bases[..row_len], chunk).expect("MSM calculation failed.")); vec![row_commitment] } @@ -339,13 +338,7 @@ impl StreamingCommitmentScheme for DoryCommitmentScheme { let K = onehot_k; let row_len = DoryGlobals::get_num_columns(); - let g1_slice = - unsafe { std::slice::from_raw_parts(setup.g1_vec.as_ptr(), setup.g1_vec.len()) }; - - let g1_bases: Vec = g1_slice[..row_len] - .iter() - .map(|g| g.0.into_affine()) - .collect(); + let g1_bases = DoryGlobals::affine_g1_bases_or_init(&setup.g1_vec); let mut indices_per_k: Vec> = vec![Vec::new(); K]; for (col_index, k) in chunk.iter().enumerate() { @@ -354,7 +347,8 @@ impl StreamingCommitmentScheme for DoryCommitmentScheme { } } - let results = jolt_optimizations::batch_g1_additions_multi(&g1_bases, &indices_per_k); + let results = + jolt_optimizations::batch_g1_additions_multi(&g1_bases[..row_len], &indices_per_k); let mut row_commitments = vec![ArkG1(G1Projective::zero()); K]; for (k, result) in results.into_iter().enumerate() { diff --git a/jolt-core/src/poly/commitment/dory/dory_globals.rs b/jolt-core/src/poly/commitment/dory/dory_globals.rs index 8772532d7..cad43a8a0 100644 --- a/jolt-core/src/poly/commitment/dory/dory_globals.rs +++ b/jolt-core/src/poly/commitment/dory/dory_globals.rs @@ -2,10 +2,13 @@ use crate::utils::math::Math; use allocative::Allocative; +use ark_bn254::G1Affine; +use ark_ec::CurveGroup; use dory::backends::arkworks::{init_cache, ArkG1, ArkG2}; + use std::sync::{ atomic::{AtomicU8, Ordering}, - RwLock, + RwLock, RwLockReadGuard, }; #[cfg(test)] use std::{ @@ -162,6 +165,10 @@ static CURRENT_CONTEXT: AtomicU8 = AtomicU8::new(0); // Layout tracking: 0=CycleMajor, 1=AddressMajor static CURRENT_LAYOUT: AtomicU8 = AtomicU8::new(0); +// Cached affine G1 generators — avoids repeated projective-to-affine conversion +// in process_chunk/process_chunk_onehot during streaming commitment. +static AFFINE_G1_CACHE: RwLock> = RwLock::new(Vec::new()); + /// Dory commitment context - determines which set of global parameters to use #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DoryContext { @@ -491,6 +498,9 @@ impl DoryGlobals { *UNTRUSTED_ADVICE_MAX_NUM_ROWS.write().unwrap() = None; *UNTRUSTED_ADVICE_NUM_COLUMNS.write().unwrap() = None; + // Reset affine G1 cache + AFFINE_G1_CACHE.write().unwrap().clear(); + CURRENT_CONTEXT.store(0, Ordering::SeqCst); } @@ -511,4 +521,48 @@ impl DoryGlobals { pub fn init_prepared_cache(g1_vec: &[ArkG1], g2_vec: &[ArkG2]) { init_cache(g1_vec, g2_vec); } + + /// Initialize the affine G1 generator cache from the prover setup. + /// + /// Converts projective G1 generators to affine form using batch normalization + /// (a single field inversion via Montgomery's trick + 2n multiplications) + /// and caches the result. Subsequent calls to `affine_g1_bases_or_init` + /// return the cached read guard without recomputation. + /// + /// The cache is replaced if the new set is larger than the existing one. + pub fn init_affine_g1_cache(g1_vec: &[ArkG1]) { + let needed = g1_vec.len(); + { + let cache = AFFINE_G1_CACHE.read().unwrap(); + if cache.len() >= needed { + return; + } + } + let projective: Vec<_> = g1_vec.iter().map(|g| g.0).collect(); + let affine = ark_bn254::G1Projective::normalize_batch(&projective); + *AFFINE_G1_CACHE.write().unwrap() = affine; + } + + /// Return a read guard over the cached affine G1 bases, initializing + /// from `g1_vec` on first use. + /// + /// In production, `init_affine_g1_cache` is called once during `setup_prover` + /// so this just returns the pre-populated cache. In tests (or if the cache + /// was reset), it lazily computes the affine bases on first access. + /// + /// The read-then-write sequence has a benign TOCTOU race: two threads may + /// both observe an empty cache and both call `init_affine_g1_cache`. The + /// worst case is redundant work — correctness is unaffected because the + /// generators are deterministic and `init_affine_g1_cache` replaces the + /// cache atomically under a write lock. + pub fn affine_g1_bases_or_init(g1_vec: &[ArkG1]) -> RwLockReadGuard<'static, Vec> { + { + let cache = AFFINE_G1_CACHE.read().unwrap(); + if cache.len() >= g1_vec.len() { + return cache; + } + } + Self::init_affine_g1_cache(g1_vec); + AFFINE_G1_CACHE.read().unwrap() + } }