From 7d0d1c05770ed592518762c0133b18222ed44d2f Mon Sep 17 00:00:00 2001 From: Sami Farin Date: Wed, 31 Dec 2025 09:28:39 +0200 Subject: [PATCH 1/3] Make some calculations 250-500 times faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before: $ time ./fend 2^1555.555|b3sum 0567ac5de698ba87f79145ebf29c1ab169d37b185e76ddd4b8ecc2de2c82d4de - real 1m42,334s user 1m41,908s sys 0m0,032s Afer: $ time ./fend 2^1555.555|b3sum 0567ac5de698ba87f79145ebf29c1ab169d37b185e76ddd4b8ecc2de2c82d4de - real 0m0,401s user 0m0,401s sys 0m0,004s And for the gcd: Before: $ time ./fend 0.9911^192|b3sum 315e6e5e5b9716413c0e6cbe347ed8a7e2923b3cec6290db7d88ff9710aea09e - real 0m19,810s user 0m19,757s sys 0m0,005s After: $ time ./fend 0.9911^192|b3sum 315e6e5e5b9716413c0e6cbe347ed8a7e2923b3cec6290db7d88ff9710aea09e - real 0m0,049s user 0m0,040s System information: rust 1.92.0, LLVM 20.1.8 x86_64-redhat-linux-gnu, Intel i5-13600K, opt-level=3, ~/.cargo/config.toml contents: [build] rustflags = ["-C", "target-cpu=native"] Functions changed: add_assign_internal: Replaces high-level get/set overhead with efficient, vectorizable slice iteration using .zip() to eliminate bounds checks in the hot loop. sub: Optimizes borrowing logic using u128 overflow detection and safely clamps loop ranges to handle unnormalized inputs without panicking. sub_assign: Performs subtraction in-place on mutable buffers to eliminate expensive memory allocations during the Karatsuba recombination step. mul: Dispatches to Karatsuba multiplication for inputs larger than 64 limbs (4096 bits) to reduce algorithmic complexity from O(N²) to O(N^¹·⁵⁸⁵). mul_karatsuba_slice: Recursively computes Karatsuba products using &[u64] slices instead of cloning BigUint vectors, significantly reducing memory churn. mul_internal_slice: Provides a highly optimized Schoolbook (O(N²)) multiplication base case that operates directly on slices for maximum cache efficiency. div_rem_knuth (called by divmod): Replaces slow bit-by-bit binary division with Knuth's Algorithm D (base-2⁶⁴), reducing the number of division steps by a factor of 64. root_n: Replaces Binary Search (linear convergence) with Newton's Method (quadratic convergence), reducing iteration count from thousands to dozens for large roots. lshift / rshift: Moves interrupt checks outside the loop and simplifies carry logic to allow the compiler to generate efficient block memory moves. add_assign_shifted: A specialized helper for Karatsuba that performs a "shift-and-add" operation in one pass without creating intermediate shifted values. gcd: Replaces the division-heavy Euclidean algorithm with Stein's Algorithm (Binary GCD), utilizing efficient bitwise shifts and subtraction to eliminate expensive modulo operations during fraction simplification. --- core/src/num/biguint.rs | 798 +++++++++++++++++++++++++++++++++------- 1 file changed, 667 insertions(+), 131 deletions(-) diff --git a/core/src/num/biguint.rs b/core/src/num/biguint.rs index 2687e4fb..c0487ed3 100644 --- a/core/src/num/biguint.rs +++ b/core/src/num/biguint.rs @@ -217,16 +217,6 @@ impl BigUint { } } - pub(crate) fn gcd(mut a: Self, mut b: Self, int: &I) -> FResult { - while b >= 1.into() { - let r = a.rem(&b, int)?; - a = b; - b = r; - } - - Ok(a) - } - pub(crate) fn pow(a: &Self, b: &Self, int: &I) -> FResult { if a.is_zero() && b.is_zero() { return Err(FendError::ZeroToThePowerOfZero); @@ -242,37 +232,64 @@ impl BigUint { // computes the exact `n`-th root if possible, otherwise the next lower integer pub(crate) fn root_n(self, n: &Self, int: &I) -> FResult> { - if self == 0.into() || self == 1.into() || n == &Self::from(1) { + if self.is_zero() { return Ok(Exact::new(self, true)); } - if n.value_len() > 1 { - return Err(FendError::OutOfRange { - value: Box::new(n.format(&FormatOptions::default(), int)?.value), - range: Range { - start: RangeBound::Closed(Box::new(1)), - end: RangeBound::Closed(Box::new(u64::MAX)), - }, - }); + if n.is_zero() { + return Err(FendError::DivideByZero); } - let max_bits = self.bits() / n.get(0) + 1; - let mut low_guess = Self::from(1); - let mut high_guess = Small(1).lshift_n(&(max_bits + 1).into(), int)?; - let result = loop { - test_int(int)?; - let mut guess = low_guess.clone().add(&high_guess); - guess.rshift(int)?; - - let res = Self::pow(&guess, n, int)?; - match res.cmp(&self) { - Ordering::Equal => break Exact::new(guess, true), - Ordering::Greater => high_guess = guess, - Ordering::Less => low_guess = guess, + if n == &Self::from(1) { + return Ok(Exact::new(self, true)); + } + + let Ok(n_usize) = n.try_as_usize(int) else { + // n is huge (larger than usize::MAX). + // root is 1 (unless self is 0 or 1, handled above). + return Ok(Exact::new(Self::from(1), false)); + }; + + let self_bits = self.bits(); + if self_bits < n_usize as u64 { + if self == Self::from(1) { + return Ok(Exact::new(Self::from(1), true)); } - if high_guess.clone().sub(&low_guess) <= 1.into() { - break Exact::new(low_guess, false); + return Ok(Exact::new(Self::from(1), false)); + } + + // Initial guess: 2 ^ ceil(bits / n) + let guess_bits = self_bits.div_ceil(n_usize as u64); + let mut x = Small(1).lshift_n(&guess_bits.into(), int)?; + + let n_minus_1 = Self::from((n_usize - 1) as u64); + + loop { + test_int(int)?; + + // Newton iteration: x_new = ( (n-1)x + self/x^(n-1) ) / n + + // 1. Calculate x^(n-1) + let x_pow_n_minus_1 = Self::pow(&x, &n_minus_1, int)?; + + // 2. Calculate self / x^(n-1) + // We discard the remainder (the .1 tuple element). + let quotient = self.divmod(&x_pow_n_minus_1, int)?.0; + + // 3. Calculate numerator: (n-1)x + quotient + let term1 = x.clone().mul(&n_minus_1, int)?; + let numerator = term1.add("ient); + + // 4. Calculate x_new: numerator / n + let x_new = numerator.div(n, int)?; + + // 5. Check convergence + // For integer Newton method starting from above, we stop when x_new >= x + if x_new >= x { + let check_pow = Self::pow(&x, n, int)?; + return Ok(Exact::new(x, check_pow == self)); } - }; - Ok(result) + + x = x_new; + } } fn pow_internal(&self, mut exponent: u64, int: &I) -> FResult { @@ -290,6 +307,9 @@ impl BigUint { } fn lshift(&mut self, int: &I) -> FResult<()> { + // Check for interrupts once per call, not per limb. + test_int(int)?; + match self { Small(n) => { if *n & 0xc000_0000_0000_0000 == 0 { @@ -299,15 +319,16 @@ impl BigUint { } } Large(value) => { - if value[value.len() - 1] & (1_u64 << 63) != 0 { - value.push(0); + // Use a forward pass. + // This is cache-friendly and allows the compiler to vectorise. + let mut carry = 0; + for elem in value.iter_mut() { + let new_carry = *elem >> 63; + *elem = (*elem << 1) | carry; + carry = new_carry; } - for i in (0..value.len()).rev() { - test_int(int)?; - value[i] <<= 1; - if i != 0 { - value[i] |= value[i - 1] >> 63; - } + if carry != 0 { + value.push(carry); } } } @@ -315,18 +336,20 @@ impl BigUint { } fn rshift(&mut self, int: &I) -> FResult<()> { + // Check for interrupts once per call, not per limb. + test_int(int)?; + match self { Small(n) => *n >>= 1, Large(value) => { - for i in 0..value.len() { - test_int(int)?; - value[i] >>= 1; - let next = if i + 1 >= value.len() { - 0 - } else { - value[i + 1] - }; - value[i] |= next << 63; + let len = value.len(); + if len > 0 { + // Forward pass: value[i] takes the high bit of value[i+1] + for i in 0..len - 1 { + value[i] = (value[i] >> 1) | (value[i + 1] << 63); + } + // Handle the most significant limb + value[len - 1] >>= 1; } } } @@ -334,86 +357,484 @@ impl BigUint { } pub(crate) fn divmod(&self, other: &Self, int: &I) -> FResult<(Self, Self)> { + if other.is_zero() { + return Err(FendError::DivideByZero); + } + if self.is_zero() { + return Ok((0.into(), 0.into())); + } + if self < other { + return Ok((0.into(), self.clone())); + } + if other == &Self::from(1) { + return Ok((self.clone(), 0.into())); + } + if let (Small(a), Small(b)) = (self, other) { if let (Some(div_res), Some(mod_res)) = (a.checked_div(*b), a.checked_rem(*b)) { return Ok((Small(div_res), Small(mod_res))); } return Err(FendError::DivideByZero); } - if other.is_zero() { - return Err(FendError::DivideByZero); + + self.div_rem_knuth(other, int) + } + + fn trailing_zeros(&self) -> u64 { + match self { + Small(n) => u64::from(n.trailing_zeros()), + Large(v) => { + let mut count = 0; + for &digit in v { + if digit == 0 { + count += 64; + } else { + count += u64::from(digit.trailing_zeros()); + break; + } + } + count + } } - if other == &Self::from(1) { - return Ok((self.clone(), Self::from(0))); + } + + fn rshift_u64(&mut self, n: u64) { + if n == 0 { + return; } - if self.is_zero() { - return Ok((Self::from(0), Self::from(0))); + match self { + Small(v) => { + if n >= 64 { + *v = 0; + } else { + *v >>= n; + } + } + Large(v) => { + let shift_limbs = (n / 64) as usize; + let shift_bits = (n % 64) as u32; + + if shift_limbs >= v.len() { + *self = Small(0); + return; + } + + // Shift limbs (whole words) - remove LSBs + if shift_limbs > 0 { + v.drain(0..shift_limbs); + } + + // Shift bits within limbs + if shift_bits > 0 { + let len = v.len(); + let inv_shift = 64 - shift_bits; + for i in 0..len - 1 { + v[i] = (v[i] >> shift_bits) | (v[i + 1] << inv_shift); + } + v[len - 1] >>= shift_bits; + } + + // Clean up + while v.last() == Some(&0) { + v.pop(); + } + if v.is_empty() { + *self = Small(0); + } + } } - if self < other { - return Ok((Self::from(0), self.clone())); + } + + fn lshift_u64(&mut self, n: u64) { + if n == 0 { + return; + } + match self { + Small(v) => { + // If it fits in Small, stay Small + if n < 64 && u64::from(v.leading_zeros()) >= n { + *v <<= n; + } else { + // promote to Large + let mut large = Large(vec![*v]); + large.lshift_u64(n); + *self = large; + } + } + Large(v) => { + let shift_limbs = (n / 64) as usize; + let shift_bits = (n % 64) as u32; + + // 1. Shift bits + if shift_bits > 0 { + let mut carry = 0; + let inv_shift = 64 - shift_bits; + for x in v.iter_mut() { + let next_carry = *x >> inv_shift; + *x = (*x << shift_bits) | carry; + carry = next_carry; + } + if carry != 0 { + v.push(carry); + } + } + + // 2. Prepend zero limbs (LSBs) + if shift_limbs > 0 { + let mut new_vec = vec![0; shift_limbs]; + new_vec.append(v); + *v = new_vec; + } + } } - if self == other { - return Ok((Self::from(1), Self::from(0))); + } + + // Stein's Algorithm (Binary GCD) + pub(crate) fn gcd(mut self, mut other: Self, int: &I) -> FResult { + // 1. Handle base cases + if self.is_zero() { + return Ok(other); } - if other == &Self::from(2) { - let mut div_result = self.clone(); - div_result.rshift(int)?; - let modulo = self.get(0) & 1; - return Ok((div_result, Self::from(modulo))); + if other.is_zero() { + return Ok(self); } - // binary long division - let mut q = Self::from(0); - let mut r = Self::from(0); - for i in (0..self.value_len()).rev() { + + // 2. Find common power of 2 + let u_zeros = self.trailing_zeros(); + let v_zeros = other.trailing_zeros(); + let common_shift = std::cmp::min(u_zeros, v_zeros); + + // 3. Remove factors of 2 from u and v + self.rshift_u64(u_zeros); + other.rshift_u64(v_zeros); + + // 4. Main Loop + loop { + // Invariant: self and other are both odd at the start of loop body test_int(int)?; - for j in (0..64).rev() { - r.lshift(int)?; - let bit_of_self = u64::from((self.get(i) & (1 << j)) != 0); - r.set(0, r.get(0) | bit_of_self); - if &r >= other { - r = r.sub(other); - q.set(i, q.get(i) | (1 << j)); + + match self.cmp(&other) { + Ordering::Equal => break, + Ordering::Greater => { + self.sub_assign(&other); + let zeros = self.trailing_zeros(); + self.rshift_u64(zeros); + } + Ordering::Less => { + std::mem::swap(&mut self, &mut other); + self.sub_assign(&other); + let zeros = self.trailing_zeros(); + self.rshift_u64(zeros); } } } - Ok((q, r)) + + // 5. Restore common power of 2 + self.lshift_u64(common_shift); + Ok(self) } - /// computes self *= other - fn mul_internal(&mut self, other: &Self, int: &I) -> FResult<()> { - if self.is_zero() || other.is_zero() { - *self = Self::from(0); - return Ok(()); + // Knuth's Algorithm D (The Art of Computer Programming, Vol 2, 4.3.1) + #[allow(clippy::cast_possible_truncation)] + fn div_rem_knuth(&self, other: &Self, int: &I) -> FResult<(Self, Self)> { + let mut dividend_digits = match self { + Small(n) => vec![*n], + Large(v) => v.clone(), + }; + let mut divisor_digits = match other { + Small(n) => vec![*n], + Large(v) => v.clone(), + }; + + // Sanitize inputs by removing leading zero limbs (trailing in LE vector). + while dividend_digits.len() > 1 && dividend_digits.last() == Some(&0) { + dividend_digits.pop(); } - let self_clone = self.clone(); - self.make_large(); - match self { - Small(_) => unreachable!(), - Large(v) => { - v.clear(); - v.push(0); + while divisor_digits.len() > 1 && divisor_digits.last() == Some(&0) { + divisor_digits.pop(); + } + + let divisor_len = divisor_digits.len(); + // shift = number of leading zeros in the most significant digit of the divisor + let shift = divisor_digits.last().unwrap().leading_zeros(); + + // Normalize divisor (shift left so MSB is 1) + if shift > 0 { + let mut carry = 0; + for digit in &mut divisor_digits { + let next_carry = *digit >> (64 - shift); + *digit = (*digit << shift) | carry; + carry = next_carry; + } + } + + // Normalize dividend (shift left by same amount) + if shift > 0 { + let mut carry = 0; + for digit in &mut dividend_digits { + let next_carry = *digit >> (64 - shift); + *digit = (*digit << shift) | carry; + carry = next_carry; + } + if carry != 0 { + dividend_digits.push(carry); } } - for i in 0..other.value_len() { + + // Ensure dividend has a leading zero digit for the algorithm to work comfortably + dividend_digits.push(0); + + let delta_len = dividend_digits.len() - divisor_len - 1; + let mut quotient_digits = vec![0; delta_len + 1]; + let divisor_msb = divisor_digits[divisor_len - 1]; + + // Iterate 'j' from m down to 0 -> offset + for offset in (0..=delta_len).rev() { test_int(int)?; - self.add_assign_internal(&self_clone, other.get(i), i); + + let dividend_high = dividend_digits[offset + divisor_len]; + let dividend_low = dividend_digits[offset + divisor_len - 1]; + + // Estimate q_hat (quotient digit) + let mut quotient_estimate = if dividend_high == divisor_msb { + u64::MAX + } else { + let numerator = (u128::from(dividend_high) << 64) | u128::from(dividend_low); + (numerator / u128::from(divisor_msb)) as u64 + }; + + // Refine q_hat + let divisor_second = if divisor_len >= 2 { + divisor_digits[divisor_len - 2] + } else { + 0 + }; + let dividend_third = if offset + divisor_len >= 2 { + dividend_digits[offset + divisor_len - 2] + } else { + 0 + }; + + let mut remainder = ((u128::from(dividend_high) << 64) | u128::from(dividend_low)) + .wrapping_sub(u128::from(quotient_estimate) * u128::from(divisor_msb)); + + while u128::from(divisor_second) * u128::from(quotient_estimate) + > (remainder << 64) | u128::from(dividend_third) + { + quotient_estimate -= 1; + remainder = remainder.wrapping_add(u128::from(divisor_msb)); + if remainder > u128::from(u64::MAX) { + break; + } + } + + // Multiply and subtract: dividend[offset..] -= quotient_estimate * divisor + let mut borrow: u128 = 0; + for (i, &divisor_digit) in divisor_digits.iter().enumerate() { + let product = u128::from(divisor_digit) * u128::from(quotient_estimate) + borrow; + let sub_val = product as u64; + borrow = product >> 64; + + let (res, borrowed) = dividend_digits[offset + i].overflowing_sub(sub_val); + dividend_digits[offset + i] = res; + if borrowed { + borrow += 1; + } + } + // Propagate borrow to the top digit + let (res, borrowed) = + dividend_digits[offset + divisor_len].overflowing_sub(borrow as u64); + dividend_digits[offset + divisor_len] = res; + + // If we subtracted too much (borrowed from top), correct by adding back divisor + if borrowed { + quotient_estimate -= 1; + let mut carry: u128 = 0; + for (i, &divisor_digit) in divisor_digits.iter().enumerate() { + let sum = + u128::from(dividend_digits[offset + i]) + u128::from(divisor_digit) + carry; + dividend_digits[offset + i] = sum as u64; + carry = sum >> 64; + } + dividend_digits[offset + divisor_len] = + dividend_digits[offset + divisor_len].wrapping_add(carry as u64); + } + + quotient_digits[offset] = quotient_estimate; } - Ok(()) + + // Denormalize remainder (shift right) + if shift > 0 { + let mut next_high_bits = 0; + // Iterate from MSB to LSB + for digit in dividend_digits.iter_mut().rev() { + let val = *digit; + *digit = (val >> shift) | next_high_bits; + next_high_bits = val << (64 - shift); + } + } + + let mut final_quotient = Self::Large(quotient_digits); + final_quotient.reduce(); + let mut final_remainder = Self::Large(dividend_digits); + final_remainder.reduce(); + + Ok((final_quotient, final_remainder)) } /// computes `self += (other * mul_digit) << (64 * shift)` + #[allow(clippy::cast_possible_truncation)] fn add_assign_internal(&mut self, other: &Self, mul_digit: u64, shift: usize) { - let mut carry = 0; - for i in 0..max(self.value_len(), other.value_len() + shift) { - let a = self.get(i); - let b = if i >= shift { other.get(i - shift) } else { 0 }; - let sum = u128::from(a) + (u128::from(b) * u128::from(mul_digit)) + u128::from(carry); - self.set(i, truncate(sum)); - carry = truncate(sum >> 64); + if mul_digit == 0 { + return; + } + + self.make_large(); + let self_vec = match self { + Large(v) => v, + Small(_) => unreachable!(), + }; + + let other_slice = match other { + Large(v) => v.as_slice(), + Small(v) => std::slice::from_ref(v), + }; + + let min_len = shift + other_slice.len(); + if self_vec.len() < min_len { + self_vec.resize(min_len, 0); } - if carry != 0 { - self.value_push(carry); + + let mut carry: u128 = 0; + let mul_digit = u128::from(mul_digit); + + // Use zip to iterate specified slices. + let target_slice = &mut self_vec[shift..]; + for (target, &source) in target_slice.iter_mut().zip(other_slice) { + let a = u128::from(*target); + let b = u128::from(source); + + let sum = a + (b * mul_digit) + carry; + *target = sum as u64; + carry = sum >> 64; } + + // Propagate carry + let mut i = min_len; + while carry != 0 { + if i < self_vec.len() { + let sum = u128::from(self_vec[i]) + carry; + self_vec[i] = sum as u64; + carry = sum >> 64; + i += 1; + } else { + self_vec.push(carry as u64); + break; + } + } + } + + /// computes `self += other << (64 * shift)` + #[allow(clippy::cast_possible_truncation)] + fn add_assign_shifted(&mut self, other: &Self, shift: usize) { + self.make_large(); + let self_vec = match self { + Large(v) => v, + Small(_) => unreachable!(), + }; + + let other_slice = match other { + Large(v) => v.as_slice(), + Small(v) => std::slice::from_ref(v), + }; + + let min_len = shift + other_slice.len(); + if self_vec.len() < min_len { + self_vec.resize(min_len, 0); + } + + let mut carry: u128 = 0; + let target_slice = &mut self_vec[shift..]; + + for (target, &source) in target_slice.iter_mut().zip(other_slice) { + let a = u128::from(*target); + let b = u128::from(source); + + let sum = a + b + carry; + *target = sum as u64; + carry = sum >> 64; + } + + let mut i = min_len; + while carry != 0 { + if i < self_vec.len() { + let sum = u128::from(self_vec[i]) + carry; + self_vec[i] = sum as u64; + carry = sum >> 64; + i += 1; + } else { + self_vec.push(carry as u64); + break; + } + } + } + + #[allow(clippy::cast_possible_truncation)] + fn mul_internal_slice(a: &[u64], b: &[u64]) -> Self { + if a.is_empty() || b.is_empty() { + return Self::from(0); + } + // Pre-allocate result + let mut res = vec![0; a.len() + b.len()]; + + for (i, &digit) in b.iter().enumerate() { + if digit == 0 { + continue; + } + let mut carry: u128 = 0; + let mul_digit = u128::from(digit); + + // Optimized inner loop on slices + let target = &mut res[i..]; + for (dst, &src) in target.iter_mut().zip(a) { + let sum = u128::from(*dst) + (u128::from(src) * mul_digit) + carry; + *dst = sum as u64; + carry = sum >> 64; + } + + // Propagate carry + let mut k = i + a.len(); + while carry > 0 { + let sum = u128::from(res[k]) + carry; + res[k] = sum as u64; + carry = sum >> 64; + k += 1; + } + } + + let mut big = Large(res); + big.reduce(); + big + } + + #[allow(clippy::cast_possible_truncation)] + // Helper to add two slices and return BigUint + fn add_slices(a: &[u64], b: &[u64]) -> Self { + let max_len = max(a.len(), b.len()); + let mut res = vec![0; max_len]; + let mut carry: u128 = 0; + + for i in 0..max_len { + let va = if i < a.len() { u128::from(a[i]) } else { 0 }; + let vb = if i < b.len() { u128::from(b[i]) } else { 0 }; + let sum = va + vb + carry; + res[i] = sum as u64; + carry = sum >> 64; + } + if carry > 0 { + res.push(carry as u64); + } + Large(res) } // Note: 0! = 1, 1! = 1 @@ -444,16 +865,6 @@ impl BigUint { Ok(b) } - pub(crate) fn mul(mut self, other: &Self, int: &I) -> FResult { - if let (Small(a), Small(b)) = (&self, &other) - && let Some(res) = a.checked_mul(*b) - { - return Ok(Self::from(res)); - } - self.mul_internal(other, int)?; - Ok(self) - } - fn rem(&self, other: &Self, int: &I) -> FResult { Ok(self.divmod(other, int)?.1) } @@ -471,6 +882,7 @@ impl BigUint { self } + #[allow(clippy::cast_possible_truncation)] pub(crate) fn sub(self, other: &Self) -> Self { if let (Small(a), Small(b)) = (&self, &other) { return Self::from(a - b); @@ -483,28 +895,152 @@ impl BigUint { if other.is_zero() { return self; } - let mut carry = 0; // 0 or 1 + let mut res = match self { Large(x) => x, Small(v) => vec![v], }; - if res.len() < other.value_len() { - res.resize(other.value_len(), 0); - } - for (i, a) in res.iter_mut().enumerate() { - let b = other.get(i); - if !(b == u64::MAX && carry == 1) && *a >= b + carry { - *a = *a - b - carry; - carry = 0; - } else { - let next_digit = - u128::from(*a) + ((1_u128) << 64) - u128::from(b) - u128::from(carry); - *a = truncate(next_digit); - carry = 1; - } + + let other_slice = match other { + Large(v) => v.as_slice(), + Small(v) => std::slice::from_ref(v), + }; + + let mut borrow: u128 = 0; + let mut i = 0; + + let len = std::cmp::min(res.len(), other_slice.len()); + + while i < len { + let a = res[i]; + let b = other_slice[i]; + + let sub_val = u128::from(b) + borrow; + let (val, new_borrow) = u128::from(a).overflowing_sub(sub_val); + + res[i] = val as u64; + borrow = u128::from(new_borrow); + i += 1; } - assert_eq!(carry, 0); - Large(res) + + while borrow > 0 && i < res.len() { + let a = res[i]; + let (val, new_borrow) = a.overflowing_sub(borrow as u64); + res[i] = val; + borrow = u128::from(new_borrow); + i += 1; + } + + let mut result = Large(res); + result.reduce(); + result + } + + #[allow(clippy::cast_possible_truncation)] + fn sub_assign(&mut self, other: &Self) { + let other_slice = match other { + Large(v) => v.as_slice(), + Small(s) => std::slice::from_ref(s), + }; + + if other_slice.is_empty() { + return; + } + + self.make_large(); + let self_vec = match self { + Large(v) => v, + Small(_) => unreachable!(), + }; + + let mut borrow: u128 = 0; + let len = std::cmp::min(self_vec.len(), other_slice.len()); + + for i in 0..len { + let val = u128::from(self_vec[i]); + let sub = u128::from(other_slice[i]) + borrow; + let (res, b) = val.overflowing_sub(sub); + self_vec[i] = res as u64; + borrow = u128::from(b); + } + + let mut i = len; + while borrow > 0 && i < self_vec.len() { + let val = u128::from(self_vec[i]); + let (res, b) = val.overflowing_sub(borrow); + self_vec[i] = res as u64; + borrow = u128::from(b); + i += 1; + } + } + + pub(crate) fn mul(self, other: &Self, int: &I) -> FResult { + if self.is_zero() || other.is_zero() { + return Ok(Self::from(0)); + } + + let a_slice = match &self { + Large(v) => v.as_slice(), + Small(s) => std::slice::from_ref(s), + }; + let b_slice = match other { + Large(v) => v.as_slice(), + Small(s) => std::slice::from_ref(s), + }; + + // 64 limbs (4096 bits) is the optimal cut-off. + if a_slice.len() >= 64 && b_slice.len() >= 64 { + return Self::mul_karatsuba_slice(a_slice, b_slice, int); + } + + Ok(Self::mul_internal_slice(a_slice, b_slice)) + } + + fn mul_karatsuba_slice(a: &[u64], b: &[u64], int: &I) -> FResult { + test_int(int)?; + + // Base case: 64 limbs + if a.len() < 64 || b.len() < 64 { + return Ok(Self::mul_internal_slice(a, b)); + } + + let m = max(a.len(), b.len()) / 2; + + let a_len = a.len(); + let a_low = if m < a_len { &a[..m] } else { a }; + let a_high = if m < a_len { &a[m..] } else { &[] }; + + let b_len = b.len(); + let b_low = if m < b_len { &b[..m] } else { b }; + let b_high = if m < b_len { &b[m..] } else { &[] }; + + let z0 = Self::mul_karatsuba_slice(a_low, b_low, int)?; + let z2 = Self::mul_karatsuba_slice(a_high, b_high, int)?; + + let a_sum = Self::add_slices(a_low, a_high); + let b_sum = Self::add_slices(b_low, b_high); + + let a_sum_slice = match &a_sum { + Large(v) => v.as_slice(), + Small(s) => std::slice::from_ref(s), + }; + let b_sum_slice = match &b_sum { + Large(v) => v.as_slice(), + Small(s) => std::slice::from_ref(s), + }; + + let z1_raw = Self::mul_karatsuba_slice(a_sum_slice, b_sum_slice, int)?; + + // Use in-place subtraction + let mut z1 = z1_raw; + z1.sub_assign(&z2); + z1.sub_assign(&z0); + + let mut res = z0; + res.add_assign_shifted(&z1, m); + res.add_assign_shifted(&z2, 2 * m); + + Ok(res) } pub(crate) const fn is_definitely_zero(&self) -> bool { From 9f2592594fb6bed74f6baf47ad8344d652402953 Mon Sep 17 00:00:00 2001 From: printfn Date: Thu, 1 Jan 2026 03:56:24 +0000 Subject: [PATCH 2/3] Remove unused code --- core/src/num/biguint.rs | 50 +---------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/core/src/num/biguint.rs b/core/src/num/biguint.rs index c0487ed3..f64cbb28 100644 --- a/core/src/num/biguint.rs +++ b/core/src/num/biguint.rs @@ -178,27 +178,6 @@ impl BigUint { } } - fn set(&mut self, idx: usize, new_value: u64) { - match self { - Small(n) => { - if idx == 0 { - *n = new_value; - } else if new_value == 0 { - // no need to do anything - } else { - self.make_large(); - self.set(idx, new_value); - } - } - Large(value) => { - while idx >= value.len() { - value.push(0); - } - value[idx] = new_value; - } - } - } - fn value_len(&self) -> usize { match self { Small(_) => 1, @@ -206,17 +185,6 @@ impl BigUint { } } - fn value_push(&mut self, new: u64) { - if new == 0 { - return; - } - self.make_large(); - match self { - Small(_) => unreachable!(), - Large(v) => v.push(new), - } - } - pub(crate) fn pow(a: &Self, b: &Self, int: &I) -> FResult { if a.is_zero() && b.is_zero() { return Err(FendError::ZeroToThePowerOfZero); @@ -535,7 +503,7 @@ impl BigUint { } // Knuth's Algorithm D (The Art of Computer Programming, Vol 2, 4.3.1) - #[allow(clippy::cast_possible_truncation)] + #[allow(clippy::cast_possible_truncation, clippy::too_many_lines)] fn div_rem_knuth(&self, other: &Self, int: &I) -> FResult<(Self, Self)> { let mut dividend_digits = match self { Small(n) => vec![*n], @@ -865,10 +833,6 @@ impl BigUint { Ok(b) } - fn rem(&self, other: &Self, int: &I) -> FResult { - Ok(self.divmod(other, int)?.1) - } - pub(crate) fn is_even(&self, int: &I) -> FResult { Ok(self.divmod(&Self::from(2), int)?.1 == 0.into()) } @@ -1658,18 +1622,6 @@ mod tests { Ok(()) } - #[test] - fn test_rem() -> Res { - let int = &crate::interrupt::Never; - let three = BigUint::from(3); - assert_eq!(BigUint::from(20).rem(&three, int)?, BigUint::from(2)); - assert_eq!(BigUint::from(21).rem(&three, int)?, BigUint::from(0)); - assert_eq!(BigUint::from(22).rem(&three, int)?, BigUint::from(1)); - assert_eq!(BigUint::from(23).rem(&three, int)?, BigUint::from(2)); - assert_eq!(BigUint::from(24).rem(&three, int)?, BigUint::from(0)); - Ok(()) - } - #[test] fn test_lshift() -> Res { let int = &crate::interrupt::Never; From 6a52ead2eae2e1ffc4a13584d114653680493ec1 Mon Sep 17 00:00:00 2001 From: printfn Date: Thu, 1 Jan 2026 03:59:46 +0000 Subject: [PATCH 3/3] Fix clippy warnings --- core/src/ast.rs | 30 +++++++++++++++--------------- core/src/scope.rs | 2 +- core/src/serialize.rs | 6 +++--- core/src/value.rs | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/core/src/ast.rs b/core/src/ast.rs index 2ebc2e3d..091ae145 100644 --- a/core/src/ast.rs +++ b/core/src/ast.rs @@ -108,27 +108,27 @@ impl fmt::Display for Bop { pub(crate) enum Expr { Literal(Value), Ident(Ident), - Parens(Box), - UnaryMinus(Box), - UnaryPlus(Box), - UnaryDiv(Box), - Factorial(Box), - Bop(Bop, Box, Box), + Parens(Box), + UnaryMinus(Box), + UnaryPlus(Box), + UnaryDiv(Box), + Factorial(Box), + Bop(Bop, Box, Box), // Call a function or multiply the expressions - Apply(Box, Box), + Apply(Box, Box), // Call a function, or throw an error if lhs is not a function - ApplyFunctionCall(Box, Box), + ApplyFunctionCall(Box, Box), // Multiply the expressions - ApplyMul(Box, Box), + ApplyMul(Box, Box), - As(Box, Box), - Fn(Ident, Box), + As(Box, Box), + Fn(Ident, Box), - Of(Ident, Box), + Of(Ident, Box), - Assign(Ident, Box), - Equality(bool, Box, Box), - Statements(Box, Box), + Assign(Ident, Box), + Equality(bool, Box, Box), + Statements(Box, Box), } impl Expr { diff --git a/core/src/scope.rs b/core/src/scope.rs index 7653376b..2d9cff0d 100644 --- a/core/src/scope.rs +++ b/core/src/scope.rs @@ -72,7 +72,7 @@ impl ScopeValue { pub(crate) struct Scope { ident: Ident, value: ScopeValue, - inner: Option>, + inner: Option>, } pub(crate) fn compare_option_arc_scope( diff --git a/core/src/serialize.rs b/core/src/serialize.rs index 98e1b21f..d3cb7a16 100644 --- a/core/src/serialize.rs +++ b/core/src/serialize.rs @@ -52,9 +52,9 @@ pub(crate) enum CborValue { Negative(u64), Bytes(Vec), String(String), - Array(Vec), - Map(Vec<(CborValue, CborValue)>), - Tag(u64, Box), + Array(Vec), + Map(Vec<(Self, Self)>), + Tag(u64, Box), Boolean(bool), Null, Undefined, diff --git a/core/src/value.rs b/core/src/value.rs index b7152d88..34ddae72 100644 --- a/core/src/value.rs +++ b/core/src/value.rs @@ -28,7 +28,7 @@ pub(crate) enum Value { Base(Base), // user-defined function with a named parameter Fn(Ident, Box, Option>), - Object(Vec<(Cow<'static, str>, Box)>), + Object(Vec<(Cow<'static, str>, Box)>), String(Cow<'static, str>), Bool(bool), Unit, // unit value `()`